compiler/parser/Lexer.x

   1 -----------------------------------------------------------------------------
   2 -- (c) The University of Glasgow, 2006
   3 --
   4 -- GHC's lexer.
   5 --
   6 -- This is a combination of an Alex-generated lexer from a regex
   7 -- definition, with some hand-coded bits.
   8 --
   9 -- Completely accurate information about token-spans within the source
  10 -- file is maintained.  Every token has a start and end SrcLoc attached to it.
  11 --
  12 -----------------------------------------------------------------------------
  13
  14 --   ToDo / known bugs:
  15 --    - parsing integers is a bit slow
  16 --    - readRational is a bit slow
  17 --
  18 --   Known bugs, that were also in the previous version:
  19 --    - M... should be 3 tokens, not 1.
  20 --    - pragma-end should be only valid in a pragma
  21
  22 --   qualified operator NOTES.
  23 --
  24 --   - If M.(+) is a single lexeme, then..
  25 --     - Probably (+) should be a single lexeme too, for consistency.
  26 --       Otherwise ( + ) would be a prefix operator, but M.( + ) would not be.
  27 --     - But we have to rule out reserved operators, otherwise (..) becomes
  28 --       a different lexeme.
  29 --     - Should we therefore also rule out reserved operators in the qualified
  30 --       form?  This is quite difficult to achieve.  We don't do it for
  31 --       qualified varids.
  32
  33 {
  34 -- XXX The above flags turn off warnings in the generated code:
  35 {-# OPTIONS_GHC -fno-warn-unused-matches #-}
  36 {-# OPTIONS_GHC -fno-warn-unused-binds #-}
  37 {-# OPTIONS_GHC -fno-warn-unused-imports #-}
  38 {-# OPTIONS_GHC -fno-warn-missing-signatures #-}
  39 -- But alex still generates some code that causes the "lazy unlifted bindings"
  40 -- warning, and old compilers don't know about it so we can't easily turn
  41 -- it off, so for now we use the sledge hammer:
  42 {-# OPTIONS_GHC -w #-}
  43
  44 {-# OPTIONS_GHC -funbox-strict-fields #-}
  45
  46 module Lexer (
  47    Token(..), lexer, pragState, mkPState, PState(..),
  48    P(..), ParseResult(..), getSrcLoc,
  49    getPState, getDynFlags, withThisPackage,
  50    failLocMsgP, failSpanMsgP, srcParseFail,
  51    getMessages,
  52    popContext, pushCurrentContext, setLastToken, setSrcLoc,
  53    getLexState, popLexState, pushLexState,
  54    extension, bangPatEnabled, datatypeContextsEnabled,
  55    addWarning,
  56    lexTokenStream
  57   ) where
  58
  59 import Bag
  60 import ErrUtils
  61 import Outputable
  62 import StringBuffer
  63 import FastString
  64 import SrcLoc
  65 import UniqFM
  66 import DynFlags
  67 import Module
  68 import Ctype
  69 import BasicTypes       ( InlineSpec(..), RuleMatchInfo(..) )
  70 import Util             ( readRational )
  71
  72 import Control.Monad
  73 import Data.Bits
  74 import Data.Char
  75 import Data.List
  76 import Data.Maybe
  77 import Data.Map (Map)
  78 import qualified Data.Map as Map
  79 import Data.Ratio
  80 }
  81
  82 $unispace    = \x05 -- Trick Alex into handling Unicode. See alexGetChar.
  83 $whitechar   = [\ \n\r\f\v $unispace]
  84 $white_no_nl = $whitechar # \n
  85 $tab         = \t
  86
  87 $ascdigit  = 0-9
  88 $unidigit  = \x03 -- Trick Alex into handling Unicode. See alexGetChar.
  89 $decdigit  = $ascdigit -- for now, should really be $digit (ToDo)
  90 $digit     = [$ascdigit $unidigit]
  91
  92 $special   = [\(\)\,\;\[\]\`\{\}]
  93 $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
  94 $unisymbol = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
  95 $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
  96
  97 $unilarge  = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
  98 $asclarge  = [A-Z]
  99 $large     = [$asclarge $unilarge]
 100
 101 $unismall  = \x02 -- Trick Alex into handling Unicode. See alexGetChar.
 102 $ascsmall  = [a-z]
 103 $small     = [$ascsmall $unismall \_]
 104
 105 $unigraphic = \x06 -- Trick Alex into handling Unicode. See alexGetChar.
 106 $graphic   = [$small $large $symbol $digit $special $unigraphic \:\"\']
 107
 108 $octit     = 0-7
 109 $hexit     = [$decdigit A-F a-f]
 110 $symchar   = [$symbol \:]
 111 $nl        = [\n\r]
 112 $idchar    = [$small $large $digit \']
 113
 114 $pragmachar = [$small $large $digit]
 115
 116 $docsym    = [\| \^ \* \$]
 117
 118 @varid     = $small $idchar*
 119 @conid     = $large $idchar*
 120
 121 @varsym    = $symbol $symchar*
 122 @consym    = \: $symchar*
 123
 124 @decimal     = $decdigit+
 125 @octal       = $octit+
 126 @hexadecimal = $hexit+
 127 @exponent    = [eE] [\-\+]? @decimal
 128
 129 -- we support the hierarchical module name extension:
 130 @qual = (@conid \.)+
 131
 132 @floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
 133
 134 -- normal signed numerical literals can only be explicitly negative,
 135 -- not explicitly positive (contrast @exponent)
 136 @negative = \-
 137 @signed = @negative ?
 138
 139 haskell :-
 140
 141 -- everywhere: skip whitespace and comments
 142 $white_no_nl+                           ;
 143 $tab+         { warn Opt_WarnTabs (text "Warning: Tab character") }
 144
 145 -- Everywhere: deal with nested comments.  We explicitly rule out
 146 -- pragmas, "{-#", so that we don't accidentally treat them as comments.
 147 -- (this can happen even though pragmas will normally take precedence due to
 148 -- longest-match, because pragmas aren't valid in every state, but comments
 149 -- are). We also rule out nested Haddock comments, if the -haddock flag is
 150 -- set.
 151
 152 "{-" / { isNormalComment } { nested_comment lexToken }
 153
 154 -- Single-line comments are a bit tricky.  Haskell 98 says that two or
 155 -- more dashes followed by a symbol should be parsed as a varsym, so we
 156 -- have to exclude those.
 157
 158 -- Since Haddock comments aren't valid in every state, we need to rule them
 159 -- out here.
 160
 161 -- The following two rules match comments that begin with two dashes, but
 162 -- continue with a different character. The rules test that this character
 163 -- is not a symbol (in which case we'd have a varsym), and that it's not a
 164 -- space followed by a Haddock comment symbol (docsym) (in which case we'd
 165 -- have a Haddock comment). The rules then munch the rest of the line.
 166
 167 "-- " ~[$docsym \#] .* { lineCommentToken }
 168 "--" [^$symbol : \ ] .* { lineCommentToken }
 169
 170 -- Next, match Haddock comments if no -haddock flag
 171
 172 "-- " [$docsym \#] .* / { ifExtension (not . haddockEnabled) } { lineCommentToken }
 173
 174 -- Now, when we've matched comments that begin with 2 dashes and continue
 175 -- with a different character, we need to match comments that begin with three
 176 -- or more dashes (which clearly can't be Haddock comments). We only need to
 177 -- make sure that the first non-dash character isn't a symbol, and munch the
 178 -- rest of the line.
 179
 180 "---"\-* [^$symbol :] .* { lineCommentToken }
 181
 182 -- Since the previous rules all match dashes followed by at least one
 183 -- character, we also need to match a whole line filled with just dashes.
 184
 185 "--"\-* / { atEOL } { lineCommentToken }
 186
 187 -- We need this rule since none of the other single line comment rules
 188 -- actually match this case.
 189
 190 "-- " / { atEOL } { lineCommentToken }
 191
 192 -- 'bol' state: beginning of a line.  Slurp up all the whitespace (including
 193 -- blank lines) until we find a non-whitespace character, then do layout
 194 -- processing.
 195 --
 196 -- One slight wibble here: what if the line begins with {-#? In
 197 -- theory, we have to lex the pragma to see if it's one we recognise,
 198 -- and if it is, then we backtrack and do_bol, otherwise we treat it
 199 -- as a nested comment.  We don't bother with this: if the line begins
 200 -- with {-#, then we'll assume it's a pragma we know about and go for do_bol.
 201 <bol> {
 202   \n                                    ;
 203   ^\# (line)?                           { begin line_prag1 }
 204   ^\# pragma .* \n                      ; -- GCC 3.3 CPP generated, apparently
 205   ^\# \! .* \n                          ; -- #!, for scripts
 206   ()                                    { do_bol }
 207 }
 208
 209 -- after a layout keyword (let, where, do, of), we begin a new layout
 210 -- context if the curly brace is missing.
 211 -- Careful! This stuff is quite delicate.
 212 <layout, layout_do> {
 213   \{ / { notFollowedBy '-' }            { pop_and open_brace }
 214         -- we might encounter {-# here, but {- has been handled already
 215   \n                                    ;
 216   ^\# (line)?                           { begin line_prag1 }
 217 }
 218
 219 -- do is treated in a subtly different way, see new_layout_context
 220 <layout>    ()                          { new_layout_context True }
 221 <layout_do> ()                          { new_layout_context False }
 222
 223 -- after a new layout context which was found to be to the left of the
 224 -- previous context, we have generated a '{' token, and we now need to
 225 -- generate a matching '}' token.
 226 <layout_left>  ()                       { do_layout_left }
 227
 228 <0,option_prags> \n                             { begin bol }
 229
 230 "{-#" $whitechar* $pragmachar+ / { known_pragma linePrags }
 231                                 { dispatch_pragmas linePrags }
 232
 233 -- single-line line pragmas, of the form
 234 --    # <line> "<file>" <extra-stuff> \n
 235 <line_prag1> $decdigit+                 { setLine line_prag1a }
 236 <line_prag1a> \" [$graphic \ ]* \"      { setFile line_prag1b }
 237 <line_prag1b> .*                        { pop }
 238
 239 -- Haskell-style line pragmas, of the form
 240 --    {-# LINE <line> "<file>" #-}
 241 <line_prag2> $decdigit+                 { setLine line_prag2a }
 242 <line_prag2a> \" [$graphic \ ]* \"      { setFile line_prag2b }
 243 <line_prag2b> "#-}"|"-}"                { pop }
 244    -- NOTE: accept -} at the end of a LINE pragma, for compatibility
 245    -- with older versions of GHC which generated these.
 246
 247 <0,option_prags> {
 248   "{-#" $whitechar* $pragmachar+
 249         $whitechar+ $pragmachar+ / { known_pragma twoWordPrags }
 250                                  { dispatch_pragmas twoWordPrags }
 251
 252   "{-#" $whitechar* $pragmachar+ / { known_pragma oneWordPrags }
 253                                  { dispatch_pragmas oneWordPrags }
 254
 255   -- We ignore all these pragmas, but don't generate a warning for them
 256   "{-#" $whitechar* $pragmachar+ / { known_pragma ignoredPrags }
 257                                  { dispatch_pragmas ignoredPrags }
 258
 259   -- ToDo: should only be valid inside a pragma:
 260   "#-}"                                 { endPrag }
 261 }
 262
 263 <option_prags> {
 264   "{-#"  $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
 265                                    { dispatch_pragmas fileHeaderPrags }
 266
 267   "-- #"                                 { multiline_doc_comment }
 268 }
 269
 270 <0> {
 271   -- In the "0" mode we ignore these pragmas
 272   "{-#"  $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
 273                      { nested_comment lexToken }
 274 }
 275
 276 <0> {
 277   "-- #" .* { lineCommentToken }
 278 }
 279
 280 <0,option_prags> {
 281   "{-#"  { warnThen Opt_WarnUnrecognisedPragmas (text "Unrecognised pragma")
 282                     (nested_comment lexToken) }
 283 }
 284
 285 -- '0' state: ordinary lexemes
 286
 287 -- Haddock comments
 288
 289 <0,option_prags> {
 290   "-- " $docsym      / { ifExtension haddockEnabled } { multiline_doc_comment }
 291   "{-" \ ? $docsym   / { ifExtension haddockEnabled } { nested_doc_comment }
 292 }
 293
 294 -- "special" symbols
 295
 296 <0> {
 297   "[:" / { ifExtension parrEnabled }    { token ITopabrack }
 298   ":]" / { ifExtension parrEnabled }    { token ITcpabrack }
 299 }
 300
 301 <0> {
 302   "[|"      / { ifExtension thEnabled } { token ITopenExpQuote }
 303   "[e|"     / { ifExtension thEnabled } { token ITopenExpQuote }
 304   "[p|"     / { ifExtension thEnabled } { token ITopenPatQuote }
 305   "[d|"     / { ifExtension thEnabled } { layout_token ITopenDecQuote }
 306   "[t|"     / { ifExtension thEnabled } { token ITopenTypQuote }
 307   "|]"      / { ifExtension thEnabled } { token ITcloseQuote }
 308   \$ @varid / { ifExtension thEnabled } { skip_one_varid ITidEscape }
 309   "$("      / { ifExtension thEnabled } { token ITparenEscape }
 310
 311 -- For backward compatibility, accept the old dollar syntax
 312   "[$" @varid "|"  / { ifExtension qqEnabled }
 313                      { lex_quasiquote_tok }
 314
 315   "[" @varid "|"  / { ifExtension qqEnabled }
 316                      { lex_quasiquote_tok }
 317 }
 318
 319 <0> {
 320   "(|" / { ifExtension arrowsEnabled `alexAndPred` notFollowedBySymbol }
 321                                         { special IToparenbar }
 322   "|)" / { ifExtension arrowsEnabled }  { special ITcparenbar }
 323 }
 324
 325 <0> {
 326   \? @varid / { ifExtension ipEnabled } { skip_one_varid ITdupipvarid }
 327 }
 328
 329 <0> {
 330   "(#" / { ifExtension unboxedTuplesEnabled `alexAndPred` notFollowedBySymbol }
 331          { token IToubxparen }
 332   "#)" / { ifExtension unboxedTuplesEnabled }
 333          { token ITcubxparen }
 334 }
 335
 336 <0> {
 337   "{|" / { ifExtension genericsEnabled } { token ITocurlybar }
 338   "|}" / { ifExtension genericsEnabled } { token ITccurlybar }
 339 }
 340
 341 <0,option_prags> {
 342   \(                                    { special IToparen }
 343   \)                                    { special ITcparen }
 344   \[                                    { special ITobrack }
 345   \]                                    { special ITcbrack }
 346   \,                                    { special ITcomma }
 347   \;                                    { special ITsemi }
 348   \`                                    { special ITbackquote }
 349
 350   \{                                    { open_brace }
 351   \}                                    { close_brace }
 352 }
 353
 354 <0,option_prags> {
 355   @qual @varid                  { idtoken qvarid }
 356   @qual @conid                  { idtoken qconid }
 357   @varid                        { varid }
 358   @conid                        { idtoken conid }
 359 }
 360
 361 <0> {
 362   @qual @varid "#"+ / { ifExtension magicHashEnabled } { idtoken qvarid }
 363   @qual @conid "#"+ / { ifExtension magicHashEnabled } { idtoken qconid }
 364   @varid "#"+       / { ifExtension magicHashEnabled } { varid }
 365   @conid "#"+       / { ifExtension magicHashEnabled } { idtoken conid }
 366 }
 367
 368 -- ToDo: - move `var` and (sym) into lexical syntax?
 369 --       - remove backquote from $special?
 370 <0> {
 371   @qual @varsym       / { ifExtension oldQualOps } { idtoken qvarsym }
 372   @qual @consym       / { ifExtension oldQualOps } { idtoken qconsym }
 373   @qual \( @varsym \) / { ifExtension newQualOps } { idtoken prefixqvarsym }
 374   @qual \( @consym \) / { ifExtension newQualOps } { idtoken prefixqconsym }
 375   @varsym                                          { varsym }
 376   @consym                                          { consym }
 377 }
 378
 379 -- For the normal boxed literals we need to be careful
 380 -- when trying to be close to Haskell98
 381 <0> {
 382   -- Normal integral literals (:: Num a => a, from Integer)
 383   @decimal           { tok_num positive 0 0 decimal }
 384   0[oO] @octal       { tok_num positive 2 2 octal }
 385   0[xX] @hexadecimal { tok_num positive 2 2 hexadecimal }
 386
 387   -- Normal rational literals (:: Fractional a => a, from Rational)
 388   @floating_point    { strtoken tok_float }
 389 }
 390
 391 <0> {
 392   -- Unboxed ints (:: Int#) and words (:: Word#)
 393   -- It's simpler (and faster?) to give separate cases to the negatives,
 394   -- especially considering octal/hexadecimal prefixes.
 395   @decimal                     \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal }
 396   0[oO] @octal                 \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
 397   0[xX] @hexadecimal           \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
 398   @negative @decimal           \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal }
 399   @negative 0[oO] @octal       \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
 400   @negative 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 hexadecimal }
 401
 402   @decimal                     \# \# / { ifExtension magicHashEnabled } { tok_primword 0 2 decimal }
 403   0[oO] @octal                 \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 octal }
 404   0[xX] @hexadecimal           \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 hexadecimal }
 405
 406   -- Unboxed floats and doubles (:: Float#, :: Double#)
 407   -- prim_{float,double} work with signed literals
 408   @signed @floating_point \# / { ifExtension magicHashEnabled } { init_strtoken 1 tok_primfloat }
 409   @signed @floating_point \# \# / { ifExtension magicHashEnabled } { init_strtoken 2 tok_primdouble }
 410 }
 411
 412 -- Strings and chars are lexed by hand-written code.  The reason is
 413 -- that even if we recognise the string or char here in the regex
 414 -- lexer, we would still have to parse the string afterward in order
 415 -- to convert it to a String.
 416 <0> {
 417   \'                            { lex_char_tok }
 418   \"                            { lex_string_tok }
 419 }
 420
 421 {
 422 -- -----------------------------------------------------------------------------
 423 -- The token type
 424
 425 data Token
 426   = ITas                        -- Haskell keywords
 427   | ITcase
 428   | ITclass
 429   | ITdata
 430   | ITdefault
 431   | ITderiving
 432   | ITdo
 433   | ITelse
 434   | IThiding
 435   | ITif
 436   | ITimport
 437   | ITin
 438   | ITinfix
 439   | ITinfixl
 440   | ITinfixr
 441   | ITinstance
 442   | ITlet
 443   | ITmodule
 444   | ITnewtype
 445   | ITof
 446   | ITqualified
 447   | ITthen
 448   | ITtype
 449   | ITwhere
 450   | ITscc                       -- ToDo: remove (we use {-# SCC "..." #-} now)
 451
 452   | ITforall                    -- GHC extension keywords
 453   | ITforeign
 454   | ITexport
 455   | ITlabel
 456   | ITdynamic
 457   | ITsafe
 458   | ITthreadsafe
 459   | ITinterruptible
 460   | ITunsafe
 461   | ITstdcallconv
 462   | ITccallconv
 463   | ITprimcallconv
 464   | ITmdo
 465   | ITfamily
 466   | ITgroup
 467   | ITby
 468   | ITusing
 469
 470         -- Pragmas
 471   | ITinline_prag InlineSpec RuleMatchInfo
 472   | ITspec_prag                 -- SPECIALISE
 473   | ITspec_inline_prag Bool     -- SPECIALISE INLINE (or NOINLINE)
 474   | ITsource_prag
 475   | ITrules_prag
 476   | ITwarning_prag
 477   | ITdeprecated_prag
 478   | ITline_prag
 479   | ITscc_prag
 480   | ITgenerated_prag
 481   | ITcore_prag                 -- hdaume: core annotations
 482   | ITunpack_prag
 483   | ITann_prag
 484   | ITclose_prag
 485   | IToptions_prag String
 486   | ITinclude_prag String
 487   | ITlanguage_prag
 488
 489   | ITdotdot                    -- reserved symbols
 490   | ITcolon
 491   | ITdcolon
 492   | ITequal
 493   | ITlam
 494   | ITvbar
 495   | ITlarrow
 496   | ITrarrow
 497   | ITat
 498   | ITtilde
 499   | ITdarrow
 500   | ITminus
 501   | ITbang
 502   | ITstar
 503   | ITdot
 504
 505   | ITbiglam                    -- GHC-extension symbols
 506
 507   | ITocurly                    -- special symbols
 508   | ITccurly
 509   | ITocurlybar                 -- {|, for type applications
 510   | ITccurlybar                 -- |}, for type applications
 511   | ITvocurly
 512   | ITvccurly
 513   | ITobrack
 514   | ITopabrack                  -- [:, for parallel arrays with -XParr
 515   | ITcpabrack                  -- :], for parallel arrays with -XParr
 516   | ITcbrack
 517   | IToparen
 518   | ITcparen
 519   | IToubxparen
 520   | ITcubxparen
 521   | ITsemi
 522   | ITcomma
 523   | ITunderscore
 524   | ITbackquote
 525
 526   | ITvarid   FastString        -- identifiers
 527   | ITconid   FastString
 528   | ITvarsym  FastString
 529   | ITconsym  FastString
 530   | ITqvarid  (FastString,FastString)
 531   | ITqconid  (FastString,FastString)
 532   | ITqvarsym (FastString,FastString)
 533   | ITqconsym (FastString,FastString)
 534   | ITprefixqvarsym (FastString,FastString)
 535   | ITprefixqconsym (FastString,FastString)
 536
 537   | ITdupipvarid   FastString   -- GHC extension: implicit param: ?x
 538
 539   | ITchar       Char
 540   | ITstring     FastString
 541   | ITinteger    Integer
 542   | ITrational   Rational
 543
 544   | ITprimchar   Char
 545   | ITprimstring FastString
 546   | ITprimint    Integer
 547   | ITprimword   Integer
 548   | ITprimfloat  Rational
 549   | ITprimdouble Rational
 550
 551   -- Template Haskell extension tokens
 552   | ITopenExpQuote              --  [| or [e|
 553   | ITopenPatQuote              --  [p|
 554   | ITopenDecQuote              --  [d|
 555   | ITopenTypQuote              --  [t|
 556   | ITcloseQuote                --  |]
 557   | ITidEscape   FastString     --  $x
 558   | ITparenEscape               --  $(
 559   | ITvarQuote                  --  '
 560   | ITtyQuote                   --  ''
 561   | ITquasiQuote (FastString,FastString,SrcSpan) --  [:...|...|]
 562
 563   -- Arrow notation extension
 564   | ITproc
 565   | ITrec
 566   | IToparenbar                 --  (|
 567   | ITcparenbar                 --  |)
 568   | ITlarrowtail                --  -<
 569   | ITrarrowtail                --  >-
 570   | ITLarrowtail                --  -<<
 571   | ITRarrowtail                --  >>-
 572
 573   | ITunknown String            -- Used when the lexer can't make sense of it
 574   | ITeof                       -- end of file token
 575
 576   -- Documentation annotations
 577   | ITdocCommentNext  String     -- something beginning '-- |'
 578   | ITdocCommentPrev  String     -- something beginning '-- ^'
 579   | ITdocCommentNamed String     -- something beginning '-- $'
 580   | ITdocSection      Int String -- a section heading
 581   | ITdocOptions      String     -- doc options (prune, ignore-exports, etc)
 582   | ITdocOptionsOld   String     -- doc options declared "-- # ..."-style
 583   | ITlineComment     String     -- comment starting by "--"
 584   | ITblockComment    String     -- comment in {- -}
 585
 586 #ifdef DEBUG
 587   deriving Show -- debugging
 588 #endif
 589
 590 {-
 591 isSpecial :: Token -> Bool
 592 -- If we see M.x, where x is a keyword, but
 593 -- is special, we treat is as just plain M.x,
 594 -- not as a keyword.
 595 isSpecial ITas          = True
 596 isSpecial IThiding      = True
 597 isSpecial ITqualified   = True
 598 isSpecial ITforall      = True
 599 isSpecial ITexport      = True
 600 isSpecial ITlabel       = True
 601 isSpecial ITdynamic     = True
 602 isSpecial ITsafe        = True
 603 isSpecial ITthreadsafe  = True
 604 isSpecial ITinterruptible = True
 605 isSpecial ITunsafe      = True
 606 isSpecial ITccallconv   = True
 607 isSpecial ITstdcallconv = True
 608 isSpecial ITprimcallconv = True
 609 isSpecial ITmdo         = True
 610 isSpecial ITfamily      = True
 611 isSpecial ITgroup   = True
 612 isSpecial ITby      = True
 613 isSpecial ITusing   = True
 614 isSpecial _             = False
 615 -}
 616
 617 -- the bitmap provided as the third component indicates whether the
 618 -- corresponding extension keyword is valid under the extension options
 619 -- provided to the compiler; if the extension corresponding to *any* of the
 620 -- bits set in the bitmap is enabled, the keyword is valid (this setup
 621 -- facilitates using a keyword in two different extensions that can be
 622 -- activated independently)
 623 --
 624 reservedWordsFM :: UniqFM (Token, Int)
 625 reservedWordsFM = listToUFM $
 626         map (\(x, y, z) -> (mkFastString x, (y, z)))
 627        [( "_",          ITunderscore,   0 ),
 628         ( "as",         ITas,           0 ),
 629         ( "case",       ITcase,         0 ),
 630         ( "class",      ITclass,        0 ),
 631         ( "data",       ITdata,         0 ),
 632         ( "default",    ITdefault,      0 ),
 633         ( "deriving",   ITderiving,     0 ),
 634         ( "do",         ITdo,           0 ),
 635         ( "else",       ITelse,         0 ),
 636         ( "hiding",     IThiding,       0 ),
 637         ( "if",         ITif,           0 ),
 638         ( "import",     ITimport,       0 ),
 639         ( "in",         ITin,           0 ),
 640         ( "infix",      ITinfix,        0 ),
 641         ( "infixl",     ITinfixl,       0 ),
 642         ( "infixr",     ITinfixr,       0 ),
 643         ( "instance",   ITinstance,     0 ),
 644         ( "let",        ITlet,          0 ),
 645         ( "module",     ITmodule,       0 ),
 646         ( "newtype",    ITnewtype,      0 ),
 647         ( "of",         ITof,           0 ),
 648         ( "qualified",  ITqualified,    0 ),
 649         ( "then",       ITthen,         0 ),
 650         ( "type",       ITtype,         0 ),
 651         ( "where",      ITwhere,        0 ),
 652         ( "_scc_",      ITscc,          0 ),            -- ToDo: remove
 653
 654     ( "forall", ITforall,        bit explicitForallBit .|. bit inRulePragBit),
 655         ( "mdo",        ITmdo,           bit recursiveDoBit),
 656         ( "family",     ITfamily,        bit tyFamBit),
 657     ( "group",  ITgroup,     bit transformComprehensionsBit),
 658     ( "by",     ITby,        bit transformComprehensionsBit),
 659     ( "using",  ITusing,     bit transformComprehensionsBit),
 660
 661         ( "foreign",    ITforeign,       bit ffiBit),
 662         ( "export",     ITexport,        bit ffiBit),
 663         ( "label",      ITlabel,         bit ffiBit),
 664         ( "dynamic",    ITdynamic,       bit ffiBit),
 665         ( "safe",       ITsafe,          bit ffiBit),
 666         ( "threadsafe", ITthreadsafe,    bit ffiBit),  -- ToDo: remove
 667         ( "interruptible", ITinterruptible, bit ffiBit),
 668         ( "unsafe",     ITunsafe,        bit ffiBit),
 669         ( "stdcall",    ITstdcallconv,   bit ffiBit),
 670         ( "ccall",      ITccallconv,     bit ffiBit),
 671         ( "prim",       ITprimcallconv,  bit ffiBit),
 672
 673         ( "rec",        ITrec,           bit recBit),
 674         ( "proc",       ITproc,          bit arrowsBit)
 675      ]
 676
 677 reservedSymsFM :: UniqFM (Token, Int -> Bool)
 678 reservedSymsFM = listToUFM $
 679     map (\ (x,y,z) -> (mkFastString x,(y,z)))
 680       [ ("..",  ITdotdot,   always)
 681         -- (:) is a reserved op, meaning only list cons
 682        ,(":",   ITcolon,    always)
 683        ,("::",  ITdcolon,   always)
 684        ,("=",   ITequal,    always)
 685        ,("\\",  ITlam,      always)
 686        ,("|",   ITvbar,     always)
 687        ,("<-",  ITlarrow,   always)
 688        ,("->",  ITrarrow,   always)
 689        ,("@",   ITat,       always)
 690        ,("~",   ITtilde,    always)
 691        ,("=>",  ITdarrow,   always)
 692        ,("-",   ITminus,    always)
 693        ,("!",   ITbang,     always)
 694
 695         -- For data T (a::*) = MkT
 696        ,("*", ITstar, always) -- \i -> kindSigsEnabled i || tyFamEnabled i)
 697         -- For 'forall a . t'
 698        ,(".", ITdot,  always) -- \i -> explicitForallEnabled i || inRulePrag i)
 699
 700        ,("-<",  ITlarrowtail, arrowsEnabled)
 701        ,(">-",  ITrarrowtail, arrowsEnabled)
 702        ,("-<<", ITLarrowtail, arrowsEnabled)
 703        ,(">>-", ITRarrowtail, arrowsEnabled)
 704
 705        ,("∷",   ITdcolon, unicodeSyntaxEnabled)
 706        ,("⇒",   ITdarrow, unicodeSyntaxEnabled)
 707        ,("∀",   ITforall, \i -> unicodeSyntaxEnabled i &&
 708                                 explicitForallEnabled i)
 709        ,("→",   ITrarrow, unicodeSyntaxEnabled)
 710        ,("←",   ITlarrow, unicodeSyntaxEnabled)
 711
 712        ,("⤙",   ITlarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 713        ,("⤚",   ITrarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 714        ,("⤛",   ITLarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 715        ,("⤜",   ITRarrowtail, \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
 716
 717        ,("★", ITstar, unicodeSyntaxEnabled)
 718
 719         -- ToDo: ideally, → and ∷ should be "specials", so that they cannot
 720         -- form part of a large operator.  This would let us have a better
 721         -- syntax for kinds: ɑ∷*→* would be a legal kind signature. (maybe).
 722        ]
 723
 724 -- -----------------------------------------------------------------------------
 725 -- Lexer actions
 726
 727 type Action = SrcSpan -> StringBuffer -> Int -> P (Located Token)
 728
 729 special :: Token -> Action
 730 special tok span _buf _len = return (L span tok)
 731
 732 token, layout_token :: Token -> Action
 733 token t span _buf _len = return (L span t)
 734 layout_token t span _buf _len = pushLexState layout >> return (L span t)
 735
 736 idtoken :: (StringBuffer -> Int -> Token) -> Action
 737 idtoken f span buf len = return (L span $! (f buf len))
 738
 739 skip_one_varid :: (FastString -> Token) -> Action
 740 skip_one_varid f span buf len
 741   = return (L span $! f (lexemeToFastString (stepOn buf) (len-1)))
 742
 743 strtoken :: (String -> Token) -> Action
 744 strtoken f span buf len =
 745   return (L span $! (f $! lexemeToString buf len))
 746
 747 init_strtoken :: Int -> (String -> Token) -> Action
 748 -- like strtoken, but drops the last N character(s)
 749 init_strtoken drop f span buf len =
 750   return (L span $! (f $! lexemeToString buf (len-drop)))
 751
 752 begin :: Int -> Action
 753 begin code _span _str _len = do pushLexState code; lexToken
 754
 755 pop :: Action
 756 pop _span _buf _len = do _ <- popLexState
 757                          lexToken
 758
 759 pop_and :: Action -> Action
 760 pop_and act span buf len = do _ <- popLexState
 761                               act span buf len
 762
 763 {-# INLINE nextCharIs #-}
 764 nextCharIs :: StringBuffer -> (Char -> Bool) -> Bool
 765 nextCharIs buf p = not (atEnd buf) && p (currentChar buf)
 766
 767 notFollowedBy :: Char -> AlexAccPred Int
 768 notFollowedBy char _ _ _ (AI _ buf)
 769   = nextCharIs buf (/=char)
 770
 771 notFollowedBySymbol :: AlexAccPred Int
 772 notFollowedBySymbol _ _ _ (AI _ buf)
 773   = nextCharIs buf (`notElem` "!#$%&*+./<=>?@\\^|-~")
 774
 775 -- We must reject doc comments as being ordinary comments everywhere.
 776 -- In some cases the doc comment will be selected as the lexeme due to
 777 -- maximal munch, but not always, because the nested comment rule is
 778 -- valid in all states, but the doc-comment rules are only valid in
 779 -- the non-layout states.
 780 isNormalComment :: AlexAccPred Int
 781 isNormalComment bits _ _ (AI _ buf)
 782   | haddockEnabled bits = notFollowedByDocOrPragma
 783   | otherwise           = nextCharIs buf (/='#')
 784   where
 785     notFollowedByDocOrPragma
 786        = not $ spaceAndP buf (`nextCharIs` (`elem` "|^*$#"))
 787
 788 spaceAndP :: StringBuffer -> (StringBuffer -> Bool) -> Bool
 789 spaceAndP buf p = p buf || nextCharIs buf (==' ') && p (snd (nextChar buf))
 790
 791 {-
 792 haddockDisabledAnd p bits _ _ (AI _ buf)
 793   = if haddockEnabled bits then False else (p buf)
 794 -}
 795
 796 atEOL :: AlexAccPred Int
 797 atEOL _ _ _ (AI _ buf) = atEnd buf || currentChar buf == '\n'
 798
 799 ifExtension :: (Int -> Bool) -> AlexAccPred Int
 800 ifExtension pred bits _ _ _ = pred bits
 801
 802 multiline_doc_comment :: Action
 803 multiline_doc_comment span buf _len = withLexedDocType (worker "")
 804   where
 805     worker commentAcc input docType oneLine = case alexGetChar input of
 806       Just ('\n', input')
 807         | oneLine -> docCommentEnd input commentAcc docType buf span
 808         | otherwise -> case checkIfCommentLine input' of
 809           Just input -> worker ('\n':commentAcc) input docType False
 810           Nothing -> docCommentEnd input commentAcc docType buf span
 811       Just (c, input) -> worker (c:commentAcc) input docType oneLine
 812       Nothing -> docCommentEnd input commentAcc docType buf span
 813
 814     checkIfCommentLine input = check (dropNonNewlineSpace input)
 815       where
 816         check input = case alexGetChar input of
 817           Just ('-', input) -> case alexGetChar input of
 818             Just ('-', input) -> case alexGetChar input of
 819               Just (c, _) | c /= '-' -> Just input
 820               _ -> Nothing
 821             _ -> Nothing
 822           _ -> Nothing
 823
 824         dropNonNewlineSpace input = case alexGetChar input of
 825           Just (c, input')
 826             | isSpace c && c /= '\n' -> dropNonNewlineSpace input'
 827             | otherwise -> input
 828           Nothing -> input
 829
 830 lineCommentToken :: Action
 831 lineCommentToken span buf len = do
 832   b <- extension rawTokenStreamEnabled
 833   if b then strtoken ITlineComment span buf len else lexToken
 834
 835 {-
 836   nested comments require traversing by hand, they can't be parsed
 837   using regular expressions.
 838 -}
 839 nested_comment :: P (Located Token) -> Action
 840 nested_comment cont span _str _len = do
 841   input <- getInput
 842   go "" (1::Int) input
 843   where
 844     go commentAcc 0 input = do setInput input
 845                                b <- extension rawTokenStreamEnabled
 846                                if b
 847                                  then docCommentEnd input commentAcc ITblockComment _str span
 848                                  else cont
 849     go commentAcc n input = case alexGetChar input of
 850       Nothing -> errBrace input span
 851       Just ('-',input) -> case alexGetChar input of
 852         Nothing  -> errBrace input span
 853         Just ('\125',input) -> go commentAcc (n-1) input
 854         Just (_,_)          -> go ('-':commentAcc) n input
 855       Just ('\123',input) -> case alexGetChar input of
 856         Nothing  -> errBrace input span
 857         Just ('-',input) -> go ('-':'\123':commentAcc) (n+1) input
 858         Just (_,_)       -> go ('\123':commentAcc) n input
 859       Just (c,input) -> go (c:commentAcc) n input
 860
 861 nested_doc_comment :: Action
 862 nested_doc_comment span buf _len = withLexedDocType (go "")
 863   where
 864     go commentAcc input docType _ = case alexGetChar input of
 865       Nothing -> errBrace input span
 866       Just ('-',input) -> case alexGetChar input of
 867         Nothing -> errBrace input span
 868         Just ('\125',input) ->
 869           docCommentEnd input commentAcc docType buf span
 870         Just (_,_) -> go ('-':commentAcc) input docType False
 871       Just ('\123', input) -> case alexGetChar input of
 872         Nothing  -> errBrace input span
 873         Just ('-',input) -> do
 874           setInput input
 875           let cont = do input <- getInput; go commentAcc input docType False
 876           nested_comment cont span buf _len
 877         Just (_,_) -> go ('\123':commentAcc) input docType False
 878       Just (c,input) -> go (c:commentAcc) input docType False
 879
 880 withLexedDocType :: (AlexInput -> (String -> Token) -> Bool -> P (Located Token))
 881                  -> P (Located Token)
 882 withLexedDocType lexDocComment = do
 883   input@(AI _ buf) <- getInput
 884   case prevChar buf ' ' of
 885     '|' -> lexDocComment input ITdocCommentNext False
 886     '^' -> lexDocComment input ITdocCommentPrev False
 887     '$' -> lexDocComment input ITdocCommentNamed False
 888     '*' -> lexDocSection 1 input
 889     '#' -> lexDocComment input ITdocOptionsOld False
 890     _ -> panic "withLexedDocType: Bad doc type"
 891  where
 892     lexDocSection n input = case alexGetChar input of
 893       Just ('*', input) -> lexDocSection (n+1) input
 894       Just (_,   _)     -> lexDocComment input (ITdocSection n) True
 895       Nothing -> do setInput input; lexToken -- eof reached, lex it normally
 896
 897 -- RULES pragmas turn on the forall and '.' keywords, and we turn them
 898 -- off again at the end of the pragma.
 899 rulePrag :: Action
 900 rulePrag span _buf _len = do
 901   setExts (.|. bit inRulePragBit)
 902   return (L span ITrules_prag)
 903
 904 endPrag :: Action
 905 endPrag span _buf _len = do
 906   setExts (.&. complement (bit inRulePragBit))
 907   return (L span ITclose_prag)
 908
 909 -- docCommentEnd
 910 -------------------------------------------------------------------------------
 911 -- This function is quite tricky. We can't just return a new token, we also
 912 -- need to update the state of the parser. Why? Because the token is longer
 913 -- than what was lexed by Alex, and the lexToken function doesn't know this, so
 914 -- it writes the wrong token length to the parser state. This function is
 915 -- called afterwards, so it can just update the state.
 916
 917 docCommentEnd :: AlexInput -> String -> (String -> Token) -> StringBuffer ->
 918                  SrcSpan -> P (Located Token)
 919 docCommentEnd input commentAcc docType buf span = do
 920   setInput input
 921   let (AI loc nextBuf) = input
 922       comment = reverse commentAcc
 923       span' = mkSrcSpan (srcSpanStart span) loc
 924       last_len = byteDiff buf nextBuf
 925
 926   span `seq` setLastToken span' last_len
 927   return (L span' (docType comment))
 928
 929 errBrace :: AlexInput -> SrcSpan -> P a
 930 errBrace (AI end _) span = failLocMsgP (srcSpanStart span) end "unterminated `{-'"
 931
 932 open_brace, close_brace :: Action
 933 open_brace span _str _len = do
 934   ctx <- getContext
 935   setContext (NoLayout:ctx)
 936   return (L span ITocurly)
 937 close_brace span _str _len = do
 938   popContext
 939   return (L span ITccurly)
 940
 941 qvarid, qconid :: StringBuffer -> Int -> Token
 942 qvarid buf len = ITqvarid $! splitQualName buf len False
 943 qconid buf len = ITqconid $! splitQualName buf len False
 944
 945 splitQualName :: StringBuffer -> Int -> Bool -> (FastString,FastString)
 946 -- takes a StringBuffer and a length, and returns the module name
 947 -- and identifier parts of a qualified name.  Splits at the *last* dot,
 948 -- because of hierarchical module names.
 949 splitQualName orig_buf len parens = split orig_buf orig_buf
 950   where
 951     split buf dot_buf
 952         | orig_buf `byteDiff` buf >= len  = done dot_buf
 953         | c == '.'                        = found_dot buf'
 954         | otherwise                       = split buf' dot_buf
 955       where
 956        (c,buf') = nextChar buf
 957
 958     -- careful, we might get names like M....
 959     -- so, if the character after the dot is not upper-case, this is
 960     -- the end of the qualifier part.
 961     found_dot buf -- buf points after the '.'
 962         | isUpper c    = split buf' buf
 963         | otherwise    = done buf
 964       where
 965        (c,buf') = nextChar buf
 966
 967     done dot_buf =
 968         (lexemeToFastString orig_buf (qual_size - 1),
 969          if parens -- Prelude.(+)
 970             then lexemeToFastString (stepOn dot_buf) (len - qual_size - 2)
 971             else lexemeToFastString dot_buf (len - qual_size))
 972       where
 973         qual_size = orig_buf `byteDiff` dot_buf
 974
 975 varid :: Action
 976 varid span buf len =
 977   fs `seq`
 978   case lookupUFM reservedWordsFM fs of
 979         Just (keyword,0)    -> do
 980                 maybe_layout keyword
 981                 return (L span keyword)
 982         Just (keyword,exts) -> do
 983                 b <- extension (\i -> exts .&. i /= 0)
 984                 if b then do maybe_layout keyword
 985                              return (L span keyword)
 986                      else return (L span (ITvarid fs))
 987         _other -> return (L span (ITvarid fs))
 988   where
 989         fs = lexemeToFastString buf len
 990
 991 conid :: StringBuffer -> Int -> Token
 992 conid buf len = ITconid fs
 993   where fs = lexemeToFastString buf len
 994
 995 qvarsym, qconsym, prefixqvarsym, prefixqconsym :: StringBuffer -> Int -> Token
 996 qvarsym buf len = ITqvarsym $! splitQualName buf len False
 997 qconsym buf len = ITqconsym $! splitQualName buf len False
 998 prefixqvarsym buf len = ITprefixqvarsym $! splitQualName buf len True
 999 prefixqconsym buf len = ITprefixqconsym $! splitQualName buf len True
1000
1001 varsym, consym :: Action
1002 varsym = sym ITvarsym
1003 consym = sym ITconsym
1004
1005 sym :: (FastString -> Token) -> SrcSpan -> StringBuffer -> Int
1006     -> P (Located Token)
1007 sym con span buf len =
1008   case lookupUFM reservedSymsFM fs of
1009         Just (keyword,exts) -> do
1010                 b <- extension exts
1011                 if b then return (L span keyword)
1012                      else return (L span $! con fs)
1013         _other -> return (L span $! con fs)
1014   where
1015         fs = lexemeToFastString buf len
1016
1017 -- Variations on the integral numeric literal.
1018 tok_integral :: (Integer -> Token)
1019      -> (Integer -> Integer)
1020  --    -> (StringBuffer -> StringBuffer) -> (Int -> Int)
1021      -> Int -> Int
1022      -> (Integer, (Char->Int)) -> Action
1023 tok_integral itint transint transbuf translen (radix,char_to_int) span buf len =
1024   return $ L span $ itint $! transint $ parseUnsignedInteger
1025      (offsetBytes transbuf buf) (subtract translen len) radix char_to_int
1026
1027 -- some conveniences for use with tok_integral
1028 tok_num :: (Integer -> Integer)
1029         -> Int -> Int
1030         -> (Integer, (Char->Int)) -> Action
1031 tok_num = tok_integral ITinteger
1032 tok_primint :: (Integer -> Integer)
1033             -> Int -> Int
1034             -> (Integer, (Char->Int)) -> Action
1035 tok_primint = tok_integral ITprimint
1036 tok_primword :: Int -> Int
1037              -> (Integer, (Char->Int)) -> Action
1038 tok_primword = tok_integral ITprimword positive
1039 positive, negative :: (Integer -> Integer)
1040 positive = id
1041 negative = negate
1042 decimal, octal, hexadecimal :: (Integer, Char -> Int)
1043 decimal = (10,octDecDigit)
1044 octal = (8,octDecDigit)
1045 hexadecimal = (16,hexDigit)
1046
1047 -- readRational can understand negative rationals, exponents, everything.
1048 tok_float, tok_primfloat, tok_primdouble :: String -> Token
1049 tok_float        str = ITrational   $! readRational str
1050 tok_primfloat    str = ITprimfloat  $! readRational str
1051 tok_primdouble   str = ITprimdouble $! readRational str
1052
1053 -- -----------------------------------------------------------------------------
1054 -- Layout processing
1055
1056 -- we're at the first token on a line, insert layout tokens if necessary
1057 do_bol :: Action
1058 do_bol span _str _len = do
1059         pos <- getOffside
1060         case pos of
1061             LT -> do
1062                 --trace "layout: inserting '}'" $ do
1063                 popContext
1064                 -- do NOT pop the lex state, we might have a ';' to insert
1065                 return (L span ITvccurly)
1066             EQ -> do
1067                 --trace "layout: inserting ';'" $ do
1068                 _ <- popLexState
1069                 return (L span ITsemi)
1070             GT -> do
1071                 _ <- popLexState
1072                 lexToken
1073
1074 -- certain keywords put us in the "layout" state, where we might
1075 -- add an opening curly brace.
1076 maybe_layout :: Token -> P ()
1077 maybe_layout t = do -- If the alternative layout rule is enabled then
1078                     -- we never create an implicit layout context here.
1079                     -- Layout is handled XXX instead.
1080                     -- The code for closing implicit contexts, or
1081                     -- inserting implicit semi-colons, is therefore
1082                     -- irrelevant as it only applies in an implicit
1083                     -- context.
1084                     alr <- extension alternativeLayoutRule
1085                     unless alr $ f t
1086     where f ITdo    = pushLexState layout_do
1087           f ITmdo   = pushLexState layout_do
1088           f ITof    = pushLexState layout
1089           f ITlet   = pushLexState layout
1090           f ITwhere = pushLexState layout
1091           f ITrec   = pushLexState layout
1092           f _       = return ()
1093
1094 -- Pushing a new implicit layout context.  If the indentation of the
1095 -- next token is not greater than the previous layout context, then
1096 -- Haskell 98 says that the new layout context should be empty; that is
1097 -- the lexer must generate {}.
1098 --
1099 -- We are slightly more lenient than this: when the new context is started
1100 -- by a 'do', then we allow the new context to be at the same indentation as
1101 -- the previous context.  This is what the 'strict' argument is for.
1102 --
1103 new_layout_context :: Bool -> Action
1104 new_layout_context strict span _buf _len = do
1105     _ <- popLexState
1106     (AI l _) <- getInput
1107     let offset = srcLocCol l
1108     ctx <- getContext
1109     case ctx of
1110         Layout prev_off : _  |
1111            (strict     && prev_off >= offset  ||
1112             not strict && prev_off > offset) -> do
1113                 -- token is indented to the left of the previous context.
1114                 -- we must generate a {} sequence now.
1115                 pushLexState layout_left
1116                 return (L span ITvocurly)
1117         _ -> do
1118                 setContext (Layout offset : ctx)
1119                 return (L span ITvocurly)
1120
1121 do_layout_left :: Action
1122 do_layout_left span _buf _len = do
1123     _ <- popLexState
1124     pushLexState bol  -- we must be at the start of a line
1125     return (L span ITvccurly)
1126
1127 -- -----------------------------------------------------------------------------
1128 -- LINE pragmas
1129
1130 setLine :: Int -> Action
1131 setLine code span buf len = do
1132   let line = parseUnsignedInteger buf len 10 octDecDigit
1133   setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 1)
1134         -- subtract one: the line number refers to the *following* line
1135   _ <- popLexState
1136   pushLexState code
1137   lexToken
1138
1139 setFile :: Int -> Action
1140 setFile code span buf len = do
1141   let file = lexemeToFastString (stepOn buf) (len-2)
1142   setAlrLastLoc noSrcSpan
1143   setSrcLoc (mkSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
1144   _ <- popLexState
1145   pushLexState code
1146   lexToken
1147
1148
1149 -- -----------------------------------------------------------------------------
1150 -- Options, includes and language pragmas.
1151
1152 lex_string_prag :: (String -> Token) -> Action
1153 lex_string_prag mkTok span _buf _len
1154     = do input <- getInput
1155          start <- getSrcLoc
1156          tok <- go [] input
1157          end <- getSrcLoc
1158          return (L (mkSrcSpan start end) tok)
1159     where go acc input
1160               = if isString input "#-}"
1161                    then do setInput input
1162                            return (mkTok (reverse acc))
1163                    else case alexGetChar input of
1164                           Just (c,i) -> go (c:acc) i
1165                           Nothing -> err input
1166           isString _ [] = True
1167           isString i (x:xs)
1168               = case alexGetChar i of
1169                   Just (c,i') | c == x    -> isString i' xs
1170                   _other -> False
1171           err (AI end _) = failLocMsgP (srcSpanStart span) end "unterminated options pragma"
1172
1173
1174 -- -----------------------------------------------------------------------------
1175 -- Strings & Chars
1176
1177 -- This stuff is horrible.  I hates it.
1178
1179 lex_string_tok :: Action
1180 lex_string_tok span _buf _len = do
1181   tok <- lex_string ""
1182   end <- getSrcLoc
1183   return (L (mkSrcSpan (srcSpanStart span) end) tok)
1184
1185 lex_string :: String -> P Token
1186 lex_string s = do
1187   i <- getInput
1188   case alexGetChar' i of
1189     Nothing -> lit_error i
1190
1191     Just ('"',i)  -> do
1192         setInput i
1193         magicHash <- extension magicHashEnabled
1194         if magicHash
1195           then do
1196             i <- getInput
1197             case alexGetChar' i of
1198               Just ('#',i) -> do
1199                    setInput i
1200                    if any (> '\xFF') s
1201                     then failMsgP "primitive string literal must contain only characters <= \'\\xFF\'"
1202                     else let s' = mkZFastString (reverse s) in
1203                          return (ITprimstring s')
1204                         -- mkZFastString is a hack to avoid encoding the
1205                         -- string in UTF-8.  We just want the exact bytes.
1206               _other ->
1207                 return (ITstring (mkFastString (reverse s)))
1208           else
1209                 return (ITstring (mkFastString (reverse s)))
1210
1211     Just ('\\',i)
1212         | Just ('&',i) <- next -> do
1213                 setInput i; lex_string s
1214         | Just (c,i) <- next, c <= '\x7f' && is_space c -> do
1215                            -- is_space only works for <= '\x7f' (#3751)
1216                 setInput i; lex_stringgap s
1217         where next = alexGetChar' i
1218
1219     Just (c, i1) -> do
1220         case c of
1221           '\\' -> do setInput i1; c' <- lex_escape; lex_string (c':s)
1222           c | isAny c -> do setInput i1; lex_string (c:s)
1223           _other -> lit_error i
1224
1225 lex_stringgap :: String -> P Token
1226 lex_stringgap s = do
1227   i <- getInput
1228   c <- getCharOrFail i
1229   case c of
1230     '\\' -> lex_string s
1231     c | is_space c -> lex_stringgap s
1232     _other -> lit_error i
1233
1234
1235 lex_char_tok :: Action
1236 -- Here we are basically parsing character literals, such as 'x' or '\n'
1237 -- but, when Template Haskell is on, we additionally spot
1238 -- 'x and ''T, returning ITvarQuote and ITtyQuote respectively,
1239 -- but WITHOUT CONSUMING the x or T part  (the parser does that).
1240 -- So we have to do two characters of lookahead: when we see 'x we need to
1241 -- see if there's a trailing quote
1242 lex_char_tok span _buf _len = do        -- We've seen '
1243    i1 <- getInput       -- Look ahead to first character
1244    let loc = srcSpanStart span
1245    case alexGetChar' i1 of
1246         Nothing -> lit_error  i1
1247
1248         Just ('\'', i2@(AI end2 _)) -> do       -- We've seen ''
1249                   th_exts <- extension thEnabled
1250                   if th_exts then do
1251                         setInput i2
1252                         return (L (mkSrcSpan loc end2)  ITtyQuote)
1253                    else lit_error i1
1254
1255         Just ('\\', i2@(AI _end2 _)) -> do      -- We've seen 'backslash
1256                   setInput i2
1257                   lit_ch <- lex_escape
1258                   i3 <- getInput
1259                   mc <- getCharOrFail i3 -- Trailing quote
1260                   if mc == '\'' then finish_char_tok loc lit_ch
1261                                 else lit_error i3
1262
1263         Just (c, i2@(AI _end2 _))
1264                 | not (isAny c) -> lit_error i1
1265                 | otherwise ->
1266
1267                 -- We've seen 'x, where x is a valid character
1268                 --  (i.e. not newline etc) but not a quote or backslash
1269            case alexGetChar' i2 of      -- Look ahead one more character
1270                 Just ('\'', i3) -> do   -- We've seen 'x'
1271                         setInput i3
1272                         finish_char_tok loc c
1273                 _other -> do            -- We've seen 'x not followed by quote
1274                                         -- (including the possibility of EOF)
1275                                         -- If TH is on, just parse the quote only
1276                         th_exts <- extension thEnabled
1277                         let (AI end _) = i1
1278                         if th_exts then return (L (mkSrcSpan loc end) ITvarQuote)
1279                                    else lit_error i2
1280
1281 finish_char_tok :: SrcLoc -> Char -> P (Located Token)
1282 finish_char_tok loc ch  -- We've already seen the closing quote
1283                         -- Just need to check for trailing #
1284   = do  magicHash <- extension magicHashEnabled
1285         i@(AI end _) <- getInput
1286         if magicHash then do
1287                 case alexGetChar' i of
1288                         Just ('#',i@(AI end _)) -> do
1289                                 setInput i
1290                                 return (L (mkSrcSpan loc end) (ITprimchar ch))
1291                         _other ->
1292                                 return (L (mkSrcSpan loc end) (ITchar ch))
1293             else do
1294                    return (L (mkSrcSpan loc end) (ITchar ch))
1295
1296 isAny :: Char -> Bool
1297 isAny c | c > '\x7f' = isPrint c
1298         | otherwise  = is_any c
1299
1300 lex_escape :: P Char
1301 lex_escape = do
1302   i0 <- getInput
1303   c <- getCharOrFail i0
1304   case c of
1305         'a'   -> return '\a'
1306         'b'   -> return '\b'
1307         'f'   -> return '\f'
1308         'n'   -> return '\n'
1309         'r'   -> return '\r'
1310         't'   -> return '\t'
1311         'v'   -> return '\v'
1312         '\\'  -> return '\\'
1313         '"'   -> return '\"'
1314         '\''  -> return '\''
1315         '^'   -> do i1 <- getInput
1316                     c <- getCharOrFail i1
1317                     if c >= '@' && c <= '_'
1318                         then return (chr (ord c - ord '@'))
1319                         else lit_error i1
1320
1321         'x'   -> readNum is_hexdigit 16 hexDigit
1322         'o'   -> readNum is_octdigit  8 octDecDigit
1323         x | is_decdigit x -> readNum2 is_decdigit 10 octDecDigit (octDecDigit x)
1324
1325         c1 ->  do
1326            i <- getInput
1327            case alexGetChar' i of
1328             Nothing -> lit_error i0
1329             Just (c2,i2) ->
1330               case alexGetChar' i2 of
1331                 Nothing -> do lit_error i0
1332                 Just (c3,i3) ->
1333                    let str = [c1,c2,c3] in
1334                    case [ (c,rest) | (p,c) <- silly_escape_chars,
1335                                      Just rest <- [stripPrefix p str] ] of
1336                           (escape_char,[]):_ -> do
1337                                 setInput i3
1338                                 return escape_char
1339                           (escape_char,_:_):_ -> do
1340                                 setInput i2
1341                                 return escape_char
1342                           [] -> lit_error i0
1343
1344 readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char
1345 readNum is_digit base conv = do
1346   i <- getInput
1347   c <- getCharOrFail i
1348   if is_digit c
1349         then readNum2 is_digit base conv (conv c)
1350         else lit_error i
1351
1352 readNum2 :: (Char -> Bool) -> Int -> (Char -> Int) -> Int -> P Char
1353 readNum2 is_digit base conv i = do
1354   input <- getInput
1355   read i input
1356   where read i input = do
1357           case alexGetChar' input of
1358             Just (c,input') | is_digit c -> do
1359                let i' = i*base + conv c
1360                if i' > 0x10ffff
1361                   then setInput input >> lexError "numeric escape sequence out of range"
1362                   else read i' input'
1363             _other -> do
1364               setInput input; return (chr i)
1365
1366
1367 silly_escape_chars :: [(String, Char)]
1368 silly_escape_chars = [
1369         ("NUL", '\NUL'),
1370         ("SOH", '\SOH'),
1371         ("STX", '\STX'),
1372         ("ETX", '\ETX'),
1373         ("EOT", '\EOT'),
1374         ("ENQ", '\ENQ'),
1375         ("ACK", '\ACK'),
1376         ("BEL", '\BEL'),
1377         ("BS", '\BS'),
1378         ("HT", '\HT'),
1379         ("LF", '\LF'),
1380         ("VT", '\VT'),
1381         ("FF", '\FF'),
1382         ("CR", '\CR'),
1383         ("SO", '\SO'),
1384         ("SI", '\SI'),
1385         ("DLE", '\DLE'),
1386         ("DC1", '\DC1'),
1387         ("DC2", '\DC2'),
1388         ("DC3", '\DC3'),
1389         ("DC4", '\DC4'),
1390         ("NAK", '\NAK'),
1391         ("SYN", '\SYN'),
1392         ("ETB", '\ETB'),
1393         ("CAN", '\CAN'),
1394         ("EM", '\EM'),
1395         ("SUB", '\SUB'),
1396         ("ESC", '\ESC'),
1397         ("FS", '\FS'),
1398         ("GS", '\GS'),
1399         ("RS", '\RS'),
1400         ("US", '\US'),
1401         ("SP", '\SP'),
1402         ("DEL", '\DEL')
1403         ]
1404
1405 -- before calling lit_error, ensure that the current input is pointing to
1406 -- the position of the error in the buffer.  This is so that we can report
1407 -- a correct location to the user, but also so we can detect UTF-8 decoding
1408 -- errors if they occur.
1409 lit_error :: AlexInput -> P a
1410 lit_error i = do setInput i; lexError "lexical error in string/character literal"
1411
1412 getCharOrFail :: AlexInput -> P Char
1413 getCharOrFail i =  do
1414   case alexGetChar' i of
1415         Nothing -> lexError "unexpected end-of-file in string/character literal"
1416         Just (c,i)  -> do setInput i; return c
1417
1418 -- -----------------------------------------------------------------------------
1419 -- QuasiQuote
1420
1421 lex_quasiquote_tok :: Action
1422 lex_quasiquote_tok span buf len = do
1423   let quoter = tail (lexemeToString buf (len - 1))
1424                 -- 'tail' drops the initial '[',
1425                 -- while the -1 drops the trailing '|'
1426   quoteStart <- getSrcLoc
1427   quote <- lex_quasiquote ""
1428   end <- getSrcLoc
1429   return (L (mkSrcSpan (srcSpanStart span) end)
1430            (ITquasiQuote (mkFastString quoter,
1431                           mkFastString (reverse quote),
1432                           mkSrcSpan quoteStart end)))
1433
1434 lex_quasiquote :: String -> P String
1435 lex_quasiquote s = do
1436   i <- getInput
1437   case alexGetChar' i of
1438     Nothing -> lit_error i
1439
1440     Just ('\\',i)
1441         | Just ('|',i) <- next -> do
1442                 setInput i; lex_quasiquote ('|' : s)
1443         | Just (']',i) <- next -> do
1444                 setInput i; lex_quasiquote (']' : s)
1445         where next = alexGetChar' i
1446
1447     Just ('|',i)
1448         | Just (']',i) <- next -> do
1449                 setInput i; return s
1450         where next = alexGetChar' i
1451
1452     Just (c, i) -> do
1453          setInput i; lex_quasiquote (c : s)
1454
1455 -- -----------------------------------------------------------------------------
1456 -- Warnings
1457
1458 warn :: DynFlag -> SDoc -> Action
1459 warn option warning srcspan _buf _len = do
1460     addWarning option srcspan warning
1461     lexToken
1462
1463 warnThen :: DynFlag -> SDoc -> Action -> Action
1464 warnThen option warning action srcspan buf len = do
1465     addWarning option srcspan warning
1466     action srcspan buf len
1467
1468 -- -----------------------------------------------------------------------------
1469 -- The Parse Monad
1470
1471 data LayoutContext
1472   = NoLayout
1473   | Layout !Int
1474   deriving Show
1475
1476 data ParseResult a
1477   = POk PState a
1478   | PFailed
1479         SrcSpan         -- The start and end of the text span related to
1480                         -- the error.  Might be used in environments which can
1481                         -- show this span, e.g. by highlighting it.
1482         Message         -- The error message
1483
1484 data PState = PState {
1485         buffer     :: StringBuffer,
1486         dflags     :: DynFlags,
1487         messages   :: Messages,
1488         last_loc   :: SrcSpan,  -- pos of previous token
1489         last_len   :: !Int,     -- len of previous token
1490         loc        :: SrcLoc,   -- current loc (end of prev token + 1)
1491         extsBitmap :: !Int,     -- bitmap that determines permitted extensions
1492         context    :: [LayoutContext],
1493         lex_state  :: [Int],
1494         -- Used in the alternative layout rule:
1495         -- These tokens are the next ones to be sent out. They are
1496         -- just blindly emitted, without the rule looking at them again:
1497         alr_pending_implicit_tokens :: [Located Token],
1498         -- This is the next token to be considered or, if it is Nothing,
1499         -- we need to get the next token from the input stream:
1500         alr_next_token :: Maybe (Located Token),
1501         -- This is what we consider to be the locatino of the last token
1502         -- emitted:
1503         alr_last_loc :: SrcSpan,
1504         -- The stack of layout contexts:
1505         alr_context :: [ALRContext],
1506         -- Are we expecting a '{'? If it's Just, then the ALRLayout tells
1507         -- us what sort of layout the '{' will open:
1508         alr_expecting_ocurly :: Maybe ALRLayout,
1509         -- Have we just had the '}' for a let block? If so, than an 'in'
1510         -- token doesn't need to close anything:
1511         alr_justClosedExplicitLetBlock :: Bool
1512      }
1513         -- last_loc and last_len are used when generating error messages,
1514         -- and in pushCurrentContext only.  Sigh, if only Happy passed the
1515         -- current token to happyError, we could at least get rid of last_len.
1516         -- Getting rid of last_loc would require finding another way to
1517         -- implement pushCurrentContext (which is only called from one place).
1518
1519 data ALRContext = ALRNoLayout Bool{- does it contain commas? -}
1520                               Bool{- is it a 'let' block? -}
1521                 | ALRLayout ALRLayout Int
1522 data ALRLayout = ALRLayoutLet
1523                | ALRLayoutWhere
1524                | ALRLayoutOf
1525                | ALRLayoutDo
1526
1527 newtype P a = P { unP :: PState -> ParseResult a }
1528
1529 instance Monad P where
1530   return = returnP
1531   (>>=) = thenP
1532   fail = failP
1533
1534 returnP :: a -> P a
1535 returnP a = a `seq` (P $ \s -> POk s a)
1536
1537 thenP :: P a -> (a -> P b) -> P b
1538 (P m) `thenP` k = P $ \ s ->
1539         case m s of
1540                 POk s1 a         -> (unP (k a)) s1
1541                 PFailed span err -> PFailed span err
1542
1543 failP :: String -> P a
1544 failP msg = P $ \s -> PFailed (last_loc s) (text msg)
1545
1546 failMsgP :: String -> P a
1547 failMsgP msg = P $ \s -> PFailed (last_loc s) (text msg)
1548
1549 failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a
1550 failLocMsgP loc1 loc2 str = P $ \_ -> PFailed (mkSrcSpan loc1 loc2) (text str)
1551
1552 failSpanMsgP :: SrcSpan -> SDoc -> P a
1553 failSpanMsgP span msg = P $ \_ -> PFailed span msg
1554
1555 getPState :: P PState
1556 getPState = P $ \s -> POk s s
1557
1558 getDynFlags :: P DynFlags
1559 getDynFlags = P $ \s -> POk s (dflags s)
1560
1561 withThisPackage :: (PackageId -> a) -> P a
1562 withThisPackage f
1563  = do   pkg     <- liftM thisPackage getDynFlags
1564         return  $ f pkg
1565
1566 extension :: (Int -> Bool) -> P Bool
1567 extension p = P $ \s -> POk s (p $! extsBitmap s)
1568
1569 getExts :: P Int
1570 getExts = P $ \s -> POk s (extsBitmap s)
1571
1572 setExts :: (Int -> Int) -> P ()
1573 setExts f = P $ \s -> POk s{ extsBitmap = f (extsBitmap s) } ()
1574
1575 setSrcLoc :: SrcLoc -> P ()
1576 setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} ()
1577
1578 getSrcLoc :: P SrcLoc
1579 getSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc
1580
1581 setLastToken :: SrcSpan -> Int -> P ()
1582 setLastToken loc len = P $ \s -> POk s {
1583   last_loc=loc,
1584   last_len=len
1585   } ()
1586
1587 data AlexInput = AI SrcLoc StringBuffer
1588
1589 alexInputPrevChar :: AlexInput -> Char
1590 alexInputPrevChar (AI _ buf) = prevChar buf '\n'
1591
1592 alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
1593 alexGetChar (AI loc s)
1594   | atEnd s   = Nothing
1595   | otherwise = adj_c `seq` loc' `seq` s' `seq`
1596                 --trace (show (ord c)) $
1597                 Just (adj_c, (AI loc' s'))
1598   where (c,s') = nextChar s
1599         loc'   = advanceSrcLoc loc c
1600
1601         non_graphic     = '\x0'
1602         upper           = '\x1'
1603         lower           = '\x2'
1604         digit           = '\x3'
1605         symbol          = '\x4'
1606         space           = '\x5'
1607         other_graphic   = '\x6'
1608
1609         adj_c
1610           | c <= '\x06' = non_graphic
1611           | c <= '\x7f' = c
1612           -- Alex doesn't handle Unicode, so when Unicode
1613           -- character is encountered we output these values
1614           -- with the actual character value hidden in the state.
1615           | otherwise =
1616                 case generalCategory c of
1617                   UppercaseLetter       -> upper
1618                   LowercaseLetter       -> lower
1619                   TitlecaseLetter       -> upper
1620                   ModifierLetter        -> other_graphic
1621                   OtherLetter           -> lower -- see #1103
1622                   NonSpacingMark        -> other_graphic
1623                   SpacingCombiningMark  -> other_graphic
1624                   EnclosingMark         -> other_graphic
1625                   DecimalNumber         -> digit
1626                   LetterNumber          -> other_graphic
1627                   OtherNumber           -> other_graphic
1628                   ConnectorPunctuation  -> symbol
1629                   DashPunctuation       -> symbol
1630                   OpenPunctuation       -> other_graphic
1631                   ClosePunctuation      -> other_graphic
1632                   InitialQuote          -> other_graphic
1633                   FinalQuote            -> other_graphic
1634                   OtherPunctuation      -> symbol
1635                   MathSymbol            -> symbol
1636                   CurrencySymbol        -> symbol
1637                   ModifierSymbol        -> symbol
1638                   OtherSymbol           -> symbol
1639                   Space                 -> space
1640                   _other                -> non_graphic
1641
1642 -- This version does not squash unicode characters, it is used when
1643 -- lexing strings.
1644 alexGetChar' :: AlexInput -> Maybe (Char,AlexInput)
1645 alexGetChar' (AI loc s)
1646   | atEnd s   = Nothing
1647   | otherwise = c `seq` loc' `seq` s' `seq`
1648                 --trace (show (ord c)) $
1649                 Just (c, (AI loc' s'))
1650   where (c,s') = nextChar s
1651         loc'   = advanceSrcLoc loc c
1652
1653 getInput :: P AlexInput
1654 getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (AI l b)
1655
1656 setInput :: AlexInput -> P ()
1657 setInput (AI l b) = P $ \s -> POk s{ loc=l, buffer=b } ()
1658
1659 pushLexState :: Int -> P ()
1660 pushLexState ls = P $ \s@PState{ lex_state=l } -> POk s{lex_state=ls:l} ()
1661
1662 popLexState :: P Int
1663 popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls
1664
1665 getLexState :: P Int
1666 getLexState = P $ \s@PState{ lex_state=ls:_ } -> POk s ls
1667
1668 popNextToken :: P (Maybe (Located Token))
1669 popNextToken
1670     = P $ \s@PState{ alr_next_token = m } ->
1671               POk (s {alr_next_token = Nothing}) m
1672
1673 setAlrLastLoc :: SrcSpan -> P ()
1674 setAlrLastLoc l = P $ \s -> POk (s {alr_last_loc = l}) ()
1675
1676 getAlrLastLoc :: P SrcSpan
1677 getAlrLastLoc = P $ \s@(PState {alr_last_loc = l}) -> POk s l
1678
1679 getALRContext :: P [ALRContext]
1680 getALRContext = P $ \s@(PState {alr_context = cs}) -> POk s cs
1681
1682 setALRContext :: [ALRContext] -> P ()
1683 setALRContext cs = P $ \s -> POk (s {alr_context = cs}) ()
1684
1685 getJustClosedExplicitLetBlock :: P Bool
1686 getJustClosedExplicitLetBlock
1687  = P $ \s@(PState {alr_justClosedExplicitLetBlock = b}) -> POk s b
1688
1689 setJustClosedExplicitLetBlock :: Bool -> P ()
1690 setJustClosedExplicitLetBlock b
1691  = P $ \s -> POk (s {alr_justClosedExplicitLetBlock = b}) ()
1692
1693 setNextToken :: Located Token -> P ()
1694 setNextToken t = P $ \s -> POk (s {alr_next_token = Just t}) ()
1695
1696 popPendingImplicitToken :: P (Maybe (Located Token))
1697 popPendingImplicitToken
1698     = P $ \s@PState{ alr_pending_implicit_tokens = ts } ->
1699               case ts of
1700               [] -> POk s Nothing
1701               (t : ts') -> POk (s {alr_pending_implicit_tokens = ts'}) (Just t)
1702
1703 setPendingImplicitTokens :: [Located Token] -> P ()
1704 setPendingImplicitTokens ts = P $ \s -> POk (s {alr_pending_implicit_tokens = ts}) ()
1705
1706 getAlrExpectingOCurly :: P (Maybe ALRLayout)
1707 getAlrExpectingOCurly = P $ \s@(PState {alr_expecting_ocurly = b}) -> POk s b
1708
1709 setAlrExpectingOCurly :: Maybe ALRLayout -> P ()
1710 setAlrExpectingOCurly b = P $ \s -> POk (s {alr_expecting_ocurly = b}) ()
1711
1712 -- for reasons of efficiency, flags indicating language extensions (eg,
1713 -- -fglasgow-exts or -XParr) are represented by a bitmap stored in an unboxed
1714 -- integer
1715
1716 genericsBit :: Int
1717 genericsBit = 0 -- {| and |}
1718 ffiBit :: Int
1719 ffiBit     = 1
1720 parrBit :: Int
1721 parrBit    = 2
1722 arrowsBit :: Int
1723 arrowsBit  = 4
1724 thBit :: Int
1725 thBit      = 5
1726 ipBit :: Int
1727 ipBit      = 6
1728 explicitForallBit :: Int
1729 explicitForallBit = 7 -- the 'forall' keyword and '.' symbol
1730 bangPatBit :: Int
1731 bangPatBit = 8  -- Tells the parser to understand bang-patterns
1732                 -- (doesn't affect the lexer)
1733 tyFamBit :: Int
1734 tyFamBit   = 9  -- indexed type families: 'family' keyword and kind sigs
1735 haddockBit :: Int
1736 haddockBit = 10 -- Lex and parse Haddock comments
1737 magicHashBit :: Int
1738 magicHashBit = 11 -- "#" in both functions and operators
1739 kindSigsBit :: Int
1740 kindSigsBit = 12 -- Kind signatures on type variables
1741 recursiveDoBit :: Int
1742 recursiveDoBit = 13 -- mdo
1743 unicodeSyntaxBit :: Int
1744 unicodeSyntaxBit = 14 -- the forall symbol, arrow symbols, etc
1745 unboxedTuplesBit :: Int
1746 unboxedTuplesBit = 15 -- (# and #)
1747 datatypeContextsBit :: Int
1748 datatypeContextsBit = 16
1749 transformComprehensionsBit :: Int
1750 transformComprehensionsBit = 17
1751 qqBit :: Int
1752 qqBit      = 18 -- enable quasiquoting
1753 inRulePragBit :: Int
1754 inRulePragBit = 19
1755 rawTokenStreamBit :: Int
1756 rawTokenStreamBit = 20 -- producing a token stream with all comments included
1757 newQualOpsBit :: Int
1758 newQualOpsBit = 21 -- Haskell' qualified operator syntax, e.g. Prelude.(+)
1759 recBit :: Int
1760 recBit = 22 -- rec
1761 alternativeLayoutRuleBit :: Int
1762 alternativeLayoutRuleBit = 23
1763
1764 always :: Int -> Bool
1765 always           _     = True
1766 genericsEnabled :: Int -> Bool
1767 genericsEnabled  flags = testBit flags genericsBit
1768 parrEnabled :: Int -> Bool
1769 parrEnabled      flags = testBit flags parrBit
1770 arrowsEnabled :: Int -> Bool
1771 arrowsEnabled    flags = testBit flags arrowsBit
1772 thEnabled :: Int -> Bool
1773 thEnabled        flags = testBit flags thBit
1774 ipEnabled :: Int -> Bool
1775 ipEnabled        flags = testBit flags ipBit
1776 explicitForallEnabled :: Int -> Bool
1777 explicitForallEnabled flags = testBit flags explicitForallBit
1778 bangPatEnabled :: Int -> Bool
1779 bangPatEnabled   flags = testBit flags bangPatBit
1780 -- tyFamEnabled :: Int -> Bool
1781 -- tyFamEnabled     flags = testBit flags tyFamBit
1782 haddockEnabled :: Int -> Bool
1783 haddockEnabled   flags = testBit flags haddockBit
1784 magicHashEnabled :: Int -> Bool
1785 magicHashEnabled flags = testBit flags magicHashBit
1786 -- kindSigsEnabled :: Int -> Bool
1787 -- kindSigsEnabled  flags = testBit flags kindSigsBit
1788 unicodeSyntaxEnabled :: Int -> Bool
1789 unicodeSyntaxEnabled flags = testBit flags unicodeSyntaxBit
1790 unboxedTuplesEnabled :: Int -> Bool
1791 unboxedTuplesEnabled flags = testBit flags unboxedTuplesBit
1792 datatypeContextsEnabled :: Int -> Bool
1793 datatypeContextsEnabled flags = testBit flags datatypeContextsBit
1794 qqEnabled :: Int -> Bool
1795 qqEnabled        flags = testBit flags qqBit
1796 -- inRulePrag :: Int -> Bool
1797 -- inRulePrag       flags = testBit flags inRulePragBit
1798 rawTokenStreamEnabled :: Int -> Bool
1799 rawTokenStreamEnabled flags = testBit flags rawTokenStreamBit
1800 newQualOps :: Int -> Bool
1801 newQualOps       flags = testBit flags newQualOpsBit
1802 oldQualOps :: Int -> Bool
1803 oldQualOps flags = not (newQualOps flags)
1804 alternativeLayoutRule :: Int -> Bool
1805 alternativeLayoutRule flags = testBit flags alternativeLayoutRuleBit
1806
1807 -- PState for parsing options pragmas
1808 --
1809 pragState :: DynFlags -> StringBuffer -> SrcLoc -> PState
1810 pragState dynflags buf loc = (mkPState dynflags buf loc) {
1811                                  lex_state = [bol, option_prags, 0]
1812                              }
1813
1814 -- create a parse state
1815 --
1816 mkPState :: DynFlags -> StringBuffer -> SrcLoc -> PState
1817 mkPState flags buf loc =
1818   PState {
1819       buffer          = buf,
1820       dflags        = flags,
1821       messages      = emptyMessages,
1822       last_loc      = mkSrcSpan loc loc,
1823       last_len      = 0,
1824       loc           = loc,
1825       extsBitmap    = fromIntegral bitmap,
1826       context       = [],
1827       lex_state     = [bol, 0],
1828       alr_pending_implicit_tokens = [],
1829       alr_next_token = Nothing,
1830       alr_last_loc = noSrcSpan,
1831       alr_context = [],
1832       alr_expecting_ocurly = Nothing,
1833       alr_justClosedExplicitLetBlock = False
1834     }
1835     where
1836       bitmap = genericsBit `setBitIf` xopt Opt_Generics flags
1837                .|. ffiBit            `setBitIf` xopt Opt_ForeignFunctionInterface flags
1838                .|. parrBit           `setBitIf` xopt Opt_PArr         flags
1839                .|. arrowsBit         `setBitIf` xopt Opt_Arrows       flags
1840                .|. thBit             `setBitIf` xopt Opt_TemplateHaskell flags
1841                .|. qqBit             `setBitIf` xopt Opt_QuasiQuotes flags
1842                .|. ipBit             `setBitIf` xopt Opt_ImplicitParams flags
1843                .|. explicitForallBit `setBitIf` xopt Opt_ExplicitForAll flags
1844                .|. bangPatBit        `setBitIf` xopt Opt_BangPatterns flags
1845                .|. tyFamBit          `setBitIf` xopt Opt_TypeFamilies flags
1846                .|. haddockBit        `setBitIf` dopt Opt_Haddock      flags
1847                .|. magicHashBit      `setBitIf` xopt Opt_MagicHash    flags
1848                .|. kindSigsBit       `setBitIf` xopt Opt_KindSignatures flags
1849                .|. recursiveDoBit    `setBitIf` xopt Opt_RecursiveDo flags
1850                .|. recBit            `setBitIf` xopt Opt_DoRec  flags
1851                .|. recBit            `setBitIf` xopt Opt_Arrows flags
1852                .|. unicodeSyntaxBit  `setBitIf` xopt Opt_UnicodeSyntax flags
1853                .|. unboxedTuplesBit  `setBitIf` xopt Opt_UnboxedTuples flags
1854                .|. datatypeContextsBit `setBitIf` xopt Opt_DatatypeContexts flags
1855                .|. transformComprehensionsBit `setBitIf` xopt Opt_TransformListComp flags
1856                .|. rawTokenStreamBit `setBitIf` dopt Opt_KeepRawTokenStream flags
1857                .|. newQualOpsBit `setBitIf` xopt Opt_NewQualifiedOperators flags
1858                .|. alternativeLayoutRuleBit `setBitIf` xopt Opt_AlternativeLayoutRule flags
1859       --
1860       setBitIf :: Int -> Bool -> Int
1861       b `setBitIf` cond | cond      = bit b
1862                         | otherwise = 0
1863
1864 addWarning :: DynFlag -> SrcSpan -> SDoc -> P ()
1865 addWarning option srcspan warning
1866  = P $ \s@PState{messages=(ws,es), dflags=d} ->
1867        let warning' = mkWarnMsg srcspan alwaysQualify warning
1868            ws' = if dopt option d then ws `snocBag` warning' else ws
1869        in POk s{messages=(ws', es)} ()
1870
1871 getMessages :: PState -> Messages
1872 getMessages PState{messages=ms} = ms
1873
1874 getContext :: P [LayoutContext]
1875 getContext = P $ \s@PState{context=ctx} -> POk s ctx
1876
1877 setContext :: [LayoutContext] -> P ()
1878 setContext ctx = P $ \s -> POk s{context=ctx} ()
1879
1880 popContext :: P ()
1881 popContext = P $ \ s@(PState{ buffer = buf, context = ctx,
1882                               last_len = len, last_loc = last_loc }) ->
1883   case ctx of
1884         (_:tl) -> POk s{ context = tl } ()
1885         []     -> PFailed last_loc (srcParseErr buf len)
1886
1887 -- Push a new layout context at the indentation of the last token read.
1888 -- This is only used at the outer level of a module when the 'module'
1889 -- keyword is missing.
1890 pushCurrentContext :: P ()
1891 pushCurrentContext = P $ \ s@PState{ last_loc=loc, context=ctx } ->
1892     POk s{context = Layout (srcSpanStartCol loc) : ctx} ()
1893
1894 getOffside :: P Ordering
1895 getOffside = P $ \s@PState{last_loc=loc, context=stk} ->
1896                 let offs = srcSpanStartCol loc in
1897                 let ord = case stk of
1898                         (Layout n:_) -> --trace ("layout: " ++ show n ++ ", offs: " ++ show offs) $
1899                                         compare offs n
1900                         _            -> GT
1901                 in POk s ord
1902
1903 -- ---------------------------------------------------------------------------
1904 -- Construct a parse error
1905
1906 srcParseErr
1907   :: StringBuffer       -- current buffer (placed just after the last token)
1908   -> Int                -- length of the previous token
1909   -> Message
1910 srcParseErr buf len
1911   = hcat [ if null token
1912              then ptext (sLit "parse error (possibly incorrect indentation)")
1913              else hcat [ptext (sLit "parse error on input "),
1914                         char '`', text token, char '\'']
1915     ]
1916   where token = lexemeToString (offsetBytes (-len) buf) len
1917
1918 -- Report a parse failure, giving the span of the previous token as
1919 -- the location of the error.  This is the entry point for errors
1920 -- detected during parsing.
1921 srcParseFail :: P a
1922 srcParseFail = P $ \PState{ buffer = buf, last_len = len,
1923                             last_loc = last_loc } ->
1924     PFailed last_loc (srcParseErr buf len)
1925
1926 -- A lexical error is reported at a particular position in the source file,
1927 -- not over a token range.
1928 lexError :: String -> P a
1929 lexError str = do
1930   loc <- getSrcLoc
1931   (AI end buf) <- getInput
1932   reportLexError loc end buf str
1933
1934 -- -----------------------------------------------------------------------------
1935 -- This is the top-level function: called from the parser each time a
1936 -- new token is to be read from the input.
1937
1938 lexer :: (Located Token -> P a) -> P a
1939 lexer cont = do
1940   alr <- extension alternativeLayoutRule
1941   let lexTokenFun = if alr then lexTokenAlr else lexToken
1942   tok@(L _span _tok__) <- lexTokenFun
1943   --trace ("token: " ++ show _tok__) $ do
1944   cont tok
1945
1946 lexTokenAlr :: P (Located Token)
1947 lexTokenAlr = do mPending <- popPendingImplicitToken
1948                  t <- case mPending of
1949                       Nothing ->
1950                           do mNext <- popNextToken
1951                              t <- case mNext of
1952                                   Nothing -> lexToken
1953                                   Just next -> return next
1954                              alternativeLayoutRuleToken t
1955                       Just t ->
1956                           return t
1957                  setAlrLastLoc (getLoc t)
1958                  case unLoc t of
1959                      ITwhere -> setAlrExpectingOCurly (Just ALRLayoutWhere)
1960                      ITlet   -> setAlrExpectingOCurly (Just ALRLayoutLet)
1961                      ITof    -> setAlrExpectingOCurly (Just ALRLayoutOf)
1962                      ITdo    -> setAlrExpectingOCurly (Just ALRLayoutDo)
1963                      ITmdo   -> setAlrExpectingOCurly (Just ALRLayoutDo)
1964                      ITrec   -> setAlrExpectingOCurly (Just ALRLayoutDo)
1965                      _       -> return ()
1966                  return t
1967
1968 alternativeLayoutRuleToken :: Located Token -> P (Located Token)
1969 alternativeLayoutRuleToken t
1970     = do context <- getALRContext
1971          lastLoc <- getAlrLastLoc
1972          mExpectingOCurly <- getAlrExpectingOCurly
1973          justClosedExplicitLetBlock <- getJustClosedExplicitLetBlock
1974          setJustClosedExplicitLetBlock False
1975          dflags <- getDynFlags
1976          let transitional = xopt Opt_AlternativeLayoutRuleTransitional dflags
1977              thisLoc = getLoc t
1978              thisCol = srcSpanStartCol thisLoc
1979              newLine = (lastLoc == noSrcSpan)
1980                     || (srcSpanStartLine thisLoc > srcSpanEndLine lastLoc)
1981          case (unLoc t, context, mExpectingOCurly) of
1982              -- This case handles a GHC extension to the original H98
1983              -- layout rule...
1984              (ITocurly, _, Just alrLayout) ->
1985                  do setAlrExpectingOCurly Nothing
1986                     let isLet = case alrLayout of
1987                                 ALRLayoutLet -> True
1988                                 _ -> False
1989                     setALRContext (ALRNoLayout (containsCommas ITocurly) isLet : context)
1990                     return t
1991              -- ...and makes this case unnecessary
1992              {-
1993              -- I think our implicit open-curly handling is slightly
1994              -- different to John's, in how it interacts with newlines
1995              -- and "in"
1996              (ITocurly, _, Just _) ->
1997                  do setAlrExpectingOCurly Nothing
1998                     setNextToken t
1999                     lexTokenAlr
2000              -}
2001              (_, ALRLayout _ col : ls, Just expectingOCurly)
2002               | (thisCol > col) ||
2003                 (thisCol == col &&
2004                  isNonDecreasingIntentation expectingOCurly) ->
2005                  do setAlrExpectingOCurly Nothing
2006                     setALRContext (ALRLayout expectingOCurly thisCol : context)
2007                     setNextToken t
2008                     return (L thisLoc ITocurly)
2009               | otherwise ->
2010                  do setAlrExpectingOCurly Nothing
2011                     setPendingImplicitTokens [L lastLoc ITccurly]
2012                     setNextToken t
2013                     return (L lastLoc ITocurly)
2014              (_, _, Just expectingOCurly) ->
2015                  do setAlrExpectingOCurly Nothing
2016                     setALRContext (ALRLayout expectingOCurly thisCol : context)
2017                     setNextToken t
2018                     return (L thisLoc ITocurly)
2019              -- We do the [] cases earlier than in the spec, as we
2020              -- have an actual EOF token
2021              (ITeof, ALRLayout _ _ : ls, _) ->
2022                  do setALRContext ls
2023                     setNextToken t
2024                     return (L thisLoc ITccurly)
2025              (ITeof, _, _) ->
2026                  return t
2027              -- the other ITeof case omitted; general case below covers it
2028              (ITin, _, _)
2029               | justClosedExplicitLetBlock ->
2030                  return t
2031              (ITin, ALRLayout ALRLayoutLet _ : ls, _)
2032               | newLine ->
2033                  do setPendingImplicitTokens [t]
2034                     setALRContext ls
2035                     return (L thisLoc ITccurly)
2036              -- This next case is to handle a transitional issue:
2037              (ITwhere, ALRLayout _ col : ls, _)
2038               | newLine && thisCol == col && transitional ->
2039                  do addWarning Opt_WarnAlternativeLayoutRuleTransitional
2040                                thisLoc
2041                                (transitionalAlternativeLayoutWarning
2042                                     "`where' clause at the same depth as implicit layout block")
2043                     setALRContext ls
2044                     setNextToken t
2045                     -- Note that we use lastLoc, as we may need to close
2046                     -- more layouts, or give a semicolon
2047                     return (L lastLoc ITccurly)
2048              -- This next case is to handle a transitional issue:
2049              (ITvbar, ALRLayout _ col : ls, _)
2050               | newLine && thisCol == col && transitional ->
2051                  do addWarning Opt_WarnAlternativeLayoutRuleTransitional
2052                                thisLoc
2053                                (transitionalAlternativeLayoutWarning
2054                                     "`|' at the same depth as implicit layout block")
2055                     setALRContext ls
2056                     setNextToken t
2057                     -- Note that we use lastLoc, as we may need to close
2058                     -- more layouts, or give a semicolon
2059                     return (L lastLoc ITccurly)
2060              (_, ALRLayout _ col : ls, _)
2061               | newLine && thisCol == col ->
2062                  do setNextToken t
2063                     return (L thisLoc ITsemi)
2064               | newLine && thisCol < col ->
2065                  do setALRContext ls
2066                     setNextToken t
2067                     -- Note that we use lastLoc, as we may need to close
2068                     -- more layouts, or give a semicolon
2069                     return (L lastLoc ITccurly)
2070              -- We need to handle close before open, as 'then' is both
2071              -- an open and a close
2072              (u, _, _)
2073               | isALRclose u ->
2074                  case context of
2075                  ALRLayout _ _ : ls ->
2076                      do setALRContext ls
2077                         setNextToken t
2078                         return (L thisLoc ITccurly)
2079                  ALRNoLayout _ isLet : ls ->
2080                      do let ls' = if isALRopen u
2081                                      then ALRNoLayout (containsCommas u) False : ls
2082                                      else ls
2083                         setALRContext ls'
2084                         when isLet $ setJustClosedExplicitLetBlock True
2085                         return t
2086                  [] ->
2087                      do let ls = if isALRopen u
2088                                     then [ALRNoLayout (containsCommas u) False]
2089                                     else ls
2090                         setALRContext ls
2091                         -- XXX This is an error in John's code, but
2092                         -- it looks reachable to me at first glance
2093                         return t
2094              (u, _, _)
2095               | isALRopen u ->
2096                  do setALRContext (ALRNoLayout (containsCommas u) False : context)
2097                     return t
2098              (ITin, ALRLayout ALRLayoutLet _ : ls, _) ->
2099                  do setALRContext ls
2100                     setPendingImplicitTokens [t]
2101                     return (L thisLoc ITccurly)
2102              (ITin, ALRLayout _ _ : ls, _) ->
2103                  do setALRContext ls
2104                     setNextToken t
2105                     return (L thisLoc ITccurly)
2106              -- the other ITin case omitted; general case below covers it
2107              (ITcomma, ALRLayout _ _ : ls, _)
2108               | topNoLayoutContainsCommas ls ->
2109                  do setALRContext ls
2110                     setNextToken t
2111                     return (L thisLoc ITccurly)
2112              (ITwhere, ALRLayout ALRLayoutDo _ : ls, _) ->
2113                  do setALRContext ls
2114                     setPendingImplicitTokens [t]
2115                     return (L thisLoc ITccurly)
2116              -- the other ITwhere case omitted; general case below covers it
2117              (_, _, _) -> return t
2118
2119 transitionalAlternativeLayoutWarning :: String -> SDoc
2120 transitionalAlternativeLayoutWarning msg
2121     = text "transitional layout will not be accepted in the future:"
2122    $$ text msg
2123
2124 isALRopen :: Token -> Bool
2125 isALRopen ITcase        = True
2126 isALRopen ITif          = True
2127 isALRopen ITthen        = True
2128 isALRopen IToparen      = True
2129 isALRopen ITobrack      = True
2130 isALRopen ITocurly      = True
2131 -- GHC Extensions:
2132 isALRopen IToubxparen   = True
2133 isALRopen ITparenEscape = True
2134 isALRopen _             = False
2135
2136 isALRclose :: Token -> Bool
2137 isALRclose ITof     = True
2138 isALRclose ITthen   = True
2139 isALRclose ITelse   = True
2140 isALRclose ITcparen = True
2141 isALRclose ITcbrack = True
2142 isALRclose ITccurly = True
2143 -- GHC Extensions:
2144 isALRclose ITcubxparen = True
2145 isALRclose _        = False
2146
2147 isNonDecreasingIntentation :: ALRLayout -> Bool
2148 isNonDecreasingIntentation ALRLayoutDo = True
2149 isNonDecreasingIntentation _           = False
2150
2151 containsCommas :: Token -> Bool
2152 containsCommas IToparen = True
2153 containsCommas ITobrack = True
2154 -- John doesn't have {} as containing commas, but records contain them,
2155 -- which caused a problem parsing Cabal's Distribution.Simple.InstallDirs
2156 -- (defaultInstallDirs).
2157 containsCommas ITocurly = True
2158 -- GHC Extensions:
2159 containsCommas IToubxparen = True
2160 containsCommas _        = False
2161
2162 topNoLayoutContainsCommas :: [ALRContext] -> Bool
2163 topNoLayoutContainsCommas [] = False
2164 topNoLayoutContainsCommas (ALRLayout _ _ : ls) = topNoLayoutContainsCommas ls
2165 topNoLayoutContainsCommas (ALRNoLayout b _ : _) = b
2166
2167 lexToken :: P (Located Token)
2168 lexToken = do
2169   inp@(AI loc1 buf) <- getInput
2170   sc <- getLexState
2171   exts <- getExts
2172   case alexScanUser exts inp sc of
2173     AlexEOF -> do
2174         let span = mkSrcSpan loc1 loc1
2175         setLastToken span 0
2176         return (L span ITeof)
2177     AlexError (AI loc2 buf) ->
2178         reportLexError loc1 loc2 buf "lexical error"
2179     AlexSkip inp2 _ -> do
2180         setInput inp2
2181         lexToken
2182     AlexToken inp2@(AI end buf2) _ t -> do
2183         setInput inp2
2184         let span = mkSrcSpan loc1 end
2185         let bytes = byteDiff buf buf2
2186         span `seq` setLastToken span bytes
2187         t span buf bytes
2188
2189 reportLexError :: SrcLoc -> SrcLoc -> StringBuffer -> [Char] -> P a
2190 reportLexError loc1 loc2 buf str
2191   | atEnd buf = failLocMsgP loc1 loc2 (str ++ " at end of input")
2192   | otherwise =
2193   let
2194         c = fst (nextChar buf)
2195   in
2196   if c == '\0' -- decoding errors are mapped to '\0', see utf8DecodeChar#
2197     then failLocMsgP loc2 loc2 (str ++ " (UTF-8 decoding error)")
2198     else failLocMsgP loc1 loc2 (str ++ " at character " ++ show c)
2199
2200 lexTokenStream :: StringBuffer -> SrcLoc -> DynFlags -> ParseResult [Located Token]
2201 lexTokenStream buf loc dflags = unP go initState
2202     where dflags' = dopt_set (dopt_unset dflags Opt_Haddock) Opt_KeepRawTokenStream
2203           initState = mkPState dflags' buf loc
2204           go = do
2205             ltok <- lexer return
2206             case ltok of
2207               L _ ITeof -> return []
2208               _ -> liftM (ltok:) go
2209
2210 linePrags = Map.singleton "line" (begin line_prag2)
2211
2212 fileHeaderPrags = Map.fromList([("options", lex_string_prag IToptions_prag),
2213                                  ("options_ghc", lex_string_prag IToptions_prag),
2214                                  ("options_haddock", lex_string_prag ITdocOptions),
2215                                  ("language", token ITlanguage_prag),
2216                                  ("include", lex_string_prag ITinclude_prag)])
2217
2218 ignoredPrags = Map.fromList (map ignored pragmas)
2219                where ignored opt = (opt, nested_comment lexToken)
2220                      impls = ["hugs", "nhc98", "jhc", "yhc", "catch", "derive"]
2221                      options_pragmas = map ("options_" ++) impls
2222                      -- CFILES is a hugs-only thing.
2223                      pragmas = options_pragmas ++ ["cfiles", "contract"]
2224
2225 oneWordPrags = Map.fromList([("rules", rulePrag),
2226                            ("inline", token (ITinline_prag Inline FunLike)),
2227                            ("inlinable", token (ITinline_prag Inlinable FunLike)),
2228                            ("inlineable", token (ITinline_prag Inlinable FunLike)),
2229                                           -- Spelling variant
2230                            ("notinline", token (ITinline_prag NoInline FunLike)),
2231                            ("specialize", token ITspec_prag),
2232                            ("source", token ITsource_prag),
2233                            ("warning", token ITwarning_prag),
2234                            ("deprecated", token ITdeprecated_prag),
2235                            ("scc", token ITscc_prag),
2236                            ("generated", token ITgenerated_prag),
2237                            ("core", token ITcore_prag),
2238                            ("unpack", token ITunpack_prag),
2239                            ("ann", token ITann_prag)])
2240
2241 twoWordPrags = Map.fromList([("inline conlike", token (ITinline_prag Inline ConLike)),
2242                              ("notinline conlike", token (ITinline_prag NoInline ConLike)),
2243                              ("specialize inline", token (ITspec_inline_prag True)),
2244                              ("specialize notinline", token (ITspec_inline_prag False))])
2245
2246
2247 dispatch_pragmas :: Map String Action -> Action
2248 dispatch_pragmas prags span buf len = case Map.lookup (clean_pragma (lexemeToString buf len)) prags of
2249                                        Just found -> found span buf len
2250                                        Nothing -> lexError "unknown pragma"
2251
2252 known_pragma :: Map String Action -> AlexAccPred Int
2253 known_pragma prags _ _ len (AI _ buf) = (isJust $ Map.lookup (clean_pragma (lexemeToString (offsetBytes (- len) buf) len)) prags)
2254                                           && (nextCharIs buf (\c -> not (isAlphaNum c || c == '_')))
2255
2256 clean_pragma :: String -> String
2257 clean_pragma prag = canon_ws (map toLower (unprefix prag))
2258                     where unprefix prag' = case stripPrefix "{-#" prag' of
2259                                              Just rest -> rest
2260                                              Nothing -> prag'
2261                           canonical prag' = case prag' of
2262                                               "noinline" -> "notinline"
2263                                               "specialise" -> "specialize"
2264                                               "constructorlike" -> "conlike"
2265                                               _ -> prag'
2266                           canon_ws s = unwords (map canonical (words s))
2267 }