X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2Fparser%2FLexer.x;h=e8f54ba6ad174b25b086d24a76cfb93910e9ecdd;hb=0345a8378429e10e0c4feb7a6be2f9f132699b81;hp=c813e3614546de7bf3589944dc5d5e5509a66a8b;hpb=230db59e57cb21a80f727b139bd619ef26eb020b;p=ghc-hetmet.git diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x index c813e36..e8f54ba 100644 --- a/compiler/parser/Lexer.x +++ b/compiler/parser/Lexer.x @@ -70,11 +70,9 @@ import Util ( maybePrefixMatch, readRational ) import Control.Monad import Data.Bits -import Data.Char ( chr, ord, isSpace ) +import Data.Char import Data.Ratio import Debug.Trace - -import Unicode ( GeneralCategory(..), generalCategory, isPrint, isUpper ) } $unispace = \x05 -- Trick Alex into handling Unicode. See alexGetChar. @@ -223,7 +221,8 @@ $tab+ { warn Opt_WarnTabs (text "Tab character") } <0,option_prags> \n { begin bol } -"{-#" $whitechar* (line|LINE) { begin line_prag2 } +"{-#" $whitechar* (line|LINE) / { notFollowedByPragmaChar } + { begin line_prag2 } -- single-line line pragmas, of the form -- # "" \n @@ -240,31 +239,39 @@ $tab+ { warn Opt_WarnTabs (text "Tab character") } -- with older versions of GHC which generated these. <0,option_prags> { - "{-#" $whitechar* (RULES|rules) { rulePrag } - "{-#" $whitechar* (INLINE|inline) { token (ITinline_prag True) } - "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline) + "{-#" $whitechar* (RULES|rules) / { notFollowedByPragmaChar } { rulePrag } + "{-#" $whitechar* (INLINE|inline) / { notFollowedByPragmaChar } + { token (ITinline_prag True) } + "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline) / { notFollowedByPragmaChar } { token (ITinline_prag False) } - "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e) + "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e) / { notFollowedByPragmaChar } { token ITspec_prag } "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e) - $whitechar* (INLINE|inline) { token (ITspec_inline_prag True) } + $whitechar+ (INLINE|inline) / { notFollowedByPragmaChar } + { token (ITspec_inline_prag True) } "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e) - $whitechar* (NO(T?)INLINE|no(t?)inline) + $whitechar+ (NO(T?)INLINE|no(t?)inline) / { notFollowedByPragmaChar } { token (ITspec_inline_prag False) } - "{-#" $whitechar* (SOURCE|source) { token ITsource_prag } - "{-#" $whitechar* (WARNING|warning) + "{-#" $whitechar* (SOURCE|source) / { notFollowedByPragmaChar } + { token ITsource_prag } + "{-#" $whitechar* (WARNING|warning) / { notFollowedByPragmaChar } { token ITwarning_prag } - "{-#" $whitechar* (DEPRECATED|deprecated) + "{-#" $whitechar* (DEPRECATED|deprecated) / { notFollowedByPragmaChar } { token ITdeprecated_prag } - "{-#" $whitechar* (SCC|scc) { token ITscc_prag } - "{-#" $whitechar* (GENERATED|generated) + "{-#" $whitechar* (SCC|scc) / { notFollowedByPragmaChar } + { token ITscc_prag } + "{-#" $whitechar* (GENERATED|generated) / { notFollowedByPragmaChar } { token ITgenerated_prag } - "{-#" $whitechar* (CORE|core) { token ITcore_prag } - "{-#" $whitechar* (UNPACK|unpack) { token ITunpack_prag } + "{-#" $whitechar* (CORE|core) / { notFollowedByPragmaChar } + { token ITcore_prag } + "{-#" $whitechar* (UNPACK|unpack) / { notFollowedByPragmaChar } + { token ITunpack_prag } + "{-#" $whitechar* (ANN|ann) / { notFollowedByPragmaChar } + { token ITann_prag } -- We ignore all these pragmas, but don't generate a warning for them -- CFILES is a hugs-only thing. - "{-#" $whitechar* (OPTIONS_HUGS|options_hugs|OPTIONS_NHC98|options_nhc98|OPTIONS_JHC|options_jhc|CFILES|cfiles) + "{-#" $whitechar* (OPTIONS_(HUGS|hugs|NHC98|nhc98|JHC|jhc|YHC|yhc|CATCH|catch|DERIVE|derive)|CFILES|cfiles) / { notFollowedByPragmaChar } { nested_comment lexToken } -- ToDo: should only be valid inside a pragma: @@ -272,19 +279,23 @@ $tab+ { warn Opt_WarnTabs (text "Tab character") } } { - "{-#" $whitechar* (OPTIONS|options) { lex_string_prag IToptions_prag } - "{-#" $whitechar* (OPTIONS_GHC|options_ghc) + "{-#" $whitechar* (OPTIONS|options) / { notFollowedByPragmaChar } + { lex_string_prag IToptions_prag } + "{-#" $whitechar* (OPTIONS_GHC|options_ghc) / { notFollowedByPragmaChar } { lex_string_prag IToptions_prag } "{-#" $whitechar* (OPTIONS_HADDOCK|options_haddock) + / { notFollowedByPragmaChar } { lex_string_prag ITdocOptions } "-- #" { multiline_doc_comment } - "{-#" $whitechar* (LANGUAGE|language) { token ITlanguage_prag } - "{-#" $whitechar* (INCLUDE|include) { lex_string_prag ITinclude_prag } + "{-#" $whitechar* (LANGUAGE|language) / { notFollowedByPragmaChar } + { token ITlanguage_prag } + "{-#" $whitechar* (INCLUDE|include) / { notFollowedByPragmaChar } + { lex_string_prag ITinclude_prag } } <0> { -- In the "0" mode we ignore these pragmas - "{-#" $whitechar* (OPTIONS|options|OPTIONS_GHC|options_ghc|OPTIONS_HADDOCK|options_haddock|LANGUAGE|language|INCLUDE|include) + "{-#" $whitechar* (OPTIONS|options|OPTIONS_GHC|options_ghc|OPTIONS_HADDOCK|options_haddock|LANGUAGE|language|INCLUDE|include) / { notFollowedByPragmaChar } { nested_comment lexToken } } @@ -490,6 +501,7 @@ data Token | ITgenerated_prag | ITcore_prag -- hdaume: core annotations | ITunpack_prag + | ITann_prag | ITclose_prag | IToptions_prag String | ITinclude_prag String @@ -709,7 +721,6 @@ reservedSymsFM = listToUFM $ ,("-<<", ITLarrowtail, arrowsEnabled) ,(">>-", ITRarrowtail, arrowsEnabled) -#if __GLASGOW_HASKELL__ >= 605 ,("∷", ITdcolon, unicodeSyntaxEnabled) ,("⇒", ITdarrow, unicodeSyntaxEnabled) ,("∀", ITforall, \i -> unicodeSyntaxEnabled i && @@ -720,7 +731,6 @@ reservedSymsFM = listToUFM $ -- ToDo: ideally, → and ∷ should be "specials", so that they cannot -- form part of a large operator. This would let us have a better -- syntax for kinds: ɑ∷*→* would be a legal kind signature. (maybe). -#endif ] -- ----------------------------------------------------------------------------- @@ -769,6 +779,9 @@ notFollowedBy char _ _ _ (AI _ _ buf) notFollowedBySymbol _ _ _ (AI _ _ buf) = nextCharIs buf (`notElem` "!#$%&*+./<=>?@\\^|-~") +notFollowedByPragmaChar _ _ _ (AI _ _ buf) + = nextCharIs buf (\c -> not (isAlphaNum c || c == '_')) + -- We must reject doc comments as being ordinary comments everywhere. -- In some cases the doc comment will be selected as the lexeme due to -- maximal munch, but not always, because the nested comment rule is