X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2Fparser%2FLexer.x;h=edfbecdc91735a8d3c14f3ab96379317c86a9ef6;hp=19927d1154481dd4e6d44ce65f256bb3b8bb87a8;hb=831a35dd00faff195cf938659c2dd736192b865f;hpb=c0b5a0fe78855d2f628a4b6f973425496315a44c diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x index 19927d1..edfbecd 100644 --- a/compiler/parser/Lexer.x +++ b/compiler/parser/Lexer.x @@ -32,7 +32,7 @@ -- qualified varids. { -{-# OPTIONS -w #-} +{-# OPTIONS -Wwarn #-} -- The above warning supression flag is a temporary kludge. -- While working on this module you are encouraged to remove it and fix -- any warnings in the module. See @@ -70,11 +70,9 @@ import Util ( maybePrefixMatch, readRational ) import Control.Monad import Data.Bits -import Data.Char ( chr, ord, isSpace ) +import Data.Char import Data.Ratio import Debug.Trace - -import Unicode ( GeneralCategory(..), generalCategory, isPrint, isUpper ) } $unispace = \x05 -- Trick Alex into handling Unicode. See alexGetChar. @@ -223,7 +221,8 @@ $tab+ { warn Opt_WarnTabs (text "Tab character") } <0,option_prags> \n { begin bol } -"{-#" $whitechar* (line|LINE) { begin line_prag2 } +"{-#" $whitechar* (line|LINE) / { notFollowedByPragmaChar } + { begin line_prag2 } -- single-line line pragmas, of the form -- # "" \n @@ -240,31 +239,45 @@ $tab+ { warn Opt_WarnTabs (text "Tab character") } -- with older versions of GHC which generated these. <0,option_prags> { - "{-#" $whitechar* (RULES|rules) { rulePrag } - "{-#" $whitechar* (INLINE|inline) { token (ITinline_prag True) } - "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline) + "{-#" $whitechar* (RULES|rules) / { notFollowedByPragmaChar } { rulePrag } + "{-#" $whitechar* (INLINE|inline) / { notFollowedByPragmaChar } + { token (ITinline_prag True) } + "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline) / { notFollowedByPragmaChar } { token (ITinline_prag False) } - "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e) + "{-#" $whitechar* (INLINE|inline) + $whitechar+ (CONLIKE|conlike) / { notFollowedByPragmaChar } + { token (ITinline_conlike_prag True) } + "{-#" $whitechar* (NO(T)?INLINE|no(t?)inline) + $whitechar+ (CONLIKE|constructorlike) / { notFollowedByPragmaChar } + { token (ITinline_conlike_prag False) } + "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e) / { notFollowedByPragmaChar } { token ITspec_prag } "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e) - $whitechar* (INLINE|inline) { token (ITspec_inline_prag True) } + $whitechar+ (INLINE|inline) / { notFollowedByPragmaChar } + { token (ITspec_inline_prag True) } "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e) - $whitechar* (NO(T?)INLINE|no(t?)inline) + $whitechar+ (NO(T?)INLINE|no(t?)inline) / { notFollowedByPragmaChar } { token (ITspec_inline_prag False) } - "{-#" $whitechar* (SOURCE|source) { token ITsource_prag } - "{-#" $whitechar* (WARNING|warning) + "{-#" $whitechar* (SOURCE|source) / { notFollowedByPragmaChar } + { token ITsource_prag } + "{-#" $whitechar* (WARNING|warning) / { notFollowedByPragmaChar } { token ITwarning_prag } - "{-#" $whitechar* (DEPRECATED|deprecated) + "{-#" $whitechar* (DEPRECATED|deprecated) / { notFollowedByPragmaChar } { token ITdeprecated_prag } - "{-#" $whitechar* (SCC|scc) { token ITscc_prag } - "{-#" $whitechar* (GENERATED|generated) + "{-#" $whitechar* (SCC|scc) / { notFollowedByPragmaChar } + { token ITscc_prag } + "{-#" $whitechar* (GENERATED|generated) / { notFollowedByPragmaChar } { token ITgenerated_prag } - "{-#" $whitechar* (CORE|core) { token ITcore_prag } - "{-#" $whitechar* (UNPACK|unpack) { token ITunpack_prag } + "{-#" $whitechar* (CORE|core) / { notFollowedByPragmaChar } + { token ITcore_prag } + "{-#" $whitechar* (UNPACK|unpack) / { notFollowedByPragmaChar } + { token ITunpack_prag } + "{-#" $whitechar* (ANN|ann) / { notFollowedByPragmaChar } + { token ITann_prag } -- We ignore all these pragmas, but don't generate a warning for them -- CFILES is a hugs-only thing. - "{-#" $whitechar* (OPTIONS_HUGS|options_hugs|OPTIONS_NHC98|options_nhc98|OPTIONS_JHC|options_jhc|CFILES|cfiles) + "{-#" $whitechar* (OPTIONS_(HUGS|hugs|NHC98|nhc98|JHC|jhc|YHC|yhc|CATCH|catch|DERIVE|derive)|CFILES|cfiles|CONTRACT|contract) / { notFollowedByPragmaChar } { nested_comment lexToken } -- ToDo: should only be valid inside a pragma: @@ -272,19 +285,23 @@ $tab+ { warn Opt_WarnTabs (text "Tab character") } } { - "{-#" $whitechar* (OPTIONS|options) { lex_string_prag IToptions_prag } - "{-#" $whitechar* (OPTIONS_GHC|options_ghc) + "{-#" $whitechar* (OPTIONS|options) / { notFollowedByPragmaChar } + { lex_string_prag IToptions_prag } + "{-#" $whitechar* (OPTIONS_GHC|options_ghc) / { notFollowedByPragmaChar } { lex_string_prag IToptions_prag } "{-#" $whitechar* (OPTIONS_HADDOCK|options_haddock) + / { notFollowedByPragmaChar } { lex_string_prag ITdocOptions } "-- #" { multiline_doc_comment } - "{-#" $whitechar* (LANGUAGE|language) { token ITlanguage_prag } - "{-#" $whitechar* (INCLUDE|include) { lex_string_prag ITinclude_prag } + "{-#" $whitechar* (LANGUAGE|language) / { notFollowedByPragmaChar } + { token ITlanguage_prag } + "{-#" $whitechar* (INCLUDE|include) / { notFollowedByPragmaChar } + { lex_string_prag ITinclude_prag } } <0> { -- In the "0" mode we ignore these pragmas - "{-#" $whitechar* (OPTIONS|options|OPTIONS_GHC|options_ghc|OPTIONS_HADDOCK|options_haddock|LANGUAGE|language|INCLUDE|include) + "{-#" $whitechar* (OPTIONS|options|OPTIONS_GHC|options_ghc|OPTIONS_HADDOCK|options_haddock|LANGUAGE|language|INCLUDE|include) / { notFollowedByPragmaChar } { nested_comment lexToken } } @@ -479,6 +496,7 @@ data Token -- Pragmas | ITinline_prag Bool -- True <=> INLINE, False <=> NOINLINE + | ITinline_conlike_prag Bool -- same | ITspec_prag -- SPECIALISE | ITspec_inline_prag Bool -- SPECIALISE INLINE (or NOINLINE) | ITsource_prag @@ -490,6 +508,7 @@ data Token | ITgenerated_prag | ITcore_prag -- hdaume: core annotations | ITunpack_prag + | ITann_prag | ITclose_prag | IToptions_prag String | ITinclude_prag String @@ -700,16 +719,15 @@ reservedSymsFM = listToUFM $ ,("!", ITbang, always) -- For data T (a::*) = MkT - ,("*", ITstar, \i -> kindSigsEnabled i || tyFamEnabled i) + ,("*", ITstar, always) -- \i -> kindSigsEnabled i || tyFamEnabled i) -- For 'forall a . t' - ,(".", ITdot, \i -> explicitForallEnabled i || inRulePrag i) + ,(".", ITdot, always) -- \i -> explicitForallEnabled i || inRulePrag i) ,("-<", ITlarrowtail, arrowsEnabled) ,(">-", ITrarrowtail, arrowsEnabled) ,("-<<", ITLarrowtail, arrowsEnabled) ,(">>-", ITRarrowtail, arrowsEnabled) -#if __GLASGOW_HASKELL__ >= 605 ,("∷", ITdcolon, unicodeSyntaxEnabled) ,("⇒", ITdarrow, unicodeSyntaxEnabled) ,("∀", ITforall, \i -> unicodeSyntaxEnabled i && @@ -720,7 +738,6 @@ reservedSymsFM = listToUFM $ -- ToDo: ideally, → and ∷ should be "specials", so that they cannot -- form part of a large operator. This would let us have a better -- syntax for kinds: ɑ∷*→* would be a legal kind signature. (maybe). -#endif ] -- ----------------------------------------------------------------------------- @@ -769,6 +786,9 @@ notFollowedBy char _ _ _ (AI _ _ buf) notFollowedBySymbol _ _ _ (AI _ _ buf) = nextCharIs buf (`notElem` "!#$%&*+./<=>?@\\^|-~") +notFollowedByPragmaChar _ _ _ (AI _ _ buf) + = nextCharIs buf (\c -> not (isAlphaNum c || c == '_')) + -- We must reject doc comments as being ordinary comments everywhere. -- In some cases the doc comment will be selected as the lexeme due to -- maximal munch, but not always, because the nested comment rule is @@ -1230,11 +1250,11 @@ lex_char_tok span _buf _len = do -- We've seen ' -- We've seen 'x, where x is a valid character -- (i.e. not newline etc) but not a quote or backslash case alexGetChar' i2 of -- Look ahead one more character - Nothing -> lit_error Just ('\'', i3) -> do -- We've seen 'x' setInput i3 finish_char_tok loc c _other -> do -- We've seen 'x not followed by quote + -- (including the possibility of EOF) -- If TH is on, just parse the quote only th_exts <- extension thEnabled let (AI end _ _) = i1 @@ -1556,13 +1576,13 @@ alexGetChar (AI loc ofs s) DecimalNumber -> digit LetterNumber -> other_graphic OtherNumber -> other_graphic - ConnectorPunctuation -> other_graphic - DashPunctuation -> other_graphic + ConnectorPunctuation -> symbol + DashPunctuation -> symbol OpenPunctuation -> other_graphic ClosePunctuation -> other_graphic InitialQuote -> other_graphic FinalQuote -> other_graphic - OtherPunctuation -> other_graphic + OtherPunctuation -> symbol MathSymbol -> symbol CurrencySymbol -> symbol ModifierSymbol -> symbol