X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2Fparser%2FLexer.x;h=b3b2336a632ccf8c53c6bd851296110a00550197;hb=97583b57b68d646a5735c995cf7be217a8e83ffe;hp=19927d1154481dd4e6d44ce65f256bb3b8bb87a8;hpb=c0b5a0fe78855d2f628a4b6f973425496315a44c;p=ghc-hetmet.git

diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x
index 19927d1..b3b2336 100644
--- a/compiler/parser/Lexer.x
+++ b/compiler/parser/Lexer.x
@@ -70,11 +70,9 @@ import Util		( maybePrefixMatch, readRational )
 
 import Control.Monad
 import Data.Bits
-import Data.Char 	( chr, ord, isSpace )
+import Data.Char
 import Data.Ratio
 import Debug.Trace
-
-import Unicode ( GeneralCategory(..), generalCategory, isPrint, isUpper )
 }
 
 $unispace    = \x05 -- Trick Alex into handling Unicode. See alexGetChar.
@@ -223,7 +221,8 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
 
 <0,option_prags> \n				{ begin bol }
 
-"{-#" $whitechar* (line|LINE) 		{ begin line_prag2 }
+"{-#" $whitechar* (line|LINE) / { notFollowedByPragmaChar }
+                            { begin line_prag2 }
 
 -- single-line line pragmas, of the form
 --    # <line> "<file>" <extra-stuff> \n
@@ -240,31 +239,39 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
    -- with older versions of GHC which generated these.
 
 <0,option_prags> {
-  "{-#" $whitechar* (RULES|rules)       { rulePrag }
-  "{-#" $whitechar* (INLINE|inline)	{ token (ITinline_prag True) }
-  "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline)
+  "{-#" $whitechar* (RULES|rules)  / { notFollowedByPragmaChar } { rulePrag }
+  "{-#" $whitechar* (INLINE|inline)	 / { notFollowedByPragmaChar }
+                    { token (ITinline_prag True) }
+  "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline) / { notFollowedByPragmaChar }
   					{ token (ITinline_prag False) }
-  "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
+  "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e) / { notFollowedByPragmaChar }
   					{ token ITspec_prag }
   "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
-	$whitechar* (INLINE|inline)	{ token (ITspec_inline_prag True) }
+	$whitechar+ (INLINE|inline) / { notFollowedByPragmaChar }
+                    { token (ITspec_inline_prag True) }
   "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e)
-	$whitechar* (NO(T?)INLINE|no(t?)inline)
+	$whitechar+ (NO(T?)INLINE|no(t?)inline) / { notFollowedByPragmaChar }
 					{ token (ITspec_inline_prag False) }
-  "{-#" $whitechar* (SOURCE|source)	{ token ITsource_prag }
-  "{-#" $whitechar* (WARNING|warning)
+  "{-#" $whitechar* (SOURCE|source) / { notFollowedByPragmaChar }
+                    { token ITsource_prag }
+  "{-#" $whitechar* (WARNING|warning) / { notFollowedByPragmaChar }
   					{ token ITwarning_prag }
-  "{-#" $whitechar* (DEPRECATED|deprecated)
+  "{-#" $whitechar* (DEPRECATED|deprecated) / { notFollowedByPragmaChar }
   					{ token ITdeprecated_prag }
-  "{-#" $whitechar* (SCC|scc)		{ token ITscc_prag }
-  "{-#" $whitechar* (GENERATED|generated)
+  "{-#" $whitechar* (SCC|scc)  / { notFollowedByPragmaChar }
+                    { token ITscc_prag }
+  "{-#" $whitechar* (GENERATED|generated) / { notFollowedByPragmaChar }
   					{ token ITgenerated_prag }
-  "{-#" $whitechar* (CORE|core)		{ token ITcore_prag }
-  "{-#" $whitechar* (UNPACK|unpack)	{ token ITunpack_prag }
+  "{-#" $whitechar* (CORE|core) / { notFollowedByPragmaChar }
+                    { token ITcore_prag }
+  "{-#" $whitechar* (UNPACK|unpack) / { notFollowedByPragmaChar }
+                    { token ITunpack_prag }
+  "{-#" $whitechar* (ANN|ann) / { notFollowedByPragmaChar }
+                    { token ITann_prag }
 
   -- We ignore all these pragmas, but don't generate a warning for them
   -- CFILES is a hugs-only thing.
-  "{-#" $whitechar* (OPTIONS_HUGS|options_hugs|OPTIONS_NHC98|options_nhc98|OPTIONS_JHC|options_jhc|CFILES|cfiles)
+  "{-#" $whitechar* (OPTIONS_(HUGS|hugs|NHC98|nhc98|JHC|jhc|YHC|yhc|CATCH|catch|DERIVE|derive)|CFILES|cfiles) / { notFollowedByPragmaChar }
                     { nested_comment lexToken }
 
   -- ToDo: should only be valid inside a pragma:
@@ -272,19 +279,23 @@ $tab+         { warn Opt_WarnTabs (text "Tab character") }
 }
 
 <option_prags> {
-  "{-#"  $whitechar* (OPTIONS|options)   { lex_string_prag IToptions_prag }
-  "{-#"  $whitechar* (OPTIONS_GHC|options_ghc)
+  "{-#"  $whitechar* (OPTIONS|options) / { notFollowedByPragmaChar }
+                                        { lex_string_prag IToptions_prag }
+  "{-#"  $whitechar* (OPTIONS_GHC|options_ghc) / { notFollowedByPragmaChar }
                                         { lex_string_prag IToptions_prag }
   "{-#"  $whitechar* (OPTIONS_HADDOCK|options_haddock)
+                   / { notFollowedByPragmaChar }
                                          { lex_string_prag ITdocOptions }
   "-- #"                                 { multiline_doc_comment }
-  "{-#"  $whitechar* (LANGUAGE|language) { token ITlanguage_prag }
-  "{-#"  $whitechar* (INCLUDE|include)   { lex_string_prag ITinclude_prag }
+  "{-#"  $whitechar* (LANGUAGE|language) / { notFollowedByPragmaChar }
+                                         { token ITlanguage_prag }
+  "{-#"  $whitechar* (INCLUDE|include) / { notFollowedByPragmaChar }
+                                         { lex_string_prag ITinclude_prag }
 }
 
 <0> {
   -- In the "0" mode we ignore these pragmas
-  "{-#"  $whitechar* (OPTIONS|options|OPTIONS_GHC|options_ghc|OPTIONS_HADDOCK|options_haddock|LANGUAGE|language|INCLUDE|include)
+  "{-#"  $whitechar* (OPTIONS|options|OPTIONS_GHC|options_ghc|OPTIONS_HADDOCK|options_haddock|LANGUAGE|language|INCLUDE|include) / { notFollowedByPragmaChar }
                      { nested_comment lexToken }
 }
 
@@ -490,6 +501,7 @@ data Token
   | ITgenerated_prag
   | ITcore_prag                 -- hdaume: core annotations
   | ITunpack_prag
+  | ITann_prag
   | ITclose_prag
   | IToptions_prag String
   | ITinclude_prag String
@@ -709,7 +721,6 @@ reservedSymsFM = listToUFM $
        ,("-<<", ITLarrowtail, arrowsEnabled)
        ,(">>-", ITRarrowtail, arrowsEnabled)
 
-#if __GLASGOW_HASKELL__ >= 605
        ,("â·",   ITdcolon, unicodeSyntaxEnabled)
        ,("â",   ITdarrow, unicodeSyntaxEnabled)
        ,("â",   ITforall, \i -> unicodeSyntaxEnabled i &&
@@ -720,7 +731,6 @@ reservedSymsFM = listToUFM $
         -- ToDo: ideally, â and â· should be "specials", so that they cannot
         -- form part of a large operator.  This would let us have a better
         -- syntax for kinds: Éâ·*â* would be a legal kind signature. (maybe).
-#endif
        ]
 
 -- -----------------------------------------------------------------------------
@@ -769,6 +779,9 @@ notFollowedBy char _ _ _ (AI _ _ buf)
 notFollowedBySymbol _ _ _ (AI _ _ buf)
   = nextCharIs buf (`notElem` "!#$%&*+./<=>?@\\^|-~")
 
+notFollowedByPragmaChar _ _ _ (AI _ _ buf)
+  = nextCharIs buf (\c -> not (isAlphaNum c || c == '_'))
+
 -- We must reject doc comments as being ordinary comments everywhere.
 -- In some cases the doc comment will be selected as the lexeme due to
 -- maximal munch, but not always, because the nested comment rule is
@@ -1230,11 +1243,11 @@ lex_char_tok span _buf _len = do	-- We've seen '
 		-- We've seen 'x, where x is a valid character
 		--  (i.e. not newline etc) but not a quote or backslash
 	   case alexGetChar' i2 of	-- Look ahead one more character
-		Nothing -> lit_error
 		Just ('\'', i3) -> do 	-- We've seen 'x'
 			setInput i3 
 			finish_char_tok loc c
 		_other -> do 		-- We've seen 'x not followed by quote
+		       	  		-- (including the possibility of EOF)
 					-- If TH is on, just parse the quote only
 			th_exts <- extension thEnabled	
 			let (AI end _ _) = i1
@@ -1556,13 +1569,13 @@ alexGetChar (AI loc ofs s)
 		  DecimalNumber         -> digit
 		  LetterNumber          -> other_graphic
 		  OtherNumber           -> other_graphic
-		  ConnectorPunctuation  -> other_graphic
-		  DashPunctuation       -> other_graphic
+		  ConnectorPunctuation  -> symbol
+		  DashPunctuation       -> symbol
 		  OpenPunctuation       -> other_graphic
 		  ClosePunctuation      -> other_graphic
 		  InitialQuote          -> other_graphic
 		  FinalQuote            -> other_graphic
-		  OtherPunctuation      -> other_graphic
+		  OtherPunctuation      -> symbol
 		  MathSymbol            -> symbol
 		  CurrencySymbol        -> symbol
 		  ModifierSymbol        -> symbol