X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2Fparser%2FLexer.x;h=fdbaeef3feaa773673b9bdec87c2e9d6220df075;hb=7ab880e6cbce4e095d8316d4289066aa2d50419b;hp=4c1b48efc0845f3009dc19f4d8e5e26eadb36103;hpb=0065d5ab628975892cea1ec7303f968c3338cbe1;p=ghc-hetmet.git

diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x
index 4c1b48e..fdbaeef 100644
--- a/compiler/parser/Lexer.x
+++ b/compiler/parser/Lexer.x
@@ -65,7 +65,7 @@ $decdigit  = $ascdigit -- for now, should really be $digit (ToDo)
 $digit     = [$ascdigit $unidigit]
 
 $special   = [\(\)\,\;\[\]\`\{\}]
-$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~]
+$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~ \xa1-\xbf \xd7 \xf7]
 $unisymbol = \x04
 $symbol    = [$ascsymbol $unisymbol] # [$special \_\:\"\']
 
@@ -216,6 +216,11 @@ $white_no_nl+ 				;
   "{-#" $whitechar* (INCLUDE|include)   { lex_string_prag ITinclude_prag }
 }
 
+<0,option_prags,glaexts> {
+	-- This is to catch things like {-# OPTIONS OPTIONS_HUGS ... 
+  "{-#" $whitechar* $idchar+            { nested_comment }
+}
+
 -- '0' state: ordinary lexemes
 -- 'glaexts' state: glasgow extensions (postfix '#', etc.)
 
@@ -371,6 +376,8 @@ data Token
   | ITccallconv
   | ITdotnet
   | ITmdo
+  | ITiso
+  | ITfamily
 
 	-- Pragmas
   | ITinline_prag Bool		-- True <=> INLINE, False <=> NOINLINE
@@ -494,6 +501,8 @@ isSpecial ITunsafe    	= True
 isSpecial ITccallconv   = True
 isSpecial ITstdcallconv = True
 isSpecial ITmdo		= True
+isSpecial ITiso		= True
+isSpecial ITfamily	= True
 isSpecial _             = False
 
 -- the bitmap provided as the third component indicates whether the
@@ -534,6 +543,8 @@ reservedWordsFM = listToUFM $
 
       	( "forall",	ITforall,	 bit tvBit),
 	( "mdo",	ITmdo,		 bit glaExtsBit),
+	( "iso",	ITiso,		 bit glaExtsBit),
+	( "family",	ITfamily,	 bit glaExtsBit),
 
 	( "foreign",	ITforeign,	 bit ffiBit),
 	( "export",	ITexport,	 bit ffiBit),
@@ -583,6 +594,9 @@ reservedSymsFM = listToUFM $
        ,("â",   ITrarrow,	bit glaExtsBit)
        ,("â",   ITlarrow,	bit glaExtsBit)
        ,("â¯", 	ITdotdot,	bit glaExtsBit)
+        -- ToDo: ideally, â and â· should be "specials", so that they cannot
+        -- form part of a large operator.  This would let us have a better
+        -- syntax for kinds: Éâ·*â* would be a legal kind signature. (maybe).
 #endif
        ]
 
@@ -1210,6 +1224,7 @@ alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
 alexGetChar (AI loc ofs s) 
   | atEnd s   = Nothing
   | otherwise = adj_c `seq` loc' `seq` ofs' `seq` s' `seq` 
+		--trace (show (ord c)) $
 		Just (adj_c, (AI loc' ofs' s'))
   where (c,s') = nextChar s
         loc'   = advanceSrcLoc loc c
@@ -1259,6 +1274,7 @@ alexGetChar' :: AlexInput -> Maybe (Char,AlexInput)
 alexGetChar' (AI loc ofs s) 
   | atEnd s   = Nothing
   | otherwise = c `seq` loc' `seq` ofs' `seq` s' `seq` 
+		--trace (show (ord c)) $
 		Just (c, (AI loc' ofs' s'))
   where (c,s') = nextChar s
         loc'   = advanceSrcLoc loc c
@@ -1443,15 +1459,13 @@ lexToken = do
 	span `seq` setLastToken span bytes
 	t span buf bytes
 
--- ToDo: Alex reports the buffer at the start of the erroneous lexeme,
--- but it would be more informative to report the location where the
--- error was actually discovered, especially if this is a decoding
--- error.
-reportLexError loc1 loc2 buf str = 
+reportLexError loc1 loc2 buf str
+  | atEnd buf = failLocMsgP loc1 loc2 (str ++ " at end of input")
+  | otherwise =
   let 
 	c = fst (nextChar buf)
   in
   if c == '\0' -- decoding errors are mapped to '\0', see utf8DecodeChar#
-    then failLocMsgP loc2 loc2 "UTF-8 decoding error"
+    then failLocMsgP loc2 loc2 (str ++ " (UTF-8 decoding error)")
     else failLocMsgP loc1 loc2 (str ++ " at character " ++ show c)
 }