X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2Fparser%2FLex.lhs;h=c15f46fa06b46c4450ae156c9b9b7f96c51bcd64;hb=0588967a4bc5f2e1b5e3bf433334fad59aa07221;hp=25aa14c5f5a650ee8bcfdb9d2d90833309142947;hpb=a33ecb97353d7e411c40edd662a4afcfc603fe28;p=ghc-hetmet.git

diff --git a/ghc/compiler/parser/Lex.lhs b/ghc/compiler/parser/Lex.lhs
index 25aa14c..c15f46f 100644
--- a/ghc/compiler/parser/Lex.lhs
+++ b/ghc/compiler/parser/Lex.lhs
@@ -16,11 +16,10 @@ An example that provokes the error is
 --------------------------------------------------------
 
 \begin{code}
-{-# OPTIONS -#include "ctypes.h" #-}
 
 module Lex (
 
-	ifaceParseErr,
+	ifaceParseErr, srcParseErr,
 
 	-- Monad for parser
 	Token(..), lexer, ParseResult(..), PState(..),
@@ -55,12 +54,7 @@ import FastString
 import StringBuffer
 import GlaExts
 import ST		( runST )
-
-#if __GLASGOW_HASKELL__ >= 303
-import Bits
-import Word
-#endif
-
+import Ctypes
 import Char		( chr )
 import Addr
 import PrelRead 	( readRational__ ) -- Glasgow non-std
@@ -128,6 +122,9 @@ data Token
   | ITlabel
   | ITdynamic
   | ITunsafe
+  | ITwith
+  | ITstdcallconv
+  | ITccallconv
 
   | ITinterface			-- interface keywords
   | IT__export
@@ -142,20 +139,24 @@ data Token
   | ITbottom
   | ITinteger_lit 
   | ITfloat_lit
+  | ITword_lit
+  | ITword64_lit
+  | ITint64_lit
   | ITrational_lit
   | ITaddr_lit
   | ITlit_lit
   | ITstring_lit
   | ITtypeapp
-  | ITonce
-  | ITmany
+  | ITusage
+  | ITfuall
   | ITarity 
   | ITspecialise
   | ITnocaf
   | ITunfold InlinePragInfo
   | ITstrict ([Demand], Bool)
   | ITrules
-  | ITcprinfo (CprInfo)
+  | ITcprinfo
+  | ITdeprecated
   | IT__scc
   | ITsccAllCafs
 
@@ -164,6 +165,7 @@ data Token
   | ITinline_prag
   | ITnoinline_prag
   | ITrules_prag
+  | ITdeprecated_prag
   | ITline_prag
   | ITclose_prag
 
@@ -206,6 +208,8 @@ data Token
   | ITqvarsym (FAST_STRING,FAST_STRING)
   | ITqconsym (FAST_STRING,FAST_STRING)
 
+  | ITipvarid FAST_STRING	-- GHC extension: implicit param: ?x
+
   | ITpragma StringBuffer
 
   | ITchar       Char 
@@ -222,7 +226,7 @@ data Token
 
   | ITunknown String		-- Used when the lexer can't make sense of it
   | ITeof			-- end of file token
-  deriving Text -- debugging
+  deriving Show -- debugging
 \end{code}
 
 -----------------------------------------------------------------------------
@@ -239,7 +243,8 @@ pragmaKeywordsFM = listToUFM $
 	( "NOTINLINE",	ITnoinline_prag ),
 	( "LINE",       ITline_prag ),
 	( "RULES",	ITrules_prag ),
-	( "RULEZ",	ITrules_prag )	-- american spelling :-)
+	( "RULEZ",	ITrules_prag ),	-- american spelling :-)
+	( "DEPRECATED",	ITdeprecated_prag )
  	]
 
 haskellKeywordsFM = listToUFM $
@@ -272,6 +277,7 @@ haskellKeywordsFM = listToUFM $
 	( "_scc_",	ITscc )
      ]
 
+-- IMPORTANT: Keep this in synch with ParseIface.y's var_fs production! (SUP)
 ghcExtensionKeywordsFM = listToUFM $
 	map (\ (x,y) -> (_PK_ x,y))
      [	( "forall",	ITforall ),
@@ -280,6 +286,9 @@ ghcExtensionKeywordsFM = listToUFM $
 	( "label",	ITlabel ),
 	( "dynamic",	ITdynamic ),
 	( "unsafe",	ITunsafe ),
+	( "with",	ITwith ),
+	( "stdcall",    ITstdcallconv),
+	( "ccall",      ITccallconv),
         ("_ccall_",	ITccall (False, False, False)),
         ("_ccall_GC_",	ITccall (False, False, True)),
         ("_casm_",	ITccall (False, True,  False)),
@@ -299,18 +308,22 @@ ghcExtensionKeywordsFM = listToUFM $
 	("__bot",		ITbottom),
 	("__integer",		ITinteger_lit),
 	("__float",		ITfloat_lit),
+	("__int64",		ITint64_lit),
+	("__word",		ITword_lit),
+	("__word64",		ITword64_lit),
 	("__rational",		ITrational_lit),
 	("__addr",		ITaddr_lit),
 	("__litlit",		ITlit_lit),
 	("__string",		ITstring_lit),
 	("__a",			ITtypeapp),
-	("__o",			ITonce),
-	("__m",			ITmany),
+	("__u",			ITusage),
+	("__fuall",		ITfuall),
 	("__A",			ITarity),
 	("__P",			ITspecialise),
 	("__C",			ITnocaf),
 	("__R",			ITrules),
-        ("__u",			ITunfold NoInlinePragInfo),
+        ("__D",			ITdeprecated),
+        ("__U",			ITunfold NoInlinePragInfo),
 	
         ("__ccall",		ITccall (False, False, False)),
         ("__ccall_GC",		ITccall (False, False, True)),
@@ -365,7 +378,8 @@ lexer cont buf s@(PState{
 		})
 
 	-- first, start a new lexeme and lose all the whitespace
-  = tab line bol atbol (stepOverLexeme buf)
+  =  _scc_ "Lexer" 
+  tab line bol atbol (stepOverLexeme buf)
   where
 	line = srcLocLine loc
 
@@ -505,7 +519,6 @@ lexBOL cont buf s@(PState{
 lexToken :: (Token -> P a) -> Int# -> P a
 lexToken cont glaexts buf =
  --trace "lexToken" $
- _scc_ "Lexer" 
   case currentChar# buf of
 
     -- special symbols ----------------------------------------------------
@@ -523,7 +536,7 @@ lexToken cont glaexts buf =
     '}'# -> \ s@PState{context = ctx} ->
 	    case ctx of	
 		(_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'}
-		_ -> lexError "too many '}'s" buf s
+		_  	 -> lexError "too many '}'s" buf s
 
     '#'# -> case lookAhead# buf 1# of
 		')'#  | flag glaexts -> cont ITcubxparen (setCurrentPos# buf 2#)
@@ -561,8 +574,8 @@ lexToken cont glaexts buf =
 			lex_demand cont (stepOnUntil (not . isSpace) 
 		                        (stepOnBy# buf 3#)) -- past __S
 	   	    'M'# -> 
-			lex_cpr cont (stepOnUntil (not . isSpace) 
-		                     (stepOnBy# buf 3#)) -- past __M
+			cont ITcprinfo (stepOnBy# buf 3#)	-- past __M
+
 	   	    's'# -> 
 			case prefixMatch (stepOnBy# buf 3#) "cc" of
 		               Just buf' -> lex_scc cont (stepOverLexeme buf')
@@ -586,6 +599,8 @@ lexToken cont glaexts buf =
 	       trace "lexIface: misplaced NUL?" $ 
 	       cont (ITunknown "\NUL") (stepOn buf)
 
+    '?'# | flag glaexts && is_lower (lookAhead# buf 1#) ->
+	    lex_ip cont (incLexeme buf)
     c | is_digit  c -> lex_num cont glaexts 0 buf
       | is_symbol c -> lex_sym cont buf
       | is_upper  c -> lex_con cont glaexts buf
@@ -623,9 +638,9 @@ lex_string cont glaexts s buf
 
 	-- ignore \& in a string, deal with string gaps
 	'\\'# | next_ch `eqChar#` '&'# 
-		-> lex_string cont glaexts s (setCurrentPos# buf 2#)
+		-> lex_string cont glaexts s buf'
 	      | is_space next_ch
-		-> lex_stringgap cont glaexts s buf'
+		-> lex_stringgap cont glaexts s (incLexeme buf)
 
 	    where next_ch = lookAhead# buf 1#
 		  buf' = setCurrentPos# buf 2#
@@ -703,8 +718,8 @@ readNum cont buf is_digit base conv = read buf 0
 
 is_hexdigit c 
 	=  is_digit c 
-	|| (c `geChar#` 'a'# && c `leChar#` 'h'#)
-	|| (c `geChar#` 'A'# && c `leChar#` 'H'#)
+	|| (c `geChar#` 'a'# && c `leChar#` 'f'#)
+	|| (c `geChar#` 'A'# && c `leChar#` 'F'#)
 
 hex c | is_digit c = ord# c -# ord# '0'#
       | otherwise  = ord# (to_lower c) -# ord# 'a'# +# 10#
@@ -784,23 +799,6 @@ lex_demand cont buf =
    = case read_em [] buf of
       (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
 
-lex_cpr cont buf = 
- case read_em [] buf of { (cpr_inf,buf') -> 
-   ASSERT ( null (tail cpr_inf) )
-   cont (ITcprinfo $ head cpr_inf) buf'
- }
- where
-   -- code snatched from lex_demand above
-  read_em acc buf = 
-   case currentChar# buf of
-    '-'# -> read_em (NoCPRInfo : acc) (stepOn buf)
-    '('# -> do_unpack acc (stepOn buf)
-    ')'# -> (reverse acc, stepOn buf)
-    _    -> (reverse acc, buf)
-
-  do_unpack acc buf
-   = case read_em [] buf of
-      (stuff, rest) -> read_em ((CPRInfo stuff)  : acc) rest
 
 ------------------
 lex_scc cont buf =
@@ -860,39 +858,19 @@ after_lexnum cont glaexts i buf
 
 lex_cstring cont buf =
  case expandUntilMatch (stepOverLexeme buf) "\'\'" of
-   buf' -> cont (ITlitlit (lexemeToFastString 
+   Just buf' -> cont (ITlitlit (lexemeToFastString 
 				(setCurrentPos# buf' (negateInt# 2#))))
-           	(mergeLexemes buf buf')
-
-------------------------------------------------------------------------------
--- Character Classes
-
-is_ident, is_symbol, is_any, is_upper, is_digit :: Char# -> Bool
-
-{-# INLINE is_ctype #-}
-#if __GLASGOW_HASKELL__ >= 303
-is_ctype :: Word8 -> Char# -> Bool
-is_ctype mask = \c ->
-   (indexWord8OffAddr (``char_types'' :: Addr) (ord (C# c)) .&. mask) /= 0
-#else
-is_ctype :: Int -> Char# -> Bool
-is_ctype (I# mask) = \c ->
-    let (A# ctype) = ``char_types'' :: Addr
-	flag_word  = int2Word# (ord# (indexCharOffAddr# ctype (ord# c)))
-    in
-	(flag_word `and#` (int2Word# mask)) `neWord#` (int2Word# 0#)
-#endif
-
-is_ident  = is_ctype 1
-is_symbol = is_ctype 2
-is_any    = is_ctype 4
-is_space  = is_ctype 8
-is_upper  = is_ctype 16
-is_digit  = is_ctype 32
+           	   (mergeLexemes buf buf')
+   Nothing   -> lexError "unterminated ``" buf
 
 -----------------------------------------------------------------------------
 -- identifiers, symbols etc.
 
+lex_ip cont buf =
+ case expandWhile# is_ident buf of
+   buf' -> cont (ITipvarid lexeme) buf'
+	   where lexeme = lexemeToFastString buf'
+
 lex_id cont glaexts buf =
  case expandWhile# is_ident buf of { buf1 -> 
 
@@ -979,10 +957,9 @@ lex_id3 cont glaexts mod buf just_a_conid
 	-- real lexeme is M.<sym>
        new_buf = mergeLexemes buf buf'
      in
-     case lookupUFM haskellKeySymsFM lexeme of {
-	Just kwd_token -> just_a_conid;	-- avoid M.:: etc.
-	Nothing        -> cont (mk_qvar_token mod lexeme) new_buf
-     }}
+     cont (mk_qvar_token mod lexeme) new_buf
+	-- wrong, but arguably morally right: M... is now a qvarsym
+     }
 
   | otherwise   =
      let 
@@ -1015,9 +992,6 @@ slurp_trailing_hashes buf glaexts
 
 mk_var_token pk_str
   | is_upper f		= ITconid pk_str
- 	-- _[A-Z] is treated as a constructor in interface files.
-  | f `eqChar#` '_'# && not (_NULL_ tl) 
-	&& (case _HEAD_ tl of { C# g -> is_upper g }) = ITconid pk_str
   | is_ident f		= ITvarid pk_str
   | f `eqChar#` ':'#	= ITconsym pk_str
   | otherwise		= ITvarsym pk_str
@@ -1199,13 +1173,25 @@ h = h
 	- we still need to insert another '}' followed by a ';',
 	  hence the atbol trick.
 
+There's also a special hack in here to deal with
+
+	do
+	   ....
+	   e $ do
+	   blah
+
+i.e. the inner context is at the same indentation level as the outer
+context.  This is strictly illegal according to Haskell 98, but
+there's a lot of existing code using this style and it doesn't make
+any sense to disallow it, since empty 'do' lists don't make sense.
 -}
 
-layoutOn :: P ()
-layoutOn buf s@(PState{ bol = bol, context = ctx }) =
+layoutOn :: Bool -> P ()
+layoutOn strict buf s@(PState{ bol = bol, context = ctx }) =
     let offset = lexemeIndex buf -# bol in
     case ctx of
-	Layout prev_off : _ | prev_off >=# offset ->
+	Layout prev_off : _ 
+	   | if strict then prev_off >=# offset else prev_off ># offset ->
 		--trace ("layout on, column: " ++  show (I# offset)) $
 		POk s{ context = Layout (offset +# 1#) : ctx, atbol = 1# } ()
 	other -> 
@@ -1217,10 +1203,10 @@ layoutOff buf s@(PState{ context = ctx }) =
     POk s{ context = NoLayout:ctx } ()
 
 popContext :: P ()
-popContext = \ buf s@(PState{ context = ctx }) ->
+popContext = \ buf s@(PState{ context = ctx, loc = loc }) ->
   case ctx of
 	(_:tl) -> POk s{ context = tl } ()
-	[]    -> panic "Lex.popContext: empty context"
+	[]     -> PFailed (srcParseErr buf loc)
 
 {- 
  Note that if the name of the file we're processing ends
@@ -1262,4 +1248,17 @@ ifaceVersionErr hi_vers l toks
         Nothing -> ptext SLIT("pre ghc-3.02 version")
 	Just v  -> ptext SLIT("version") <+> integer v
 
+-----------------------------------------------------------------------------
+
+srcParseErr :: StringBuffer -> SrcLoc -> Message
+srcParseErr s l
+  = hcat [ppr l, 
+	  if null token 
+	     then ptext SLIT(": parse error (possibly incorrect indentation)")
+	     else hcat [ptext SLIT(": parse error on input "),
+          	  	char '`', text token, char '\'']
+    ]
+  where 
+	token = lexemeToString s
+
 \end{code}