[project @ 2000-02-15 22:18:16 by panne]

[ghc-hetmet.git] / ghc / compiler / parser / Lex.lhs
diff --git a/ghc/compiler/parser/Lex.lhs b/ghc/compiler/parser/Lex.lhs

index 25aa14c..13ace2b 100644 (file)
--- a/ghc/compiler/parser/Lex.lhs
+++ b/ghc/compiler/parser/Lex.lhs
@@ -128,6 +128,9 @@ data Token
    | ITlabel
    | ITdynamic
    | ITunsafe
+  | ITwith
+  | ITstdcallconv
+  | ITccallconv
  
    | ITinterface                        -- interface keywords
    | IT__export
@@ -147,14 +150,15 @@ data Token
    | ITlit_lit
    | ITstring_lit
    | ITtypeapp
-  | ITonce
-  | ITmany
+  | ITusage
+  | ITfuall
    | ITarity 
    | ITspecialise
    | ITnocaf
    | ITunfold InlinePragInfo
    | ITstrict ([Demand], Bool)
    | ITrules
+  | ITdeprecated
    | ITcprinfo (CprInfo)
    | IT__scc
    | ITsccAllCafs
@@ -164,6 +168,7 @@ data Token
    | ITinline_prag
    | ITnoinline_prag
    | ITrules_prag
+  | ITdeprecated_prag
    | ITline_prag
    | ITclose_prag
  
@@ -206,6 +211,8 @@ data Token
    | ITqvarsym (FAST_STRING,FAST_STRING)
    | ITqconsym (FAST_STRING,FAST_STRING)
  
+  | ITipvarid FAST_STRING      -- GHC extension: implicit param: ?x
+
    | ITpragma StringBuffer
  
    | ITchar       Char 
@@ -239,7 +246,8 @@ pragmaKeywordsFM = listToUFM $
         ( "NOTINLINE",  ITnoinline_prag ),
         ( "LINE",       ITline_prag ),
         ( "RULES",      ITrules_prag ),
-       ( "RULEZ",      ITrules_prag )  -- american spelling :-)
+       ( "RULEZ",      ITrules_prag ), -- american spelling :-)
+       ( "DEPRECATED", ITdeprecated_prag )
         ]
  
  haskellKeywordsFM = listToUFM $
@@ -280,6 +288,9 @@ ghcExtensionKeywordsFM = listToUFM $
         ( "label",      ITlabel ),
         ( "dynamic",    ITdynamic ),
         ( "unsafe",     ITunsafe ),
+       ( "with",       ITwith ),
+       ( "stdcall",    ITstdcallconv),
+       ( "ccall",      ITccallconv),
          ("_ccall_",    ITccall (False, False, False)),
          ("_ccall_GC_", ITccall (False, False, True)),
          ("_casm_",     ITccall (False, True,  False)),
@@ -304,13 +315,14 @@ ghcExtensionKeywordsFM = listToUFM $
         ("__litlit",            ITlit_lit),
         ("__string",            ITstring_lit),
         ("__a",                 ITtypeapp),
-       ("__o",                 ITonce),
-       ("__m",                 ITmany),
+       ("__u",                 ITusage),
+       ("__fuall",             ITfuall),
         ("__A",                 ITarity),
         ("__P",                 ITspecialise),
         ("__C",                 ITnocaf),
         ("__R",                 ITrules),
-        ("__u",                        ITunfold NoInlinePragInfo),
+        ("__D",                        ITdeprecated),
+        ("__U",                        ITunfold NoInlinePragInfo),
         
          ("__ccall",            ITccall (False, False, False)),
          ("__ccall_GC",         ITccall (False, False, True)),
@@ -365,7 +377,8 @@ lexer cont buf s@(PState{
                 })
  
         -- first, start a new lexeme and lose all the whitespace
-  = tab line bol atbol (stepOverLexeme buf)
+  =  _scc_ "Lexer" 
+  tab line bol atbol (stepOverLexeme buf)
    where
         line = srcLocLine loc
  
@@ -505,7 +518,6 @@ lexBOL cont buf s@(PState{
  lexToken :: (Token -> P a) -> Int# -> P a
  lexToken cont glaexts buf =
   --trace "lexToken" $
- _scc_ "Lexer" 
    case currentChar# buf of
  
      -- special symbols ----------------------------------------------------
@@ -523,7 +535,7 @@ lexToken cont glaexts buf =
      '}'# -> \ s@PState{context = ctx} ->
             case ctx of 
                 (_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'}
-               _ -> lexError "too many '}'s" buf s
+               _        -> lexError "too many '}'s" buf s
  
      '#'# -> case lookAhead# buf 1# of
                 ')'#  | flag glaexts -> cont ITcubxparen (setCurrentPos# buf 2#)
@@ -586,6 +598,8 @@ lexToken cont glaexts buf =
                trace "lexIface: misplaced NUL?" $ 
                cont (ITunknown "\NUL") (stepOn buf)
  
+    '?'# | flag glaexts && is_lower (lookAhead# buf 1#) ->
+           lex_ip cont (stepOn buf)
      c | is_digit  c -> lex_num cont glaexts 0 buf
        | is_symbol c -> lex_sym cont buf
        | is_upper  c -> lex_con cont glaexts buf
@@ -623,9 +637,9 @@ lex_string cont glaexts s buf
  
         -- ignore \& in a string, deal with string gaps
         '\\'# | next_ch `eqChar#` '&'# 
-               -> lex_string cont glaexts s (setCurrentPos# buf 2#)
+               -> lex_string cont glaexts s buf'
               | is_space next_ch
-               -> lex_stringgap cont glaexts s buf'
+               -> lex_stringgap cont glaexts s (incLexeme buf)
  
             where next_ch = lookAhead# buf 1#
                   buf' = setCurrentPos# buf 2#
@@ -703,8 +717,8 @@ readNum cont buf is_digit base conv = read buf 0
  
  is_hexdigit c 
         =  is_digit c 
-       || (c `geChar#` 'a'# && c `leChar#` 'h'#)
-       || (c `geChar#` 'A'# && c `leChar#` 'H'#)
+       || (c `geChar#` 'a'# && c `leChar#` 'f'#)
+       || (c `geChar#` 'A'# && c `leChar#` 'F'#)
  
  hex c | is_digit c = ord# c -# ord# '0'#
        | otherwise  = ord# (to_lower c) -# ord# 'a'# +# 10#
@@ -860,9 +874,10 @@ after_lexnum cont glaexts i buf
  
  lex_cstring cont buf =
   case expandUntilMatch (stepOverLexeme buf) "\'\'" of
-   buf' -> cont (ITlitlit (lexemeToFastString 
+   Just buf' -> cont (ITlitlit (lexemeToFastString 
                                 (setCurrentPos# buf' (negateInt# 2#))))
-               (mergeLexemes buf buf')
+                  (mergeLexemes buf buf')
+   Nothing   -> lexError "unterminated ``" buf
  
  ------------------------------------------------------------------------------
  -- Character Classes
@@ -887,12 +902,18 @@ is_ident  = is_ctype 1
  is_symbol = is_ctype 2
  is_any    = is_ctype 4
  is_space  = is_ctype 8
-is_upper  = is_ctype 16
-is_digit  = is_ctype 32
+is_lower  = is_ctype 16
+is_upper  = is_ctype 32
+is_digit  = is_ctype 64
  
  -----------------------------------------------------------------------------
  -- identifiers, symbols etc.
  
+lex_ip cont buf =
+ case expandWhile# is_ident buf of
+   buf' -> cont (ITipvarid lexeme) buf'
+          where lexeme = lexemeToFastString buf'
+
  lex_id cont glaexts buf =
   case expandWhile# is_ident buf of { buf1 -> 
  
@@ -979,10 +1000,9 @@ lex_id3 cont glaexts mod buf just_a_conid
         -- real lexeme is M.<sym>
         new_buf = mergeLexemes buf buf'
       in
-     case lookupUFM haskellKeySymsFM lexeme of {
-       Just kwd_token -> just_a_conid; -- avoid M.:: etc.
-       Nothing        -> cont (mk_qvar_token mod lexeme) new_buf
-     }}
+     cont (mk_qvar_token mod lexeme) new_buf
+       -- wrong, but arguably morally right: M... is now a qvarsym
+     }
  
    | otherwise   =
       let 
@@ -1199,13 +1219,25 @@ h = h
         - we still need to insert another '}' followed by a ';',
           hence the atbol trick.
  
+There's also a special hack in here to deal with
+
+       do
+          ....
+          e $ do
+          blah
+
+i.e. the inner context is at the same indentation level as the outer
+context.  This is strictly illegal according to Haskell 98, but
+there's a lot of existing code using this style and it doesn't make
+any sense to disallow it, since empty 'do' lists don't make sense.
  -}
  
-layoutOn :: P ()
-layoutOn buf s@(PState{ bol = bol, context = ctx }) =
+layoutOn :: Bool -> P ()
+layoutOn strict buf s@(PState{ bol = bol, context = ctx }) =
      let offset = lexemeIndex buf -# bol in
      case ctx of
-       Layout prev_off : _ | prev_off >=# offset ->
+       Layout prev_off : _ 
+          | if strict then prev_off >=# offset else prev_off ># offset ->
                 --trace ("layout on, column: " ++  show (I# offset)) $
                 POk s{ context = Layout (offset +# 1#) : ctx, atbol = 1# } ()
         other ->