[project @ 2002-02-04 03:40:31 by chak]

[ghc-hetmet.git] / ghc / compiler / parser / Lex.lhs
diff --git a/ghc/compiler/parser/Lex.lhs b/ghc/compiler/parser/Lex.lhs

index b2f04b0..dfc3945 100644 (file)
--- a/ghc/compiler/parser/Lex.lhs
+++ b/ghc/compiler/parser/Lex.lhs
@@ -16,11 +16,10 @@ An example that provokes the error is
  --------------------------------------------------------
  
  \begin{code}
-{-# OPTIONS -#include "ctypes.h" #-}
  
  module Lex (
  
-       ifaceParseErr,
+       ifaceParseErr, srcParseErr,
  
         -- Monad for parser
         Token(..), lexer, ParseResult(..), PState(..),
@@ -28,41 +27,33 @@ module Lex (
         StringBuffer,
  
         P, thenP, thenP_, returnP, mapP, failP, failMsgP,
-       getSrcLocP, getSrcFile,
+       getSrcLocP, setSrcLocP, getSrcFile,
         layoutOn, layoutOff, pushContext, popContext
      ) where
  
  #include "HsVersions.h"
  
-import Char            ( ord, isSpace, toUpper )
+import Char            ( isSpace, toUpper )
  import List             ( isSuffixOf )
  
-import IdInfo          ( InlinePragInfo(..), CprInfo(..) )
-import Name            ( isLowerISO, isUpperISO )
-import PrelMods                ( mkTupNameStr, mkUbxTupNameStr )
-import CmdLineOpts     ( opt_IgnoreIfacePragmas, opt_HiVersion, opt_NoHiCheck )
-import Demand          ( Demand(..) {- instance Read -} )
-import UniqFM           ( UniqFM, listToUFM, lookupUFM)
-import BasicTypes      ( NewOrData(..) )
+import PrelNames       ( mkTupNameStr )
+import CmdLineOpts     ( opt_HiVersion, opt_NoHiCheck )
+import ForeignCall     ( Safety(..) )
+import NewDemand       ( StrictSig(..), Demand(..), Demands(..),
+                         DmdResult(..), mkTopDmdType, evalDmd, lazyDmd )
+import UniqFM           ( listToUFM, lookupUFM )
+import BasicTypes      ( Boxity(..) )
  import SrcLoc          ( SrcLoc, incSrcLine, srcLocFile, srcLocLine,
                           replaceSrcLine, mkSrcLoc )
  
-import Maybes          ( MaybeErr(..) )
  import ErrUtils                ( Message )
  import Outputable
  
  import FastString
  import StringBuffer
  import GlaExts
-import ST              ( runST )
-
-#if __GLASGOW_HASKELL__ >= 303
-import Bits
-import Word
-#endif
-
-import Char            ( chr )
-import Addr
+import Ctype
+import Char            ( chr, ord )
  import PrelRead        ( readRational__ ) -- Glasgow non-std
  \end{code}
  
@@ -120,17 +111,19 @@ data Token
    | ITthen
    | ITtype
    | ITwhere
-  | ITscc
+  | ITscc                      -- ToDo: remove (we use {-# SCC "..." #-} now)
  
    | ITforall                   -- GHC extension keywords
    | ITforeign
    | ITexport
    | ITlabel
    | ITdynamic
+  | ITsafe
    | ITunsafe
    | ITwith
    | ITstdcallconv
    | ITccallconv
+  | ITdotnet
  
    | ITinterface                        -- interface keywords
    | IT__export
@@ -140,13 +133,17 @@ data Token
    | ITcoerce
    | ITinlineMe
    | ITinlineCall
-  | ITccall (Bool,Bool,Bool)   -- (is_dyn, is_casm, may_gc)
+  | ITccall (Bool,Bool,Safety) -- (is_dyn, is_casm, may_gc)
    | ITdefaultbranch
    | ITbottom
    | ITinteger_lit 
    | ITfloat_lit
+  | ITword_lit
+  | ITword64_lit
+  | ITint64_lit
    | ITrational_lit
    | ITaddr_lit
+  | ITlabel_lit
    | ITlit_lit
    | ITstring_lit
    | ITtypeapp
@@ -155,11 +152,11 @@ data Token
    | ITarity 
    | ITspecialise
    | ITnocaf
-  | ITunfold InlinePragInfo
-  | ITstrict ([Demand], Bool)
+  | ITunfold
+  | ITstrict StrictSig
    | ITrules
+  | ITcprinfo
    | ITdeprecated
-  | ITcprinfo (CprInfo)
    | IT__scc
    | ITsccAllCafs
  
@@ -170,6 +167,7 @@ data Token
    | ITrules_prag
    | ITdeprecated_prag
    | ITline_prag
+  | ITscc_prag
    | ITclose_prag
  
    | ITdotdot                   -- reserved symbols
@@ -190,6 +188,8 @@ data Token
  
    | ITocurly                   -- special symbols
    | ITccurly
+  | ITocurlybar                 -- {|, for type applications
+  | ITccurlybar                 -- |}, for type applications
    | ITvccurly
    | ITobrack
    | ITcbrack
@@ -211,16 +211,17 @@ data Token
    | ITqvarsym (FAST_STRING,FAST_STRING)
    | ITqconsym (FAST_STRING,FAST_STRING)
  
-  | ITipvarid FAST_STRING      -- GHC extension: implicit param: ?x
+  | ITdupipvarid   FAST_STRING -- GHC extension: implicit param: ?x
+  | ITsplitipvarid FAST_STRING -- GHC extension: implicit param: %x
  
    | ITpragma StringBuffer
  
-  | ITchar       Char 
+  | ITchar       Int
    | ITstring     FAST_STRING
-  | ITinteger    Integer 
+  | ITinteger    Integer
    | ITrational   Rational
  
-  | ITprimchar   Char
+  | ITprimchar   Int
    | ITprimstring FAST_STRING
    | ITprimint    Integer
    | ITprimfloat  Rational
@@ -229,7 +230,7 @@ data Token
  
    | ITunknown String           -- Used when the lexer can't make sense of it
    | ITeof                      -- end of file token
-  deriving Text -- debugging
+  deriving Show -- debugging
  \end{code}
  
  -----------------------------------------------------------------------------
@@ -247,6 +248,7 @@ pragmaKeywordsFM = listToUFM $
         ( "LINE",       ITline_prag ),
         ( "RULES",      ITrules_prag ),
         ( "RULEZ",      ITrules_prag ), -- american spelling :-)
+       ( "SCC",        ITscc_prag ),
         ( "DEPRECATED", ITdeprecated_prag )
         ]
  
@@ -277,9 +279,27 @@ haskellKeywordsFM = listToUFM $
         ( "then",       ITthen ),     
         ( "type",       ITtype ),     
         ( "where",      ITwhere ),
-       ( "_scc_",      ITscc )
+       ( "_scc_",      ITscc )         -- ToDo: remove
       ]
  
+isSpecial :: Token -> Bool
+-- If we see M.x, where x is a keyword, but
+-- is special, we treat is as just plain M.x, 
+-- not as a keyword.
+isSpecial ITas         = True
+isSpecial IThiding     = True
+isSpecial ITqualified  = True
+isSpecial ITforall     = True
+isSpecial ITexport     = True
+isSpecial ITlabel      = True
+isSpecial ITdynamic    = True
+isSpecial ITsafe       = True
+isSpecial ITunsafe     = True
+isSpecial ITwith       = True
+isSpecial ITccallconv   = True
+isSpecial ITstdcallconv = True
+isSpecial _             = False
+
  -- IMPORTANT: Keep this in synch with ParseIface.y's var_fs production! (SUP)
  ghcExtensionKeywordsFM = listToUFM $
         map (\ (x,y) -> (_PK_ x,y))
@@ -288,14 +308,16 @@ ghcExtensionKeywordsFM = listToUFM $
         ( "export",     ITexport ),
         ( "label",      ITlabel ),
         ( "dynamic",    ITdynamic ),
+       ( "safe",       ITunsafe ),
         ( "unsafe",     ITunsafe ),
         ( "with",       ITwith ),
         ( "stdcall",    ITstdcallconv),
         ( "ccall",      ITccallconv),
-        ("_ccall_",    ITccall (False, False, False)),
-        ("_ccall_GC_", ITccall (False, False, True)),
-        ("_casm_",     ITccall (False, True,  False)),
-        ("_casm_GC_",  ITccall (False, True,  True)),
+       ( "dotnet",     ITdotnet),
+        ("_ccall_",    ITccall (False, False, PlayRisky)),
+        ("_ccall_GC_", ITccall (False, False, PlaySafe)),
+        ("_casm_",     ITccall (False, True,  PlayRisky)),
+        ("_casm_GC_",  ITccall (False, True,  PlaySafe)),
  
         -- interface keywords
          ("__interface",                ITinterface),
@@ -311,8 +333,12 @@ ghcExtensionKeywordsFM = listToUFM $
         ("__bot",               ITbottom),
         ("__integer",           ITinteger_lit),
         ("__float",             ITfloat_lit),
+       ("__int64",             ITint64_lit),
+       ("__word",              ITword_lit),
+       ("__word64",            ITword64_lit),
         ("__rational",          ITrational_lit),
         ("__addr",              ITaddr_lit),
+       ("__label",             ITlabel_lit),
         ("__litlit",            ITlit_lit),
         ("__string",            ITstring_lit),
         ("__a",                 ITtypeapp),
@@ -323,16 +349,16 @@ ghcExtensionKeywordsFM = listToUFM $
         ("__C",                 ITnocaf),
         ("__R",                 ITrules),
          ("__D",                        ITdeprecated),
-        ("__U",                        ITunfold NoInlinePragInfo),
+        ("__U",                        ITunfold),
         
-        ("__ccall",            ITccall (False, False, False)),
-        ("__ccall_GC",         ITccall (False, False, True)),
-        ("__dyn_ccall",                ITccall (True,  False, False)),
-        ("__dyn_ccall_GC",     ITccall (True,  False, True)),
-        ("__casm",             ITccall (False, True,  False)),
-        ("__dyn_casm",         ITccall (True,  True,  False)),
-        ("__casm_GC",          ITccall (False, True,  True)),
-        ("__dyn_casm_GC",      ITccall (True,  True,  True)),
+        ("__ccall",            ITccall (False, False, PlayRisky)),
+        ("__ccall_GC",         ITccall (False, False, PlaySafe)),
+        ("__dyn_ccall",                ITccall (True,  False, PlayRisky)),
+        ("__dyn_ccall_GC",     ITccall (True,  False, PlaySafe)),
+        ("__casm",             ITccall (False, True,  PlayRisky)),
+        ("__dyn_casm",         ITccall (True,  True,  PlayRisky)),
+        ("__casm_GC",          ITccall (False, True,  PlaySafe)),
+        ("__dyn_casm_GC",      ITccall (True,  True,  PlaySafe)),
  
          ("/\\",                        ITbiglam)
       ]
@@ -401,17 +427,15 @@ lexer cont buf s@(PState{
                           if next `eqChar#` '-'# then trundle (n +# 1#)
                           else if is_symbol next || n <# 2#
                                 then is_a_token
-                               else case untilChar# (stepOnBy# buf n) '\n'# of 
-                                   { buf' -> tab y bol atbol (stepOverLexeme buf')
-                                   }
+                               else tab y bol atbol 
+                                        (stepOnUntilChar# (stepOnBy# buf n) '\n'#)
                     in trundle 1#
  
                 -- comments and pragmas.  We deal with LINE pragmas here,
                 -- and throw out any unrecognised pragmas as comments.  Any
                 -- pragmas we know about are dealt with later (after any layout
                 -- processing if necessary).
-
-           '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
+            '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
                 if lookAhead# buf 2# `eqChar#` '#'# then
                   if lookAhead# buf 3# `eqChar#` '#'# then is_a_token else
                   case expandWhile# is_space (setCurrentPos# buf 3#) of { buf1->
@@ -419,14 +443,34 @@ lexer cont buf s@(PState{
                   let lexeme = mkFastString -- ToDo: too slow
                                   (map toUpper (lexemeToString buf2)) in
                   case lookupUFM pragmaKeywordsFM lexeme of
-                       Just ITline_prag -> line_prag (lexer cont) buf2 s'
+                       -- ignore RULES pragmas when -fglasgow-exts is off
+                       Just ITrules_prag | not (flag glaexts) ->
+                          skip_to_end (stepOnBy# buf 2#) s'
+                       Just ITline_prag -> 
+                          line_prag skip_to_end buf2 s'
                         Just other -> is_a_token
-                       Nothing -> skip_to_end (stepOnBy# buf 2#)
+                       Nothing -> skip_to_end (stepOnBy# buf 2#) s'
                   }}
-               
-               else skip_to_end (stepOnBy# buf 2#)
+
+               else skip_to_end (stepOnBy# buf 2#) s'
+               where
+                   skip_to_end = skipNestedComment (lexer cont)
+
+               -- special GHC extension: we grok cpp-style #line pragmas
+           '#'# | lexemeIndex buf ==# bol ->   -- the '#' must be in column 0
+               let buf1 | lookAhead# buf 1# `eqChar#` 'l'# &&
+                          lookAhead# buf 2# `eqChar#` 'i'# &&
+                          lookAhead# buf 3# `eqChar#` 'n'# &&
+                          lookAhead# buf 4# `eqChar#` 'e'#  = stepOnBy# buf 5#
+                        | otherwise = stepOn buf
+               in
+               case expandWhile# is_space buf1 of { buf2 ->
+               if is_digit (currentChar# buf2) 
+                       then line_prag next_line buf2 s'
+                       else is_a_token
+               }
                 where
-                   skip_to_end buf = nested_comment (lexer cont) buf s'
+               next_line buf = lexer cont (stepOnUntilChar# buf '\n'#)
  
                 -- tabs have been expanded beforehand
             c | is_space c -> tab y bol atbol (stepOn buf)
@@ -440,45 +484,56 @@ lexer cont buf s@(PState{
                             | otherwise    = lexToken cont glaexts buf s'
  
  -- {-# LINE .. #-} pragmas.  yeuch.
-line_prag cont buf =
+line_prag cont buf s@PState{loc=loc} =
    case expandWhile# is_space buf               of { buf1 ->
    case scanNumLit 0 (stepOverLexeme buf1)      of { (line,buf2) ->
    -- subtract one: the line number refers to the *following* line.
    let real_line = line - 1 in
    case fromInteger real_line                   of { i@(I# l) -> 
+       -- ToDo, if no filename then we skip the newline.... d'oh
    case expandWhile# is_space buf2              of { buf3 ->
    case currentChar# buf3                       of
       '\"'#{-"-} -> 
         case untilEndOfString# (stepOn (stepOverLexeme buf3)) of { buf4 ->
-       let file = lexemeToFastString buf4 in
-       \s@PState{loc=loc} -> skipToEnd buf4 s{loc = mkSrcLoc file i}
+       let 
+           file = lexemeToFastString buf4 
+           new_buf = stepOn (stepOverLexeme buf4)
+       in
+       if nullFastString file
+               then cont new_buf s{loc = replaceSrcLine loc l}
+               else cont new_buf s{loc = mkSrcLoc file i}
         }
-     other -> \s@PState{loc=loc} -> skipToEnd buf3 s{loc = replaceSrcLine loc l}
+     _other -> cont (stepOverLexeme buf3) s{loc = replaceSrcLine loc l}
    }}}}
-  where
-       skipToEnd buf = nested_comment cont buf
  
-nested_comment :: P a -> P a
-nested_comment cont buf = loop buf
+skipNestedComment :: P a -> P a
+skipNestedComment cont buf state = skipNestedComment' (loc state) cont buf state
+
+skipNestedComment' :: SrcLoc -> P a -> P a
+skipNestedComment' orig_loc cont buf = loop buf
   where
     loop buf = 
       case currentChar# buf of
-       '\NUL'# | bufferExhausted (stepOn buf) -> 
-               lexError "unterminated `{-'" buf
-
-       '-'# | lookAhead# buf 1# `eqChar#` '}'# ->
-               cont (stepOnBy# buf 2#)
+       '-'# | lookAhead# buf 1# `eqChar#` '}'# -> cont (stepOnBy# buf 2#)
  
         '{'# | lookAhead# buf 1# `eqChar#` '-'# ->
-             nested_comment (nested_comment cont) (stepOnBy# buf 2#)
+             skipNestedComment 
+               (skipNestedComment' orig_loc cont) 
+               (stepOnBy# buf 2#)
  
         '\n'# -> \ s@PState{loc=loc} ->
                  let buf' = stepOn buf in
-                nested_comment cont buf'
-                       s{loc = incSrcLine loc, bol = currentIndex# buf',
-                         atbol = 1#}
+                loop buf' s{loc = incSrcLine loc, 
+                            bol = currentIndex# buf',
+                            atbol = 1#}
+
+       -- pass the original SrcLoc to lexError so that the error is
+       -- reported at the line it was originally on, not the line at
+       -- the end of the file.
+       '\NUL'# | bufferExhausted (stepOn buf) -> 
+               \s -> lexError "unterminated `{-'" buf s{loc=orig_loc} -- -}
  
-       _   -> nested_comment cont (stepOn buf)
+       _   -> loop (stepOn buf)
  
  -- When we are lexing the first token of a line, check whether we need to
  -- insert virtual semicolons or close braces due to layout.
@@ -518,7 +573,7 @@ lexBOL cont buf s@(PState{
  
  lexToken :: (Token -> P a) -> Int# -> P a
  lexToken cont glaexts buf =
- --trace "lexToken" $
+-- trace "lexToken" $
    case currentChar# buf of
  
      -- special symbols ----------------------------------------------------
@@ -532,12 +587,16 @@ lexToken cont glaexts buf =
      ']'# -> cont ITcbrack    (incLexeme buf)
      ','# -> cont ITcomma     (incLexeme buf)
      ';'# -> cont ITsemi      (incLexeme buf)
-
      '}'# -> \ s@PState{context = ctx} ->
             case ctx of 
                 (_:ctx') -> cont ITccurly (incLexeme buf) s{context=ctx'}
                 _        -> lexError "too many '}'s" buf s
+    '|'# -> case lookAhead# buf 1# of
+                '}'#  | flag glaexts -> cont ITccurlybar 
+                                              (setCurrentPos# buf 2#)
+                 _                    -> lex_sym cont (incLexeme buf)
  
+                
      '#'# -> case lookAhead# buf 1# of
                 ')'#  | flag glaexts -> cont ITcubxparen (setCurrentPos# buf 2#)
                 '-'# -> case lookAhead# buf 2# of
@@ -551,19 +610,23 @@ lexToken cont glaexts buf =
                 -> cont ITbackquote (incLexeme buf)
  
      '{'# ->    -- look for "{-##" special iface pragma
-       case lookAhead# buf 1# of
+            case lookAhead# buf 1# of
+           '|'# | flag glaexts 
+                -> cont ITocurlybar (setCurrentPos# buf 2#)
            '-'# -> case lookAhead# buf 2# of
                     '#'# -> case lookAhead# buf 3# of
-                               '#'# ->  
-                                  let (lexeme, buf') 
-                                         = doDiscard False (stepOnBy# (stepOverLexeme buf) 4#) in
-                                  cont (ITpragma lexeme) buf'
+                               '#'# -> 
+                                  lexPragma
+                                     cont
+                                     (\ cont lexeme buf' -> cont (ITpragma lexeme) buf')
+                                     0#
+                                     (stepOnBy# (stepOverLexeme buf) 4#)
                                 _ -> lex_prag cont (setCurrentPos# buf 3#)
-                   _    -> cont ITocurly (incLexeme buf)
-          _ -> (layoutOff `thenP_` cont ITocurly)  (incLexeme buf)
+                   _    -> cont ITocurly (incLexeme buf) 
+          _ -> (layoutOff `thenP_` cont ITocurly)  (incLexeme buf) 
  
      -- strings/characters -------------------------------------------------
-    '\"'#{-"-} -> lex_string cont glaexts "" (incLexeme buf)
+    '\"'#{-"-} -> lex_string cont glaexts [] (incLexeme buf)
      '\''#      -> lex_char (char_end cont) glaexts (incLexeme buf)
  
      -- strictness and cpr pragmas and __scc treated specially.
@@ -574,8 +637,8 @@ lexToken cont glaexts buf =
                         lex_demand cont (stepOnUntil (not . isSpace) 
                                         (stepOnBy# buf 3#)) -- past __S
                     'M'# -> 
-                       lex_cpr cont (stepOnUntil (not . isSpace) 
-                                    (stepOnBy# buf 3#)) -- past __M
+                       cont ITcprinfo (stepOnBy# buf 3#)       -- past __M
+
                     's'# -> 
                         case prefixMatch (stepOnBy# buf 3#) "cc" of
                                Just buf' -> lex_scc cont (stepOverLexeme buf')
@@ -600,7 +663,9 @@ lexToken cont glaexts buf =
                cont (ITunknown "\NUL") (stepOn buf)
  
      '?'# | flag glaexts && is_lower (lookAhead# buf 1#) ->
-           lex_ip cont (incLexeme buf)
+           lex_ip ITdupipvarid cont (incLexeme buf)
+    '%'# | flag glaexts && is_lower (lookAhead# buf 1#) ->
+           lex_ip ITsplitipvarid cont (incLexeme buf)
      c | is_digit  c -> lex_num cont glaexts 0 buf
        | is_symbol c -> lex_sym cont buf
        | is_upper  c -> lex_con cont glaexts buf
@@ -631,9 +696,12 @@ lex_prag cont buf
  lex_string cont glaexts s buf
    = case currentChar# buf of
         '"'#{-"-} -> 
-          let buf' = incLexeme buf; s' = mkFastString (reverse s) in
-          case currentChar# buf' of
-               '#'# | flag glaexts -> cont (ITprimstring s') (incLexeme buf')
+          let buf' = incLexeme buf
+               s' = mkFastStringNarrow (map chr (reverse s)) 
+           in case currentChar# buf' of
+               '#'# | flag glaexts -> if all (<= 0xFF) s
+                    then cont (ITprimstring s') (incLexeme buf')
+                    else lexError "primitive string literal must contain only characters <= \'\\xFF\'" buf'
                 _                   -> cont (ITstring s') buf'
  
         -- ignore \& in a string, deal with string gaps
@@ -658,11 +726,11 @@ lex_stringgap cont glaexts s buf
  
  lex_next_string cont s glaexts c buf = lex_string cont glaexts (c:s) buf
  
-lex_char :: (Int# -> Char -> P a) -> Int# -> P a
+lex_char :: (Int# -> Int -> P a) -> Int# -> P a
  lex_char cont glaexts buf
    = case currentChar# buf of
         '\\'# -> lex_escape (cont glaexts) (incLexeme buf)
-       c | is_any c -> cont glaexts (C# c) (incLexeme buf)
+       c | is_any c -> cont glaexts (I# (ord# c)) (incLexeme buf)
         other -> charError buf
  
  char_end cont glaexts c buf
@@ -677,19 +745,19 @@ char_end cont glaexts c buf
  lex_escape cont buf
    = let buf' = incLexeme buf in
      case currentChar# buf of
-       'a'#       -> cont '\a' buf'
-       'b'#       -> cont '\b' buf'
-       'f'#       -> cont '\f' buf'
-       'n'#       -> cont '\n' buf'
-       'r'#       -> cont '\r' buf'
-       't'#       -> cont '\t' buf'
-       'v'#       -> cont '\v' buf'
-       '\\'#      -> cont '\\' buf'
-       '"'#       -> cont '\"' buf'
-       '\''#      -> cont '\'' buf'
+       'a'#       -> cont (ord '\a') buf'
+       'b'#       -> cont (ord '\b') buf'
+       'f'#       -> cont (ord '\f') buf'
+       'n'#       -> cont (ord '\n') buf'
+       'r'#       -> cont (ord '\r') buf'
+       't'#       -> cont (ord '\t') buf'
+       'v'#       -> cont (ord '\v') buf'
+       '\\'#      -> cont (ord '\\') buf'
+       '"'#       -> cont (ord '\"') buf'
+       '\''#      -> cont (ord '\'') buf'
         '^'#       -> let c = currentChar# buf' in
                       if c `geChar#` '@'# && c `leChar#` '_'#
-                       then cont (C# (chr# (ord# c -# ord# '@'#))) (incLexeme buf')
+                       then cont (I# (ord# c -# ord# '@'#)) (incLexeme buf')
                         else charError buf'
  
         'x'#      -> readNum (after_charnum cont) buf' is_hexdigit 16 hex
@@ -699,13 +767,12 @@ lex_escape cont buf
  
         _          -> case [ (c,buf2) | (p,c) <- silly_escape_chars,
                                        Just buf2 <- [prefixMatch buf p] ] of
-                           (c,buf2):_ -> cont c buf2
+                           (c,buf2):_ -> cont (ord c) buf2
                             [] -> charError buf'
  
-after_charnum cont i buf 
-  = let int = fromInteger i in
-    if i >= 0 && i <= 255 
-       then cont (chr int) buf
+after_charnum cont i buf
+  = if i >= 0 && i <= 0x10FFFF
+       then cont (fromInteger i) buf
         else charError buf
  
  readNum cont buf is_digit base conv = read buf 0
@@ -776,46 +843,44 @@ silly_escape_chars = [
  lex_demand cont buf = 
   case read_em [] buf of { (ls,buf') -> 
   case currentChar# buf' of
-   'B'# -> cont (ITstrict (ls, True )) (incLexeme buf')
-   _    -> cont (ITstrict (ls, False)) buf'
+   'b'# -> cont (ITstrict (StrictSig (mkTopDmdType ls BotRes))) (incLexeme buf')
+   'm'# -> cont (ITstrict (StrictSig (mkTopDmdType ls RetCPR))) (incLexeme buf')
+   _    -> cont (ITstrict (StrictSig (mkTopDmdType ls TopRes))) buf'
   }
   where
-   -- code snatched from Demand.lhs
    read_em acc buf = 
     case currentChar# buf of
-    'L'# -> read_em (WwLazy False : acc) (stepOn buf)
-    'A'# -> read_em (WwLazy True  : acc) (stepOn buf)
-    'S'# -> read_em (WwStrict     : acc) (stepOn buf)
-    'P'# -> read_em (WwPrim       : acc) (stepOn buf)
-    'E'# -> read_em (WwEnum       : acc) (stepOn buf)
-    ')'# -> (reverse acc, stepOn buf)
-    'U'# -> do_unpack DataType True  acc (stepOnBy# buf 2#)
-    'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
-    'N'# -> do_unpack NewType True  acc (stepOnBy# buf 2#)
-    'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
-    _    -> (reverse acc, buf)
+    'T'# -> read_em (Top     : acc) (stepOn buf)
+    'L'# -> read_em (lazyDmd : acc) (stepOn buf)
+    'A'# -> read_em (Abs     : acc) (stepOn buf)
+    'V'# -> read_em (evalDmd : acc) (stepOn buf)       -- Temporary, until
+                                                       -- we've recompiled prelude etc
+    'C'# -> do_unary Call  acc (stepOnBy# buf 2#)      -- Skip 'C('
  
-  do_unpack new_or_data wrapper_unpacks acc buf
-   = case read_em [] buf of
-      (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
+    'U'# -> do_seq1 Eval        acc (stepOnBy# buf 1#)
+    'D'# -> do_seq1 Defer       acc (stepOnBy# buf 1#)
+    'S'# -> do_seq1 (Box . Eval) acc (stepOnBy# buf 1#)
  
-lex_cpr cont buf = 
- case read_em [] buf of { (cpr_inf,buf') -> 
-   ASSERT ( null (tail cpr_inf) )
-   cont (ITcprinfo $ head cpr_inf) buf'
- }
- where
-   -- code snatched from lex_demand above
-  read_em acc buf = 
-   case currentChar# buf of
-    '-'# -> read_em (NoCPRInfo : acc) (stepOn buf)
-    '('# -> do_unpack acc (stepOn buf)
-    ')'# -> (reverse acc, stepOn buf)
      _    -> (reverse acc, buf)
  
-  do_unpack acc buf
-   = case read_em [] buf of
-      (stuff, rest) -> read_em ((CPRInfo stuff)  : acc) rest
+  do_seq1 fn acc buf
+    = case currentChar# buf of
+       '('# -> do_seq2 fn acc (stepOnBy# buf 1#)
+       _    -> read_em (fn (Poly Abs) : acc) buf
+
+  do_seq2 fn acc buf
+    = case read_em [] buf of { (dmds, buf) -> 
+      case currentChar# buf of
+       ')'# -> read_em (fn (Prod dmds) : acc)
+                       (stepOn buf) 
+       '*'# -> ASSERT( length dmds == 1 )
+               read_em (fn (Poly (head dmds)) : acc)
+                       (stepOnBy# buf 2#)      -- Skip '*)'
+      }
+       
+  do_unary fn acc buf
+    = case read_em [] buf of
+        ([dmd], rest) -> read_em (fn dmd : acc) (stepOn rest)  -- Skip ')'
  
  ------------------
  lex_scc cont buf =
@@ -880,43 +945,17 @@ lex_cstring cont buf =
                    (mergeLexemes buf buf')
     Nothing   -> lexError "unterminated ``" buf
  
-------------------------------------------------------------------------------
--- Character Classes
-
-is_ident, is_symbol, is_any, is_upper, is_digit :: Char# -> Bool
-
-{-# INLINE is_ctype #-}
-#if __GLASGOW_HASKELL__ >= 303
-is_ctype :: Word8 -> Char# -> Bool
-is_ctype mask = \c ->
-   (indexWord8OffAddr (``char_types'' :: Addr) (ord (C# c)) .&. mask) /= 0
-#else
-is_ctype :: Int -> Char# -> Bool
-is_ctype (I# mask) = \c ->
-    let (A# ctype) = ``char_types'' :: Addr
-       flag_word  = int2Word# (ord# (indexCharOffAddr# ctype (ord# c)))
-    in
-       (flag_word `and#` (int2Word# mask)) `neWord#` (int2Word# 0#)
-#endif
-
-is_ident  = is_ctype 1
-is_symbol = is_ctype 2
-is_any    = is_ctype 4
-is_space  = is_ctype 8
-is_lower  = is_ctype 16
-is_upper  = is_ctype 32
-is_digit  = is_ctype 64
-
  -----------------------------------------------------------------------------
  -- identifiers, symbols etc.
  
-lex_ip cont buf =
+lex_ip ip_constr cont buf =
   case expandWhile# is_ident buf of
-   buf' -> cont (ITipvarid lexeme) buf'
-          where lexeme = lexemeToFastString buf'
+   buf' -> cont (ip_constr (tailFS lexeme)) buf'
+       where lexeme = lexemeToFastString buf'
  
  lex_id cont glaexts buf =
- case expandWhile# is_ident buf of { buf1 -> 
+ let buf1 = expandWhile# is_ident buf in
+ seq buf1 $
  
   case (if flag glaexts 
         then expandWhile# (eqChar# '#'#) buf1 -- slurp trailing hashes
@@ -929,7 +968,7 @@ lex_id cont glaexts buf =
                           cont kwd_token buf';
         Nothing        -> 
  
- let var_token = cont (mk_var_token lexeme) buf' in
+ let var_token = cont (ITvarid lexeme) buf' in
  
   if not (flag glaexts)
     then var_token
@@ -939,9 +978,10 @@ lex_id cont glaexts buf =
         Just kwd_token -> cont kwd_token buf';
         Nothing        -> var_token
  
- }}}}
+ }}}
  
  lex_sym cont buf =
+ -- trace "lex_sym" $
   case expandWhile# is_symbol buf of
     buf' -> case lookupUFM haskellKeySymsFM lexeme of {
                 Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
@@ -952,22 +992,36 @@ lex_sym cont buf =
         where lexeme = lexemeToFastString buf'
  
  
-lex_con cont glaexts buf = 
- case expandWhile# is_ident buf          of { buf1 ->
- case slurp_trailing_hashes buf1 glaexts of { buf' ->
+-- lex_con recursively collects components of a qualified identifer.
+-- The argument buf is the StringBuffer representing the lexeme
+-- identified so far, where the next character is upper-case.
  
- case currentChar# buf' of
-     '.'# -> munch
+lex_con cont glaexts buf =
+ -- trace ("con: "{-++unpackFS lexeme-}) $
+ let empty_buf = stepOverLexeme buf in
+ case expandWhile# is_ident empty_buf    of { buf1 ->
+ case slurp_trailing_hashes buf1 glaexts of { con_buf ->
+
+ let all_buf = mergeLexemes buf con_buf
+     
+     con_lexeme = lexemeToFastString con_buf
+     mod_lexeme = lexemeToFastString (decLexeme buf)
+     all_lexeme = lexemeToFastString all_buf
+
+     just_a_conid
+       | emptyLexeme buf = cont (ITconid con_lexeme)               all_buf
+       | otherwise       = cont (ITqconid (mod_lexeme,con_lexeme)) all_buf
+ in
+
+ case currentChar# all_buf of
+     '.'# -> maybe_qualified cont glaexts all_lexeme 
+               (incLexeme all_buf) just_a_conid
       _    -> just_a_conid
- 
-   where
-    just_a_conid = --trace ("con: "++unpackFS lexeme) $
-                  cont (ITconid lexeme) buf'
-    lexeme = lexemeToFastString buf'
-    munch = lex_qid cont glaexts lexeme (incLexeme buf') just_a_conid
- }}
-
-lex_qid cont glaexts mod buf just_a_conid =
+  }}
+
+
+maybe_qualified cont glaexts mod buf just_a_conid =
+ -- trace ("qid: "{-++unpackFS lexeme-}) $
   case currentChar# buf of
    '['# ->      -- Special case for []
      case lookAhead# buf 1# of
@@ -986,15 +1040,21 @@ lex_qid cont glaexts mod buf just_a_conid =
       _    -> just_a_conid
  
    '-'# -> case lookAhead# buf 1# of
-            '>'# -> cont (ITqconid (mod,SLIT("->"))) (setCurrentPos# buf 2#)
+            '>'# -> cont (ITqconid (mod,SLIT("(->)"))) (setCurrentPos# buf 2#)
              _    -> lex_id3 cont glaexts mod buf just_a_conid
+
    _    -> lex_id3 cont glaexts mod buf just_a_conid
  
+
  lex_id3 cont glaexts mod buf just_a_conid
+  | is_upper (currentChar# buf) =
+     lex_con cont glaexts buf
+
    | is_symbol (currentChar# buf) =
       let 
         start_new_lexeme = stepOverLexeme buf
       in
+     -- trace ("lex_id31 "{-++unpackFS lexeme-}) $
       case expandWhile# is_symbol start_new_lexeme of { buf' ->
       let
         lexeme  = lexemeToFastString buf'
@@ -1009,6 +1069,7 @@ lex_id3 cont glaexts mod buf just_a_conid
       let 
         start_new_lexeme = stepOverLexeme buf
       in
+     -- trace ("lex_id32 "{-++unpackFS lexeme-}) $
       case expandWhile# is_ident start_new_lexeme of { buf1 ->
       if emptyLexeme buf1 
             then just_a_conid
@@ -1017,17 +1078,18 @@ lex_id3 cont glaexts mod buf just_a_conid
       case slurp_trailing_hashes buf1 glaexts of { buf' ->
  
       let
-      lexeme  = lexemeToFastString buf'
-      new_buf = mergeLexemes buf buf'
+      lexeme     = lexemeToFastString buf'
+      new_buf     = mergeLexemes buf buf'
        is_a_qvarid = cont (mk_qvar_token mod lexeme) new_buf
       in
       case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
-           Just kwd_token -> just_a_conid; -- avoid M.where etc.
-           Nothing        -> is_a_qvarid
-       -- TODO: special ids (as, qualified, hiding) shouldn't be
-       -- recognised as keywords here.  ie.  M.as is a qualified varid.
-     }}}
+           Nothing          -> is_a_qvarid ;
  
+           Just kwd_token | isSpecial kwd_token   -- special ids (as, qualified, hiding) shouldn't be
+                          -> is_a_qvarid          --  recognised as keywords here.
+                          | otherwise
+                          -> just_a_conid         -- avoid M.where etc.
+     }}}
  
  slurp_trailing_hashes buf glaexts
    | flag glaexts = expandWhile# (`eqChar#` '#'#) buf
@@ -1036,17 +1098,15 @@ slurp_trailing_hashes buf glaexts
  
  mk_var_token pk_str
    | is_upper f         = ITconid pk_str
-       -- _[A-Z] is treated as a constructor in interface files.
-  | f `eqChar#` '_'# && not (_NULL_ tl) 
-       && (case _HEAD_ tl of { C# g -> is_upper g }) = ITconid pk_str
    | is_ident f         = ITvarid pk_str
    | f `eqChar#` ':'#   = ITconsym pk_str
    | otherwise          = ITvarsym pk_str
    where
        (C# f) = _HEAD_ pk_str
-      tl     = _TAIL_ pk_str
+      -- tl     = _TAIL_ pk_str
  
  mk_qvar_token m token =
+-- trace ("mk_qvar ") $ 
   case mk_var_token token of
     ITconid n  -> ITqconid  (m,n)
     ITvarid n  -> ITqvarid  (m,n)
@@ -1065,7 +1125,7 @@ lex_tuple cont mod buf back_off =
     go n buf =
      case currentChar# buf of
        ','# -> go (n+1) (stepOn buf)
-      ')'# -> cont (ITqconid (mod, snd (mkTupNameStr n))) (stepOn buf)
+      ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Boxed n))) (stepOn buf)
        _    -> back_off
  
  lex_ubx_tuple cont mod buf back_off =
@@ -1075,46 +1135,60 @@ lex_ubx_tuple cont mod buf back_off =
      case currentChar# buf of
        ','# -> go (n+1) (stepOn buf)
        '#'# -> case lookAhead# buf 1# of
-               ')'# -> cont (ITqconid (mod, snd (mkUbxTupNameStr n)))
+               ')'# -> cont (ITqconid (mod, snd (mkTupNameStr Unboxed n)))
                                  (stepOnBy# buf 2#)
                 _    -> back_off
        _    -> back_off
  \end{code}
  
  -----------------------------------------------------------------------------
-doDiscard rips along really fast, looking for a '#-}', 
+'lexPragma' rips along really fast, looking for a '##-}', 
  indicating the end of the pragma we're skipping
  
  \begin{code}
-doDiscard inStr buf =
+lexPragma cont contf inStr buf =
   case currentChar# buf of
-   '#'# | not inStr ->
+   '#'# | inStr ==# 0# ->
         case lookAhead# buf 1# of { '#'# -> 
         case lookAhead# buf 2# of { '-'# ->
         case lookAhead# buf 3# of { '}'# -> 
-          (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#));
-       _    -> doDiscard inStr (incLexeme buf) };
-        _    -> doDiscard inStr (incLexeme buf) };
-        _    -> doDiscard inStr (incLexeme buf) }
+           contf cont (lexemeToBuffer buf)
+                     (stepOverLexeme (setCurrentPos# buf 4#));
+       _    -> lexPragma cont contf inStr (incLexeme buf) };
+        _    -> lexPragma cont contf inStr (incLexeme buf) };
+        _    -> lexPragma cont contf inStr (incLexeme buf) }
+
     '"'# ->
         let
          odd_slashes buf flg i# =
            case lookAhead# buf i# of
            '\\'# -> odd_slashes buf (not flg) (i# -# 1#)
            _     -> flg
+
+       not_inStr = if inStr ==# 0# then 1# else 0#
         in
         case lookAhead# buf (negateInt# 1#) of --backwards, actually
          '\\'# -> -- escaping something..
-          if odd_slashes buf True (negateInt# 2#) then
-              -- odd number of slashes, " is escaped.
-             doDiscard inStr (incLexeme buf)
-          else
-              -- even number of slashes, \ is escaped.
-             doDiscard (not inStr) (incLexeme buf)
-         _ -> case inStr of -- forced to avoid build-up
-              True  -> doDiscard False (incLexeme buf)
-               False -> doDiscard True  (incLexeme buf)
-   _ -> doDiscard inStr (incLexeme buf)
+          if odd_slashes buf True (negateInt# 2#) 
+               then  -- odd number of slashes, " is escaped.
+                     lexPragma cont contf inStr (incLexeme buf)
+               else  -- even number of slashes, \ is escaped.
+                     lexPragma cont contf not_inStr (incLexeme buf)
+         _ -> lexPragma cont contf not_inStr (incLexeme buf)
+
+   '\''# | inStr ==# 0# ->
+       case lookAhead# buf 1# of { '"'# ->
+       case lookAhead# buf 2# of { '\''# ->
+          lexPragma cont contf inStr (setCurrentPos# buf 3#);
+       _ -> lexPragma cont contf inStr (incLexeme buf) };
+       _ -> lexPragma cont contf inStr (incLexeme buf) }
+
+    -- a sign that the input is ill-formed, since pragmas are
+    -- assumed to always be properly closed (in .hi files).
+   '\NUL'# -> trace "lexPragma: unexpected end-of-file" $ 
+             cont (ITunknown "\NUL") buf
+
+   _ -> lexPragma cont contf inStr (incLexeme buf)
  
  \end{code}
  
@@ -1173,12 +1247,16 @@ lexError str buf s@PState{ loc = loc }
  getSrcLocP :: P SrcLoc
  getSrcLocP buf s@(PState{ loc = loc }) = POk s loc
  
+-- use a temporary SrcLoc for the duration of the argument
+setSrcLocP :: SrcLoc -> P a -> P a
+setSrcLocP new_loc p buf s = 
+  case p buf s{ loc=new_loc } of
+      POk _ a   -> POk s a
+      PFailed e -> PFailed e
+  
  getSrcFile :: P FAST_STRING
  getSrcFile buf s@(PState{ loc = loc }) = POk s (srcLocFile loc)
  
-getContext :: P [LayoutContext]
-getContext buf s@(PState{ context = ctx }) = POk s ctx
-
  pushContext :: LayoutContext -> P ()
  pushContext ctxt buf s@(PState{ context = ctx }) = POk s{context = ctxt:ctx} ()
  
@@ -1250,10 +1328,10 @@ layoutOff buf s@(PState{ context = ctx }) =
      POk s{ context = NoLayout:ctx } ()
  
  popContext :: P ()
-popContext = \ buf s@(PState{ context = ctx }) ->
+popContext = \ buf s@(PState{ context = ctx, loc = loc }) ->
    case ctx of
         (_:tl) -> POk s{ context = tl } ()
-       []    -> panic "Lex.popContext: empty context"
+       []     -> PFailed (srcParseErr buf loc)
  
  {- 
   Note that if the name of the file we're processing ends
@@ -1295,4 +1373,17 @@ ifaceVersionErr hi_vers l toks
          Nothing -> ptext SLIT("pre ghc-3.02 version")
         Just v  -> ptext SLIT("version") <+> integer v
  
+-----------------------------------------------------------------------------
+
+srcParseErr :: StringBuffer -> SrcLoc -> Message
+srcParseErr s l
+  = hcat [ppr l, 
+         if null token 
+            then ptext SLIT(": parse error (possibly incorrect indentation)")
+            else hcat [ptext SLIT(": parse error on input "),
+                       char '`', text token, char '\'']
+    ]
+  where 
+       token = lexemeToString s
+
  \end{code}