[project @ 1999-01-18 19:04:55 by sof]

[ghc-hetmet.git] / ghc / compiler / reader / Lex.lhs
diff --git a/ghc/compiler/reader/Lex.lhs b/ghc/compiler/reader/Lex.lhs

index 23cc723..11d5774 100644 (file)
--- a/ghc/compiler/reader/Lex.lhs
+++ b/ghc/compiler/reader/Lex.lhs
@@ -1,149 +1,86 @@
  %
-% (c) The GRASP/AQUA Project, Glasgow University, 1992-1996
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
  %
  \section[Lexical analysis]{Lexical analysis}
  
+--------------------------------------------------------
+[Jan 98]
+There's a known bug in here:
+
+       If an interface file ends prematurely, Lex tries to
+       do headFS of an empty FastString.
+
+An example that provokes the error is
+
+       f _:_ _forall_ [a] <<<END OF FILE>>>
+--------------------------------------------------------
+
  \begin{code}
-#include "HsVersions.h"
+{-# OPTIONS -#include "ctypes.h" #-}
  
  module Lex (
  
-       isLexCon, isLexVar, isLexId, isLexSym,
-       isLexConId, isLexConSym, isLexVarId, isLexVarSym,
-       mkTupNameStr, ifaceParseErr,
+       ifaceParseErr,
  
         -- Monad for parser
-       IfaceToken(..), lexIface, SYN_IE(IfM), thenIf, returnIf, happyError,
+       IfaceToken(..), lexIface, IfM, thenIf, returnIf, getSrcLocIf,
+       checkVersion, 
+       happyError,
         StringBuffer
  
      ) where
  
+#include "HsVersions.h"
  
-IMPORT_1_3(Char(isDigit, isAlpha, isAlphanum, isUpper,isLower, isSpace, ord))
-IMPORT_DELOOPER(Ubiq)
-IMPORT_DELOOPER(IdLoop)    -- get the CostCentre type&constructors from here
+import Char            ( ord, isSpace )
+import List             ( isSuffixOf )
  
-import CmdLineOpts     ( opt_IgnoreIfacePragmas )
+import CostCentre      -- Pretty much all of it
+import IdInfo          ( InlinePragInfo(..) )
+import Name            ( isLowerISO, isUpperISO, mkModule )
+
+import PrelMods                ( mkTupNameStr, mkUbxTupNameStr )
+import CmdLineOpts     ( opt_IgnoreIfacePragmas, opt_HiVersion, opt_NoHiCheck )
  import Demand          ( Demand(..) {- instance Read -} )
  import UniqFM           ( UniqFM, listToUFM, lookupUFM)
---import FiniteMap     ( FiniteMap, listToFM, lookupFM )
-#if __GLASGOW_HASKELL__ >= 202
-import Maybes          ( MaybeErr(..) )
-#else
-import Maybes          ( Maybe(..), MaybeErr(..) )
-#endif
-import Pretty
-
+import BasicTypes      ( NewOrData(..), IfaceFlavour(..) )
+import SrcLoc          ( SrcLoc, incSrcLine, srcLocFile )
  
-
-import ErrUtils                ( Error(..) )
-import Outputable      ( Outputable(..) )
-import PprStyle                ( PprStyle(..) )
-import Util            ( nOfThem, panic )
+import Maybes          ( MaybeErr(..) )
+import ErrUtils                ( Message )
+import Outputable
  
  import FastString
  import StringBuffer
-
-#if __GLASGOW_HASKELL__ <= 201
-import PreludeGlaST 
-#else
  import GlaExts
-#endif
-\end{code}
-
-%************************************************************************
-%*                                                                     *
-\subsection{Lexical categories}
-%*                                                                     *
-%************************************************************************
-
-These functions test strings to see if they fit the lexical categories
-defined in the Haskell report.  Normally applied as in e.g. @isCon
-(getLocalName foo)@.
-
-\begin{code}
-isLexCon, isLexVar, isLexId, isLexSym, isLexConId, isLexConSym,
- isLexVarId, isLexVarSym  :: FAST_STRING -> Bool
-
-isLexCon cs = isLexConId  cs || isLexConSym cs
-isLexVar cs = isLexVarId  cs || isLexVarSym cs
-
-isLexId  cs = isLexConId  cs || isLexVarId  cs
-isLexSym cs = isLexConSym cs || isLexVarSym cs
-
--------------
-
-isLexConId cs
-  | _NULL_ cs       = False
-  | cs == SLIT("[]") = True
-  | c  == '('       = True     -- (), (,), (,,), ...
-  | otherwise       = isUpper c || isUpperISO c
-  where                                        
-    c = _HEAD_ cs
-
-isLexVarId cs
-  | _NULL_ cs   = False
-  | otherwise    = isLower c || isLowerISO c
-  where
-    c = _HEAD_ cs
-
-isLexConSym cs
-  | _NULL_ cs  = False
-  | otherwise  = c  == ':'
-              || cs == SLIT("->")
-  where
-    c = _HEAD_ cs
-
-isLexVarSym cs
-  | _NULL_ cs = False
-  | otherwise = isSymbolASCII c
-            || isSymbolISO c
-  where
-    c = _HEAD_ cs
-
--------------
-isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
-isSymbolISO   c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
-isUpperISO    (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
---0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
-isLowerISO    (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
---0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
-\end{code}
+import ST              ( runST )
  
+#if __GLASGOW_HASKELL__ >= 303
+import Bits
+import Word
+#endif
  
-%************************************************************************
-%*                                                                     *
-\subsection{Tuple strings -- ugh!}
-%*                                                                     *
-%************************************************************************
+import Addr
  
-\begin{code}
-mkTupNameStr 0 = SLIT("()")
-mkTupNameStr 1 = panic "Name.mkTupNameStr: 1 ???"
-mkTupNameStr 2 = _PK_ "(,)"   -- not strictly necessary
-mkTupNameStr 3 = _PK_ "(,,)"  -- ditto
-mkTupNameStr 4 = _PK_ "(,,,)" -- ditto
-mkTupNameStr n = _PK_ ("(" ++ nOfThem (n-1) ',' ++ ")")
+import PrelRead                ( readRational__ ) -- Glasgow non-std
  \end{code}
  
-
-
  %************************************************************************
  %*                                                                     *
  \subsection{Data types}
  %*                                                                     *
  %************************************************************************
  
-The token data type, fairly un-interesting except from two constructors,
-@ITidinfo@ and @ITtype@, which are used to lazily lex id info (arity,
-strictness, unfolding etc) and types for id decls. 
+The token data type, fairly un-interesting except from one
+constructor, @ITidinfo@, which is used to lazily lex id info (arity,
+strictness, unfolding etc).
  
  The Idea/Observation here is that the renamer needs to scan through
  all of an interface file before it can continue. But only a fraction
  of the information contained in the file turns out to be useful, so
  delaying as much as possible of the scanning and parsing of an
  interface file Makes Sense (Heap profiles of the compiler 
-show at a reduction in heap usage by at least a factor of two,
+show a reduction in heap usage by at least a factor of two,
  post-renamer). 
  
  Hence, the interface file lexer spots when value declarations are
@@ -158,65 +95,99 @@ Laziness, you know it makes sense :-)
  
  \begin{code}
  data IfaceToken
-  = ITinterface                -- keywords
-  | ITusages
-  | ITversions
-  | ITexports
-  | ITinstance_modules
-  | ITinstances
-  | ITfixities
-  | ITdeclarations
-  | ITpragmas
+  = ITcase                     -- Haskell keywords
+  | ITclass
    | ITdata
-  | ITtype
-  | ITnewtype
+  | ITdefault
    | ITderiving
-  | ITclass
-  | ITwhere
-  | ITinstance
+  | ITdo
+  | ITelse
+  | ITif
+  | ITimport
+  | ITin
+  | ITinfix
    | ITinfixl
    | ITinfixr
-  | ITinfix
+  | ITinstance
+  | ITlet
+  | ITmodule
+  | ITnewtype
+  | ITof
+  | ITthen
+  | ITtype
+  | ITwhere
+  | ITas
+  | ITqualified
+  | IThiding
+
+  | ITinterface                        -- GHC-extension keywords
+  | ITexport
+  | ITinstimport
    | ITforall
-  | ITbang             -- magic symbols
-  | ITvbar
+  | ITletrec 
+  | ITcoerce
+  | ITinline
+  | ITccall (Bool,Bool,Bool)   -- (is_dyn, is_casm, may_gc)
+  | ITdefaultbranch
+  | ITbottom
+  | ITinteger_lit 
+  | ITfloat_lit
+  | ITrational_lit
+  | ITaddr_lit
+  | ITlit_lit
+  | ITstring_lit
+  | ITtypeapp
+  | ITarity 
+  | ITspecialise
+  | ITnocaf
+  | ITunfold InlinePragInfo
+  | ITstrict ([Demand], Bool)
+  | ITscc CostCentre
+
+  | ITdotdot                   -- reserved symbols
    | ITdcolon
-  | ITcomma
-  | ITdarrow
-  | ITdotdot
    | ITequal
-  | ITocurly
-  | ITobrack
-  | IToparen
+  | ITlam
+  | ITvbar
+  | ITlarrow
    | ITrarrow
+  | ITat
+  | ITtilde
+  | ITdarrow
+  | ITminus
+  | ITbang
+
+  | ITbiglam                   -- GHC-extension symbols
+
+  | ITocurly                   -- special symbols
    | ITccurly
+  | ITobrack
    | ITcbrack
+  | IToparen
    | ITcparen
+  | IToubxparen
+  | ITcubxparen
    | ITsemi
-  | ITvarid   FAST_STRING
+  | ITcomma
+
+  | ITvarid   FAST_STRING      -- identifiers
    | ITconid   FAST_STRING
    | ITvarsym  FAST_STRING
    | ITconsym  FAST_STRING
-  | ITqvarid  (FAST_STRING,FAST_STRING)
-  | ITqconid  (FAST_STRING,FAST_STRING)
-  | ITqvarsym (FAST_STRING,FAST_STRING)
-  | ITqconsym (FAST_STRING,FAST_STRING)
-
-  | ITidinfo [IfaceToken]  -- lazily return the stream of tokens for
-                          -- the info attached to an id.
-  | ITtysig [IfaceToken]   -- lazily return the stream of tokens for
-                          -- the info attached to an id.
-       -- Stuff for reading unfoldings
-  | ITarity | ITstrict | ITunfold
-  | ITdemand [Demand] | ITbottom
-  | ITlam | ITbiglam | ITcase | ITprim_case | ITlet | ITletrec | ITin | ITof
-  | ITcoerce_in | ITcoerce_out | ITatsign
-  | ITccall (Bool,Bool)                -- (is_casm, may_gc)
-  | ITscc CostCentre 
-  | ITchar Char | ITstring FAST_STRING
-  | ITinteger Integer | ITdouble Double
-  | ITinteger_lit | ITfloat_lit | ITrational_lit | ITaddr_lit | ITlit_lit | ITstring_lit
+  | ITqvarid  (FAST_STRING,FAST_STRING,IfaceFlavour)
+  | ITqconid  (FAST_STRING,FAST_STRING,IfaceFlavour)
+  | ITqvarsym (FAST_STRING,FAST_STRING,IfaceFlavour)
+  | ITqconsym (FAST_STRING,FAST_STRING,IfaceFlavour)
+
+  | ITpragma StringBuffer
+
+  | ITchar Char 
+  | ITstring FAST_STRING
+  | ITinteger Integer 
+  | ITrational Rational
+
    | ITunknown String           -- Used when the lexer can't make sense of it
+  | ITeof                      -- end of file token
    deriving Text -- debugging
  
  instance Text CostCentre -- cheat!
@@ -230,58 +201,65 @@ instance Text CostCentre -- cheat!
  %************************************************************************
  
  \begin{code}
-lexIface :: StringBuffer -> [IfaceToken]
-lexIface buf =
+lexIface :: (IfaceToken -> IfM a) -> IfM a
+lexIface cont buf =
   _scc_ "Lexer" 
  -- if bufferExhausted buf then
  --  []
  -- else
---  _trace ("Lexer: "++[C# (currentChar# buf)]) $
+--  trace ("Lexer: '"++[C# (currentChar# buf)]++"'") $
    case currentChar# buf of
        -- whitespace and comments, ignore.
-    ' '#  -> lexIface (stepOn buf)
-    '\t'# -> lexIface (stepOn buf)
-    '\n'# -> lexIface (stepOn buf)
+    ' '#  -> lexIface cont (stepOn buf)
+    '\t'# -> lexIface cont (stepOn buf)
+    '\n'# -> \ loc -> lexIface cont (stepOn buf) (incSrcLine loc)
  
  -- Numbers and comments
      '-'#  ->
        case lookAhead# buf 1# of
-        '-'# -> lex_comment (stepOnBy# buf 2#)
+--        '-'# -> lex_comment cont (stepOnBy# buf 2#)
          c    -> 
-         if isDigit (C# c)
-          then lex_num (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
-         else lex_id buf
-
--- Leave out nested comments for now; danger of finding { and - juxtaposed by mistake?
---    '{' : '-' : cs -> lex_nested_comment 1{-one seen-} cs
-
+         if is_digit c
+          then lex_num cont (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
+         else lex_sym cont buf
+
+    '{'# ->                            -- look for "{-##" special iface pragma
+       case lookAhead# buf 1# of
+          '-'# -> case lookAhead# buf 2# of
+                   '#'# -> case lookAhead# buf 3# of
+                               '#'# ->  
+                                  let (lexeme, buf') 
+                                         = doDiscard False (stepOnBy# buf 4#) in
+                                  cont (ITpragma lexeme) buf'
+                               _ ->  lex_nested_comment (lexIface cont) buf
+                   _    -> cont ITocurly (stepOn buf)
+                           -- lex_nested_comment (lexIface cont) buf
+          _ -> cont ITocurly (stepOn buf)
+
+    -- special symbols ----------------------------------------------------
      '('# -> 
          case prefixMatch (stepOn buf) "..)" of
-          Just buf' ->  ITdotdot : lexIface (stepOverLexeme buf')
+          Just buf' ->  cont ITdotdot (stepOverLexeme buf')
             Nothing ->
              case lookAhead# buf 1# of
-              ','# -> lex_tuple Nothing  (stepOnBy# buf 2#)
-              ')'# -> ITconid SLIT("()") : lexIface (stepOnBy# buf 2#)
-             _    -> IToparen : lexIface (stepOn buf)
-
-    '{'# -> ITocurly : lexIface (stepOn buf)
-    '}'# -> ITccurly : lexIface (stepOn buf)
-    ')'# -> ITcparen : lexIface (stepOn buf)
-    '['# -> 
-      case lookAhead# buf 1# of
-       ']'# -> ITconid SLIT("[]") : lexIface (stepOnBy# buf 2#)
-        _    -> ITobrack : lexIface (stepOn buf)
-    ']'# -> ITcbrack    : lexIface (stepOn buf)
-    ','# -> ITcomma     : lexIface (stepOn buf)
-    ':'# -> case lookAhead# buf 1# of
-              ':'# -> ITdcolon  : lexIface (stepOnBy# buf 2#)
-              _    -> lex_id (incLexeme buf)
-    ';'#  -> ITsemi    : lexIface (stepOn buf)
-    '\"'# -> case untilEndOfString# (stepOn buf) of
+             '#'# -> cont IToubxparen (stepOnBy# buf 2#)
+             _    -> cont IToparen (stepOn buf)
+    ')'# -> cont ITcparen (stepOn buf)
+    '}'# -> cont ITccurly (stepOn buf)
+    '#'# -> case lookAhead# buf 1# of
+               ')'# -> cont ITcubxparen (stepOnBy# buf 2#)
+               _    -> lex_sym cont (incLexeme buf)
+    '['# -> cont ITobrack (stepOn buf)
+    ']'# -> cont ITcbrack (stepOn buf)
+    ','# -> cont ITcomma  (stepOn buf)
+    ';'# -> cont ITsemi   (stepOn buf)
+
+    -- strings/characters -------------------------------------------------
+    '\"'#{-"-} -> case untilEndOfString# (stepOn buf) of
               buf' ->
                   -- the string literal does *not* include the dquotes
                 case lexemeToFastString buf' of
-                v -> ITstring v : lexIface (stepOn (stepOverLexeme buf'))
+                v -> cont (ITstring v) (stepOn (stepOverLexeme buf'))
  
      '\''# -> --
              -- untilEndOfChar# extends the current lexeme until
@@ -292,50 +270,75 @@ lexIface buf =
              --
              case untilEndOfChar# (stepOn buf) of
                buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of
-                       [  (ch, rest)] -> ITchar ch : lexIface (stepOverLexeme (incLexeme buf'))
+                       [  (ch, rest)] -> cont (ITchar ch) (stepOverLexeme (incLexeme buf'))
+
+    -- strictness pragma and __scc treated specially.
+    '_'# ->
+        case lookAhead# buf 1# of
+          '_'# -> case lookAhead# buf 2# of
+                   'S'# -> 
+                       lex_demand cont (stepOnUntil (not . isSpace) 
+                                       (stepOnBy# buf 3#)) -- past __S
+                   's'# -> 
+                       case prefixMatch (stepOnBy# buf 3#) "cc" of
+                              Just buf' -> lex_scc cont 
+                                               (stepOnUntil (not . isSpace) 
+                                               (stepOverLexeme buf'))
+                              Nothing   -> lex_id cont buf
+                   _ -> lex_id cont buf
+          _    -> lex_id cont buf
  
  -- ``thingy'' form for casm
      '`'# ->
             case lookAhead# buf 1# of
-             '`'# -> lex_cstring (stepOnBy# buf 2#) -- remove the `` and go.
-             _    -> lex_id (incLexeme buf)         -- add ` to lexeme and assume
+             '`'# -> lex_cstring cont (stepOnBy# buf 2#) -- remove the `s and go.
+             _    -> lex_sym cont (incLexeme buf)         -- add ` to lexeme and assume
                                                      -- scanning an id of some sort.
--- Keywords
-    '_'# ->
-        case lookAhead# buf 1# of
-          'S'# -> case lookAhead# buf 2# of
-                   '_'# -> ITstrict : 
-                           lex_demand (stepOnUntil (not . isSpace) 
-                                                   (stepOnBy# buf 3#)) -- past _S_
-          's'# -> case prefixMatch (stepOnBy# buf 2#) "cc_" of
-                    Just buf' -> lex_scc (stepOnUntil (not . isSpace) (stepOverLexeme buf'))
-                    Nothing   -> lex_keyword (stepOnBy# buf 1#) -- drop the '_' and assume
-                                                                -- it is a keyword.
-          _    -> lex_keyword (stepOn buf)
  
      '\NUL'# ->
             if bufferExhausted (stepOn buf) then
-              []
+              cont ITeof buf
             else
-              lex_id buf
-    c ->
-       if isDigit (C# c) then
-          lex_num (id) (ord# c -# ord# '0'#) (incLexeme buf)
-        else
-          lex_id buf
+              trace "lexIface: misplaced NUL?" $ 
+              cont (ITunknown "\NUL") (stepOn buf)
+
+    c | is_digit  c -> lex_num cont (id) (ord# c -# ord# '0'#) (incLexeme buf)
+      | is_symbol c -> lex_sym cont buf
+      | is_upper  c -> lex_con cont buf
+      | is_ident  c -> lex_id  cont buf
+
  --  where
-lex_comment buf = 
+lex_comment cont buf = 
  --   _trace ("comment: "++[C# (currentChar# buf)]) $
-   case untilChar# buf '\n'# of {buf' -> lexIface (stepOverLexeme buf')}
+   case untilChar# buf '\n'# of {buf' -> lexIface cont (stepOverLexeme buf')}
  
-------------------
-lex_demand buf = 
--- _trace ("demand: "++[C# (currentChar# buf)]) $
- case read_em [] buf of { (ls,buf') -> ITdemand ls : lexIface (stepOverLexeme buf')}
+-------------------------------------------------------------------------------
+
+lex_nested_comment cont buf =
+  case currentChar# buf of
+       '-'# -> case lookAhead# buf 1# of
+                '}'# -> cont (stepOnBy# buf 2#)
+                _    -> lex_nested_comment cont (stepOn buf)
+
+       '{'# -> case lookAhead# buf 1# of
+                '-'# -> lex_nested_comment
+                               (lex_nested_comment cont) 
+                               (stepOnBy# buf 2#)
+                _    -> lex_nested_comment cont (stepOn buf)
+
+       _   -> lex_nested_comment cont (stepOn buf)
+
+-------------------------------------------------------------------------------
+
+lex_demand cont buf = 
+ case read_em [] buf of { (ls,buf') -> 
+ case currentChar# buf' of
+   'B'# -> cont (ITstrict (ls, True )) (stepOverLexeme (stepOn buf'))
+   _    -> cont (ITstrict (ls, False)) (stepOverLexeme buf')
+ }
   where
     -- code snatched from Demand.lhs
    read_em acc buf = 
---   _trace ("read_em: "++[C# (currentChar# buf)]) $
     case currentChar# buf of
      'L'# -> read_em (WwLazy False : acc) (stepOn buf)
      'A'# -> read_em (WwLazy True  : acc) (stepOn buf)
@@ -343,69 +346,72 @@ lex_demand buf =
      'P'# -> read_em (WwPrim       : acc) (stepOn buf)
      'E'# -> read_em (WwEnum       : acc) (stepOn buf)
      ')'# -> (reverse acc, stepOn buf)
-    'U'# -> do_unpack True  acc (stepOnBy# buf 2#)
-    'u'# -> do_unpack False acc (stepOnBy# buf 2#)
+    'U'# -> do_unpack DataType True  acc (stepOnBy# buf 2#)
+    'u'# -> do_unpack DataType False acc (stepOnBy# buf 2#)
+    'N'# -> do_unpack NewType True  acc (stepOnBy# buf 2#)
+    'n'# -> do_unpack NewType False acc (stepOnBy# buf 2#)
      _    -> (reverse acc, buf)
  
-  do_unpack wrapper_unpacks acc buf
+  do_unpack new_or_data wrapper_unpacks acc buf
     = case read_em [] buf of
-      (stuff, rest) -> read_em (WwUnpack wrapper_unpacks stuff : acc) rest
+      (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
  
  ------------------
-lex_scc buf =
--- _trace ("scc: "++[C# (currentChar# buf)]) $
+lex_scc cont buf =
   case currentChar# buf of
    '"'# ->
-      -- YUCK^2
-     case prefixMatch (stepOn buf) "NO_CC\"" of
-      Just buf' -> ITscc noCostCentre : lexIface (stepOverLexeme buf')
-      Nothing -> 
-       case prefixMatch (stepOn buf) "CURRENT_CC\"" of
-        Just buf' -> ITscc useCurrentCostCentre : lexIface (stepOverLexeme buf')
-        Nothing   ->
-         case prefixMatch (stepOn buf) "OVERHEAD\"" of
-         Just buf' -> ITscc overheadCostCentre : lexIface (stepOverLexeme buf')
-         Nothing   ->
-          case prefixMatch (stepOn buf) "DONT_CARE\"" of
-           Just buf' -> ITscc dontCareCostCentre : lexIface (stepOverLexeme buf')
-           Nothing   ->
-            case prefixMatch (stepOn buf) "SUBSUMED\"" of
-             Just buf' -> ITscc subsumedCosts : lexIface (stepOverLexeme buf')
+        case prefixMatch (stepOn buf) "CAFs." of
+         Just buf' ->
+          case untilChar# (stepOverLexeme buf') '\"'# of
+           buf'' -> cont (ITscc (mkAllCafsCC (mkModule (lexemeToString buf'')) _NIL_)) 
+                        (stepOn (stepOverLexeme buf''))
+         Nothing ->
+            case prefixMatch (stepOn buf) "DICTs." of
+             Just buf' ->
+              case untilChar# (stepOverLexeme buf') '\"'# of
+               buf'' -> cont (ITscc (mkAllDictsCC (mkModule (lexemeToString buf'')) _NIL_ True)) 
+                        (stepOn (stepOverLexeme buf''))
               Nothing ->
-              case prefixMatch (stepOn buf) "CAFs_in_...\"" of
-               Just buf' -> ITscc preludeCafsCostCentre : lexIface (stepOverLexeme buf')
+             let
+              match_user_cc buf =
+                case untilChar# buf '/'# of
+                 buf' -> 
+                  let mod_name = mkModule (lexemeToString buf') in
+--                       case untilChar# (stepOn (stepOverLexeme buf')) '/'# of
+--                        buf'' -> 
+--                            let grp_name = lexemeToFastString buf'' in
+                   case untilChar# (stepOn (stepOverLexeme buf')) '\"'# of
+                    buf'' ->
+                      -- The label may contain arbitrary characters, so it
+                      -- may have been escaped etc., hence we `read' it in to get
+                      -- rid of these meta-chars in the string and then pack it (again.)
+                      -- ToDo: do the same for module name (single quotes allowed in m-names).
+                      -- BTW, the code in this module is totally gruesome..
+                      let upk_label = _UNPK_ (lexemeToFastString buf'') in
+                      case reads ('"':upk_label++"\"") of
+                       ((cc_label,_):_) -> 
+                           let cc_name = _PK_ cc_label in
+                           (mkUserCC cc_name mod_name _NIL_{-grp_name-}, 
+                            stepOn (stepOverLexeme buf''))
+                       _ -> 
+                         trace ("trouble lexing scc label: " ++ upk_label ++ " , ignoring") 
+                         (mkUserCC _NIL_ mod_name _NIL_{-grp_name-}, 
+                          stepOn (stepOverLexeme buf''))
+              in
+              case prefixMatch (stepOn buf) "CAF:" of
+               Just buf' ->
+                case match_user_cc (stepOverLexeme buf') of
+                 (cc, buf'') -> cont (ITscc (cafifyCC cc)) buf''
                 Nothing ->
-                case prefixMatch (stepOn buf) "CC_CAFs_in_..." of
-                 Just buf' ->
-                 case untilChar# (stepOverLexeme buf') '\"'# of
-                  buf'' -> ITscc (mkAllCafsCC ({-module-}lexemeToFastString buf'') _NIL_): 
-                           lexIface (stepOn (stepOverLexeme buf''))
-                 Nothing ->
-                  case prefixMatch (stepOn buf) "DICTs_in_...\"" of
-                   Just buf' -> ITscc (preludeDictsCostCentre True) : lexIface (stepOverLexeme buf')
-                   Nothing ->
-                    case prefixMatch (stepOn buf) "CC_DICTs_in_..." of
-                     Just buf' ->
-                     case untilChar# (stepOverLexeme buf') '\"'# of
-                      buf'' -> ITscc (mkAllDictsCC (lexemeToFastString buf'') _NIL_ True): 
-                               lexIface (stepOn (stepOverLexeme buf''))
-                     Nothing ->
-                      case prefixMatch (stepOn buf) "CAF:" of
-                       Just buf' ->              
-                       case untilChar# (stepOverLexeme buf') '\"'# of
-                        buf'' -> ITscc (cafifyCC (mkUserCC (lexemeToFastString buf'') _NIL_ _NIL_)): 
-                                 lexIface (stepOn (stepOverLexeme buf''))
-                       Nothing ->
-                       case untilChar# (stepOn buf) '\"'# of
-                          buf' -> ITscc (mkUserCC (lexemeToFastString buf') _NIL_ _NIL_): 
-                                   lexIface (stepOn (stepOverLexeme buf'))
-  c -> ITunknown [C# c] : lexIface (stepOn buf)
+                 case match_user_cc (stepOn buf) of
+                 (cc, buf'') -> cont (ITscc cc) buf''
+  c -> cont (ITunknown [C# c]) (stepOn buf)
  
  
  -----------
-lex_num :: (Int -> Int) -> Int# -> StringBuffer -> [IfaceToken]
-lex_num minus acc# buf =
--- _trace ("lex_num: "++[C# (currentChar# buf)]) $
+lex_num :: (IfaceToken -> IfM a) -> (Int -> Int) -> Int# -> IfM a
+lex_num cont minus acc# buf =
+ --trace ("lex_num: "++[C# (currentChar# buf)]) $
   case scanNumLit (I# acc#) buf of
       (acc',buf') ->
         case currentChar# buf' of
@@ -413,371 +419,318 @@ lex_num minus acc# buf =
               -- this case is not optimised at all, as the
               -- presence of floating point numbers in interface
               -- files is not that common. (ToDo)
-           case expandWhile (isDigit) (incLexeme buf') of
-              buf'' -> -- points to first non digit char
-               case reads (lexemeToString buf'') of
-                 [(v,_)] -> ITdouble v : lexIface (stepOverLexeme buf'')
-         _ -> ITinteger (fromInt (minus acc')) : lexIface (stepOverLexeme buf')
-
---        case reads (lexemeToString buf') of
---          [(i,_)] -> ITinteger i : lexIface (stepOverLexeme buf')
-
-------------
-lex_keyword buf =
--- _trace ("lex_keyword: "++[C# (currentChar# buf)]) $
- case currentChar# buf of
-  ':'# -> case lookAhead# buf 1# of
-           '_'# -> -- a binding, type (and other id-info) follows,
-                   -- to make the parser ever so slightly, we push
-                   -- 
-               lex_decl (stepOnBy# buf 2#)
-           v# -> ITunknown (['_',':',C# v#]) : lexIface (stepOnBy# buf 2#)
-  _ ->
-    case expandWhile (is_kwd_char) buf of
-     buf' ->
-      let kw = lexemeToFastString buf' in
---    _trace ("kw: "++lexemeToString buf') $
-      case lookupUFM ifaceKeywordsFM kw of
-       Nothing -> ITunknown (_UNPK_ kw) : -- (minor) sigh 
-                 lexIface (stepOverLexeme buf')
-       Just xx -> xx : lexIface (stepOverLexeme buf')
-
-lex_decl buf =
- case doDiscard False buf of -- spin until ;; is found
-   buf' ->
-      {- _trace (show (lexemeToString buf')) $ -}
-      case currentChar# buf' of
-       '\n'# -> -- newline, no id info.
-          ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) : 
-          lexIface (stepOverLexeme buf')
-       '\r'# -> -- just to be sure for those Win* boxes..
-          ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) : 
-          lexIface (stepOverLexeme buf')
-       '\NUL'# ->
-          ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) : 
-          lexIface (stepOverLexeme buf')
-       c     -> -- run all over the id info
-        case doDiscard False (stepOverLexeme buf') of -- spin until ;; is found (outside a string!)
-          buf'' -> 
-                   --_trace ((C# c):show (lexemeToString (decLexeme buf')))  $
-                   --_trace (show (lexemeToString (decLexeme buf''))) $
-                   ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))):
-                   let ls = lexIface (stepOverLexeme buf'') in
-                   if opt_IgnoreIfacePragmas then
-                       ls
-                   else
-                       let is = lexIface (lexemeToBuffer (decLexeme buf'')) in
-                       --_trace (show is) $
-                       ITidinfo is : ls
-                   
--- ToDo: hammer!
-is_kwd_char c@(C# c#) = 
- isAlphanum c || -- OLD: c `elem` "_@/\\"
- (case c# of
-   '_'#  -> True
-   '@'#  -> True
-   '/'#  -> True
-   '\\'# -> True
-   _     -> False)
-
-
+           case expandWhile# is_digit (incLexeme buf') of
+              buf2 -> -- points to first non digit char
+               let l = case currentChar# buf2 of
+                         'e'# -> let buf3 = incLexeme buf2 in
+                             case currentChar# buf3 of
+                               '-'# -> expandWhile# is_digit (incLexeme buf3)
+                               _    -> expandWhile# is_digit buf3
+                         _ -> buf2
+               in let v = readRational__ (lexemeToString l) in
+                  cont (ITrational v) (stepOverLexeme l)
+
+         _ -> cont (ITinteger (fromInt (minus acc'))) (stepOverLexeme buf')
  
  -----------
-lex_cstring buf =
--- _trace ("lex_cstring: "++[C# (currentChar# buf)]) $
+lex_cstring cont buf =
   case expandUntilMatch buf "\'\'" of
-   buf' -> ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))) :
-           lexIface (stepOverLexeme buf')
-       
------------
-lex_tuple module_dot buf =
--- _trace ("lex_tuple: "++[C# (currentChar# buf)]) $
-  go 2 buf
-  where
-   go n buf =
-    case currentChar# buf of
-      ','# -> go (n+1) (stepOn buf)
-      ')'# -> end_lex_id module_dot (ITconid (mkTupNameStr n)) (stepOn buf)
-      _    -> ITunknown ("tuple " ++ show n) : lexIface buf
-
--- Similarly ' itself is ok inside an identifier, but not at the start
-
-id_arr :: _ByteArray Int
-id_arr =
- unsafePerformPrimIO (
-  newCharArray (0,255) `thenPrimIO` \ barr ->
-  let
-   loop 256# = returnPrimIO ()
-   loop i# =
-    if isAlphanum (C# (chr# i#)) || is_sym (chr# i#) then
-       writeCharArray barr (I# i#) '\1' `seqPrimIO`
-       loop (i# +# 1#)
-    else
-       writeCharArray barr (I# i#) '\0' `seqPrimIO`
-       loop (i# +# 1#)
-  in
-  loop 0#                    `seqPrimIO`
-  unsafeFreezeByteArray barr)
-
-is_id_char (C# c#) = 
- let
-  _ByteArray _ arr# = id_arr
- in
- case ord# (indexCharArray# arr# (ord# c#)) of
-  0# -> False
-  1# -> True
-
---is_id_char c@(C# c#)  = isAlphanum c || is_sym c#
-
-is_sym c#=
- case c# of {
-   ':'# -> True; '_'#  -> True; '\''# -> True; '!'# -> True; 
-   '#'# -> True; '$'#  -> True; ':'#  -> True; '%'# -> True; 
-   '&'# -> True; '*'#  -> True; '+'#  -> True; '.'# -> True; 
-   '/'# -> True; '<'#  -> True; '='#  -> True; '>'# -> True; 
-   '?'# -> True; '\\'# -> True; '^'#  -> True; '|'# -> True; 
-   '-'# -> True; '~'#  -> True; '@'#  -> True; _    -> False }
-
---isAlphanum c || c `elem` ":_'!#$%&*+./<=>?@\\^|-~" -- ToDo: add ISOgraphic
-
-
-mod_arr :: _ByteArray Int
-mod_arr =
- unsafePerformPrimIO (
-  newCharArray (0,255) `thenPrimIO` \ barr ->
-  let
-   loop 256# = returnPrimIO ()
-   loop i# =
-    if isAlphanum (C# (chr# i#)) || i# ==# (ord# '_'#) || i# ==# (ord# '\''#) then
-       writeCharArray barr (I# i#) '\1' `seqPrimIO`
-       loop (i# +# 1#)
-    else
-       writeCharArray barr (I# i#) '\0' `seqPrimIO`
-       loop (i# +# 1#)
-  in
-  loop 0#                    `seqPrimIO`
-  unsafeFreezeByteArray barr)
-
-             
-is_mod_char (C# c#) = 
- let
-  _ByteArray _ arr# = mod_arr
- in
- case ord# (indexCharArray# arr# (ord# c#)) of
-  0# -> False
-  1# -> True
-
---isAlphanum c || c == '_' || c== '\'' --`elem` "_'"
-
-{-
-lex_id cs = 
- case _scc_ "lex_id.span" my_span' (is_mod_char) cs of
-   (xs, len, cs') ->
-    case cs' of
-     [] -> case xs of
-           [] -> lex_id2 Nothing cs
-           _  -> lex_id3 Nothing len xs cs
-
-     '.':cs'' ->
-        case xs of
-         [] -> lex_id2 Nothing cs
-         _  ->
-           let
-            pk_str = _PK_ (xs::String)
-            len = lengthPS pk_str
-           in
-           if len==len+1 then
-              error "Well, I never!"
-           else
-              lex_id2 (Just pk_str) cs''
-     _ -> case xs of
-           [] -> lex_id2 Nothing cs
-           _  -> lex_id3 Nothing len xs cs'
+   buf' -> cont (ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))))
+           (stepOverLexeme buf')       
  
--}
+------------------------------------------------------------------------------
+-- Character Classes
+
+is_ident, is_symbol, is_any, is_upper, is_digit :: Char# -> Bool
+
+{-# INLINE is_ctype #-}
+#if __GLASGOW_HASKELL__ >= 303
+is_ctype :: Word8 -> Char# -> Bool
+is_ctype mask = \c ->
+   (indexWord8OffAddr (``char_types'' :: Addr) (ord (C# c)) .&. mask) /= 0
+#else
+is_ctype :: Int -> Char# -> Bool
+is_ctype (I# mask) = \c ->
+    let (A# ctype) = ``char_types'' :: Addr
+       flag_word  = int2Word# (ord# (indexCharOffAddr# ctype (ord# c)))
+    in
+       (flag_word `and#` (int2Word# mask)) `neWord#` (int2Word# 0#)
+#endif
  
-lex_id buf = 
--- _trace ("lex_id: "++[C# (currentChar# buf)]) $
- case expandWhile (is_mod_char) buf of
-   buf' ->
-    case currentChar# buf' of
-     '.'# ->
-       if not (emptyLexeme buf') then
---        _trace ("lex_id: "++(C# (currentChar# (stepOverLexeme buf'))):show (lexemeToFastString buf')) $ 
-          case lexemeToFastString buf' of
-            l@(FastString u# l# ba#) -> lex_id2 (Just (FastString u# l# ba#)) 
-                                                (stepOn (stepOverLexeme buf'))
-       else
-          lex_id2 Nothing buf'         
-     _  -> lex_id2 Nothing buf'
-
--- Dealt with the Module.part
-lex_id2 module_dot buf =
--- _trace ("lex_id2: "++[C# (currentChar# buf)]) $
+is_ident  = is_ctype 1
+is_symbol = is_ctype 2
+is_any    = is_ctype 4
+is_space  = is_ctype 8
+is_upper  = is_ctype 16
+is_digit  = is_ctype 32
+
+-----------------------------------------------------------------------------
+-- identifiers, symbols etc.
+
+lex_id cont buf =
+ case expandWhile# is_ident buf of { buf1 -> 
+ case expandWhile# (eqChar# '#'#) buf1 of { buf' -> -- only if GHC extns on
+ let new_buf = stepOverLexeme buf' 
+     lexeme  = lexemeToFastString buf'
+ in
+ case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
+       Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $
+                         cont kwd_token new_buf;
+       Nothing        -> 
+ case lookupUFM ifaceKeywordsFM lexeme of {
+       Just kwd_token -> --trace ("ifacekeywd: "++_UNPK_(lexeme)) $
+                         cont kwd_token new_buf;
+       Nothing        -> --trace ("id: "++_UNPK_(lexeme)) $
+                         cont (mk_var_token lexeme) new_buf
+ }}}}
+
+lex_sym cont buf =
+ case expandWhile# is_symbol buf of
+   buf'
+     | is_comment lexeme -> lex_comment cont new_buf
+     | otherwise         ->
+          case lookupUFM haskellKeySymsFM lexeme of {
+               Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
+                                 cont kwd_token new_buf ;
+               Nothing        -> --trace ("sym: "++unpackFS lexeme) $
+                                 cont (mk_var_token lexeme) new_buf
+           }
+       where lexeme = lexemeToFastString buf'
+             new_buf = stepOverLexeme buf'
+
+             is_comment fs 
+               | len < 2   = False
+               | otherwise = trundle 0
+                 where
+                  len = lengthFS fs
+                  
+                  trundle n | n == len  = True
+                            | otherwise = indexFS fs n == '-' && trundle (n+1)
+
+lex_con cont buf = 
+ case expandWhile# is_ident buf of       { buf1 ->
+ case expandWhile# (eqChar# '#'#) buf1 of { buf' ->
+ case currentChar# buf' of
+     '.'# -> munch HiFile
+     '!'# -> munch HiBootFile
+     _    -> just_a_conid
+ 
+   where
+    just_a_conid = --trace ("con: "++unpackFS lexeme) $
+                  cont (ITconid lexeme) new_buf
+    lexeme = lexemeToFastString buf'
+    new_buf = stepOverLexeme buf'
+    munch hif = lex_qid cont lexeme hif (stepOn new_buf) just_a_conid
+ }}
+
+lex_qid cont mod hif buf just_a_conid =
   case currentChar# buf of
-  '['# -> 
+  '['# ->      -- Special case for []
      case lookAhead# buf 1# of
-     ']'# -> end_lex_id module_dot (ITconid SLIT("[]")) (stepOnBy# buf 2#)
-     _    -> lex_id3 module_dot buf
-  '('# ->
+     ']'# -> cont (ITqconid  (mod,SLIT("[]"),hif)) (stepOnBy# buf 2#)
+     _    -> just_a_conid
+
+  '('# ->  -- Special case for (,,,)
+          -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
      case lookAhead# buf 1# of
-     ')'# -> end_lex_id module_dot (ITconid SLIT("()")) (stepOnBy# buf 2#)
-     ','# -> lex_tuple module_dot (stepOnBy# buf 2#)
-     _    -> lex_id3 module_dot buf
-  ':'# -> lex_id3 module_dot (incLexeme buf)
-  _    -> lex_id3 module_dot buf
-
-
-
--- Dealt with [], (), : special cases
-
-lex_id3 module_dot buf =
--- _trace ("lex_id3: "++[C# (currentChar# buf)]) $
- case expandWhile (is_id_char) buf of
-  buf' ->
-    case module_dot of
-     Just _ ->
-       end_lex_id module_dot (mk_var_token lexeme) (stepOverLexeme buf')
-     Nothing ->
-       case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of
-         Just kwd_token -> kwd_token           : lexIface new_buf
-        Nothing        -> mk_var_token lexeme : lexIface new_buf
-    where
-     lexeme  = lexemeToFastString buf'
-     new_buf = stepOverLexeme buf'
+     '#'# -> case lookAhead# buf 2# of
+               ','# -> lex_ubx_tuple cont mod hif (stepOnBy# buf 3#) 
+                               just_a_conid
+               _    -> just_a_conid
+     ')'# -> cont (ITqconid (mod,SLIT("()"),hif)) (stepOnBy# buf 2#)
+     ','# -> lex_tuple cont mod hif (stepOnBy# buf 2#) just_a_conid
+     _    -> just_a_conid
+
+  '-'# -> case lookAhead# buf 1# of
+            '>'# -> cont (ITqconid (mod,SLIT("->"),hif)) (stepOnBy# buf 2#)
+            _    -> lex_id3 cont mod hif buf just_a_conid
+  _    -> lex_id3 cont mod hif buf just_a_conid
+
+lex_id3 cont mod hif buf just_a_conid
+  | is_symbol c =
+     case expandWhile# is_symbol buf of { buf' ->
+     let
+      lexeme  = lexemeToFastString buf'
+      new_buf = stepOverLexeme buf'
+     in
+     case lookupUFM haskellKeySymsFM lexeme of {
+       Just kwd_token -> just_a_conid; -- avoid M.:: etc.
+       Nothing        -> cont (mk_qvar_token mod hif lexeme) new_buf
+     }}
+
+  | otherwise   =
+     case expandWhile# is_ident buf of { buf1 ->
+     if emptyLexeme buf1 
+           then just_a_conid
+           else
+     case expandWhile# (eqChar# '#'#) buf1 of { buf' -> -- only if GHC extns on
+     let
+      lexeme  = lexemeToFastString buf'
+      new_buf = stepOverLexeme buf'
+     in
+     case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
+           Just kwd_token -> just_a_conid; -- avoid M.where etc.
+           Nothing        -> 
+     case lookupUFM ifaceKeywordsFM lexeme of {        -- only for iface files
+           Just kwd_token -> just_a_conid;
+           Nothing        -> cont (mk_qvar_token mod hif lexeme) new_buf
+     }}}}
+  where c = currentChar# buf
+
+mk_var_token pk_str
+  | is_upper f         = ITconid pk_str
+       -- _[A-Z] is treated as a constructor in interface files.
+  | f `eqChar#` '_'# && not (_NULL_ tl) 
+       && (case _HEAD_ tl of { C# g -> is_upper g }) = ITconid pk_str
+  | is_ident f         = ITvarid pk_str
+  | f `eqChar#` ':'#   = ITconsym pk_str
+  | otherwise          = ITvarsym pk_str
+  where
+      (C# f) = _HEAD_ pk_str
+      tl     = _TAIL_ pk_str
+
+mk_qvar_token m hif token =
+ case mk_var_token token of
+   ITconid n  -> ITqconid  (m,n,hif)
+   ITvarid n  -> ITqvarid  (m,n,hif)
+   ITconsym n -> ITqconsym (m,n,hif)
+   ITvarsym n -> ITqvarsym (m,n,hif)
+   _         -> ITunknown (show token)
+\end{code}
  
+----------------------------------------------------------------------------
+Horrible stuff for dealing with M.(,,,)
  
-{- OLD:
-lex_id2 module_dot [] ('[': ']': cs) = end_lex_id module_dot (ITconid SLIT("[]")) cs
-lex_id2 module_dot [] ('(': ')': cs) = end_lex_id module_dot (ITconid SLIT("()")) cs
-lex_id2 module_dot [] ('(': ',': cs) = lex_tuple module_dot cs
-lex_id2 module_dot [] (':' : cs)     = lex_id3 module_dot [':'] cs
-lex_id2 module_dot xs cs            = lex_id3 module_dot xs cs
--}
+\begin{code}
+lex_tuple cont mod hif buf back_off =
+  go 2 buf
+  where
+   go n buf =
+    case currentChar# buf of
+      ','# -> go (n+1) (stepOn buf)
+      ')'# -> cont (ITqconid (mod, snd (mkTupNameStr n),hif)) (stepOn buf)
+      _    -> back_off
  
+lex_ubx_tuple cont mod hif buf back_off =
+  go 2 buf
+  where
+   go n buf =
+    case currentChar# buf of
+      ','# -> go (n+1) (stepOn buf)
+      '#'# -> case lookAhead# buf 1# of
+               ')'# -> cont (ITqconid (mod, snd (mkUbxTupNameStr n), hif))
+                                (stepOnBy# buf 2#)
+               _    -> back_off
+      _    -> back_off
+\end{code}
  
--- Dealt with [], (), : special cases
+-----------------------------------------------------------------------------
+Keyword Lists
  
-{-
-lex_id3 module_dot len_xs xs cs =
- case my_span' (is_id_char) cs of
-   (xs1,len_xs1,rest) ->
-    case module_dot of
-     Just m  -> end_lex_id (Just m) (mk_var_token rxs) rest --OLD:_PK_ (reverse xs))) rest
-     Nothing -> 
-      case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM rxs of
-       Just kwd_token -> kwd_token         : lexIface rest
-       other         -> token : lexIface cs end_lex_id Nothing (mk_var_token rxs) rest
-    where
-     rxs = packNChars (len_xs+len_xs1) (xs++xs1) -- OLD: _PK_ (reverse xs)
--}
-mk_var_token pk_str =
-     let
-      f = _HEAD_ pk_str
-     in
-     --
-     -- These tests assume a non- ISO-8859-1 impl of isUpper&isLower,
-     -- remove the second half of disjunction when using a 1.3 prelude.
-     --
-     if      isUpper f    then ITconid pk_str
-     else if isLower f   then ITvarid pk_str
-     else if f == ':'    then ITconsym pk_str
-     else if isLowerISO f then ITvarid pk_str
-     else if isUpperISO f then ITconid pk_str
-     else ITvarsym pk_str
-
-{-
-    mk_var_token xs@(f:_) | isUpper f || isUpperISO f = ITconid n
-                         | f == ':'              = ITconsym n
-                         | isAlpha f             = ITvarid n
-                         | otherwise             = ITvarsym n 
-               where
-                     n = _PK_ xs
--}
-                           
-end_lex_id Nothing token buf  = token : lexIface buf
-end_lex_id (Just m) token buf =
- case token of
-   ITconid n  -> ITqconid  (m,n)         : lexIface buf
-   ITvarid n  -> ITqvarid  (m,n)         : lexIface buf
-   ITconsym n -> ITqconsym (m,n)         : lexIface buf
-   ITvarsym n -> ITqvarsym (m,n)         : lexIface buf
-   ITbang     -> ITqvarsym (m,SLIT("!")) : lexIface buf
-   _         -> ITunknown (show token)  : lexIface buf
-
-------------
+\begin{code}
  ifaceKeywordsFM :: UniqFM IfaceToken
  ifaceKeywordsFM = listToUFM $
        map (\ (x,y) -> (_PK_ x,y))
-       [("/\\_",               ITbiglam)
-       ,("@_",                 ITatsign)
-       ,("letrec_",            ITletrec)
-       ,("interface_",         ITinterface)
-       ,("usages_",            ITusages)
-       ,("versions_",          ITversions)
-       ,("exports_",           ITexports)
-       ,("instance_modules_",  ITinstance_modules)
-       ,("instances_",         ITinstances)
-       ,("fixities_",          ITfixities)
-       ,("declarations_",      ITdeclarations)
-       ,("pragmas_",           ITpragmas)
-       ,("forall_",            ITforall)
-       ,("U_",                 ITunfold)
-       ,("A_",                 ITarity)
-       ,("coerce_in_",         ITcoerce_in)
-       ,("coerce_out_",                ITcoerce_out)
-       ,("bot_",               ITbottom)
-       ,("integer_",           ITinteger_lit)
-       ,("rational_",          ITrational_lit)
-       ,("addr_",              ITaddr_lit)
-       ,("float_",             ITfloat_lit)
-       ,("string_",            ITstring_lit)
-       ,("litlit_",            ITlit_lit)
-       ,("ccall_",             ITccall (False, False))
-       ,("ccall_GC_",          ITccall (False, True))
-       ,("casm_",              ITccall (True,  False))
-       ,("casm_GC_",           ITccall (True,  True))
+     [  ("__interface",                ITinterface),
+       ("__export",            ITexport),
+       ("__instimport",        ITinstimport),
+       ("__forall",            ITforall),
+       ("__letrec",            ITletrec),
+       ("__coerce",            ITcoerce),
+       ("__inline",            ITinline),
+       ("__DEFAULT",           ITdefaultbranch),
+       ("__bot",               ITbottom),
+       ("__integer",           ITinteger_lit),
+       ("__float",             ITfloat_lit),
+       ("__rational",          ITrational_lit),
+       ("__addr",              ITaddr_lit),
+       ("__litlit",            ITlit_lit),
+       ("__string",            ITstring_lit),
+       ("__a",                 ITtypeapp),
+       ("__A",                 ITarity),
+       ("__P",                 ITspecialise),
+       ("__C",                 ITnocaf),
+        ("__u",                        ITunfold NoInlinePragInfo),
+        ("__U",                        ITunfold IWantToBeINLINEd),
+        ("__UU",               ITunfold IMustBeINLINEd),
+        ("__Unot",             ITunfold IMustNotBeINLINEd),
+        ("__Ux",               ITunfold IAmALoopBreaker),
+       
+        ("__ccall",            ITccall (False, False, False)),
+        ("__dyn_ccall",                ITccall (True,  False, False)),
+        ("__dyn_ccall_GC",     ITccall (True,  False, True)),
+        ("__casm",             ITccall (False, True,  False)),
+        ("__dyn_casm",         ITccall (True,  True,  False)),
+        ("__casm_GC",          ITccall (False, True,  True)),
+        ("__dyn_casm_GC",      ITccall (True,  True,  True)),
+
+        ("/\\",                        ITbiglam)
         ]
  
  haskellKeywordsFM = listToUFM $
        map (\ (x,y) -> (_PK_ x,y))
-      [ ("data",               ITdata)
-       ,("type",               ITtype)
-       ,("newtype",            ITnewtype)
-       ,("class",              ITclass)
-       ,("where",              ITwhere)
-       ,("instance",           ITinstance)
-       ,("infixl",             ITinfixl)
-       ,("infixr",             ITinfixr)
-       ,("infix",              ITinfix)
-       ,("case",               ITcase)
-       ,("case#",              ITprim_case)
-       ,("of",                 ITof)
-       ,("in",                 ITin)
-       ,("let",                        ITlet)
-       ,("deriving",           ITderiving)
-
-       ,("->",                 ITrarrow)
+       [( "case",      ITcase ),     
+       ( "class",      ITclass ),    
+       ( "data",       ITdata ),     
+       ( "default",    ITdefault ),  
+       ( "deriving",   ITderiving ), 
+       ( "do",         ITdo ),       
+       ( "else",       ITelse ),     
+       ( "if",         ITif ),       
+       ( "import",     ITimport ),   
+       ( "in",         ITin ),       
+       ( "infix",      ITinfix ),    
+       ( "infixl",     ITinfixl ),   
+       ( "infixr",     ITinfixr ),   
+       ( "instance",   ITinstance ), 
+       ( "let",        ITlet ),      
+       ( "module",     ITmodule ),   
+       ( "newtype",    ITnewtype ),  
+       ( "of",         ITof ),       
+       ( "then",       ITthen ),     
+       ( "type",       ITtype ),     
+       ( "where",      ITwhere )
+
+--     These three aren't Haskell keywords at all
+--     and 'as' is often used as a variable name
+--     ( "as",         ITas ),       
+--     ( "qualified",  ITqualified ),
+--     ( "hiding",     IThiding )
+
+     ]
+
+haskellKeySymsFM = listToUFM $
+       map (\ (x,y) -> (_PK_ x,y))
+      [ ("..",                 ITdotdot)
+       ,("::",                 ITdcolon)
+       ,("=",                  ITequal)
         ,("\\",                 ITlam)
         ,("|",                  ITvbar)
-       ,("!",                  ITbang)
+       ,("<-",                 ITlarrow)
+       ,("->",                 ITrarrow)
+       ,("@",                  ITat)
+       ,("~",                  ITtilde)
         ,("=>",                 ITdarrow)
-       ,("=",                  ITequal)
+       ,("-",                  ITminus)
+       ,("!",                  ITbang)
         ]
+\end{code}
  
+-----------------------------------------------------------------------------
+doDiscard rips along really fast, looking for a '#-}', 
+indicating the end of the pragma we're skipping
  
--- doDiscard rips along really fast looking for a double semicolon, 
--- indicating the end of the pragma we're skipping
+\begin{code}
  doDiscard inStr buf =
--- _trace (show (C# (currentChar# buf))) $
   case currentChar# buf of
-   ';'# ->
-     if not inStr then
-       case lookAhead# buf 1# of
-        ';'# -> incLexeme (incLexeme buf)
-        _    -> doDiscard inStr (incLexeme buf)
-     else
-       doDiscard inStr (incLexeme buf)
+   '#'# | not inStr ->
+       case lookAhead# buf 1# of { '#'# -> 
+       case lookAhead# buf 2# of { '-'# ->
+       case lookAhead# buf 3# of { '}'# -> 
+          (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#));
+       _    -> doDiscard inStr (incLexeme buf) };
+        _    -> doDiscard inStr (incLexeme buf) };
+        _    -> doDiscard inStr (incLexeme buf) }
     '"'# ->
         let
          odd_slashes buf flg i# =
@@ -800,43 +753,69 @@ doDiscard inStr buf =
  
  \end{code}
  
-begin{code}
-my_span :: (a -> Bool) -> [a] -> ([a],[a])
-my_span p xs = go [] xs
-  where
-    go so_far (x:xs') | p x = go (x:so_far) xs'
-    go so_far xs            = (reverse so_far, xs)
-
-my_span' :: (a -> Bool) -> [a] -> ([a],Int,[a])
-my_span' p xs = go [] 0 xs
-  where
-    go so_far n (x:xs') | p x = go (x:so_far) (n+1) xs'
-    go so_far n xs            = (reverse so_far,n, xs)
-end{code}
-
-
-%************************************************************************
-%*                                                                     *
-\subsection{Other utility functions
-%*                                                                     *
-%************************************************************************
+-----------------------------------------------------------------------------
  
  \begin{code}
-type IfM a = MaybeErr a Error
+type IfM a = StringBuffer      -- Input string
+         -> SrcLoc
+         -> MaybeErr a {-error-}Message
  
  returnIf   :: a -> IfM a
+returnIf a s l = Succeeded a
+
  thenIf    :: IfM a -> (a -> IfM b) -> IfM b
-happyError :: Int -> [IfaceToken] -> IfM a
+m `thenIf` k = \s l ->
+       case m s l of
+               Succeeded a -> k a s l
+               Failed err  -> Failed err
+
+getSrcLocIf :: IfM SrcLoc
+getSrcLocIf s l = Succeeded l
+
+happyError :: IfM a
+happyError s l = Failed (ifaceParseErr s l)
+
  
-returnIf a = Succeeded a
+{- 
+ Note that if the name of the file we're processing ends
+ with `hi-boot', we accept it on faith as having the right
+ version. This is done so that .hi-boot files that comes
+ with hsc don't have to be updated before every release,
+ *and* it allows us to share .hi-boot files with versions
+ of hsc that don't have .hi version checking (e.g., ghc-2.10's)
  
-thenIf (Succeeded a) k = k a
-thenIf (Failed  err) _ = Failed err
+ If the version number is 0, the checking is also turned off.
+ (needed to deal with GHC.hi only!)
  
-happyError ln toks = Failed (ifaceParseErr ln toks)
+ Once we can assume we're compiling with a version of ghc that
+ supports interface file checking, we can drop the special
+ pleading
+-}
+checkVersion :: Maybe Integer -> IfM ()
+checkVersion mb@(Just v) s l
+ | (v==0) || (v == fromInt opt_HiVersion) || opt_NoHiCheck = Succeeded ()
+ | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-})
+checkVersion mb@Nothing  s l 
+ | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile l)) = Succeeded ()
+ | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-})
  
  -----------------------------------------------------------------
  
-ifaceParseErr ln toks sty
-  = hsep [ptext SLIT("Interface-file parse error: line"), int ln, ptext SLIT("toks="), text (show (take 10 toks))]
+ifaceParseErr :: StringBuffer -> SrcLoc -> Message
+ifaceParseErr s l
+  = hsep [ppr l, ptext SLIT("Interface-file parse error;"),
+          ptext SLIT("current input ="), text first_bit]
+  where
+    first_bit = lexemeToString (stepOnBy# s 100#) 
+
+ifaceVersionErr hi_vers l toks
+  = hsep [ppr l, ptext SLIT("Interface file version error;"),
+          ptext SLIT("Expected"), int opt_HiVersion, 
+         ptext SLIT("found "), pp_version]
+    where
+     pp_version =
+      case hi_vers of
+        Nothing -> ptext SLIT("pre ghc-3.02 version")
+       Just v  -> ptext SLIT("version") <+> integer v
+
  \end{code}