[project @ 1999-06-01 16:15:42 by simonmar]

[ghc-hetmet.git] / ghc / compiler / reader / Lex.lhs
diff --git a/ghc/compiler/reader/Lex.lhs b/ghc/compiler/reader/Lex.lhs

index 3eee37d..5e57258 100644 (file)
--- a/ghc/compiler/reader/Lex.lhs
+++ b/ghc/compiler/reader/Lex.lhs
@@ -1,3 +1,8 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+\section[Lexical analysis]{Lexical analysis}
+
  --------------------------------------------------------
  [Jan 98]
  There's a known bug in here:
@@ -10,18 +15,12 @@ An example that provokes the error is
         f _:_ _forall_ [a] <<<END OF FILE>>>
  --------------------------------------------------------
  
-
-%
-% (c) The GRASP/AQUA Project, Glasgow University, 1992-1996
-%
-\section[Lexical analysis]{Lexical analysis}
-
  \begin{code}
+{-# OPTIONS -#include "ctypes.h" #-}
+
  module Lex (
  
-       isLexCon, isLexVar, isLexId, isLexSym,
-       isLexConId, isLexConSym, isLexVarId, isLexVarSym,
-       mkTupNameStr, ifaceParseErr,
+       ifaceParseErr,
  
         -- Monad for parser
         IfaceToken(..), lexIface, IfM, thenIf, returnIf, getSrcLocIf,
@@ -33,116 +32,45 @@ module Lex (
  
  #include "HsVersions.h"
  
-import Char            (isDigit, isAlphanum, isUpper,isLower, isSpace, ord )
+import Char            ( ord, isSpace )
  import List             ( isSuffixOf )
  
-import {-# SOURCE #-} CostCentre
-
-import CmdLineOpts     ( opt_IgnoreIfacePragmas, opt_NoHiCheck )
+import IdInfo          ( InlinePragInfo(..), CprInfo(..) )
+import Name            ( isLowerISO, isUpperISO )
+import PrelMods                ( mkTupNameStr, mkUbxTupNameStr )
+import CmdLineOpts     ( opt_IgnoreIfacePragmas, opt_HiVersion, opt_NoHiCheck )
  import Demand          ( Demand(..) {- instance Read -} )
  import UniqFM           ( UniqFM, listToUFM, lookupUFM)
-import BasicTypes      ( NewOrData(..), IfaceFlavour(..) )
+import BasicTypes      ( NewOrData(..) )
  import SrcLoc          ( SrcLoc, incSrcLine, srcLocFile )
  
  import Maybes          ( MaybeErr(..) )
-import ErrUtils                ( ErrMsg(..) )
+import ErrUtils                ( Message )
  import Outputable
-import Util            ( nOfThem, panic )
  
  import FastString
  import StringBuffer
  import GlaExts
  import ST              ( runST )
  
-import PrelRead                ( readRational__ ) -- Glasgow non-std
-\end{code}
-
-%************************************************************************
-%*                                                                     *
-\subsection{Lexical categories}
-%*                                                                     *
-%************************************************************************
-
-These functions test strings to see if they fit the lexical categories
-defined in the Haskell report.  Normally applied as in e.g. @isCon
-(getLocalName foo)@.
-
-\begin{code}
-isLexCon, isLexVar, isLexId, isLexSym, isLexConId, isLexConSym,
- isLexVarId, isLexVarSym  :: FAST_STRING -> Bool
-
-isLexCon cs = isLexConId  cs || isLexConSym cs
-isLexVar cs = isLexVarId  cs || isLexVarSym cs
+#if __GLASGOW_HASKELL__ >= 303
+import Bits
+import Word
+#endif
  
-isLexId  cs = isLexConId  cs || isLexVarId  cs
-isLexSym cs = isLexConSym cs || isLexVarSym cs
-
--------------
-
-isLexConId cs
-  | _NULL_ cs       = False
-  | cs == SLIT("[]") = True
-  | c  == '('       = True     -- (), (,), (,,), ...
-  | otherwise       = isUpper c || isUpperISO c
-  where                                        
-    c = _HEAD_ cs
-
-isLexVarId cs
-  | _NULL_ cs   = False
-  | otherwise    = isLower c || isLowerISO c
-  where
-    c = _HEAD_ cs
-
-isLexConSym cs
-  | _NULL_ cs  = False
-  | otherwise  = c  == ':'
-              || cs == SLIT("->")
-  where
-    c = _HEAD_ cs
-
-isLexVarSym cs
-  | _NULL_ cs = False
-  | otherwise = isSymbolASCII c
-            || isSymbolISO c
-  where
-    c = _HEAD_ cs
-
--------------
-isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
-isSymbolISO   c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
-isUpperISO    (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
---0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
-isLowerISO    (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
---0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
-\end{code}
-
-
-%************************************************************************
-%*                                                                     *
-\subsection{Tuple strings -- ugh!}
-%*                                                                     *
-%************************************************************************
-
-\begin{code}
-mkTupNameStr 0 = SLIT("()")
-mkTupNameStr 1 = panic "Name.mkTupNameStr: 1 ???"
-mkTupNameStr 2 = _PK_ "(,)"   -- not strictly necessary
-mkTupNameStr 3 = _PK_ "(,,)"  -- ditto
-mkTupNameStr 4 = _PK_ "(,,,)" -- ditto
-mkTupNameStr n = _PK_ ("(" ++ nOfThem (n-1) ',' ++ ")")
+import Addr
+import PrelRead                ( readRational__ ) -- Glasgow non-std
  \end{code}
  
-
-
  %************************************************************************
  %*                                                                     *
  \subsection{Data types}
  %*                                                                     *
  %************************************************************************
  
-The token data type, fairly un-interesting except from two constructors,
-@ITidinfo@ and @ITtype@, which are used to lazily lex id info (arity,
-strictness, unfolding etc) and types for id decls. 
+The token data type, fairly un-interesting except from one
+constructor, @ITidinfo@, which is used to lazily lex id info (arity,
+strictness, unfolding etc).
  
  The Idea/Observation here is that the renamer needs to scan through
  all of an interface file before it can continue. But only a fraction
@@ -164,71 +92,106 @@ Laziness, you know it makes sense :-)
  
  \begin{code}
  data IfaceToken
-  = ITinterface                -- keywords
-  | ITusages
-  | ITversions
-  | ITexports
-  | ITinstance_modules
-  | ITinstances
-  | ITfixities
-  | ITdeclarations
-  | ITpragmas
+  = ITcase                     -- Haskell keywords
+  | ITclass
    | ITdata
-  | ITtype
-  | ITnewtype
+  | ITdefault
    | ITderiving
-  | ITclass
-  | ITwhere
-  | ITinstance
+  | ITdo
+  | ITelse
+  | ITif
+  | ITimport
+  | ITin
+  | ITinfix
    | ITinfixl
    | ITinfixr
-  | ITinfix
+  | ITinstance
+  | ITlet
+  | ITmodule
+  | ITnewtype
+  | ITof
+  | ITthen
+  | ITtype
+  | ITwhere
+  | ITas
+  | ITqualified
+  | IThiding
+
+  | ITinterface                        -- GHC-extension keywords
+  | ITexport
+  | ITdepends
    | ITforall
-  | ITbang             -- magic symbols
-  | ITvbar
+  | ITletrec 
+  | ITcoerce
+  | ITinlineCall 
+  | ITinlineMe
+  | ITccall (Bool,Bool,Bool)   -- (is_dyn, is_casm, may_gc)
+  | ITdefaultbranch
+  | ITbottom
+  | ITinteger_lit 
+  | ITfloat_lit
+  | ITrational_lit
+  | ITaddr_lit
+  | ITlit_lit
+  | ITstring_lit
+  | ITtypeapp
+  | ITonce                     -- usage annotations
+  | ITmany
+  | ITarity 
+  | ITrules
+  | ITspecialise
+  | ITnocaf
+  | ITunfold InlinePragInfo
+  | ITstrict ([Demand], Bool)
+  | ITcprinfo (CprInfo)
+  | ITscc
+  | ITsccAllCafs
+
+  | ITdotdot                   -- reserved symbols
    | ITdcolon
-  | ITcomma
-  | ITdarrow
-  | ITdotdot
    | ITequal
-  | ITocurly
-  | ITobrack
-  | IToparen
+  | ITlam
+  | ITvbar
+  | ITlarrow
    | ITrarrow
+  | ITat
+  | ITtilde
+  | ITdarrow
+  | ITminus
+  | ITbang
+
+  | ITbiglam                   -- GHC-extension symbols
+
+  | ITocurly                   -- special symbols
    | ITccurly
+  | ITobrack
    | ITcbrack
+  | IToparen
    | ITcparen
+  | IToubxparen
+  | ITcubxparen
    | ITsemi
-  | ITvarid   FAST_STRING
+  | ITcomma
+
+  | ITvarid   FAST_STRING      -- identifiers
    | ITconid   FAST_STRING
    | ITvarsym  FAST_STRING
    | ITconsym  FAST_STRING
-  | ITqvarid  (FAST_STRING,FAST_STRING,IfaceFlavour)
-  | ITqconid  (FAST_STRING,FAST_STRING,IfaceFlavour)
-  | ITqvarsym (FAST_STRING,FAST_STRING,IfaceFlavour)
-  | ITqconsym (FAST_STRING,FAST_STRING,IfaceFlavour)
-
-  | ITtysig StringBuffer (Maybe StringBuffer)
-                          -- lazily return the stream of tokens for
-                          -- the info attached to an id.
-       -- Stuff for reading unfoldings
-  | ITarity 
-  | ITunfold Bool              -- True <=> there's an INLINE pragma on this Id
-  | ITstrict [Demand] | ITbottom
-  | ITspecialise
-  | ITlam | ITbiglam | ITcase | ITprim_case | ITlet | ITletrec | ITin | ITof
-  | ITcoerce | ITinline | ITatsign 
-  | ITccall (Bool,Bool)                -- (is_casm, may_gc)
-  | ITscc CostCentre 
-  | ITchar Char | ITstring FAST_STRING
-  | ITinteger Integer | ITrational Rational
-  | ITinteger_lit | ITfloat_lit | ITrational_lit | ITaddr_lit | ITlit_lit | ITstring_lit
-  | ITunknown String           -- Used when the lexer can't make sense of it
-  | ITeof                              -- end of file token
-  deriving Text -- debugging
+  | ITqvarid  (FAST_STRING,FAST_STRING)
+  | ITqconid  (FAST_STRING,FAST_STRING)
+  | ITqvarsym (FAST_STRING,FAST_STRING)
+  | ITqconsym (FAST_STRING,FAST_STRING)
+
+  | ITpragma StringBuffer
  
-instance Text CostCentre -- cheat!
+  | ITchar Char 
+  | ITstring FAST_STRING
+  | ITinteger Integer 
+  | ITrational Rational
  
+  | ITunknown String           -- Used when the lexer can't make sense of it
+  | ITeof                      -- end of file token
+  deriving Text -- debugging
  \end{code}
  
  %************************************************************************
@@ -244,7 +207,7 @@ lexIface cont buf =
  -- if bufferExhausted buf then
  --  []
  -- else
---  _trace ("Lexer: "++[C# (currentChar# buf)]) $
+--  trace ("Lexer: '"++[C# (currentChar# buf)]++"'") $
    case currentChar# buf of
        -- whitespace and comments, ignore.
      ' '#  -> lexIface cont (stepOn buf)
@@ -254,35 +217,45 @@ lexIface cont buf =
  -- Numbers and comments
      '-'#  ->
        case lookAhead# buf 1# of
-        '-'# -> lex_comment cont (stepOnBy# buf 2#)
+--        '-'# -> lex_comment cont (stepOnBy# buf 2#)
          c    -> 
-         if isDigit (C# c)
+         if is_digit c
            then lex_num cont (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
-         else lex_id cont buf
-
--- Leave out nested comments for now; danger of finding { and - juxtaposed by mistake?
---    '{' : '-' : cs -> lex_nested_comment 1{-one seen-} cs
-
+         else lex_sym cont buf
+
+    '{'# ->                            -- look for "{-##" special iface pragma
+       case lookAhead# buf 1# of
+          '-'# -> case lookAhead# buf 2# of
+                   '#'# -> case lookAhead# buf 3# of
+                               '#'# ->  
+                                  let (lexeme, buf') 
+                                         = doDiscard False (stepOnBy# buf 4#) in
+                                  cont (ITpragma lexeme) buf'
+                               _ ->  lex_nested_comment (lexIface cont) buf
+                   _    -> cont ITocurly (stepOn buf)
+                           -- lex_nested_comment (lexIface cont) buf
+          _ -> cont ITocurly (stepOn buf)
+
+    -- special symbols ----------------------------------------------------
      '('# -> 
          case prefixMatch (stepOn buf) "..)" of
            Just buf' ->  cont ITdotdot (stepOverLexeme buf')
             Nothing ->
              case lookAhead# buf 1# of
-              ','# -> lex_tuple cont Nothing  (stepOnBy# buf 2#)
-              ')'# -> cont (ITconid SLIT("()")) (stepOnBy# buf 2#)
+             '#'# -> cont IToubxparen (stepOnBy# buf 2#)
               _    -> cont IToparen (stepOn buf)
-
-    '{'# -> cont ITocurly (stepOn buf)
-    '}'# -> cont ITccurly (stepOn buf)
      ')'# -> cont ITcparen (stepOn buf)
-    '['# -> 
-      case lookAhead# buf 1# of
-       ']'# -> cont (ITconid SLIT("[]")) (stepOnBy# buf 2#)
-        _    -> cont ITobrack (stepOn buf)
+    '}'# -> cont ITccurly (stepOn buf)
+    '#'# -> case lookAhead# buf 1# of
+               ')'# -> cont ITcubxparen (stepOnBy# buf 2#)
+               _    -> lex_sym cont (incLexeme buf)
+    '['# -> cont ITobrack (stepOn buf)
      ']'# -> cont ITcbrack (stepOn buf)
      ','# -> cont ITcomma  (stepOn buf)
-    ';'#  -> cont ITsemi (stepOn buf)
-    '\"'# -> case untilEndOfString# (stepOn buf) of
+    ';'# -> cont ITsemi   (stepOn buf)
+
+    -- strings/characters -------------------------------------------------
+    '\"'#{-"-} -> case untilEndOfString# (stepOn buf) of
               buf' ->
                   -- the string literal does *not* include the dquotes
                 case lexemeToFastString buf' of
@@ -299,43 +272,71 @@ lexIface cont buf =
                buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of
                         [  (ch, rest)] -> cont (ITchar ch) (stepOverLexeme (incLexeme buf'))
  
+    -- strictness and cpr pragmas and __scc treated specially.
+    '_'# ->
+        case lookAhead# buf 1# of
+          '_'# -> case lookAhead# buf 2# of
+                   'S'# -> 
+                       lex_demand cont (stepOnUntil (not . isSpace) 
+                                       (stepOnBy# buf 3#)) -- past __S
+                   'M'# -> 
+                       lex_cpr cont (stepOnUntil (not . isSpace) 
+                                    (stepOnBy# buf 3#)) -- past __M
+                   's'# -> 
+                       case prefixMatch (stepOnBy# buf 3#) "cc" of
+                              Just buf' -> lex_scc cont (stepOverLexeme buf')
+                              Nothing   -> lex_id cont buf
+                   _ -> lex_id cont buf
+          _    -> lex_id cont buf
+
  -- ``thingy'' form for casm
      '`'# ->
             case lookAhead# buf 1# of
-             '`'# -> lex_cstring cont (stepOnBy# buf 2#) -- remove the `` and go.
-             _    -> lex_id cont (incLexeme buf)         -- add ` to lexeme and assume
+             '`'# -> lex_cstring cont (stepOnBy# buf 2#) -- remove the `s and go.
+             _    -> lex_sym cont (incLexeme buf)         -- add ` to lexeme and assume
                                                      -- scanning an id of some sort.
--- Keywords
-    '_'# ->
-        case lookAhead# buf 1# of
-          'S'# -> case lookAhead# buf 2# of
-                   '_'# ->
-                           lex_demand cont (stepOnUntil (not . isSpace) 
-                                           (stepOnBy# buf 3#)) -- past _S_
-          's'# -> case prefixMatch (stepOnBy# buf 2#) "cc_" of
-                    Just buf' -> lex_scc cont (stepOnUntil (not . isSpace) (stepOverLexeme buf'))
-                    Nothing   -> lex_keyword cont (stepOnBy# buf 1#) -- drop the '_' and assume
-                                                                -- it is a keyword.
-          _    -> lex_keyword cont (stepOn buf)
  
      '\NUL'# ->
             if bufferExhausted (stepOn buf) then
                cont ITeof buf
             else
-              lex_id cont buf
-    c ->
-       if isDigit (C# c) then
-          lex_num cont (id) (ord# c -# ord# '0'#) (incLexeme buf)
-        else
-          lex_id cont buf
+              trace "lexIface: misplaced NUL?" $ 
+              cont (ITunknown "\NUL") (stepOn buf)
+
+    c | is_digit  c -> lex_num cont (id) (ord# c -# ord# '0'#) (incLexeme buf)
+      | is_symbol c -> lex_sym cont buf
+      | is_upper  c -> lex_con cont buf
+      | is_ident  c -> lex_id  cont buf
+
  --  where
  lex_comment cont buf = 
  --   _trace ("comment: "++[C# (currentChar# buf)]) $
     case untilChar# buf '\n'# of {buf' -> lexIface cont (stepOverLexeme buf')}
  
-------------------
+-------------------------------------------------------------------------------
+
+lex_nested_comment cont buf =
+  case currentChar# buf of
+       '-'# -> case lookAhead# buf 1# of
+                '}'# -> cont (stepOnBy# buf 2#)
+                _    -> lex_nested_comment cont (stepOn buf)
+
+       '{'# -> case lookAhead# buf 1# of
+                '-'# -> lex_nested_comment
+                               (lex_nested_comment cont) 
+                               (stepOnBy# buf 2#)
+                _    -> lex_nested_comment cont (stepOn buf)
+
+       _   -> lex_nested_comment cont (stepOn buf)
+
+-------------------------------------------------------------------------------
+
  lex_demand cont buf = 
- case read_em [] buf of { (ls,buf') -> cont (ITstrict ls) (stepOverLexeme buf')}
+ case read_em [] buf of { (ls,buf') -> 
+ case currentChar# buf' of
+   'B'# -> cont (ITstrict (ls, True )) (stepOverLexeme (stepOn buf'))
+   _    -> cont (ITstrict (ls, False)) (stepOverLexeme buf')
+ }
   where
     -- code snatched from Demand.lhs
    read_em acc buf = 
@@ -356,84 +357,34 @@ lex_demand cont buf =
     = case read_em [] buf of
        (stuff, rest) -> read_em (WwUnpack new_or_data wrapper_unpacks stuff : acc) rest
  
+lex_cpr cont buf = 
+ case read_em [] buf of { (cpr_inf,buf') -> 
+   ASSERT ( null (tail cpr_inf) )
+   cont (ITcprinfo $ head cpr_inf) (stepOverLexeme buf')
+ }
+ where
+   -- code snatched from lex_demand above
+  read_em acc buf = 
+   case currentChar# buf of
+    '-'# -> read_em (NoCPRInfo : acc) (stepOn buf)
+    '('# -> do_unpack acc (stepOn buf)
+    ')'# -> (reverse acc, stepOn buf)
+    _    -> (reverse acc, buf)
+
+  do_unpack acc buf
+   = case read_em [] buf of
+      (stuff, rest) -> read_em ((CPRInfo stuff)  : acc) rest
+
  ------------------
  lex_scc cont buf =
   case currentChar# buf of
-  '"'# ->
-      -- YUCK^2
-     case prefixMatch (stepOn buf) "NO_CC\"" of
-      Just buf' -> cont (ITscc noCostCentre) (stepOverLexeme buf')
-      Nothing -> 
-       case prefixMatch (stepOn buf) "CURRENT_CC\"" of
-        Just buf' -> cont (ITscc useCurrentCostCentre) (stepOverLexeme buf')
-        Nothing   ->
-         case prefixMatch (stepOn buf) "OVERHEAD\"" of
-         Just buf' -> cont (ITscc overheadCostCentre) (stepOverLexeme buf')
-         Nothing   ->
-          case prefixMatch (stepOn buf) "DONT_CARE\"" of
-           Just buf' -> cont (ITscc dontCareCostCentre) (stepOverLexeme buf')
-           Nothing   ->
-            case prefixMatch (stepOn buf) "SUBSUMED\"" of
-             Just buf' -> cont (ITscc subsumedCosts) (stepOverLexeme buf')
-             Nothing ->
-              case prefixMatch (stepOn buf) "CAFs_in_...\"" of
-               Just buf' -> cont (ITscc preludeCafsCostCentre) (stepOverLexeme buf')
-               Nothing ->
-                case prefixMatch (stepOn buf) "CC_CAFs_in_..." of
-                 Just buf' ->
-                 case untilChar# (stepOverLexeme buf') '\"'# of
-                  buf'' -> cont (ITscc (mkAllCafsCC ({-module-}lexemeToFastString buf'') _NIL_)) (stepOn (stepOverLexeme buf''))
-                 Nothing ->
-                  case prefixMatch (stepOn buf) "DICTs_in_...\"" of
-                   Just buf' -> cont (ITscc (preludeDictsCostCentre True)) (stepOverLexeme buf')
-                   Nothing ->
-                    case prefixMatch (stepOn buf) "CC_DICTs_in_..." of
-                     Just buf' ->
-                     case untilChar# (stepOverLexeme buf') '\"'# of
-                      buf'' -> cont (ITscc (mkAllDictsCC (lexemeToFastString buf'') _NIL_ True)) 
-                               (stepOn (stepOverLexeme buf''))
-                     Nothing ->
-                     let
-                      match_user_cc buf =
-                       case untilChar# buf '/'# of
-                        buf' -> 
-                          let mod_name = lexemeToFastString buf' in
---                       case untilChar# (stepOn (stepOverLexeme buf')) '/'# of
---                        buf'' -> 
---                            let grp_name = lexemeToFastString buf'' in
-                           case untilChar# (stepOn (stepOverLexeme buf')) '\"'# of
-                            buf'' ->
-                              -- The label may contain arbitrary characters, so it
-                              -- may have been escaped etc., hence we `read' it in to get
-                              -- rid of these meta-chars in the string and then pack it (again.)
-                              -- ToDo: do the same for module name (single quotes allowed in m-names).
-                              -- BTW, the code in this module is totally gruesome..
-                              let upk_label = _UNPK_ (lexemeToFastString buf'') in
-                              case reads ('"':upk_label++"\"") of
-                               ((cc_label,_):_) -> 
-                                   let cc_name = _PK_ cc_label in
-                                   (mkUserCC cc_name mod_name _NIL_{-grp_name-}, 
-                                    stepOn (stepOverLexeme buf''))
-                               _ -> 
-                                 trace ("trouble lexing scc label: " ++ upk_label ++ " , ignoring") 
-                                 (mkUserCC _NIL_ mod_name _NIL_{-grp_name-}, 
-                                  stepOn (stepOverLexeme buf''))
-                      in
-                      case prefixMatch (stepOn buf) "CAF:" of
-                       Just buf' ->
-                        case match_user_cc (stepOverLexeme buf') of
-                         (cc, buf'') -> cont (ITscc (cafifyCC cc)) buf''
-                       Nothing ->
-                        case match_user_cc (stepOn buf) of
-                         (cc, buf'') -> cont (ITscc cc) buf''
-  c -> cont (ITunknown [C# c]) (stepOn buf)
-
+  'C'# -> cont ITsccAllCafs  (stepOverLexeme (stepOn buf))
+  other -> cont ITscc buf
  
  -----------
-lex_num :: (IfaceToken -> IfM a) -> 
-       (Int -> Int) -> Int# -> IfM a
+lex_num :: (IfaceToken -> IfM a) -> (Int -> Int) -> Int# -> IfM a
  lex_num cont minus acc# buf =
--- _trace ("lex_num: "++[C# (currentChar# buf)]) $
+ --trace ("lex_num: "++[C# (currentChar# buf)]) $
   case scanNumLit (I# acc#) buf of
       (acc',buf') ->
         case currentChar# buf' of
@@ -441,349 +392,317 @@ lex_num cont minus acc# buf =
               -- this case is not optimised at all, as the
               -- presence of floating point numbers in interface
               -- files is not that common. (ToDo)
-           case expandWhile (isDigit) (incLexeme buf') of
+           case expandWhile# is_digit (incLexeme buf') of
                buf2 -> -- points to first non digit char
                 let l = case currentChar# buf2 of
                           'e'# -> let buf3 = incLexeme buf2 in
                               case currentChar# buf3 of
-                               '-'# -> expandWhile (isDigit) (incLexeme buf3)
-                               _    -> expandWhile (isDigit) buf3
+                               '-'# -> expandWhile# is_digit (incLexeme buf3)
+                               _    -> expandWhile# is_digit buf3
                           _ -> buf2
                 in let v = readRational__ (lexemeToString l) in
                    cont (ITrational v) (stepOverLexeme l)
  
           _ -> cont (ITinteger (fromInt (minus acc'))) (stepOverLexeme buf')
  
-
-
-------------
-lex_keyword cont buf =
--- _trace ("lex_keyword: "++[C# (currentChar# buf)]) $
- case currentChar# buf of
-  ':'# -> case lookAhead# buf 1# of
-           '_'# -> -- a binding, type (and other id-info) follows,
-                   -- to make the parser ever so slightly, we push
-                   -- 
-               lex_decl cont (stepOnBy# buf 2#)
-           v# -> cont (ITunknown (['_',':',C# v#])) (stepOnBy# buf 2#)
-  _ ->
-    case expandWhile (is_kwd_char) buf of
-     buf' ->
-      let kw = lexemeToFastString buf' in
---    _trace ("kw: "++lexemeToString buf') $
-      case lookupUFM ifaceKeywordsFM kw of
-       Nothing -> cont (ITunknown (_UNPK_ kw)) -- (minor) sigh 
-                 (stepOverLexeme buf')
-       Just xx -> cont xx (stepOverLexeme buf')
-
-lex_decl cont buf =
- case doDiscard False buf of -- spin until ;; is found
-   buf' ->
-      {- _trace (show (lexemeToString buf')) $ -}
-      case currentChar# buf' of
-       '\n'# -> -- newline, no id info.
-          cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
-               (stepOverLexeme buf')
-       '\r'# -> -- just to be sure for those Win* boxes..
-          cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
-               (stepOverLexeme buf')
-       '\NUL'# ->
-          cont (ITtysig (lexemeToBuffer (decLexeme buf')) Nothing)
-               (stepOverLexeme buf')
-       c     -> -- run all over the id info
-        case doDiscard False (stepOverLexeme buf') of -- spin until ;; is found (outside a string!)
-          buf'' -> 
-                   --_trace ((C# c):show (lexemeToString (decLexeme buf')))  $
-                   --_trace (show (lexemeToString (decLexeme buf''))) $
-                   let idinfo = 
-                           if opt_IgnoreIfacePragmas then
-                               Nothing
-                           else
-                               Just (lexemeToBuffer (decLexeme buf''))
-                       --_trace (show is) $
-                   in
-                    cont (ITtysig (lexemeToBuffer (decLexeme buf')) idinfo)
-                       (stepOverLexeme buf'')
-                   
--- ToDo: hammer!
-is_kwd_char c@(C# c#) = 
- isAlphanum c || -- OLD: c `elem` "_@/\\"
- (case c# of
-   '_'#  -> True
-   '@'#  -> True
-   '/'#  -> True
-   '\\'# -> True
-   _     -> False)
-
-
-
  -----------
  lex_cstring cont buf =
   case expandUntilMatch buf "\'\'" of
     buf' -> cont (ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))))
-           (stepOverLexeme buf')
-       
------------
-lex_tuple cont module_dot buf =
-  go 2 buf
-  where
-   go n buf =
-    case currentChar# buf of
-      ','# -> go (n+1) (stepOn buf)
-      ')'# -> end_lex_id cont module_dot (ITconid (mkTupNameStr n)) (stepOn buf)
-      _    -> cont (ITunknown ("tuple " ++ show n)) buf
-
--- Similarly ' itself is ok inside an identifier, but not at the start
-
--- id_arr is an array of bytes, indexed by characters,
--- containing 0 if the character isn't a valid character from an identifier
--- and 1 if it is.  It's just a memo table for is_id_char.
-id_arr :: ByteArray Int
-id_arr =
- runST (
-  newCharArray (0,255) >>= \ barr ->
-  let
-   loop 256# = return ()
-   loop i# =
-    if isAlphanum (C# (chr# i#)) || is_sym (chr# i#) then
-       writeCharArray barr (I# i#) '\1'                >>
-       loop (i# +# 1#)
-    else
-       writeCharArray barr (I# i#) '\0'                >>
-       loop (i# +# 1#)
-  in
-  loop 0#                                      >>
-  unsafeFreezeByteArray barr)
-
-is_id_char (C# c#) = 
- let
-  ByteArray _ arr# = id_arr
- in
- case ord# (indexCharArray# arr# (ord# c#)) of
-  0# -> False
-  1# -> True
-
---OLD: is_id_char c@(C# c#)  = isAlphanum c || is_sym c#
-
-is_sym c# =
- case c# of {
-   ':'# -> True; '_'#  -> True; '\''# -> True; '!'# -> True; 
-   '#'# -> True; '$'#  -> True; '%'# -> True; 
-   '&'# -> True; '*'#  -> True; '+'#  -> True; '.'# -> True; 
-   '/'# -> True; '<'#  -> True; '='#  -> True; '>'# -> True; 
-   '?'# -> True; '\\'# -> True; '^'#  -> True; '|'# -> True; 
-   '-'# -> True; '~'#  -> True; '@'#  -> True; _    -> False }
-
---isAlphanum c || c `elem` ":_'!#$%&*+./<=>?@\\^|-~" -- ToDo: add ISOgraphic
-
-
--- mod_arr is an array of bytes, indexed by characters,
--- containing 0 if the character isn't a valid character from a module name,
--- and 1 if it is.
-mod_arr :: ByteArray Int
-mod_arr =
- runST (
-  newCharArray (0,255) >>= \ barr ->
-  let
-   loop 256# = return ()
-   loop i# =
-    if isAlphanum (C# (chr# i#)) || i# ==# (ord# '_'#) || i# ==# (ord# '\''#) then
-       writeCharArray barr (I# i#) '\1'        >>
-       loop (i# +# 1#)
-    else
-       writeCharArray barr (I# i#) '\0'                >>
-       loop (i# +# 1#)
-  in
-  loop 0#                                      >>
-  unsafeFreezeByteArray barr)
-
-             
-is_mod_char (C# c#) = 
- let
-  ByteArray _ arr# = mod_arr
+           (stepOverLexeme buf')       
+
+------------------------------------------------------------------------------
+-- Character Classes
+
+is_ident, is_symbol, is_any, is_upper, is_digit :: Char# -> Bool
+
+{-# INLINE is_ctype #-}
+#if __GLASGOW_HASKELL__ >= 303
+is_ctype :: Word8 -> Char# -> Bool
+is_ctype mask = \c ->
+   (indexWord8OffAddr (``char_types'' :: Addr) (ord (C# c)) .&. mask) /= 0
+#else
+is_ctype :: Int -> Char# -> Bool
+is_ctype (I# mask) = \c ->
+    let (A# ctype) = ``char_types'' :: Addr
+       flag_word  = int2Word# (ord# (indexCharOffAddr# ctype (ord# c)))
+    in
+       (flag_word `and#` (int2Word# mask)) `neWord#` (int2Word# 0#)
+#endif
+
+is_ident  = is_ctype 1
+is_symbol = is_ctype 2
+is_any    = is_ctype 4
+is_space  = is_ctype 8
+is_upper  = is_ctype 16
+is_digit  = is_ctype 32
+
+-----------------------------------------------------------------------------
+-- identifiers, symbols etc.
+
+lex_id cont buf =
+ case expandWhile# is_ident buf of { buf1 -> 
+ case expandWhile# (eqChar# '#'#) buf1 of { buf' -> -- only if GHC extns on
+ let new_buf = stepOverLexeme buf' 
+     lexeme  = lexemeToFastString buf'
   in
- case ord# (indexCharArray# arr# (ord# c#)) of
-  0# -> False
-  1# -> True
-
---isAlphanum c || c == '_' || c== '\'' --`elem` "_'"
-
-lex_id cont buf = 
--- _trace ("lex_id: "++[C# (currentChar# buf)]) $
- case expandWhile (is_mod_char) buf of
-   buf' ->
-    case currentChar# buf' of
-     '.'# -> munch buf' HiFile
-     '!'# -> munch buf' HiBootFile
-     _    -> lex_id2 cont Nothing buf'
+ case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
+       Just kwd_token -> --trace ("hkeywd: "++_UNPK_(lexeme)) $
+                         cont kwd_token new_buf;
+       Nothing        -> 
+ case lookupUFM ifaceKeywordsFM lexeme of {
+       Just kwd_token -> --trace ("ifacekeywd: "++_UNPK_(lexeme)) $
+                         cont kwd_token new_buf;
+       Nothing        -> --trace ("id: "++_UNPK_(lexeme)) $
+                         cont (mk_var_token lexeme) new_buf
+ }}}}
+
+lex_sym cont buf =
+ case expandWhile# is_symbol buf of
+   buf'
+     | is_comment lexeme -> lex_comment cont new_buf
+     | otherwise         ->
+          case lookupUFM haskellKeySymsFM lexeme of {
+               Just kwd_token -> --trace ("keysym: "++unpackFS lexeme) $
+                                 cont kwd_token new_buf ;
+               Nothing        -> --trace ("sym: "++unpackFS lexeme) $
+                                 cont (mk_var_token lexeme) new_buf
+           }
+       where lexeme = lexemeToFastString buf'
+             new_buf = stepOverLexeme buf'
+
+             is_comment fs 
+               | len < 2   = False
+               | otherwise = trundle 0
+                 where
+                  len = lengthFS fs
+                  
+                  trundle n | n == len  = True
+                            | otherwise = indexFS fs n == '-' && trundle (n+1)
+
+lex_con cont buf = 
+ case expandWhile# is_ident buf of       { buf1 ->
+ case expandWhile# (eqChar# '#'#) buf1 of { buf' ->
+ case currentChar# buf' of
+     '.'# -> lex_qid cont lexeme (stepOn new_buf) just_a_conid
+     _    -> just_a_conid
+ 
     where
-    munch buf' hif = 
-       if not (emptyLexeme buf') then
---        _trace ("lex_id: "++(C# (currentChar# (stepOverLexeme buf'))):show (lexemeToFastString buf')) $ 
-          case lexemeToFastString buf' of
-            l@(FastString u# l# ba#) -> lex_id2 cont (Just (FastString u# l# ba#, hif)) 
-                                                (stepOn (stepOverLexeme buf'))
-       else
-          lex_id2 cont Nothing buf'            
-       
+    just_a_conid = --trace ("con: "++unpackFS lexeme) $
+                  cont (ITconid lexeme) new_buf
+    lexeme = lexemeToFastString buf'
+    new_buf = stepOverLexeme buf'
+ }}
  
--- Dealt with the Module.part
-lex_id2 cont module_dot buf =
--- _trace ("lex_id2: "++[C# (currentChar# buf)]) $
+lex_qid cont mod buf just_a_conid =
   case currentChar# buf of
-
    '['# ->      -- Special case for []
      case lookAhead# buf 1# of
-     ']'# -> end_lex_id cont module_dot (ITconid SLIT("[]")) (stepOnBy# buf 2#)
-     _    -> lex_id3 cont module_dot buf
+     ']'# -> cont (ITqconid (mod,SLIT("[]"))) (stepOnBy# buf 2#)
+     _    -> just_a_conid
  
-  '('# ->      -- Special case for (,,,)
+  '('# ->  -- Special case for (,,,)
+          -- This *is* necessary to deal with e.g. "instance C PrelBase.(,,)"
      case lookAhead# buf 1# of
-     ')'# -> end_lex_id cont module_dot (ITconid SLIT("()")) (stepOnBy# buf 2#)
-     ','# -> lex_tuple cont module_dot (stepOnBy# buf 2#)
-     _    -> lex_id3 cont module_dot buf
-  ':'# -> lex_id3 cont module_dot (incLexeme buf)
-  '-'# ->
-     case module_dot of
-       Nothing  -> lex_id3 cont module_dot buf
-       Just ghc -> -- this should be "GHC" (current home of (->))
-         case lookAhead# buf 1# of
-          '>'# -> end_lex_id cont module_dot (ITconid SLIT("->")) 
-                       (stepOnBy# buf 2#)
-          _    -> lex_id3 cont module_dot buf
-  _    -> lex_id3 cont module_dot buf
-
-
-
--- Dealt with [], (), : special cases
-
-lex_id3 cont module_dot buf =
- case expandWhile (is_id_char) buf of
-  buf' ->
-    case module_dot of
-     Just _ ->
-       end_lex_id cont module_dot (mk_var_token lexeme) new_buf
-     Nothing ->
-       case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of
-         Just kwd_token -> cont kwd_token new_buf
-        Nothing        -> cont (mk_var_token lexeme) new_buf
-    where
-     lexeme  = lexemeToFastString buf'
-     new_buf = stepOverLexeme buf'
-
-
--- Dealt with [], (), : special cases
-mk_var_token pk_str =
+     '#'# -> case lookAhead# buf 2# of
+               ','# -> lex_ubx_tuple cont mod (stepOnBy# buf 3#) 
+                               just_a_conid
+               _    -> just_a_conid
+     ')'# -> cont (ITqconid (mod,SLIT("()"))) (stepOnBy# buf 2#)
+     ','# -> lex_tuple cont mod (stepOnBy# buf 2#) just_a_conid
+     _    -> just_a_conid
+
+  '-'# -> case lookAhead# buf 1# of
+            '>'# -> cont (ITqconid (mod,SLIT("->"))) (stepOnBy# buf 2#)
+            _    -> lex_id3 cont mod buf just_a_conid
+  _    -> lex_id3 cont mod buf just_a_conid
+
+lex_id3 cont mod buf just_a_conid
+  | is_symbol c =
+     case expandWhile# is_symbol buf of { buf' ->
       let
-      f = _HEAD_ pk_str
+      lexeme  = lexemeToFastString buf'
+      new_buf = stepOverLexeme buf'
       in
-     --
-     -- These tests assume a non- ISO-8859-1 impl of isUpper&isLower,
-     -- remove the second half of disjunction when using a 1.3 prelude.
-     --
-     if      isUpper f    then ITconid pk_str
-     else if isLower f   then ITvarid pk_str
-     else if f == ':'    then ITconsym pk_str
-     else if isLowerISO f then ITvarid pk_str
-     else if isUpperISO f then ITconid pk_str
-     else ITvarsym pk_str
-
-end_lex_id cont Nothing token buf  = cont token buf
-end_lex_id cont (Just (m,hif)) token buf =
- case token of
-   ITconid n  -> cont (ITqconid  (m,n,hif))         buf
-   ITvarid n  -> cont (ITqvarid  (m,n,hif))         buf
-   ITconsym n -> cont (ITqconsym (m,n,hif))         buf
-       
-       -- Special case for ->
-       -- "->" by itself is a special token (ITrarrow),
-       -- but M.-> is a ITqconid
-   ITvarsym n |  n == SLIT("->")
-             -> cont (ITqconsym (m,n,hif))         buf
+     case lookupUFM haskellKeySymsFM lexeme of {
+       Just kwd_token -> just_a_conid; -- avoid M.:: etc.
+       Nothing        -> cont (mk_qvar_token mod lexeme) new_buf
+     }}
+
+  | otherwise   =
+     case expandWhile# is_ident buf of { buf1 ->
+     if emptyLexeme buf1 
+           then just_a_conid
+           else
+     case expandWhile# (eqChar# '#'#) buf1 of { buf' -> -- only if GHC extns on
+     let
+      lexeme  = lexemeToFastString buf'
+      new_buf = stepOverLexeme buf'
+     in
+     case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of {
+           Just kwd_token -> just_a_conid; -- avoid M.where etc.
+           Nothing        -> 
+     case lookupUFM ifaceKeywordsFM lexeme of {        -- only for iface files
+           Just kwd_token -> just_a_conid;
+           Nothing        -> cont (mk_qvar_token mod lexeme) new_buf
+     }}}}
+  where c = currentChar# buf
+
+mk_var_token pk_str
+  | is_upper f         = ITconid pk_str
+       -- _[A-Z] is treated as a constructor in interface files.
+  | f `eqChar#` '_'# && not (_NULL_ tl) 
+       && (case _HEAD_ tl of { C# g -> is_upper g }) = ITconid pk_str
+  | is_ident f         = ITvarid pk_str
+  | f `eqChar#` ':'#   = ITconsym pk_str
+  | otherwise          = ITvarsym pk_str
+  where
+      (C# f) = _HEAD_ pk_str
+      tl     = _TAIL_ pk_str
+
+mk_qvar_token m token =
+ case mk_var_token token of
+   ITconid n  -> ITqconid  (m,n)
+   ITvarid n  -> ITqvarid  (m,n)
+   ITconsym n -> ITqconsym (m,n)
+   ITvarsym n -> ITqvarsym (m,n)
+   _         -> ITunknown (show token)
+\end{code}
  
-   ITvarsym n -> cont (ITqvarsym (m,n,hif))         buf
+----------------------------------------------------------------------------
+Horrible stuff for dealing with M.(,,,)
  
--- ITbang can't happen here I think
---   ITbang     -> cont (ITqvarsym (m,SLIT("!"),hif)) buf
+\begin{code}
+lex_tuple cont mod buf back_off =
+  go 2 buf
+  where
+   go n buf =
+    case currentChar# buf of
+      ','# -> go (n+1) (stepOn buf)
+      ')'# -> cont (ITqconid (mod, snd (mkTupNameStr n))) (stepOn buf)
+      _    -> back_off
+
+lex_ubx_tuple cont mod buf back_off =
+  go 2 buf
+  where
+   go n buf =
+    case currentChar# buf of
+      ','# -> go (n+1) (stepOn buf)
+      '#'# -> case lookAhead# buf 1# of
+               ')'# -> cont (ITqconid (mod, snd (mkUbxTupNameStr n)))
+                                (stepOnBy# buf 2#)
+               _    -> back_off
+      _    -> back_off
+\end{code}
  
-   _         -> cont (ITunknown (show token))      buf
+-----------------------------------------------------------------------------
+Keyword Lists
  
-------------
+\begin{code}
  ifaceKeywordsFM :: UniqFM IfaceToken
  ifaceKeywordsFM = listToUFM $
        map (\ (x,y) -> (_PK_ x,y))
-       [("/\\_",               ITbiglam)
-       ,("@_",                 ITatsign)
-       ,("letrec_",            ITletrec)
-       ,("interface_",         ITinterface)
-       ,("usages_",            ITusages)
-       ,("versions_",          ITversions)
-       ,("exports_",           ITexports)
-       ,("instance_modules_",  ITinstance_modules)
-       ,("instances_",         ITinstances)
-       ,("fixities_",          ITfixities)
-       ,("declarations_",      ITdeclarations)
-       ,("pragmas_",           ITpragmas)
-       ,("forall_",            ITforall)
-       ,("u_",                 ITunfold False)
-       ,("U_",                 ITunfold True)
-       ,("A_",                 ITarity)
-       ,("P_",                 ITspecialise)
-       ,("coerce_",            ITcoerce)
-       ,("inline_",            ITinline)
-       ,("bot_",               ITbottom)
-       ,("integer_",           ITinteger_lit)
-       ,("rational_",          ITrational_lit)
-       ,("addr_",              ITaddr_lit)
-       ,("float_",             ITfloat_lit)
-       ,("string_",            ITstring_lit)
-       ,("litlit_",            ITlit_lit)
-       ,("ccall_",             ITccall (False, False))
-       ,("ccall_GC_",          ITccall (False, True))
-       ,("casm_",              ITccall (True,  False))
-       ,("casm_GC_",           ITccall (True,  True))
+     [  ("__interface",                ITinterface),
+       ("__export",            ITexport),
+       ("__depends",           ITdepends),
+       ("__forall",            ITforall),
+       ("__letrec",            ITletrec),
+       ("__coerce",            ITcoerce),
+       ("__inline_me",         ITinlineMe),
+       ("__inline_call",       ITinlineCall),
+       ("__DEFAULT",           ITdefaultbranch),
+       ("__bot",               ITbottom),
+       ("__integer",           ITinteger_lit),
+       ("__float",             ITfloat_lit),
+       ("__rational",          ITrational_lit),
+       ("__addr",              ITaddr_lit),
+       ("__litlit",            ITlit_lit),
+       ("__string",            ITstring_lit),
+       ("__R",                 ITrules),
+       ("__a",                 ITtypeapp),
+       ("__o",                 ITonce),
+       ("__m",                 ITmany),
+       ("__A",                 ITarity),
+       ("__P",                 ITspecialise),
+       ("__C",                 ITnocaf),
+        ("__u",                        ITunfold NoInlinePragInfo),
+       
+        ("__ccall",            ITccall (False, False, False)),
+        ("__ccall_GC",         ITccall (False, False, True)),
+        ("__dyn_ccall",                ITccall (True,  False, False)),
+        ("__dyn_ccall_GC",     ITccall (True,  False, True)),
+        ("__casm",             ITccall (False, True,  False)),
+        ("__dyn_casm",         ITccall (True,  True,  False)),
+        ("__casm_GC",          ITccall (False, True,  True)),
+        ("__dyn_casm_GC",      ITccall (True,  True,  True)),
+
+        ("/\\",                        ITbiglam)
         ]
  
  haskellKeywordsFM = listToUFM $
        map (\ (x,y) -> (_PK_ x,y))
-      [ ("data",               ITdata)
-       ,("type",               ITtype)
-       ,("newtype",            ITnewtype)
-       ,("class",              ITclass)
-       ,("where",              ITwhere)
-       ,("instance",           ITinstance)
-       ,("infixl",             ITinfixl)
-       ,("infixr",             ITinfixr)
-       ,("infix",              ITinfix)
-       ,("case",               ITcase)
-       ,("case#",              ITprim_case)
-       ,("of",                 ITof)
-       ,("in",                 ITin)
-       ,("let",                        ITlet)
-       ,("deriving",           ITderiving)
-
-       ,("->",                 ITrarrow)
+       [( "case",      ITcase ),     
+       ( "class",      ITclass ),    
+       ( "data",       ITdata ),     
+       ( "default",    ITdefault ),  
+       ( "deriving",   ITderiving ), 
+       ( "do",         ITdo ),       
+       ( "else",       ITelse ),     
+       ( "if",         ITif ),       
+       ( "import",     ITimport ),   
+       ( "in",         ITin ),       
+       ( "infix",      ITinfix ),    
+       ( "infixl",     ITinfixl ),   
+       ( "infixr",     ITinfixr ),   
+       ( "instance",   ITinstance ), 
+       ( "let",        ITlet ),      
+       ( "module",     ITmodule ),   
+       ( "newtype",    ITnewtype ),  
+       ( "of",         ITof ),       
+       ( "then",       ITthen ),     
+       ( "type",       ITtype ),     
+       ( "where",      ITwhere )
+
+--     These three aren't Haskell keywords at all
+--     and 'as' is often used as a variable name
+--     ( "as",         ITas ),       
+--     ( "qualified",  ITqualified ),
+--     ( "hiding",     IThiding )
+
+     ]
+
+haskellKeySymsFM = listToUFM $
+       map (\ (x,y) -> (_PK_ x,y))
+      [ ("..",                 ITdotdot)
+       ,("::",                 ITdcolon)
+       ,("=",                  ITequal)
         ,("\\",                 ITlam)
         ,("|",                  ITvbar)
-       ,("!",                  ITbang)
+       ,("<-",                 ITlarrow)
+       ,("->",                 ITrarrow)
+       ,("@",                  ITat)
+       ,("~",                  ITtilde)
         ,("=>",                 ITdarrow)
-       ,("=",                  ITequal)
-       ,("::",                 ITdcolon)
+       ,("-",                  ITminus)
+       ,("!",                  ITbang)
         ]
+\end{code}
  
+-----------------------------------------------------------------------------
+doDiscard rips along really fast, looking for a '#-}', 
+indicating the end of the pragma we're skipping
  
--- doDiscard rips along really fast, looking for a double semicolon, 
--- indicating the end of the pragma we're skipping
+\begin{code}
  doDiscard inStr buf =
--- _trace (show (C# (currentChar# buf))) $
   case currentChar# buf of
-   ';'# ->
-     if not inStr then
-       case lookAhead# buf 1# of
-        ';'# -> incLexeme (incLexeme buf)
-        _    -> doDiscard inStr (incLexeme buf)
-     else
-       doDiscard inStr (incLexeme buf)
+   '#'# | not inStr ->
+       case lookAhead# buf 1# of { '#'# -> 
+       case lookAhead# buf 2# of { '-'# ->
+       case lookAhead# buf 3# of { '}'# -> 
+          (lexemeToBuffer buf, stepOverLexeme (setCurrentPos# buf 4#));
+       _    -> doDiscard inStr (incLexeme buf) };
+        _    -> doDiscard inStr (incLexeme buf) };
+        _    -> doDiscard inStr (incLexeme buf) }
     '"'# ->
         let
          odd_slashes buf flg i# =
@@ -806,31 +725,12 @@ doDiscard inStr buf =
  
  \end{code}
  
-begin{code}
-my_span :: (a -> Bool) -> [a] -> ([a],[a])
-my_span p xs = go [] xs
-  where
-    go so_far (x:xs') | p x = go (x:so_far) xs'
-    go so_far xs            = (reverse so_far, xs)
-
-my_span' :: (a -> Bool) -> [a] -> ([a],Int,[a])
-my_span' p xs = go [] 0 xs
-  where
-    go so_far n (x:xs') | p x = go (x:so_far) (n+1) xs'
-    go so_far n xs            = (reverse so_far,n, xs)
-end{code}
-
-
-%************************************************************************
-%*                                                                     *
-\subsection{Other utility functions
-%*                                                                     *
-%************************************************************************
+-----------------------------------------------------------------------------
  
  \begin{code}
  type IfM a = StringBuffer      -- Input string
           -> SrcLoc
-         -> MaybeErr a ErrMsg
+         -> MaybeErr a {-error-}Message
  
  returnIf   :: a -> IfM a
  returnIf a s l = Succeeded a
@@ -845,7 +745,7 @@ getSrcLocIf :: IfM SrcLoc
  getSrcLocIf s l = Succeeded l
  
  happyError :: IfM a
-happyError s l = Failed (ifaceParseErr l ([]::[IfaceToken]){-Todo-})
+happyError s l = Failed (ifaceParseErr s l)
  
  
  {- 
@@ -865,7 +765,7 @@ happyError s l = Failed (ifaceParseErr l ([]::[IfaceToken]){-Todo-})
  -}
  checkVersion :: Maybe Integer -> IfM ()
  checkVersion mb@(Just v) s l
- | (v==0) || (v == PROJECTVERSION) || opt_NoHiCheck = Succeeded ()
+ | (v==0) || (v == fromInt opt_HiVersion) || opt_NoHiCheck = Succeeded ()
   | otherwise = Failed (ifaceVersionErr mb l ([]::[IfaceToken]){-Todo-})
  checkVersion mb@Nothing  s l 
   | "hi-boot" `isSuffixOf` (_UNPK_ (srcLocFile l)) = Succeeded ()
@@ -873,14 +773,17 @@ checkVersion mb@Nothing  s l
  
  -----------------------------------------------------------------
  
-ifaceParseErr l toks
+ifaceParseErr :: StringBuffer -> SrcLoc -> Message
+ifaceParseErr s l
    = hsep [ppr l, ptext SLIT("Interface-file parse error;"),
-          ptext SLIT("toks="), text (show (take 10 toks))]
+          ptext SLIT("current input ="), text first_bit]
+  where
+    first_bit = lexemeToString (stepOnBy# s 100#) 
  
  ifaceVersionErr hi_vers l toks
    = hsep [ppr l, ptext SLIT("Interface file version error;"),
-          ptext SLIT("Expected"), int PROJECTVERSION, 
-         ptext SLIT(" found "), pp_version]
+          ptext SLIT("Expected"), int opt_HiVersion, 
+         ptext SLIT("found "), pp_version]
      where
       pp_version =
        case hi_vers of