X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=ghc%2Fcompiler%2Freader%2FLex.lhs;h=32f20e9e1f5a207347d1ee52941f5eee6feebba2;hp=ec761e4659f25f0c3a2222b661dae876eecd371c;hb=1fb1ab5d53a09607e7f6d2450806760688396387;hpb=fa6fb09e2e4e6918eebc79ed187f32c88817c9db

diff --git a/ghc/compiler/reader/Lex.lhs b/ghc/compiler/reader/Lex.lhs
index ec761e4..32f20e9 100644
--- a/ghc/compiler/reader/Lex.lhs
+++ b/ghc/compiler/reader/Lex.lhs
@@ -10,27 +10,39 @@ module Lex (
 
 	isLexCon, isLexVar, isLexId, isLexSym,
 	isLexConId, isLexConSym, isLexVarId, isLexVarSym,
-	mkTupNameStr,
+	mkTupNameStr, ifaceParseErr,
 
 	-- Monad for parser
-	IfaceToken(..), lexIface, SYN_IE(IfM), thenIf, returnIf, happyError
+	IfaceToken(..), lexIface, SYN_IE(IfM), thenIf, returnIf, happyError,
+	StringBuffer
 
     ) where
 
 
 IMPORT_1_3(Char(isDigit, isAlpha, isAlphanum, isUpper))
+IMPORT_DELOOPER(Ubiq)
+IMPORT_DELOOPER(IdLoop)    -- get the CostCentre type&constructors from here
 
 import CmdLineOpts	( opt_IgnoreIfacePragmas )
-import Demand		( Demand {- instance Read -} )
-import FiniteMap	( FiniteMap, listToFM, lookupFM )
+import Demand		( Demand(..) {- instance Read -} )
+import UniqFM           ( UniqFM, listToUFM, lookupUFM)
+--import FiniteMap	( FiniteMap, listToFM, lookupFM )
 import Maybes		( Maybe(..), MaybeErr(..) )
 import Pretty
 import CharSeq		( CSeq )
+
+
+
 import ErrUtils		( Error(..) )
 import Outputable	( Outputable(..) )
 import PprStyle		( PprStyle(..) )
 import Util		( nOfThem, panic )
 
+import FastString
+import StringBuffer
+
+import PreludeGlaST 
+
 \end{code}
 
 %************************************************************************
@@ -86,8 +98,10 @@ isLexVarSym cs
 -------------
 isSymbolASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
 isSymbolISO   c = ord c `elem` (0xd7 : 0xf7 : [0xa1 .. 0xbf])
-isUpperISO    c = 0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
-isLowerISO    c = 0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
+isUpperISO    (C# c#) = c# `geChar#` '\xc0'# && c# `leChar#` '\xde'# && c# `neChar#` '\xd7'#
+--0xc0 <= oc && oc <= 0xde && oc /= 0xd7 where oc = ord c
+isLowerISO    (C# c#) = c# `geChar#` '\xdf'# && c# `leChar#` '\xff'# && c# `neChar#` '\xf7'#
+--0xdf <= oc && oc <= 0xff && oc /= 0xf7 where oc = ord c
 \end{code}
 
 
@@ -114,6 +128,28 @@ mkTupNameStr n = _PK_ ("(" ++ nOfThem (n-1) ',' ++ ")")
 %*									*
 %************************************************************************
 
+The token data type, fairly un-interesting except from two constructors,
+@ITidinfo@ and @ITtype@, which are used to lazily lex id info (arity,
+strictness, unfolding etc) and types for id decls. 
+
+The Idea/Observation here is that the renamer needs to scan through
+all of an interface file before it can continue. But only a fraction
+of the information contained in the file turns out to be useful, so
+delaying as much as possible of the scanning and parsing of an
+interface file Makes Sense (Heap profiles of the compiler 
+show at a reduction in heap usage by at least a factor of two,
+post-renamer). 
+
+Hence, the interface file lexer spots when value declarations are
+being scanned and return the @ITidinfo@ and @ITtype@ constructors
+for the type and any other id info for that binding (unfolding, strictness
+etc). These constructors are applied to the result of lexing these sub-chunks.
+
+The lexing of the type and id info is all done lazily, of course, so
+the scanning (and subsequent parsing) will be done *only* on the ids the
+renamer finds out that it is interested in. The rest will just be junked.
+Laziness, you know it makes sense :-)
+
 \begin{code}
 data IfaceToken
   = ITinterface		-- keywords
@@ -144,8 +180,6 @@ data IfaceToken
   | ITdotdot
   | ITequal
   | ITocurly
-  | ITdccurly
-  | ITdocurly
   | ITobrack
   | IToparen
   | ITrarrow
@@ -162,17 +196,25 @@ data IfaceToken
   | ITqvarsym (FAST_STRING,FAST_STRING)
   | ITqconsym (FAST_STRING,FAST_STRING)
 
+  | ITidinfo [IfaceToken]  -- lazily return the stream of tokens for
+		           -- the info attached to an id.
+  | ITtysig [IfaceToken]   -- lazily return the stream of tokens for
+		           -- the info attached to an id.
 	-- Stuff for reading unfoldings
   | ITarity | ITstrict | ITunfold
   | ITdemand [Demand] | ITbottom
   | ITlam | ITbiglam | ITcase | ITprim_case | ITlet | ITletrec | ITin | ITof
   | ITcoerce_in | ITcoerce_out | ITatsign
   | ITccall (Bool,Bool)		-- (is_casm, may_gc)
-
+  | ITscc CostCentre 
   | ITchar Char | ITstring FAST_STRING
   | ITinteger Integer | ITdouble Double
   | ITinteger_lit | ITfloat_lit | ITrational_lit | ITaddr_lit | ITlit_lit | ITstring_lit
+  | ITunknown String		-- Used when the lexer can't make sense of it
   deriving Text -- debugging
+
+instance Text CostCentre -- cheat!
+
 \end{code}
 
 %************************************************************************
@@ -182,144 +224,487 @@ data IfaceToken
 %************************************************************************
 
 \begin{code}
-lexIface :: String -> [IfaceToken]
-
-lexIface input
-  = _scc_ "Lexer"
-    case input of
-      []    -> []
-
-      -- whitespace and comments
-      ' '	: cs -> lexIface cs
-      '\t'	: cs -> lexIface cs
-      '\n'	: cs -> lexIface cs
-      '-' : '-' : cs -> lex_comment cs
+lexIface :: StringBuffer -> [IfaceToken]
+lexIface buf =
+ _scc_ "Lexer" 
+-- if bufferExhausted buf then
+--  []
+-- else
+--  _trace ("Lexer: "++[C# (currentChar# buf)]) $
+  case currentChar# buf of
+      -- whitespace and comments, ignore.
+    ' '#  -> lexIface (stepOn buf)
+    '\t'# -> lexIface (stepOn buf)
+    '\n'# -> lexIface (stepOn buf)
+
+-- Numbers and comments
+    '-'#  ->
+      case lookAhead# buf 1# of
+        '-'# -> lex_comment (stepOnBy# buf 2#)
+        c    -> 
+	  if isDigit (C# c)
+          then lex_num (negate) (ord# c -# ord# '0'#) (incLexeme (incLexeme buf))
+	  else lex_id buf
 
 -- Leave out nested comments for now; danger of finding { and - juxtaposed by mistake?
 --    '{' : '-' : cs -> lex_nested_comment 1{-one seen-} cs
 
-      '(' : '.' : '.' : ')' : cs -> ITdotdot	: lexIface cs
-      '{'		    : cs -> ITocurly	: lexIface cs
-      '}'		    : cs -> ITccurly	: lexIface cs
-      '(' : ',' 	    : cs -> lex_tuple Nothing cs 
-      '(' : ')' 	    : cs -> ITconid SLIT("()")	: lexIface cs
-      '(' 		    : cs -> IToparen	: lexIface cs
-      ')'		    : cs -> ITcparen	: lexIface cs
-      '[' : ']'		    : cs -> ITconid SLIT("[]")	: lexIface cs
-      '['		    : cs -> ITobrack	: lexIface cs
-      ']'		    : cs -> ITcbrack	: lexIface cs
-      ','		    : cs -> ITcomma	: lexIface cs
-      ':' : ':'		    : cs -> ITdcolon    : lexIface cs
-      ';'		    : cs -> ITsemi	: lexIface cs
-      '\"'		    : cs -> case reads input of
-					[(str, rest)] -> ITstring (_PK_ (str::String)) : lexIface rest
-      '\''		    : cs -> case reads input of
-					[(ch, rest)] -> ITchar ch : lexIface rest
+    '('# -> 
+	 case prefixMatch (stepOn buf) "..)" of
+	   Just buf' ->  ITdotdot : lexIface (stepOverLexeme buf')
+           Nothing ->
+            case lookAhead# buf 1# of
+              ','# -> lex_tuple Nothing  (stepOnBy# buf 2#)
+              ')'# -> ITconid SLIT("()") : lexIface (stepOnBy# buf 2#)
+	      _    -> IToparen : lexIface (stepOn buf)
+
+    '{'# -> ITocurly : lexIface (stepOn buf)
+    '}'# -> ITccurly : lexIface (stepOn buf)
+    ')'# -> ITcparen : lexIface (stepOn buf)
+    '['# -> 
+      case lookAhead# buf 1# of
+	']'# -> ITconid SLIT("[]") : lexIface (stepOnBy# buf 2#)
+        _    -> ITobrack : lexIface (stepOn buf)
+    ']'# -> ITcbrack	 : lexIface (stepOn buf)
+    ','# -> ITcomma	 : lexIface (stepOn buf)
+    ':'# -> case lookAhead# buf 1# of
+              ':'# -> ITdcolon  : lexIface (stepOnBy# buf 2#)
+              _    -> lex_id (incLexeme buf)
+    ';'#  -> ITsemi	: lexIface (stepOn buf)
+    '\"'# -> case untilEndOfString# (stepOn buf) of
+	      buf' ->
+		  -- the string literal does *not* include the dquotes
+		case lexemeToFastString buf' of
+		 v -> ITstring v : lexIface (stepOn (stepOverLexeme buf'))
+
+    '\''# -> --
+	     -- untilEndOfChar# extends the current lexeme until
+	     -- it hits a non-escaped single quote. The lexeme of the
+             -- StringBuffer returned does *not* include the closing quote,
+	     -- hence we augment the lexeme and make sure to add the
+	     -- starting quote, before `read'ing the string.
+	     --
+	     case untilEndOfChar# (stepOn buf) of
+	       buf' -> case reads ('\'':lexemeToString (incLexeme buf')) of
+			[  (ch, rest)] -> ITchar ch : lexIface (stepOverLexeme (incLexeme buf'))
 
 -- ``thingy'' form for casm
-      '`' : '`'		    : cs -> lex_cstring "" cs
-
+    '`'# ->
+	    case lookAhead# buf 1# of
+	      '`'# -> lex_cstring (stepOnBy# buf 2#) -- remove the `` and go.
+	      _    -> lex_id (incLexeme buf)         -- add ` to lexeme and assume
+						     -- scanning an id of some sort.
 -- Keywords
-      '_' : 'S' : '_'	    : cs -> ITstrict	: lex_demand cs
-      '_' 		    : cs -> lex_keyword cs
-
--- Numbers
-      '-' : c : cs | isDigit c 	 -> lex_num "-" (c:cs)
-      c       : cs | isDigit c 	 -> lex_num ""  (c:cs)
-      
-      other			 -> lex_id input
-  where
-    lex_comment str
-      = case (span ((/=) '\n') str) of { (junk, rest) ->
-	lexIface rest }
-
-    ------------------
-    lex_demand (c:cs) | isSpace c = lex_demand cs
-		      | otherwise = case readList (c:cs) of
-					((demand,rest) : _) -> ITdemand demand : lexIface rest
-
-    -----------
-    lex_num minus str
-      = case (span isDigit str) of { (num, rest) ->
-	case rest of 
-	   '.' : str2 -> case (span isDigit str2) of { (num2,rest2) ->
-			 ITdouble (read (minus ++ num ++ ('.':num2))) : lexIface rest2
-			 }
-
-	   other   -> ITinteger (read (minus ++ num)) : lexIface rest
-	}
-
-    ------------
-    lex_keyword str
-      = case (span is_kwd_mod_char str)    of { (kw, rest) ->
-	case (lookupFM ifaceKeywordsFM kw) of
-	  Nothing -> panic ("lex_keyword:"++str)
-
-	  Just xx | startDiscard xx && 
-		    opt_IgnoreIfacePragmas -> lexIface (doDiscard rest)
-		  | otherwise	    	   -> xx : lexIface rest
-	}
-
-    is_kwd_mod_char c   = isAlphanum c || c `elem` "_@/\\"
-
-    -----------
-    lex_cstring so_far ('\'' : '\'' : cs) = ITstring (_PK_ (reverse (so_far::String))) : lexIface cs
-    lex_cstring so_far (c	    : cs) = lex_cstring (c:so_far) cs
+    '_'# ->
+	 case lookAhead# buf 1# of
+	   'S'# -> case lookAhead# buf 2# of
+		    '_'# -> ITstrict : 
+		            lex_demand (stepOnUntil (not . isSpace) 
+			                            (stepOnBy# buf 3#)) -- past _S_
+	   's'# -> case prefixMatch (stepOnBy# buf 2#) "cc_" of
+		     Just buf' -> lex_scc (stepOnUntil (not . isSpace) 
+		     				       (stepOverLexeme buf'))
+		     Nothing   -> lex_keyword (stepOnBy# buf 1#) -- drop the '_' and assume
+								 -- it is a keyword.
+	   _    -> lex_keyword (stepOn buf)
+
+    '\NUL'# ->
+	    if bufferExhausted (stepOn buf) then
+	       []
+	    else
+	       lex_id buf
+    c ->
+	if isDigit (C# c) then
+	   lex_num (id) (ord# c -# ord# '0'#) (incLexeme buf)
+        else
+	   lex_id buf
+--  where
+lex_comment buf = 
+--   _trace ("comment: "++[C# (currentChar# buf)]) $
+   case untilChar# buf '\n'# of {buf' -> lexIface (stepOverLexeme buf')}
+
+------------------
+lex_demand buf = 
+-- _trace ("demand: "++[C# (currentChar# buf)]) $
+ case read_em [] buf of { (ls,buf') -> ITdemand ls : lexIface (stepOverLexeme buf')}
+ where
+   -- code snatched from Demand.lhs
+  read_em acc buf = 
+--   _trace ("read_em: "++[C# (currentChar# buf)]) $
+   case currentChar# buf of
+    'L'# -> read_em (WwLazy False : acc) (stepOn buf)
+    'A'# -> read_em (WwLazy True  : acc) (stepOn buf)
+    'S'# -> read_em (WwStrict     : acc) (stepOn buf)
+    'P'# -> read_em (WwPrim       : acc) (stepOn buf)
+    'E'# -> read_em (WwEnum       : acc) (stepOn buf)
+    ')'# -> (reverse acc, stepOn buf)
+    'U'# -> do_unpack True  acc (stepOnBy# buf 2#)
+    'u'# -> do_unpack False acc (stepOnBy# buf 2#)
+    _    -> (reverse acc, buf)
+
+  do_unpack wrapper_unpacks acc buf
+   = case read_em [] buf of
+      (stuff, rest) -> read_em (WwUnpack wrapper_unpacks stuff : acc) rest
+
+------------------
+lex_scc buf =
+-- _trace ("scc: "++[C# (currentChar# buf)]) $
+ case currentChar# buf of
+  '"'# ->
+      -- YUCK^2
+     case prefixMatch (stepOn buf) "NO_CC\"" of
+      Just buf' -> ITscc noCostCentre : lexIface (stepOverLexeme buf')
+      Nothing -> 
+       case prefixMatch (stepOn buf) "CURRENT_CC\"" of
+        Just buf' -> ITscc useCurrentCostCentre : lexIface (stepOverLexeme buf')
+        Nothing   ->
+         case prefixMatch (stepOn buf) "OVERHEAD\"" of
+         Just buf' -> ITscc overheadCostCentre : lexIface (stepOverLexeme buf')
+         Nothing   ->
+          case prefixMatch (stepOn buf) "DONT_CARE\"" of
+           Just buf' -> ITscc dontCareCostCentre : lexIface (stepOverLexeme buf')
+           Nothing   ->
+            case prefixMatch (stepOn buf) "SUBSUMED\"" of
+             Just buf' -> ITscc subsumedCosts : lexIface (stepOverLexeme buf')
+             Nothing ->
+              case prefixMatch (stepOn buf) "CAFs_in_...\"" of
+               Just buf' -> ITscc preludeCafsCostCentre : lexIface (stepOverLexeme buf')
+               Nothing ->
+                case prefixMatch (stepOn buf) "CC_CAFs_in_..." of
+                 Just buf' ->
+	          case untilChar# (stepOverLexeme buf') '\"'# of
+	           buf'' -> ITscc (mkAllCafsCC ({-module-}lexemeToFastString buf'') _NIL_): 
+	                    lexIface (stepOverLexeme buf'')
+                 Nothing ->
+                  case prefixMatch (stepOn buf) "DICTs_in_...\"" of
+                   Just buf' -> ITscc (preludeDictsCostCentre True) : lexIface (stepOverLexeme buf')
+                   Nothing ->
+                    case prefixMatch (stepOn buf) "CC_DICTs_in_..." of
+                     Just buf' ->
+	              case untilChar# (stepOverLexeme buf') '\"'# of
+	               buf'' -> ITscc (mkAllDictsCC (lexemeToFastString buf'') _NIL_ True): 
+	                        lexIface (stepOverLexeme buf'')
+                     Nothing ->
+                      case prefixMatch (stepOn buf) "CAF:" of
+                       Just buf' ->		  
+	                case untilChar# (stepOverLexeme buf') '\"'# of
+	                 buf'' -> ITscc (cafifyCC (mkUserCC (lexemeToFastString buf'') _NIL_ _NIL_)): 
+	                          lexIface (stepOverLexeme buf'')
+                       Nothing ->
+	                case untilChar# (stepOn buf) '\"'# of
+	                   buf' -> ITscc (mkUserCC (lexemeToFastString buf') _NIL_ _NIL_): 
+	                           lexIface (stepOverLexeme buf')
+  c -> ITunknown [C# c] : lexIface (stepOn buf)
+
+
+-----------
+lex_num :: (Int -> Int) -> Int# -> StringBuffer -> [IfaceToken]
+lex_num minus acc# buf =
+-- _trace ("lex_num: "++[C# (currentChar# buf)]) $
+ case scanNumLit (I# acc#) buf of
+     (acc',buf') ->
+       case currentChar# buf' of
+         '.'# ->
+             -- this case is not optimised at all, as the
+             -- presence of floating point numbers in interface
+             -- files is not that common. (ToDo)
+	    case expandWhile (isDigit) (incLexeme buf') of
+              buf'' -> -- points to first non digit char
+		case reads (lexemeToString buf'') of
+	          [(v,_)] -> ITdouble v : lexIface (stepOverLexeme buf'')
+         _ -> ITinteger (fromInt (minus acc')) : lexIface (stepOverLexeme buf')
+
+--	   case reads (lexemeToString buf') of
+--	     [(i,_)] -> ITinteger i : lexIface (stepOverLexeme buf')
+
+------------
+lex_keyword buf =
+-- _trace ("lex_keyword: "++[C# (currentChar# buf)]) $
+ case currentChar# buf of
+  ':'# -> case lookAhead# buf 1# of
+	    '_'# -> -- a binding, type (and other id-info) follows,
+		    -- to make the parser ever so slightly, we push
+		    -- 
+		lex_decl (stepOnBy# buf 2#)
+	    v# -> ITunknown (['_',':',C# v#]) : lexIface (stepOnBy# buf 2#)
+  _ ->
+    case expandWhile (is_kwd_char) buf of
+     buf' ->
+      let kw = lexemeToFastString buf' in
+--    _trace ("kw: "++lexemeToString buf') $
+      case lookupUFM ifaceKeywordsFM kw of
+       Nothing -> ITunknown (_UNPK_ kw) : -- (minor) sigh 
+		  lexIface (stepOverLexeme buf')
+       Just xx -> xx : lexIface (stepOverLexeme buf')
+
+lex_decl buf =
+ case expandUntilMatch buf ";;" of
+   buf' ->
+--      _trace (show (lexemeToString buf')) $
+      case currentChar# buf' of
+       '\n'# -> -- newline, no id info.
+	   ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) : 
+	   lexIface (stepOverLexeme buf')
+       '\r'# -> -- just to be sure for those Win* boxes..
+	   ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) : 
+	   lexIface (stepOverLexeme buf')
+       '\NUL'# ->
+	   ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))) : 
+	   lexIface (stepOverLexeme buf')
+       c     -> -- run all over the id info
+	 case expandUntilMatch (stepOverLexeme buf') ";;" of
+	   buf'' -> 
+		    --_trace ((C# c):show (lexemeToString (decLexeme buf')))  $
+		    --_trace (show (lexemeToString (decLexeme buf''))) $
+		    ITtysig (lexIface (lexemeToBuffer (decLexeme buf'))):
+		    let ls = lexIface (stepOverLexeme buf'') in
+		    if opt_IgnoreIfacePragmas then
+		        ls
+		    else
+			let is = lexIface (lexemeToBuffer (decLexeme buf'')) in
+		        --_trace (show is) $
+	                ITidinfo is : ls
+		    
+-- ToDo: hammer!
+is_kwd_char c@(C# c#) = 
+ isAlphanum c || -- OLD: c `elem` "_@/\\"
+ (case c# of
+   '_'#  -> True
+   '@'#  -> True
+   '/'#  -> True
+   '\\'# -> True
+   _     -> False)
+
+
+
+-----------
+lex_cstring buf =
+-- _trace ("lex_cstring: "++[C# (currentChar# buf)]) $
+ case expandUntilMatch buf "\'\'" of
+   buf' -> ITstring (lexemeToFastString (setCurrentPos# buf' (negateInt# 2#))) :
+           lexIface (stepOverLexeme buf')
 	
-
-    -----------
-    lex_tuple module_dot orig_cs = go 2 orig_cs
-		 where
-		   go n (',':cs) = go (n+1) cs
-		   go n (')':cs) = end_lex_id module_dot (ITconid (mkTupNameStr n)) cs
-		   go n other    = panic ("lex_tuple" ++ orig_cs)
-
-	-- Similarly ' itself is ok inside an identifier, but not at the start
-    is_id_char c = isAlphanum c || c `elem` ":_'!#$%&*+./<=>?@\\^|-~" -- ToDo: add ISOgraphic
-
-    lex_id cs = go [] cs
-	where
-	  go xs (f  :cs) | is_kwd_mod_char f = go (f : xs) cs
-	  go xs ('.':cs) | not (null xs)     = lex_id2 (Just (_PK_ (reverse xs))) [] cs
-	  go xs cs			     = lex_id2 Nothing		   	  xs cs
-
-	-- Dealt with the Module.part
-    lex_id2 module_dot [] ('[': ']': cs) = end_lex_id module_dot (ITconid SLIT("[]")) cs
-    lex_id2 module_dot [] ('(': ')': cs) = end_lex_id module_dot (ITconid SLIT("()")) cs
-    lex_id2 module_dot [] ('(': ',': cs) = lex_tuple module_dot cs
-    lex_id2 module_dot [] (':' : cs)     = lex_id3 module_dot [':'] cs
-    lex_id2 module_dot xs cs 		 = lex_id3 module_dot xs cs
-
-	-- Dealt with [], (), : special cases
-    lex_id3 module_dot xs (f:cs) | is_id_char f = lex_id3 module_dot (f : xs) cs
-
-    lex_id3 Nothing xs rest = case lookupFM haskellKeywordsFM rxs of
-				       Just kwd_token -> kwd_token	    : lexIface rest
-				       other 	      -> (mk_var_token rxs) : lexIface rest
-			    where
-			       rxs = reverse xs
-
-    lex_id3 (Just m) xs rest = end_lex_id (Just m) (mk_var_token (reverse xs)) rest
-
+-----------
+lex_tuple module_dot buf =
+-- _trace ("lex_tuple: "++[C# (currentChar# buf)]) $
+  go 2 buf
+  where
+   go n buf =
+    case currentChar# buf of
+      ','# -> go (n+1) (stepOn buf)
+      ')'# -> end_lex_id module_dot (ITconid (mkTupNameStr n)) (stepOn buf)
+      _    -> ITunknown ("tuple " ++ show n) : lexIface buf
+
+-- Similarly ' itself is ok inside an identifier, but not at the start
+
+id_arr :: _ByteArray Int
+id_arr =
+ unsafePerformPrimIO (
+  newCharArray (0,255) `thenPrimIO` \ barr ->
+  let
+   loop 256# = returnPrimIO ()
+   loop i# =
+    if isAlphanum (C# (chr# i#)) || is_sym (chr# i#) then
+       writeCharArray barr (I# i#) '\1' `seqPrimIO`
+       loop (i# +# 1#)
+    else
+       writeCharArray barr (I# i#) '\0' `seqPrimIO`
+       loop (i# +# 1#)
+  in
+  loop 0#                    `seqPrimIO`
+  unsafeFreezeByteArray barr)
+
+is_id_char (C# c#) = 
+ let
+  _ByteArray _ arr# = id_arr
+ in
+ case ord# (indexCharArray# arr# (ord# c#)) of
+  0# -> False
+  1# -> True
+
+--is_id_char c@(C# c#)  = isAlphanum c || is_sym c#
+
+is_sym c#=
+ case c# of {
+   ':'# -> True; '_'# -> True; '\''# -> True; '!'# -> True; 
+   '#'# -> True; '$'# -> True; ':'# -> True;  '%'# -> True; 
+   '&'# -> True; '*'# -> True; '+'# -> True; '.'# -> True; 
+   '/'# -> True; '<'# -> True; '='# -> True; '>'# -> True; 
+   '?'# -> True; '\\'# -> True; '^'# -> True; '|'# -> True; 
+   '-'# -> True; '~'# -> True; _ -> False }
+
+--isAlphanum c || c `elem` ":_'!#$%&*+./<=>?@\\^|-~" -- ToDo: add ISOgraphic
+
+
+mod_arr :: _ByteArray Int
+mod_arr =
+ unsafePerformPrimIO (
+  newCharArray (0,255) `thenPrimIO` \ barr ->
+  let
+   loop 256# = returnPrimIO ()
+   loop i# =
+    if isAlphanum (C# (chr# i#)) || i# ==# (ord# '_'#) || i# ==# (ord# '\''#) then
+       writeCharArray barr (I# i#) '\1' `seqPrimIO`
+       loop (i# +# 1#)
+    else
+       writeCharArray barr (I# i#) '\0' `seqPrimIO`
+       loop (i# +# 1#)
+  in
+  loop 0#                    `seqPrimIO`
+  unsafeFreezeByteArray barr)
+
+             
+is_mod_char (C# c#) = 
+ let
+  _ByteArray _ arr# = mod_arr
+ in
+ case ord# (indexCharArray# arr# (ord# c#)) of
+  0# -> False
+  1# -> True
+
+--isAlphanum c || c == '_' || c== '\'' --`elem` "_'"
+
+{-
+lex_id cs = 
+ case _scc_ "lex_id.span" my_span' (is_mod_char) cs of
+   (xs, len, cs') ->
+    case cs' of
+     [] -> case xs of
+	    [] -> lex_id2 Nothing cs
+	    _  -> lex_id3 Nothing len xs cs
+
+     '.':cs'' ->
+        case xs of
+	  [] -> lex_id2 Nothing cs
+	  _  ->
+           let
+            pk_str = _PK_ (xs::String)
+            len = lengthPS pk_str
+           in
+           if len==len+1 then
+              error "Well, I never!"
+           else
+              lex_id2 (Just pk_str) cs''
+     _ -> case xs of
+	    [] -> lex_id2 Nothing cs
+	    _  -> lex_id3 Nothing len xs cs'
+
+-}
+
+lex_id buf = 
+-- _trace ("lex_id: "++[C# (currentChar# buf)]) $
+ case expandWhile (is_mod_char) buf of
+   buf' ->
+    case currentChar# buf' of
+     '.'# ->
+	if not (emptyLexeme buf') then
+--	   _trace ("lex_id: "++(C# (currentChar# (stepOverLexeme buf'))):show (lexemeToFastString buf')) $ 
+	   case lexemeToFastString buf' of
+	     l@(FastString u# l# ba#) -> lex_id2 (Just (FastString u# l# ba#)) 
+	     					 (stepOn (stepOverLexeme buf'))
+	else
+	   lex_id2 Nothing buf'		
+     _  -> lex_id2 Nothing buf'
+
+-- Dealt with the Module.part
+lex_id2 module_dot buf =
+-- _trace ("lex_id2: "++[C# (currentChar# buf)]) $
+ case currentChar# buf of
+  '['# -> 
+    case lookAhead# buf 1# of
+     ']'# -> end_lex_id module_dot (ITconid SLIT("[]")) (stepOnBy# buf 2#)
+     _    -> lex_id3 module_dot buf
+  '('# ->
+    case lookAhead# buf 1# of
+     ')'# -> end_lex_id module_dot (ITconid SLIT("()")) (stepOnBy# buf 2#)
+     ','# -> lex_tuple module_dot (stepOnBy# buf 2#)
+     _    -> lex_id3 module_dot buf
+  ':'# -> lex_id3 module_dot (incLexeme buf)
+  _    -> lex_id3 module_dot buf
+
+
+
+-- Dealt with [], (), : special cases
+
+lex_id3 module_dot buf =
+-- _trace ("lex_id3: "++[C# (currentChar# buf)]) $
+ case expandWhile (is_id_char) buf of
+  buf' ->
+    case module_dot of
+     Just _ ->
+       end_lex_id module_dot (mk_var_token lexeme) (stepOverLexeme buf')
+     Nothing ->
+       case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM lexeme of
+         Just kwd_token -> kwd_token           : lexIface new_buf
+	 Nothing        -> mk_var_token lexeme : lexIface new_buf
+    where
+     lexeme  = lexemeToFastString buf'
+     new_buf = stepOverLexeme buf'
+
+
+{- OLD:
+lex_id2 module_dot [] ('[': ']': cs) = end_lex_id module_dot (ITconid SLIT("[]")) cs
+lex_id2 module_dot [] ('(': ')': cs) = end_lex_id module_dot (ITconid SLIT("()")) cs
+lex_id2 module_dot [] ('(': ',': cs) = lex_tuple module_dot cs
+lex_id2 module_dot [] (':' : cs)     = lex_id3 module_dot [':'] cs
+lex_id2 module_dot xs cs 	     = lex_id3 module_dot xs cs
+-}
+
+
+-- Dealt with [], (), : special cases
+
+{-
+lex_id3 module_dot len_xs xs cs =
+ case my_span' (is_id_char) cs of
+   (xs1,len_xs1,rest) ->
+    case module_dot of
+     Just m  -> end_lex_id (Just m) (mk_var_token rxs) rest --OLD:_PK_ (reverse xs))) rest
+     Nothing -> 
+      case _scc_ "Lex.haskellKeyword" lookupUFM haskellKeywordsFM rxs of
+       Just kwd_token -> kwd_token	    : lexIface rest
+       other 	      -> token : lexIface cs end_lex_id Nothing (mk_var_token rxs) rest
+    where
+     rxs = packNChars (len_xs+len_xs1) (xs++xs1) -- OLD: _PK_ (reverse xs)
+-}
+mk_var_token pk_str =
+     let
+      f = _HEAD_ pk_str
+     in
+     --
+     -- These tests assume a non- ISO-8859-1 impl of isUpper&isLower,
+     -- remove the second half of disjunction when using a 1.3 prelude.
+     --
+     if      isUpper f    then ITconid pk_str
+     else if isLower f	  then ITvarid pk_str
+     else if f == ':'	  then ITconsym pk_str
+     else if isLowerISO f then ITvarid pk_str
+     else if isUpperISO f then ITconid pk_str
+     else ITvarsym pk_str
+
+{-
     mk_var_token xs@(f:_) | isUpper f || isUpperISO f = ITconid n
 		    	  | f == ':'		  = ITconsym n
 		      	  | isAlpha f		  = ITvarid n
 		    	  | otherwise		  = ITvarsym n 
 		where
 		      n = _PK_ xs
+-}
 			    
-    end_lex_id (Just m) (ITconid n)  cs = ITqconid (m,n) : lexIface cs
-    end_lex_id (Just m) (ITvarid n)  cs = ITqvarid (m,n) : lexIface cs
-    end_lex_id (Just m) (ITconsym n) cs = ITqconsym (m,n): lexIface cs
-    end_lex_id (Just m) (ITvarsym n) cs = ITqvarsym (m,n): lexIface cs
-    end_lex_id (Just m) ITbang	     cs = ITqvarsym (m,SLIT("!")) : lexIface cs
-    end_lex_id (Just m) token	     cs = panic ("end_lex_id:" ++ show token)
-    end_lex_id Nothing  token	     cs = token : lexIface cs
-
-    ------------
-    ifaceKeywordsFM :: FiniteMap String IfaceToken
-    ifaceKeywordsFM = listToFM [
-        ("/\\_",		ITbiglam)
+end_lex_id Nothing token buf  = token : lexIface buf
+end_lex_id (Just m) token buf =
+ case token of
+   ITconid n  -> ITqconid  (m,n)         : lexIface buf
+   ITvarid n  -> ITqvarid  (m,n)         : lexIface buf
+   ITconsym n -> ITqconsym (m,n)         : lexIface buf
+   ITvarsym n -> ITqvarsym (m,n)         : lexIface buf
+   ITbang     -> ITqvarsym (m,SLIT("!")) : lexIface buf
+   _	      -> ITunknown (show token)  : lexIface buf
+
+------------
+ifaceKeywordsFM :: UniqFM IfaceToken
+ifaceKeywordsFM = listToUFM $
+      map (\ (x,y) -> (_PK_ x,y))
+       [("/\\_",		ITbiglam)
        ,("@_",			ITatsign)
        ,("interface_",	 	ITinterface)
        ,("usages_",		ITusages)
@@ -348,8 +733,9 @@ lexIface input
        ,("casm_GC_",		ITccall (True,  True))
        ]
 
-    haskellKeywordsFM = listToFM [
-        ("data",		ITdata)
+haskellKeywordsFM = listToUFM $
+      map (\ (x,y) -> (_PK_ x,y))
+      [ ("data",		ITdata)
        ,("type",		ITtype)
        ,("newtype",		ITnewtype)
        ,("class",		ITclass)
@@ -374,18 +760,33 @@ lexIface input
        ,("=",			ITequal)
        ]
 
-startDiscard ITarity  = True
-startDiscard ITunfold = True
-startDiscard ITstrict = True
-startDiscard other    = False
 
 -- doDiscard rips along really fast looking for a double semicolon, 
 -- indicating the end of the pragma we're skipping
-doDiscard rest@(';' : ';' : _) = rest
-doDiscard ( _  : rest) 	       = doDiscard rest
-doDiscard []		       = []
+doDiscard buf =
+ case currentChar# buf of
+   ';'# ->
+    case lookAhead# buf 1# of
+      ';'# -> stepOnBy# buf 2#
+      _    -> doDiscard (stepOn buf)
+   _ -> doDiscard (stepOn buf)
+
 \end{code}
 
+begin{code}
+my_span :: (a -> Bool) -> [a] -> ([a],[a])
+my_span p xs = go [] xs
+  where
+    go so_far (x:xs') | p x = go (x:so_far) xs'
+    go so_far xs            = (reverse so_far, xs)
+
+my_span' :: (a -> Bool) -> [a] -> ([a],Int,[a])
+my_span' p xs = go [] 0 xs
+  where
+    go so_far n (x:xs') | p x = go (x:so_far) (n+1) xs'
+    go so_far n xs            = (reverse so_far,n, xs)
+end{code}
+
 
 %************************************************************************
 %*									*
@@ -410,5 +811,5 @@ happyError ln toks = Failed (ifaceParseErr ln toks)
 -----------------------------------------------------------------
 
 ifaceParseErr ln toks sty
-  = ppCat [ppPStr SLIT("Interface-file parse error: line"), ppInt ln, ppStr "toks=", ppStr (show (take 10 toks))]
+  = ppCat [ppPStr SLIT("Interface-file parse error: line"), ppInt ln, ppPStr SLIT("toks="), ppStr (show (take 10 toks))]
 \end{code}