X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2FcoreSyn%2FCoreArity.lhs;h=0fa1c381e9efe45e2c42acfb407756faa9c3d8b3;hp=d57c895d151befba393ef365c83a56955bc8e087;hb=2c8aabcad1d2f2c469cb8a10afa7b66beeaedd45;hpb=ea84860ef56d72da1f4c63d661b7ad333109237d

diff --git a/compiler/coreSyn/CoreArity.lhs b/compiler/coreSyn/CoreArity.lhs
index d57c895..0fa1c38 100644
--- a/compiler/coreSyn/CoreArity.lhs
+++ b/compiler/coreSyn/CoreArity.lhs
@@ -8,8 +8,8 @@
 \begin{code}
 -- | Arit and eta expansion
 module CoreArity (
-	manifestArity, exprArity, 
-	exprEtaExpandArity, etaExpand
+	manifestArity, exprArity, exprBotStrictness_maybe,
+	exprEtaExpandArity, CheapFun, etaExpand
     ) where
 
 #include "HsVersions.h"
@@ -17,26 +17,19 @@ module CoreArity (
 import CoreSyn
 import CoreFVs
 import CoreUtils
-import NewDemand
-import TyCon	( isRecursiveTyCon )
-import qualified CoreSubst
-import CoreSubst ( Subst, substBndr, substBndrs, substExpr
-       		 , mkEmptySubst, isEmptySubst )
+import CoreSubst
+import Demand
 import Var
 import VarEnv
 import Id
 import Type
-import TcType	( isDictLikeTy )
+import TyCon	( isRecursiveTyCon, isClassTyCon )
 import Coercion
 import BasicTypes
 import Unique
 import Outputable
-import DynFlags
-import StaticFlags	( opt_NoStateHack )
 import FastString
-import Maybes
-
-import GHC.Exts		-- For `xori` 
+import Pair
 \end{code}
 
 %************************************************************************
@@ -69,13 +62,88 @@ And in any case it seems more robust to have exprArity be a bit more intelligent
 But note that 	(\x y z -> f x y z)
 should have arity 3, regardless of f's arity.
 
+\begin{code}
+manifestArity :: CoreExpr -> Arity
+-- ^ manifestArity sees how many leading value lambdas there are
+manifestArity (Lam v e) | isId v    	= 1 + manifestArity e
+			| otherwise 	= manifestArity e
+manifestArity (Note n e) | notSccNote n	= manifestArity e
+manifestArity (Cast e _)            	= manifestArity e
+manifestArity _                     	= 0
+
+---------------
+exprArity :: CoreExpr -> Arity
+-- ^ An approximate, fast, version of 'exprEtaExpandArity'
+exprArity e = go e
+  where
+    go (Var v) 	       	           = idArity v
+    go (Lam x e) | isId x    	   = go e + 1
+    		 | otherwise 	   = go e
+    go (Note n e) | notSccNote n   = go e
+    go (Cast e co)                 = go e `min` length (typeArity (pSnd (coercionKind co)))
+                                        -- Note [exprArity invariant]
+    go (App e (Type _))            = go e
+    go (App f a) | exprIsTrivial a = (go f - 1) `max` 0
+        -- See Note [exprArity for applications]
+	-- NB: coercions count as a value argument
+
+    go _		       	   = 0
+
+
+---------------
+typeArity :: Type -> [OneShot]
+-- How many value arrows are visible in the type?
+-- We look through foralls, and newtypes
+-- See Note [exprArity invariant]
+typeArity ty 
+  | Just (_, ty')  <- splitForAllTy_maybe ty 
+  = typeArity ty'
+
+  | Just (arg,res) <- splitFunTy_maybe ty    
+  = isStateHackType arg : typeArity res
+
+  | Just (tc,tys) <- splitTyConApp_maybe ty 
+  , Just (ty', _) <- instNewTyCon_maybe tc tys
+  , not (isRecursiveTyCon tc)
+  , not (isClassTyCon tc)	-- Do not eta-expand through newtype classes
+    		      		-- See Note [Newtype classes and eta expansion]
+  = typeArity ty'
+  	-- Important to look through non-recursive newtypes, so that, eg 
+  	--	(f x)   where f has arity 2, f :: Int -> IO ()
+  	-- Here we want to get arity 1 for the result!
+
+  | otherwise
+  = []
+
+---------------
+exprBotStrictness_maybe :: CoreExpr -> Maybe (Arity, StrictSig)
+-- A cheap and cheerful function that identifies bottoming functions
+-- and gives them a suitable strictness signatures.  It's used during
+-- float-out
+exprBotStrictness_maybe e
+  = case getBotArity (arityType is_cheap e) of
+	Nothing -> Nothing
+	Just ar -> Just (ar, mkStrictSig (mkTopDmdType (replicate ar topDmd) BotRes))
+  where
+    is_cheap _ _ = False  -- Irrelevant for this purpose
+\end{code}
+
 Note [exprArity invariant]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 exprArity has the following invariant:
-	(exprArity e) = n, then manifestArity (etaExpand e n) = n
 
-That is, if exprArity says "the arity is n" then etaExpand really can get
-"n" manifest lambdas to the top.
+  * If typeArity (exprType e) = n,
+    then manifestArity (etaExpand e n) = n
+ 
+    That is, etaExpand can always expand as much as typeArity says
+    So the case analysis in etaExpand and in typeArity must match
+ 
+  * exprArity e <= typeArity (exprType e)      
+
+  * Hence if (exprArity e) = n, then manifestArity (etaExpand e n) = n
+
+    That is, if exprArity says "the arity is n" then etaExpand really 
+    can get "n" manifest lambdas to the top.
 
 Why is this important?  Because 
   - In TidyPgm we use exprArity to fix the *final arity* of 
@@ -88,61 +156,72 @@ An alternative would be to do the eta-expansion in TidyPgm, at least
 for top-level bindings, in which case we would not need the trim_arity
 in exprArity.  That is a less local change, so I'm going to leave it for today!
 
+Note [Newtype classes and eta expansion]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We have to be careful when eta-expanding through newtypes.  In general
+it's a good idea, but annoyingly it interacts badly with the class-op 
+rule mechanism.  Consider
+ 
+   class C a where { op :: a -> a }
+   instance C b => C [b] where
+     op x = ...
 
-\begin{code}
-manifestArity :: CoreExpr -> Arity
--- ^ manifestArity sees how many leading value lambdas there are
-manifestArity (Lam v e) | isId v    = 1 + manifestArity e
-			| otherwise = manifestArity e
-manifestArity (Note _ e)	    = manifestArity e
-manifestArity (Cast e _)            = manifestArity e
-manifestArity _                     = 0
+These translate to
 
-exprArity :: CoreExpr -> Arity
--- ^ An approximate, fast, version of 'exprEtaExpandArity'
-exprArity e = go e
-  where
-    go (Var v) 	       	         = idArity v
-    go (Lam x e) | isId x    	 = go e + 1
-    		 | otherwise 	 = go e
-    go (Note _ e)                = go e
-    go (Cast e co)               = trim_arity (go e) 0 (snd (coercionKind co))
-    go (App e (Type _))          = go e
-    go (App f a) | exprIsCheap a = (go f - 1) `max` 0
-    	-- NB: exprIsCheap a!  
-    	--	f (fac x) does not have arity 2, 
-    	-- 	even if f has arity 3!
-    	-- NB: `max 0`!  (\x y -> f x) has arity 2, even if f is
-    	--		 unknown, hence arity 0
-    go _		       	   = 0
+   co :: forall a. (a->a) ~ C a
+
+   $copList :: C b -> [b] -> [b]
+   $copList d x = ...
+
+   $dfList :: C b -> C [b]
+   {-# DFunUnfolding = [$copList] #-}
+   $dfList d = $copList d |> co@[b]
+
+Now suppose we have:
+
+   dCInt :: C Int    
+
+   blah :: [Int] -> [Int]
+   blah = op ($dfList dCInt)
+
+Now we want the built-in op/$dfList rule will fire to give
+   blah = $copList dCInt
+
+But with eta-expansion 'blah' might (and in Trac #3772, which is
+slightly more complicated, does) turn into
+
+   blah = op (\eta. ($dfList dCInt |> sym co) eta)
+
+and now it is *much* harder for the op/$dfList rule to fire, becuase
+exprIsConApp_maybe won't hold of the argument to op.  I considered
+trying to *make* it hold, but it's tricky and I gave up.
+
+The test simplCore/should_compile/T3722 is an excellent example.
+
+
+Note [exprArity for applications]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When we come to an application we check that the arg is trivial.
+   eg  f (fac x) does not have arity 2, 
+                 even if f has arity 3!
+
+* We require that is trivial rather merely cheap.  Suppose f has arity 2.
+  Then    f (Just y)
+  has arity 0, because if we gave it arity 1 and then inlined f we'd get
+          let v = Just y in \w. <f-body>
+  which has arity 0.  And we try to maintain the invariant that we don't
+  have arity decreases.
+
+*  The `max 0` is important!  (\x y -> f x) has arity 2, even if f is
+   unknown, hence arity 0
 
-	-- Note [exprArity invariant]
-    trim_arity n a ty
-	| n==a					      = a
-	| Just (_, ty') <- splitForAllTy_maybe ty     = trim_arity n a     ty'
-	| Just (_, ty') <- splitFunTy_maybe ty        = trim_arity n (a+1) ty'
-	| Just (ty',_)  <- splitNewTypeRepCo_maybe ty = trim_arity n a     ty'
-	| otherwise				      = a
-\end{code}
 
 %************************************************************************
 %*									*
-	   Eta expansion
+	   Computing the "arity" of an expression
 %*									*
 %************************************************************************
 
-\begin{code}
--- ^ The Arity returned is the number of value args the 
--- expression can be applied to without doing much work
-exprEtaExpandArity :: DynFlags -> CoreExpr -> Arity
--- exprEtaExpandArity is used when eta expanding
--- 	e  ==>  \xy -> e x y
-exprEtaExpandArity dflags e
-    = applyStateHack e (arityType dicts_cheap e)
-  where
-    dicts_cheap = dopt Opt_DictsCheap dflags
-\end{code}	
-
 Note [Definition of arity]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 The "arity" of an expression 'e' is n if
@@ -163,7 +242,6 @@ Or, to put it another way, in any context C
          is as efficient as
    C[ e ]
 
-
 It's all a bit more subtle than it looks:
 
 Note [Arity of case expressions]
@@ -185,7 +263,6 @@ This should diverge!  But if we eta-expand, it won't.   Again, we ignore this
 "problem", because being scrupulous would lose an important transformation for
 many programs.
 
-
 1.  Note [One-shot lambdas]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Consider one-shot lambdas
@@ -206,7 +283,6 @@ should diverge, but it'll converge if we eta-expand f.  Nevertheless, we
 do so; it improves some programs significantly, and increasing convergence
 isn't a bad thing.  Hence the ABot/ATop in ArityType.
 
-
 4. Note [Newtype arity]
 ~~~~~~~~~~~~~~~~~~~~~~~~
 Non-recursive newtypes are transparent, and should not get in the way.
@@ -247,38 +323,6 @@ Then we expect that if f is applied to one arg, it'll be applied to two
 (that's the hack -- we don't really know, and sometimes it's false)
 See also Id.isOneShotBndr.
 
-\begin{code}
-applyStateHack :: CoreExpr -> ArityType -> Arity
-applyStateHack e (AT orig_arity is_bot)
-  | opt_NoStateHack = orig_arity
-  | ABot <- is_bot  = orig_arity   -- Note [State hack and bottoming functions]
-  | otherwise       = go orig_ty orig_arity
-  where			-- Note [The state-transformer hack]
-    orig_ty = exprType e
-    go :: Type -> Arity -> Arity
-    go ty arity		-- This case analysis should match that in eta_expand
-	| Just (_, ty') <- splitForAllTy_maybe ty   = go ty' arity
-
-	| Just (tc,tys) <- splitTyConApp_maybe ty 
-	, Just (ty', _) <- instNewTyCon_maybe tc tys
-	, not (isRecursiveTyCon tc)	            = go ty' arity
-		-- Important to look through non-recursive newtypes, so that, eg 
-		--	(f x)   where f has arity 2, f :: Int -> IO ()
-		-- Here we want to get arity 1 for the result!
-
-	| Just (arg,res) <- splitFunTy_maybe ty
-	, arity > 0 || isStateHackType arg = 1 + go res (arity-1)
-{-
-	= if arity > 0 then 1 + go res (arity-1)
-	  else if isStateHackType arg then
-		pprTrace "applystatehack" (vcat [ppr orig_arity, ppr orig_ty,
-						ppr ty, ppr res, ppr e]) $
-	        1 + go res (arity-1)
-          else WARN( arity > 0, ppr arity ) 0
--}						 
-	| otherwise = WARN( arity > 0, ppr arity ) 0
-\end{code}
-
 Note [State hack and bottoming functions]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 It's a terrible idea to use the state hack on a bottoming function.
@@ -311,67 +355,208 @@ Extrude g1.g3
 And now we can repeat the whole loop.  Aargh!  The bug is in applying the
 state hack to a function which then swallows the argument.
 
+This arose in another guise in Trac #3959.  Here we had
+
+     catch# (throw exn >> return ())
+
+Note that (throw :: forall a e. Exn e => e -> a) is called with [a = IO ()].
+After inlining (>>) we get 
+
+     catch# (\_. throw {IO ()} exn)
+
+We must *not* eta-expand to 
+
+     catch# (\_ _. throw {...} exn)
+
+because 'catch#' expects to get a (# _,_ #) after applying its argument to
+a State#, not another function!  
+
+In short, we use the state hack to allow us to push let inside a lambda,
+but not to introduce a new lambda.
+
+
+Note [ArityType]
+~~~~~~~~~~~~~~~~
+ArityType is the result of a compositional analysis on expressions,
+from which we can decide the real arity of the expression (extracted
+with function exprEtaExpandArity).
+
+Here is what the fields mean. If an arbitrary expression 'f' has 
+ArityType 'at', then
+
+ * If at = ABot n, then (f x1..xn) definitely diverges. Partial
+   applications to fewer than n args may *or may not* diverge.
+
+   We allow ourselves to eta-expand bottoming functions, even
+   if doing so may lose some `seq` sharing, 
+       let x = <expensive> in \y. error (g x y)
+       ==> \y. let x = <expensive> in error (g x y)
+
+ * If at = ATop as, and n=length as, 
+   then expanding 'f' to (\x1..xn. f x1 .. xn) loses no sharing, 
+   assuming the calls of f respect the one-shot-ness of of
+   its definition.  
+
+   NB 'f' is an arbitary expression, eg (f = g e1 e2).  This 'f'
+   can have ArityType as ATop, with length as > 0, only if e1 e2 are 
+   themselves.
+
+ * In both cases, f, (f x1), ... (f x1 ... f(n-1)) are definitely
+   really functions, or bottom, but *not* casts from a data type, in
+   at least one case branch.  (If it's a function in one case branch but
+   an unsafe cast from a data type in another, the program is bogus.)
+   So eta expansion is dynamically ok; see Note [State hack and
+   bottoming functions], the part about catch#
+
+Example: 
+      f = \x\y. let v = <expensive> in 
+          \s(one-shot) \t(one-shot). blah
+      'f' has ArityType [ManyShot,ManyShot,OneShot,OneShot]
+      The one-shot-ness means we can, in effect, push that
+      'let' inside the \st.
+
+
+Suppose f = \xy. x+y
+Then  f             :: AT [False,False] ATop
+      f v           :: AT [False] 	ATop
+      f <expensive> :: AT [] 	        ATop
 
 -------------------- Main arity code ----------------------------
 \begin{code}
--- If e has ArityType (AT n r), then the term 'e'
---  * Must be applied to at least n *value* args 
---	before doing any significant work
---  * It will not diverge before being applied to n
---	value arguments
---  * If 'r' is ABot, then it guarantees to diverge if 
---	applied to n arguments (or more)
-
-data ArityType = AT Arity ArityRes
-data ArityRes  = ATop			-- Know nothing
-	       | ABot			-- Diverges
+-- See Note [ArityType]
+data ArityType = ATop [OneShot] | ABot Arity
+     -- There is always an explicit lambda
+     -- to justify the [OneShot], or the Arity
+
+type OneShot = Bool    -- False <=> Know nothing
+                       -- True  <=> Can definitely float inside this lambda
+	               -- The 'True' case can arise either because a binder
+		       -- is marked one-shot, or because it's a state lambda
+		       -- and we have the state hack on
 
 vanillaArityType :: ArityType
-vanillaArityType = AT 0 ATop	-- Totally uninformative
+vanillaArityType = ATop []	-- Totally uninformative
 
-incArity :: ArityType -> ArityType
-incArity (AT a r) = AT (a+1) r
+-- ^ The Arity returned is the number of value args the [_$_]
+-- expression can be applied to without doing much work
+exprEtaExpandArity :: CheapFun -> CoreExpr -> Arity
+-- exprEtaExpandArity is used when eta expanding
+-- 	e  ==>  \xy -> e x y
+exprEtaExpandArity cheap_fun e
+  = case (arityType cheap_fun e) of
+      ATop (os:oss) 
+        | os || has_lam e -> 1 + length oss	-- Note [Eta expanding thunks]
+        | otherwise       -> 0
+      ATop []             -> 0
+      ABot n              -> n
+  where
+    has_lam (Note _ e) = has_lam e
+    has_lam (Lam b e)  = isId b || has_lam e
+    has_lam _          = False
+
+getBotArity :: ArityType -> Maybe Arity
+-- Arity of a divergent function
+getBotArity (ABot n) = Just n
+getBotArity _        = Nothing
+\end{code}
 
-decArity :: ArityType -> ArityType
-decArity (AT 0 r) = AT 0     r
-decArity (AT a r) = AT (a-1) r
+Note [Eta expanding thunks]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When we see
+     f = case y of p -> \x -> blah
+should we eta-expand it? Well, if 'x' is a one-shot state token 
+then 'yes' because 'f' will only be applied once.  But otherwise
+we (conservatively) say no.  My main reason is to avoid expanding
+PAPSs
+	f = g d  ==>  f = \x. g d x
+because that might in turn make g inline (if it has an inline pragma), 
+which we might not want.  After all, INLINE pragmas say "inline only
+when saturate" so we don't want to be too gung-ho about saturating!
+
+\begin{code}
+arityLam :: Id -> ArityType -> ArityType
+arityLam id (ATop as) = ATop (isOneShotBndr id : as)
+arityLam _  (ABot n)  = ABot (n+1)
+
+floatIn :: Bool -> ArityType -> ArityType
+-- We have something like (let x = E in b), 
+-- where b has the given arity type.  
+floatIn _     (ABot n)  = ABot n
+floatIn True  (ATop as) = ATop as
+floatIn False (ATop as) = ATop (takeWhile id as)
+   -- If E is not cheap, keep arity only for one-shots
+
+arityApp :: ArityType -> Bool -> ArityType
+-- Processing (fun arg) where at is the ArityType of fun,
+-- Knock off an argument and behave like 'let'
+arityApp (ABot 0)      _     = ABot 0
+arityApp (ABot n)      _     = ABot (n-1)
+arityApp (ATop [])     _     = ATop []
+arityApp (ATop (_:as)) cheap = floatIn cheap (ATop as)
 
 andArityType :: ArityType -> ArityType -> ArityType   -- Used for branches of a 'case'
-andArityType (AT a1 ATop) (AT a2 ATop) = AT (a1 `min` a2) ATop
-andArityType (AT _  ABot) (AT a2 ATop) = AT a2		  ATop
-andArityType (AT a1 ATop) (AT _  ABot) = AT a1		  ATop
-andArityType (AT a1 ABot) (AT a2 ABot) = AT (a1 `max` a2) ABot
-
-trimArity :: Bool -> ArityType -> ArityType
--- We have something like (let x = E in b), where b has the given
--- arity type.  Then
---	* If E is cheap we can push it inside as far as we like
---	* If b eventually diverges, we allow ourselves to push inside
---	  arbitrarily, even though that is not quite right
-trimArity _cheap (AT a ABot) = AT a ABot
-trimArity True   (AT a ATop) = AT a ATop
-trimArity False  (AT _ ATop) = AT 0 ATop	-- Bale out
+andArityType (ABot n1) (ABot n2) 
+  = ABot (n1 `min` n2)
+andArityType (ATop as)  (ABot _)  = ATop as
+andArityType (ABot _)   (ATop bs) = ATop bs
+andArityType (ATop as)  (ATop bs) = ATop (as `combine` bs)
+  where	     -- See Note [Combining case branches]
+    combine (a:as) (b:bs) = (a && b) : combine as bs
+    combine []     bs     = take_one_shots bs
+    combine as     []     = take_one_shots as
+
+    take_one_shots [] = []
+    take_one_shots (one_shot : as) 
+      | one_shot  = True : take_one_shots as
+      | otherwise = [] 
+\end{code}
 
+Note [Combining case branches]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider    
+  go = \x. let z = go e0
+               go2 = \x. case x of
+                           True  -> z
+                           False -> \s(one-shot). e1
+           in go2 x
+We *really* want to eta-expand go and go2.  
+When combining the barnches of the case we have
+     ATop [] `andAT` ATop [True]
+and we want to get ATop [True].  But if the inner
+lambda wasn't one-shot we don't want to do this.
+(We need a proper arity analysis to justify that.)
+
+
+\begin{code}
 ---------------------------
-arityType :: Bool -> CoreExpr -> ArityType
+type CheapFun = CoreExpr -> Maybe Type -> Bool
+     	-- How to decide if an expression is cheap
+	-- If the Maybe is Just, the type is the type
+	-- of the expression; Nothing means "don't know"
+
+arityType :: CheapFun -> CoreExpr -> ArityType
 arityType _ (Var v)
-  | Just strict_sig <- idNewStrictness_maybe v
+  | Just strict_sig <- idStrictness_maybe v
   , (ds, res) <- splitStrictSig strict_sig
-  , isBotRes res
-  = AT (length ds) ABot	-- Function diverges
+  , let arity = length ds
+  = if isBotRes res then ABot arity
+                    else ATop (take arity one_shots)
   | otherwise
-  = AT (idArity v) ATop
+  = ATop (take (idArity v) one_shots)
+  where
+    one_shots :: [Bool]	    -- One-shot-ness derived from the type
+    one_shots = typeArity (idType v)
 
 	-- Lambdas; increase arity
-arityType dicts_cheap (Lam x e)
-  | isId x    = incArity (arityType dicts_cheap e)
-  | otherwise = arityType dicts_cheap e
+arityType cheap_fn (Lam x e)
+  | isId x    = arityLam x (arityType cheap_fn e)
+  | otherwise = arityType cheap_fn e
 
-	-- Applications; decrease arity
-arityType dicts_cheap (App fun (Type _))
-   = arityType dicts_cheap fun
-arityType dicts_cheap (App fun arg )
-   = trimArity (exprIsCheap arg) (decArity (arityType dicts_cheap fun))
+	-- Applications; decrease arity, except for types
+arityType cheap_fn (App fun (Type _))
+   = arityType cheap_fn fun
+arityType cheap_fn (App fun arg )
+   = arityApp (arityType cheap_fn fun) (cheap_fn arg Nothing) 
 
 	-- Case/Let; keep arity if either the expression is cheap
 	-- or it's a 1-shot lambda
@@ -380,40 +565,21 @@ arityType dicts_cheap (App fun arg )
 	--  ===>
 	--	f x y = case x of { (a,b) -> e }
 	-- The difference is observable using 'seq'
-arityType dicts_cheap (Case scrut _ _ alts)
-  = trimArity (exprIsCheap scrut)
-	      (foldr1 andArityType [arityType dicts_cheap rhs | (_,_,rhs) <- alts])
+arityType cheap_fn (Case scrut bndr _ alts)
+  = floatIn (cheap_fn scrut (Just (idType bndr)))
+	    (foldr1 andArityType [arityType cheap_fn rhs | (_,_,rhs) <- alts])
 
-arityType dicts_cheap (Let b e) 
-  = trimArity (cheap_bind b) (arityType dicts_cheap e)
+arityType cheap_fn (Let b e) 
+  = floatIn (cheap_bind b) (arityType cheap_fn e)
   where
     cheap_bind (NonRec b e) = is_cheap (b,e)
     cheap_bind (Rec prs)    = all is_cheap prs
-    is_cheap (b,e) = (dicts_cheap && isDictLikeTy (idType b))
-		   || exprIsCheap e
-	-- If the experimental -fdicts-cheap flag is on, we eta-expand through
-	-- dictionary bindings.  This improves arities. Thereby, it also
-	-- means that full laziness is less prone to floating out the
-  	-- application of a function to its dictionary arguments, which
-	-- can thereby lose opportunities for fusion.  Example:
-	-- 	foo :: Ord a => a -> ...
-	--	foo = /\a \(d:Ord a). let d' = ...d... in \(x:a). ....
-	--		-- So foo has arity 1
-	--
-	--	f = \x. foo dInt $ bar x
-	--
-	-- The (foo DInt) is floated out, and makes ineffective a RULE 
-	--	foo (bar x) = ...
-  	--
-	-- One could go further and make exprIsCheap reply True to any
-	-- dictionary-typed expression, but that's more work.
-	-- 
-	-- See Note [Dictionary-like types] in TcType.lhs for why we use
-	-- isDictLikeTy here rather than isDictTy
-
-arityType dicts_cheap (Note _ e) = arityType dicts_cheap e
-arityType dicts_cheap (Cast e _) = arityType dicts_cheap e
-arityType _           _          = vanillaArityType
+    is_cheap (b,e) = cheap_fn e (Just (idType b))
+
+arityType cheap_fn (Note n e) 
+  | notSccNote n              = arityType cheap_fn e
+arityType cheap_fn (Cast e _) = arityType cheap_fn e
+arityType _           _       = vanillaArityType
 \end{code}
   
   
@@ -423,16 +589,54 @@ arityType _           _          = vanillaArityType
 %*									*
 %************************************************************************
 
-IMPORTANT NOTE: The eta expander is careful not to introduce "crap".
-In particular, given a CoreExpr satisfying the 'CpeRhs' invariant (in
-CorePrep), it returns a CoreExpr satisfying the same invariant. See
-Note [Eta expansion and the CorePrep invariants] in CorePrep.
+We go for:
+   f = \x1..xn -> N  ==>   f = \x1..xn y1..ym -> N y1..ym
+				 (n >= 0)
+
+where (in both cases) 
+
+	* The xi can include type variables
+
+	* The yi are all value variables
+
+	* N is a NORMAL FORM (i.e. no redexes anywhere)
+	  wanting a suitable number of extra args.
+
+The biggest reason for doing this is for cases like
+
+	f = \x -> case x of
+		    True  -> \y -> e1
+		    False -> \y -> e2
+
+Here we want to get the lambdas together.  A good exmaple is the nofib
+program fibheaps, which gets 25% more allocation if you don't do this
+eta-expansion.
+
+We may have to sandwich some coerces between the lambdas
+to make the types work.   exprEtaExpandArity looks through coerces
+when computing arity; and etaExpand adds the coerces as necessary when
+actually computing the expansion.
+
+
+Note [No crap in eta-expanded code]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The eta expander is careful not to introduce "crap".  In particular,
+given a CoreExpr satisfying the 'CpeRhs' invariant (in CorePrep), it
+returns a CoreExpr satisfying the same invariant. See Note [Eta
+expansion and the CorePrep invariants] in CorePrep.
 
 This means the eta-expander has to do a bit of on-the-fly
 simplification but it's not too hard.  The alernative, of relying on 
 a subsequent clean-up phase of the Simplifier to de-crapify the result,
 means you can't really use it in CorePrep, which is painful.
 
+Note [Eta expansion and SCCs]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Note that SCCs are not treated specially by etaExpand.  If we have
+	etaExpand 2 (\x -> scc "foo" e)
+	= (\xy -> (scc "foo" e) y)
+So the costs of evaluating 'e' (not 'e y') are attributed to "foo"
+
 \begin{code}
 -- | @etaExpand n us e ty@ returns an expression with
 -- the same meaning as @e@, but with arity @n@.
@@ -447,11 +651,6 @@ means you can't really use it in CorePrep, which is painful.
 etaExpand :: Arity	  	-- ^ Result should have this number of value args
 	  -> CoreExpr	        -- ^ Expression to expand
 	  -> CoreExpr
--- Note that SCCs are not treated specially.  If we have
---	etaExpand 2 (\x -> scc "foo" e)
---	= (\xy -> (scc "foo" e) y)
--- So the costs of evaluating 'e' (not 'e y') are attributed to "foo"
-
 -- etaExpand deals with for-alls. For example:
 --		etaExpand 1 E
 -- where  E :: forall a. a -> a
@@ -462,22 +661,19 @@ etaExpand :: Arity	  	-- ^ Result should have this number of value args
 -- so perhaps the extra code isn't worth it
 
 etaExpand n orig_expr
-  | manifestArity orig_expr >= n = orig_expr	-- The no-op case
-  | otherwise 		    
   = go n orig_expr
   where
-      -- Strip off existing lambdas
+      -- Strip off existing lambdas and casts
       -- Note [Eta expansion and SCCs]
     go 0 expr = expr
     go n (Lam v body) | isTyVar v = Lam v (go n     body)
        	              | otherwise = Lam v (go (n-1) body)
-    go n (Note InlineMe expr) = Note InlineMe (go n expr)
     go n (Cast expr co) = Cast (go n expr) co
     go n expr           = -- pprTrace "ee" (vcat [ppr orig_expr, ppr expr, ppr etas]) $
        	 		  etaInfoAbs etas (etaInfoApp subst' expr etas)
     	  		where
 			    in_scope = mkInScopeSet (exprFreeVars expr)
-			    (in_scope', etas) = mkEtaWW n in_scope (exprType expr)
+			    (in_scope', etas) = mkEtaWW n orig_expr in_scope (exprType expr)
 			    subst' = mkEmptySubst in_scope'
 
       	      	       	        -- Wrapper    Unwrapper
@@ -492,10 +688,10 @@ instance Outputable EtaInfo where
 
 pushCoercion :: Coercion -> [EtaInfo] -> [EtaInfo]
 pushCoercion co1 (EtaCo co2 : eis)
-  | isIdentityCoercion co = eis
-  | otherwise	       	  = EtaCo co : eis
+  | isReflCo co = eis
+  | otherwise	= EtaCo co : eis
   where
-    co = co1 `mkTransCoercion` co2
+    co = co1 `mkTransCo` co2
 
 pushCoercion co eis = EtaCo co : eis
 
@@ -503,7 +699,7 @@ pushCoercion co eis = EtaCo co : eis
 etaInfoAbs :: [EtaInfo] -> CoreExpr -> CoreExpr
 etaInfoAbs []               expr = expr
 etaInfoAbs (EtaVar v : eis) expr = Lam v (etaInfoAbs eis expr)
-etaInfoAbs (EtaCo co : eis) expr = Cast (etaInfoAbs eis expr) (mkSymCoercion co)
+etaInfoAbs (EtaCo co : eis) expr = Cast (etaInfoAbs eis expr) (mkSymCo co)
 
 --------------
 etaInfoApp :: Subst -> CoreExpr -> [EtaInfo] -> CoreExpr
@@ -511,15 +707,12 @@ etaInfoApp :: Subst -> CoreExpr -> [EtaInfo] -> CoreExpr
 -- 	       ((substExpr s e) `appliedto` eis)
 
 etaInfoApp subst (Lam v1 e) (EtaVar v2 : eis) 
-  = etaInfoApp subst' e eis
-  where
-    subst' | isTyVar v1 = CoreSubst.extendTvSubst subst v1 (mkTyVarTy v2) 
-    	   | otherwise  = CoreSubst.extendIdSubst subst v1 (Var v2)
+  = etaInfoApp (CoreSubst.extendSubstWithVar subst v1 v2) e eis
 
 etaInfoApp subst (Cast e co1) eis
   = etaInfoApp subst e (pushCoercion co' eis)
   where
-    co' = CoreSubst.substTy subst co1
+    co' = CoreSubst.substCo subst co1
 
 etaInfoApp subst (Case e b _ alts) eis 
   = Case (subst_expr subst e) b1 (coreAltsType alts') alts'
@@ -546,24 +739,24 @@ etaInfoApp subst e eis
     go e (EtaCo co    : eis) = go (Cast e co) eis
 
 --------------
-mkEtaWW :: Arity -> InScopeSet -> Type
+mkEtaWW :: Arity -> CoreExpr -> InScopeSet -> Type
 	-> (InScopeSet, [EtaInfo])
 	-- EtaInfo contains fresh variables,
 	--   not free in the incoming CoreExpr
 	-- Outgoing InScopeSet includes the EtaInfo vars
 	--   and the original free vars
 
-mkEtaWW n in_scope ty
-  = go n empty_subst ty []
+mkEtaWW orig_n orig_expr in_scope orig_ty
+  = go orig_n empty_subst orig_ty []
   where
-    empty_subst = mkTvSubst in_scope emptyTvSubstEnv
+    empty_subst = TvSubst in_scope emptyTvSubstEnv
 
-    go n subst ty eis
+    go n subst ty eis	    -- See Note [exprArity invariant]
        | n == 0
        = (getTvInScope subst, reverse eis)
 
        | Just (tv,ty') <- splitForAllTy_maybe ty
-       , let (subst', tv') = substTyVarBndr subst tv
+       , let (subst', tv') = Type.substTyVarBndr subst tv
            -- Avoid free vars of the original expression
        = go n subst' ty' (EtaVar tv' : eis)
 
@@ -579,10 +772,12 @@ mkEtaWW n in_scope ty
        		--  	eta_expand 1 e T
        		-- We want to get
        		--	coerce T (\x::[T] -> (coerce ([T]->Int) e) x)
-         go n subst ty' (EtaCo (substTy subst co) : eis)
+         go n subst ty' (EtaCo co : eis)
 
-       | otherwise	       	           -- We have an expression of arity > 0, 
-       = (getTvInScope subst, reverse eis) -- but its type isn't a function. 
+       | otherwise	 -- We have an expression of arity > 0, 
+       	 		 -- but its type isn't a function. 		   
+       = WARN( True, (ppr orig_n <+> ppr orig_ty) $$ ppr orig_expr )
+         (getTvInScope subst, reverse eis)
     	-- This *can* legitmately happen:
     	-- e.g.  coerce Int (\x. x) Essentially the programmer is
 	-- playing fast and loose with types (Happy does this a lot).
@@ -591,22 +786,13 @@ mkEtaWW n in_scope ty
    
 
 --------------
--- Avoiding unnecessary substitution
+-- Avoiding unnecessary substitution; use short-cutting versions
 
 subst_expr :: Subst -> CoreExpr -> CoreExpr
-subst_expr s e | isEmptySubst s = e
-	       | otherwise      = substExpr s e
+subst_expr = substExprSC (text "CoreArity:substExpr")
 
 subst_bind :: Subst -> CoreBind -> (Subst, CoreBind)
-subst_bind subst (NonRec b r)
-  = (subst', NonRec b' (subst_expr subst r))
-  where
-    (subst', b') = substBndr subst b
-subst_bind subst (Rec prs)
-  = (subst', Rec (bs1 `zip` map (subst_expr subst') rhss))
-  where
-    (bs, rhss) = unzip prs
-    (subst', bs1) = substBndrs subst bs 
+subst_bind = substBindSC
 
 
 --------------
@@ -621,9 +807,9 @@ freshEtaId :: Int -> TvSubst -> Type -> (TvSubst, Id)
 freshEtaId n subst ty
       = (subst', eta_id')
       where
-        ty'     = substTy subst ty
+        ty'     = Type.substTy subst ty
 	eta_id' = uniqAway (getTvInScope subst) $
 		  mkSysLocal (fsLit "eta") (mkBuiltinUnique n) ty'
-	subst'  = extendTvInScope subst [eta_id']		  
+	subst'  = extendTvInScope subst eta_id'		  
 \end{code}