X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2FcoreSyn%2FCoreArity.lhs;h=d0092b26c52d5fbcdeaa24a1a67c2726f51c3c00;hp=f39b6b92352ebe07a18c5851a536dc73423e9b1b;hb=66413c79385a5b30a668e91789b8a334f6977ca9;hpb=ac38ece1e717cb412e89354aa95fd11d44c1cefb

diff --git a/compiler/coreSyn/CoreArity.lhs b/compiler/coreSyn/CoreArity.lhs
index f39b6b9..d0092b2 100644
--- a/compiler/coreSyn/CoreArity.lhs
+++ b/compiler/coreSyn/CoreArity.lhs
@@ -8,8 +8,8 @@
 \begin{code}
 -- | Arit and eta expansion
 module CoreArity (
-	manifestArity, exprArity, 
-	exprEtaExpandArity, etaExpand 
+	manifestArity, exprArity, exprBotStrictness_maybe,
+	exprEtaExpandArity, etaExpand
     ) where
 
 #include "HsVersions.h"
@@ -17,13 +17,13 @@ module CoreArity (
 import CoreSyn
 import CoreFVs
 import CoreUtils
-import qualified CoreSubst
-import CoreSubst ( Subst, substBndr, substBndrs, substExpr
-       		 , mkEmptySubst, isEmptySubst )
+import CoreSubst
+import Demand
 import Var
 import VarEnv
 import Id
 import Type
+import TyCon	( isRecursiveTyCon, isClassTyCon )
 import TcType	( isDictLikeTy )
 import Coercion
 import BasicTypes
@@ -31,9 +31,6 @@ import Unique
 import Outputable
 import DynFlags
 import FastString
-import Maybes
-
-import GHC.Exts		-- For `xori` 
 \end{code}
 
 %************************************************************************
@@ -69,10 +66,19 @@ should have arity 3, regardless of f's arity.
 Note [exprArity invariant]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 exprArity has the following invariant:
-	(exprArity e) = n, then manifestArity (etaExpand e n) = n
 
-That is, if exprArity says "the arity is n" then etaExpand really can get
-"n" manifest lambdas to the top.
+  * If typeArity (exprType e) = n,
+    then manifestArity (etaExpand e n) = n
+ 
+    That is, etaExpand can always expand as much as typeArity says
+    So the case analysis in etaExpand and in typeArity must match
+ 
+  * exprArity e <= typeArity (exprType e)      
+
+  * Hence if (exprArity e) = n, then manifestArity (etaExpand e n) = n
+
+    That is, if exprArity says "the arity is n" then etaExpand really 
+    can get "n" manifest lambdas to the top.
 
 Why is this important?  Because 
   - In TidyPgm we use exprArity to fix the *final arity* of 
@@ -89,88 +95,161 @@ in exprArity.  That is a less local change, so I'm going to leave it for today!
 \begin{code}
 manifestArity :: CoreExpr -> Arity
 -- ^ manifestArity sees how many leading value lambdas there are
-manifestArity (Lam v e) | isId v    = 1 + manifestArity e
-			| otherwise = manifestArity e
-manifestArity (Note _ e)	    = manifestArity e
-manifestArity (Cast e _)            = manifestArity e
-manifestArity _                     = 0
+manifestArity (Lam v e) | isId v    	= 1 + manifestArity e
+			| otherwise 	= manifestArity e
+manifestArity (Note n e) | notSccNote n	= manifestArity e
+manifestArity (Cast e _)            	= manifestArity e
+manifestArity _                     	= 0
 
 exprArity :: CoreExpr -> Arity
 -- ^ An approximate, fast, version of 'exprEtaExpandArity'
 exprArity e = go e
   where
-    go (Var v) 	       	         = idArity v
-    go (Lam x e) | isId x    	 = go e + 1
-    		 | otherwise 	 = go e
-    go (Note _ e)                = go e
-    go (Cast e co)               = trim_arity (go e) 0 (snd (coercionKind co))
-    go (App e (Type _))          = go e
-    go (App f a) | exprIsCheap a = (go f - 1) `max` 0
-    	-- NB: exprIsCheap a!  
-    	--	f (fac x) does not have arity 2, 
-    	-- 	even if f has arity 3!
-    	-- NB: `max 0`!  (\x y -> f x) has arity 2, even if f is
-    	--		 unknown, hence arity 0
+    go (Var v) 	       	           = idArity v
+    go (Lam x e) | isId x    	   = go e + 1
+    		 | otherwise 	   = go e
+    go (Note n e) | notSccNote n   = go e
+    go (Cast e co)                 = go e `min` length (typeArity (snd (coercionKind co)))
+       	       			     	-- Note [exprArity invariant]
+    go (App e (Type _))            = go e
+    go (App f a) | exprIsTrivial a = (go f - 1) `max` 0
+        -- See Note [exprArity for applications]
     go _		       	   = 0
 
-	-- Note [exprArity invariant]
-    trim_arity n a ty
-	| n==a					      = a
-	| Just (_, ty') <- splitForAllTy_maybe ty     = trim_arity n a     ty'
-	| Just (_, ty') <- splitFunTy_maybe ty        = trim_arity n (a+1) ty'
-	| Just (ty',_)  <- splitNewTypeRepCo_maybe ty = trim_arity n a     ty'
-	| otherwise				      = a
+
+typeArity :: Type -> [OneShot]
+-- How many value arrows are visible in the type?
+-- We look through foralls, and newtypes
+-- See Note [exprArity invariant]
+typeArity ty 
+  | Just (_, ty')  <- splitForAllTy_maybe ty 
+  = typeArity ty'
+
+  | Just (arg,res) <- splitFunTy_maybe ty    
+  = isStateHackType arg : typeArity res
+
+  | Just (tc,tys) <- splitTyConApp_maybe ty 
+  , Just (ty', _) <- instNewTyCon_maybe tc tys
+  , not (isRecursiveTyCon tc)
+  , not (isClassTyCon tc)	-- Do not eta-expand through newtype classes
+    		      		-- See Note [Newtype classes and eta expansion]
+  = typeArity ty'
+  	-- Important to look through non-recursive newtypes, so that, eg 
+  	--	(f x)   where f has arity 2, f :: Int -> IO ()
+  	-- Here we want to get arity 1 for the result!
+
+  | otherwise
+  = []
 \end{code}
 
+Note [Newtype classes and eta expansion]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We have to be careful when eta-expanding through newtypes.  In general
+it's a good idea, but annoyingly it interacts badly with the class-op 
+rule mechanism.  Consider
+ 
+   class C a where { op :: a -> a }
+   instance C b => C [b] where
+     op x = ...
+
+These translate to
+
+   co :: forall a. (a->a) ~ C a
+
+   $copList :: C b -> [b] -> [b]
+   $copList d x = ...
+
+   $dfList :: C b -> C [b]
+   {-# DFunUnfolding = [$copList] #-}
+   $dfList d = $copList d |> co@[b]
+
+Now suppose we have:
+
+   dCInt :: C Int    
+
+   blah :: [Int] -> [Int]
+   blah = op ($dfList dCInt)
+
+Now we want the built-in op/$dfList rule will fire to give
+   blah = $copList dCInt
+
+But with eta-expansion 'blah' might (and in Trac #3772, which is
+slightly more complicated, does) turn into
+
+   blah = op (\eta. ($dfList dCInt |> sym co) eta)
+
+and now it is *much* harder for the op/$dfList rule to fire, becuase
+exprIsConApp_maybe won't hold of the argument to op.  I considered
+trying to *make* it hold, but it's tricky and I gave up.
+
+The test simplCore/should_compile/T3722 is an excellent example.
+
+
+Note [exprArity for applications]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When we come to an application we check that the arg is trivial.
+   eg  f (fac x) does not have arity 2, 
+                 even if f has arity 3!
+
+* We require that is trivial rather merely cheap.  Suppose f has arity 2.
+  Then    f (Just y)
+  has arity 0, because if we gave it arity 1 and then inlined f we'd get
+          let v = Just y in \w. <f-body>
+  which has arity 0.  And we try to maintain the invariant that we don't
+  have arity decreases.
+
+*  The `max 0` is important!  (\x y -> f x) has arity 2, even if f is
+   unknown, hence arity 0
+
+
 %************************************************************************
 %*									*
-\subsection{Eta reduction and expansion}
+	   Eta expansion
 %*									*
 %************************************************************************
 
-exprEtaExpandArity is used when eta expanding
-	e  ==>  \xy -> e x y
-
-It returns 1 (or more) to:
-	case x of p -> \s -> ...
-because for I/O ish things we really want to get that \s to the top.
-We are prepared to evaluate x each time round the loop in order to get that
-
-It's all a bit more subtle than it looks:
+\begin{code}
+exprBotStrictness_maybe :: CoreExpr -> Maybe (Arity, StrictSig)
+-- A cheap and cheerful function that identifies bottoming functions
+-- and gives them a suitable strictness signatures.  It's used during
+-- float-out
+exprBotStrictness_maybe e
+  = case getBotArity (arityType False e) of
+	Nothing -> Nothing
+	Just ar -> Just (ar, mkStrictSig (mkTopDmdType (replicate ar topDmd) BotRes))
+\end{code}	
+
+Note [Definition of arity]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+The "arity" of an expression 'e' is n if
+   applying 'e' to *fewer* than n *value* arguments
+   converges rapidly
 
-1.  One-shot lambdas
+Or, to put it another way
 
-Consider one-shot lambdas
-		let x = expensive in \y z -> E
-We want this to have arity 2 if the \y-abstraction is a 1-shot lambda
-Hence the ArityType returned by arityType
+   there is no work lost in duplicating the partial
+   application (e x1 .. x(n-1))
 
-2.  The state-transformer hack
+In the divegent case, no work is lost by duplicating because if the thing
+is evaluated once, that's the end of the program.
 
-The one-shot lambda special cause is particularly important/useful for
-IO state transformers, where we often get
-	let x = E in \ s -> ...
+Or, to put it another way, in any context C
 
-and the \s is a real-world state token abstraction.  Such abstractions
-are almost invariably 1-shot, so we want to pull the \s out, past the
-let x=E, even if E is expensive.  So we treat state-token lambdas as 
-one-shot even if they aren't really.  The hack is in Id.isOneShotBndr.
+   C[ (\x1 .. xn. e x1 .. xn) ]
+         is as efficient as
+   C[ e ]
 
-3.  Dealing with bottom
+It's all a bit more subtle than it looks:
 
-Consider also 
-	f = \x -> error "foo"
-Here, arity 1 is fine.  But if it is
-	f = \x -> case x of 
-			True  -> error "foo"
-			False -> \y -> x+y
-then we want to get arity 2.  Tecnically, this isn't quite right, because
-	(f True) `seq` 1
-should diverge, but it'll converge if we eta-expand f.  Nevertheless, we
-do so; it improves some programs significantly, and increasing convergence
-isn't a bad thing.  Hence the ABot/ATop in ArityType.
+Note [Arity of case expressions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We treat the arity of 
+	case x of p -> \s -> ...
+as 1 (or more) because for I/O ish things we really want to get that
+\s to the top.  We are prepared to evaluate x each time round the loop
+in order to get that.
 
-Actually, the situation is worse.  Consider
+This isn't really right in the presence of seq.  Consider
 	f = \x -> case x of
 			True  -> \y -> x+y
 			False -> \y -> x-y
@@ -181,9 +260,28 @@ This should diverge!  But if we eta-expand, it won't.   Again, we ignore this
 "problem", because being scrupulous would lose an important transformation for
 many programs.
 
+1.  Note [One-shot lambdas]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider one-shot lambdas
+		let x = expensive in \y z -> E
+We want this to have arity 1 if the \y-abstraction is a 1-shot lambda.
 
-4. Newtypes
+3.  Note [Dealing with bottom]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+	f = \x -> error "foo"
+Here, arity 1 is fine.  But if it is
+	f = \x -> case x of 
+			True  -> error "foo"
+			False -> \y -> x+y
+then we want to get arity 2.  Technically, this isn't quite right, because
+	(f True) `seq` 1
+should diverge, but it'll converge if we eta-expand f.  Nevertheless, we
+do so; it improves some programs significantly, and increasing convergence
+isn't a bad thing.  Hence the ABot/ATop in ArityType.
 
+4. Note [Newtype arity]
+~~~~~~~~~~~~~~~~~~~~~~~~
 Non-recursive newtypes are transparent, and should not get in the way.
 We do (currently) eta-expand recursive newtypes too.  So if we have, say
 
@@ -197,82 +295,227 @@ that is, etaExpandArity looks through the coerce.
 When we eta-expand e to arity 1: eta_expand 1 e T
 we want to get: 		 coerce T (\x::[T] -> (coerce ([T]->Int) e) x)
 
-HOWEVER, note that if you use coerce bogusly you can ge
-	coerce Int negate
-And since negate has arity 2, you might try to eta expand.  But you can't
-decopose Int to a function type.   Hence the final case in eta_expand.
+  HOWEVER, note that if you use coerce bogusly you can ge
+  	coerce Int negate
+  And since negate has arity 2, you might try to eta expand.  But you can't
+  decopose Int to a function type.   Hence the final case in eta_expand.
+  
+Note [The state-transformer hack]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Suppose we have 
+	f = e
+where e has arity n.  Then, if we know from the context that f has
+a usage type like
+	t1 -> ... -> tn -1-> t(n+1) -1-> ... -1-> tm -> ...
+then we can expand the arity to m.  This usage type says that
+any application (x e1 .. en) will be applied to uniquely to (m-n) more args
+Consider f = \x. let y = <expensive> 
+		 in case x of
+		      True  -> foo
+		      False -> \(s:RealWorld) -> e
+where foo has arity 1.  Then we want the state hack to
+apply to foo too, so we can eta expand the case.
+
+Then we expect that if f is applied to one arg, it'll be applied to two
+(that's the hack -- we don't really know, and sometimes it's false)
+See also Id.isOneShotBndr.
+
+Note [State hack and bottoming functions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+It's a terrible idea to use the state hack on a bottoming function.
+Here's what happens (Trac #2861):
+
+  f :: String -> IO T
+  f = \p. error "..."
+
+Eta-expand, using the state hack:
+
+  f = \p. (\s. ((error "...") |> g1) s) |> g2
+  g1 :: IO T ~ (S -> (S,T))
+  g2 :: (S -> (S,T)) ~ IO T
+
+Extrude the g2
+
+  f' = \p. \s. ((error "...") |> g1) s
+  f = f' |> (String -> g2)
+
+Discard args for bottomming function
+
+  f' = \p. \s. ((error "...") |> g1 |> g3
+  g3 :: (S -> (S,T)) ~ (S,T)
+
+Extrude g1.g3
+
+  f'' = \p. \s. (error "...")
+  f' = f'' |> (String -> S -> g1.g3)
+
+And now we can repeat the whole loop.  Aargh!  The bug is in applying the
+state hack to a function which then swallows the argument.
+
+This arose in another guise in Trac #3959.  Here we had
+
+     catch# (throw exn >> return ())
+
+Note that (throw :: forall a e. Exn e => e -> a) is called with [a = IO ()].
+After inlining (>>) we get 
+
+     catch# (\_. throw {IO ()} exn)
+
+We must *not* eta-expand to 
+
+     catch# (\_ _. throw {...} exn)
+
+because 'catch#' expects to get a (# _,_ #) after applying its argument to
+a State#, not another function!  
+
+In short, we use the state hack to allow us to push let inside a lambda,
+but not to introduce a new lambda.
+
+
+Note [ArityType]
+~~~~~~~~~~~~~~~~
+ArityType is the result of a compositional analysis on expressions,
+from which we can decide the real arity of the expression (extracted
+with function getArity).
+
+Here is what the fields mean. If e has ArityType 
+     (AT as r), where n = length as, 
+then
+
+ * If r is ABot then (e x1..xn) definitely diverges
+   Partial applications may or may not diverge
 
+ * If r is ACheap then (e x1..x(n-1)) is cheap,
+   including any nested sub-expressions inside e
+   (say e is (f e1 e2) then e1,e2 are cheap too)
 
+ * e, (e x1), ... (e x1 ... x(n-1)) are definitely really 
+   functions, or bottom, not casts from a data type
+   So eta expansion is dynamically ok; 
+    see Note [State hack and bottoming functions], 
+    the part about catch#
+
+We regard ABot as stronger than ACheap; ie if ABot holds
+we don't bother about ACheap
+
+Suppose f = \xy. x+y
+Then  f             :: AT [False,False] ACheap
+      f v           :: AT [False] 	ACheap
+      f <expensive> :: AT [False] 	ATop
+Note the ArityRes flag tells whether the whole expression is cheap.
+Note also that having a non-empty 'as' doesn't mean it has that
+arity; see (f <expensive>) which does not have arity 1!
+
+The key function getArity extracts the arity (which in turn guides
+eta-expansion) from ArityType. 
+  * If the term is cheap or diverges we can certainly eta expand it
+      e.g.   (f x)   where x has arity 2
+  
+  * If its a function whose first arg is one-shot (probably via the
+    state hack) we can eta expand it
+      e.g.   (getChar <expensive>)  
+
+-------------------- Main arity code ----------------------------
 \begin{code}
--- ^ The Arity returned is the number of value args the 
+-- See Note [ArityType]
+data ArityType = AT [OneShot] ArityRes
+     -- There is always an explicit lambda
+     -- to justify the [OneShot]
+
+type OneShot = Bool    -- False <=> Know nothing
+                       -- True  <=> Can definitely float inside this lambda
+	               -- The 'True' case can arise either because a binder
+		       -- is marked one-shot, or because it's a state lambda
+		       -- and we have the state hack on
+
+data ArityRes  = ATop | ACheap | ABot
+
+vanillaArityType :: ArityType
+vanillaArityType = AT [] ATop	-- Totally uninformative
+
+-- ^ The Arity returned is the number of value args the [_$_]
 -- expression can be applied to without doing much work
 exprEtaExpandArity :: DynFlags -> CoreExpr -> Arity
-exprEtaExpandArity dflags e = arityDepth (arityType dflags e)
+-- exprEtaExpandArity is used when eta expanding
+-- 	e  ==>  \xy -> e x y
+exprEtaExpandArity dflags e
+  = case (arityType dicts_cheap e) of
+      AT (a:as) res | want_eta a res -> 1 + length as
+      _                              -> 0
+  where
+    want_eta one_shot ATop   = one_shot
+    want_eta _        _      = True
+
+    dicts_cheap = dopt Opt_DictsCheap dflags
 
--- A limited sort of function type
-data ArityType = AFun Bool ArityType	-- True <=> one-shot
-	       | ATop			-- Know nothing
-	       | ABot			-- Diverges
+getBotArity :: ArityType -> Maybe Arity
+-- Arity of a divergent function
+getBotArity (AT as ABot) = Just (length as)
+getBotArity _            = Nothing
 
-arityDepth :: ArityType -> Arity
-arityDepth (AFun _ ty) = 1 + arityDepth ty
-arityDepth _           = 0
+arityLam :: Id -> ArityType -> ArityType
+arityLam id (AT as r) = AT (isOneShotBndr id : as) r
 
-andArityType :: ArityType -> ArityType -> ArityType
-andArityType ABot           at2           = at2
-andArityType ATop           _             = ATop
-andArityType (AFun t1 at1)  (AFun t2 at2) = AFun (t1 && t2) (andArityType at1 at2)
-andArityType at1            at2           = andArityType at2 at1
+floatIn :: Bool -> ArityType -> ArityType
+-- We have something like (let x = E in b), 
+-- where b has the given arity type.  
+floatIn c (AT as r) = AT as (extendArityRes r c)
 
-arityType :: DynFlags -> CoreExpr -> ArityType
-	-- (go1 e) = [b1,..,bn]
-	-- means expression can be rewritten \x_b1 -> ... \x_bn -> body
-	-- where bi is True <=> the lambda is one-shot
+arityApp :: ArityType -> CoreExpr -> ArityType
+-- Processing (fun arg) where at is the ArityType of fun,
+arityApp (AT [] r)     arg = AT [] (extendArityRes r (exprIsCheap arg))
+arityApp (AT (_:as) r) arg = AT as (extendArityRes r (exprIsCheap arg))
 
-arityType dflags (Note _ e) = arityType dflags e
---	Not needed any more: etaExpand is cleverer
--- removed: | ok_note n = arityType dflags e
--- removed: | otherwise = ATop
+extendArityRes :: ArityRes -> Bool -> ArityRes
+extendArityRes ABot   _    = ABot
+extendArityRes ACheap True = ACheap
+extendArityRes _      _    = ATop
+
+andArityType :: ArityType -> ArityType -> ArityType   -- Used for branches of a 'case'
+andArityType (AT as1 r1) (AT as2 r2) 
+  = AT (go_as as1 as2) (go_r r1 r2)
+  where
+    go_r ABot ABot     = ABot
+    go_r ABot ACheap   = ACheap
+    go_r ACheap ABot   = ACheap
+    go_r ACheap ACheap = ACheap
+    go_r _    _        = ATop
+
+    go_as (os1:as1) (os2:as2) = (os1 || os2) : go_as as1 as2
+    go_as []        as2       = as2 
+    go_as as1       []        = as1
+\end{code}
 
-arityType dflags (Cast e _) = arityType dflags e
 
+\begin{code}
+---------------------------
+arityType :: Bool -> CoreExpr -> ArityType
 arityType _ (Var v)
-  = mk (idArity v) (arg_tys (idType v))
+  | Just strict_sig <- idStrictness_maybe v
+  , (ds, res) <- splitStrictSig strict_sig
+  = mk_arity (length ds) res
+  | otherwise
+  = mk_arity (idArity v) TopRes
+
   where
-    mk :: Arity -> [Type] -> ArityType
-	-- The argument types are only to steer the "state hack"
-	-- Consider case x of
-	--		True  -> foo
-	--		False -> \(s:RealWorld) -> e
-	-- where foo has arity 1.  Then we want the state hack to
-	-- apply to foo too, so we can eta expand the case.
-    mk 0 tys | isBottomingId v                   = ABot
-             | (ty:_) <- tys, isStateHackType ty = AFun True ATop
-             | otherwise                         = ATop
-    mk n (ty:tys) = AFun (isStateHackType ty) (mk (n-1) tys)
-    mk n []       = AFun False		      (mk (n-1) [])
-
-    arg_tys :: Type -> [Type]	-- Ignore for-alls
-    arg_tys ty 
-	| Just (_, ty')  <- splitForAllTy_maybe ty = arg_tys ty'
-	| Just (arg,res) <- splitFunTy_maybe ty    = arg : arg_tys res
-	| otherwise				   = []
+    mk_arity id_arity res 
+      | isBotRes res = AT (take id_arity one_shots) ABot
+      | id_arity>0   = AT (take id_arity one_shots) ACheap
+      | otherwise    = AT []                        ATop
+
+    one_shots = typeArity (idType v)
 
 	-- Lambdas; increase arity
-arityType dflags (Lam x e)
-  | isId x    = AFun (isOneShotBndr x) (arityType dflags e)
-  | otherwise = arityType dflags e
+arityType dicts_cheap (Lam x e)
+  | isId x    = arityLam x (arityType dicts_cheap e)
+  | otherwise = arityType dicts_cheap e
 
 	-- Applications; decrease arity
-arityType dflags (App f (Type _)) = arityType dflags f
-arityType dflags (App f a)
-   = case arityType dflags f of
-	ABot -> ABot	-- If function diverges, ignore argument
-	ATop -> ATop	-- No no info about function
-	AFun _ xs
-		| exprIsCheap a -> xs
-		| otherwise	-> ATop
-							   
+arityType dicts_cheap (App fun (Type _))
+   = arityType dicts_cheap fun
+arityType dicts_cheap (App fun arg )
+   = arityApp (arityType dicts_cheap fun) arg 
+
 	-- Case/Let; keep arity if either the expression is cheap
 	-- or it's a 1-shot lambda
 	-- The former is not really right for Haskell
@@ -280,26 +523,21 @@ arityType dflags (App f a)
 	--  ===>
 	--	f x y = case x of { (a,b) -> e }
 	-- The difference is observable using 'seq'
-arityType dflags (Case scrut _ _ alts)
-  = case foldr1 andArityType [arityType dflags rhs | (_,_,rhs) <- alts] of
-        xs | exprIsCheap scrut     -> xs
-        AFun one_shot _ | one_shot -> AFun True ATop
-        _                          -> ATop
-
-arityType dflags (Let b e) 
-  = case arityType dflags e of
-        xs              | cheap_bind b -> xs
-        AFun one_shot _ | one_shot     -> AFun True ATop
-        _                              -> ATop
+arityType dicts_cheap (Case scrut _ _ alts)
+  = floatIn (exprIsCheap scrut)
+	      (foldr1 andArityType [arityType dicts_cheap rhs | (_,_,rhs) <- alts])
+
+arityType dicts_cheap (Let b e) 
+  = floatIn (cheap_bind b) (arityType dicts_cheap e)
   where
     cheap_bind (NonRec b e) = is_cheap (b,e)
     cheap_bind (Rec prs)    = all is_cheap prs
-    is_cheap (b,e) = (dopt Opt_DictsCheap dflags && isDictLikeTy (idType b))
+    is_cheap (b,e) = (dicts_cheap && isDictLikeTy (idType b))
 		   || exprIsCheap e
 	-- If the experimental -fdicts-cheap flag is on, we eta-expand through
 	-- dictionary bindings.  This improves arities. Thereby, it also
 	-- means that full laziness is less prone to floating out the
-	-- application of a function to its dictionary arguments, which
+  	-- application of a function to its dictionary arguments, which
 	-- can thereby lose opportunities for fusion.  Example:
 	-- 	foo :: Ord a => a -> ...
 	--	foo = /\a \(d:Ord a). let d' = ...d... in \(x:a). ....
@@ -309,17 +547,20 @@ arityType dflags (Let b e)
 	--
 	-- The (foo DInt) is floated out, and makes ineffective a RULE 
 	--	foo (bar x) = ...
-	--
+  	--
 	-- One could go further and make exprIsCheap reply True to any
 	-- dictionary-typed expression, but that's more work.
 	-- 
 	-- See Note [Dictionary-like types] in TcType.lhs for why we use
 	-- isDictLikeTy here rather than isDictTy
 
-arityType _ _ = ATop
+arityType dicts_cheap (Note n e) 
+  | notSccNote n                 = arityType dicts_cheap e
+arityType dicts_cheap (Cast e _) = arityType dicts_cheap e
+arityType _           _          = vanillaArityType
 \end{code}
-
-
+  
+  
 %************************************************************************
 %*									*
               The main eta-expander								
@@ -336,6 +577,13 @@ simplification but it's not too hard.  The alernative, of relying on
 a subsequent clean-up phase of the Simplifier to de-crapify the result,
 means you can't really use it in CorePrep, which is painful.
 
+Note [Eta expansion and SCCs]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Note that SCCs are not treated specially by etaExpand.  If we have
+	etaExpand 2 (\x -> scc "foo" e)
+	= (\xy -> (scc "foo" e) y)
+So the costs of evaluating 'e' (not 'e y') are attributed to "foo"
+
 \begin{code}
 -- | @etaExpand n us e ty@ returns an expression with
 -- the same meaning as @e@, but with arity @n@.
@@ -350,11 +598,6 @@ means you can't really use it in CorePrep, which is painful.
 etaExpand :: Arity	  	-- ^ Result should have this number of value args
 	  -> CoreExpr	        -- ^ Expression to expand
 	  -> CoreExpr
--- Note that SCCs are not treated specially.  If we have
---	etaExpand 2 (\x -> scc "foo" e)
---	= (\xy -> (scc "foo" e) y)
--- So the costs of evaluating 'e' (not 'e y') are attributed to "foo"
-
 -- etaExpand deals with for-alls. For example:
 --		etaExpand 1 E
 -- where  E :: forall a. a -> a
@@ -365,16 +608,13 @@ etaExpand :: Arity	  	-- ^ Result should have this number of value args
 -- so perhaps the extra code isn't worth it
 
 etaExpand n orig_expr
-  | manifestArity orig_expr >= n = orig_expr	-- The no-op case
-  | otherwise 		    
   = go n orig_expr
   where
-      -- Strip off existing lambdas
+      -- Strip off existing lambdas and casts
+      -- Note [Eta expansion and SCCs]
     go 0 expr = expr
-    go n (Lam v body) | isTyVar v = Lam v (go n     body)
+    go n (Lam v body) | isTyCoVar v = Lam v (go n     body)
        	              | otherwise = Lam v (go (n-1) body)
-    go n (Note InlineMe expr) = Note InlineMe (go n expr)
-        -- Note [Eta expansion and SCCs]
     go n (Cast expr co) = Cast (go n expr) co
     go n expr           = -- pprTrace "ee" (vcat [ppr orig_expr, ppr expr, ppr etas]) $
        	 		  etaInfoAbs etas (etaInfoApp subst' expr etas)
@@ -416,7 +656,7 @@ etaInfoApp :: Subst -> CoreExpr -> [EtaInfo] -> CoreExpr
 etaInfoApp subst (Lam v1 e) (EtaVar v2 : eis) 
   = etaInfoApp subst' e eis
   where
-    subst' | isTyVar v1 = CoreSubst.extendTvSubst subst v1 (mkTyVarTy v2) 
+    subst' | isTyCoVar v1 = CoreSubst.extendTvSubst subst v1 (mkTyVarTy v2) 
     	   | otherwise  = CoreSubst.extendIdSubst subst v1 (Var v2)
 
 etaInfoApp subst (Cast e co1) eis
@@ -456,12 +696,12 @@ mkEtaWW :: Arity -> InScopeSet -> Type
 	-- Outgoing InScopeSet includes the EtaInfo vars
 	--   and the original free vars
 
-mkEtaWW n in_scope ty
-  = go n empty_subst ty []
+mkEtaWW orig_n in_scope orig_ty
+  = go orig_n empty_subst orig_ty []
   where
     empty_subst = mkTvSubst in_scope emptyTvSubstEnv
 
-    go n subst ty eis
+    go n subst ty eis	    -- See Note [exprArity invariant]
        | n == 0
        = (getTvInScope subst, reverse eis)
 
@@ -482,10 +722,12 @@ mkEtaWW n in_scope ty
        		--  	eta_expand 1 e T
        		-- We want to get
        		--	coerce T (\x::[T] -> (coerce ([T]->Int) e) x)
-         go n subst ty' (EtaCo (substTy subst co) : eis)
+         go n subst ty' (EtaCo (Type.substTy subst co) : eis)
 
-       | otherwise	       	           -- We have an expression of arity > 0, 
-       = (getTvInScope subst, reverse eis) -- but its type isn't a function. 
+       | otherwise	 -- We have an expression of arity > 0, 
+       	 		 -- but its type isn't a function. 		   
+       = WARN( True, ppr orig_n <+> ppr orig_ty )
+         (getTvInScope subst, reverse eis)
     	-- This *can* legitmately happen:
     	-- e.g.  coerce Int (\x. x) Essentially the programmer is
 	-- playing fast and loose with types (Happy does this a lot).
@@ -494,22 +736,13 @@ mkEtaWW n in_scope ty
    
 
 --------------
--- Avoiding unnecessary substitution
+-- Avoiding unnecessary substitution; use short-cutting versions
 
 subst_expr :: Subst -> CoreExpr -> CoreExpr
-subst_expr s e | isEmptySubst s = e
-	       | otherwise      = substExpr s e
+subst_expr = substExprSC (text "CoreArity:substExpr")
 
 subst_bind :: Subst -> CoreBind -> (Subst, CoreBind)
-subst_bind subst (NonRec b r)
-  = (subst', NonRec b' (subst_expr subst r))
-  where
-    (subst', b') = substBndr subst b
-subst_bind subst (Rec prs)
-  = (subst', Rec (bs1 `zip` map (subst_expr subst') rhss))
-  where
-    (bs, rhss) = unzip prs
-    (subst', bs1) = substBndrs subst bs 
+subst_bind = substBindSC
 
 
 --------------
@@ -524,9 +757,9 @@ freshEtaId :: Int -> TvSubst -> Type -> (TvSubst, Id)
 freshEtaId n subst ty
       = (subst', eta_id')
       where
-        ty'     = substTy subst ty
+        ty'     = Type.substTy subst ty
 	eta_id' = uniqAway (getTvInScope subst) $
 		  mkSysLocal (fsLit "eta") (mkBuiltinUnique n) ty'
-	subst'  = extendTvInScope subst [eta_id']		  
+	subst'  = extendTvInScope subst eta_id'		  
 \end{code}