Move a bit of build system code

[ghc-hetmet.git] / compiler / coreSyn / CoreArity.lhs
diff --git a/compiler/coreSyn/CoreArity.lhs b/compiler/coreSyn/CoreArity.lhs

index 2dfed49..d5849cb 100644 (file)
--- a/compiler/coreSyn/CoreArity.lhs
+++ b/compiler/coreSyn/CoreArity.lhs
@@ -8,8 +8,8 @@
  \begin{code}
  -- | Arit and eta expansion
  module CoreArity (
-       manifestArity, exprArity, 
-       exprEtaExpandArity, etaExpand 
+       manifestArity, exprArity, exprBotStrictness_maybe,
+       exprEtaExpandArity, etaExpand
      ) where
  
  #include "HsVersions.h"
@@ -17,22 +17,21 @@ module CoreArity (
  import CoreSyn
  import CoreFVs
  import CoreUtils
-import qualified CoreSubst
-import CoreSubst ( Subst, substBndr, substBndrs, substExpr
-                        , mkEmptySubst, isEmptySubst )
+import CoreSubst
+import Demand
  import Var
  import VarEnv
  import Id
  import Type
+import TyCon   ( isRecursiveTyCon )
+import TcType  ( isDictLikeTy )
  import Coercion
  import BasicTypes
  import Unique
  import Outputable
  import DynFlags
+import StaticFlags     ( opt_NoStateHack )
  import FastString
-import Maybes
-
-import GHC.Exts                -- For `xori` 
  \end{code}
  
  %************************************************************************
@@ -98,78 +97,93 @@ exprArity :: CoreExpr -> Arity
  -- ^ An approximate, fast, version of 'exprEtaExpandArity'
  exprArity e = go e
    where
-    go (Var v)                          = idArity v
-    go (Lam x e) | isId x       = go e + 1
-                | otherwise     = go e
-    go (Note _ e)                = go e
-    go (Cast e co)               = trim_arity (go e) 0 (snd (coercionKind co))
-    go (App e (Type _))          = go e
-    go (App f a) | exprIsCheap a = (go f - 1) `max` 0
-       -- NB: exprIsCheap a!  
-       --      f (fac x) does not have arity 2, 
-       --      even if f has arity 3!
-       -- NB: `max 0`!  (\x y -> f x) has arity 2, even if f is
-       --               unknown, hence arity 0
+    go (Var v)                            = idArity v
+    go (Lam x e) | isId x         = go e + 1
+                | otherwise       = go e
+    go (Note _ e)                  = go e
+    go (Cast e co)                 = go e `min` typeArity (snd (coercionKind co))
+                                               -- Note [exprArity invariant]
+    go (App e (Type _))            = go e
+    go (App f a) | exprIsTrivial a = (go f - 1) `max` 0
+        -- See Note [exprArity for applications]
      go _                          = 0
-
-       -- Note [exprArity invariant]
-    trim_arity n a ty
-       | n==a                                        = a
-       | Just (_, ty') <- splitForAllTy_maybe ty     = trim_arity n a     ty'
-       | Just (_, ty') <- splitFunTy_maybe ty        = trim_arity n (a+1) ty'
-       | Just (ty',_)  <- splitNewTypeRepCo_maybe ty = trim_arity n a     ty'
-       | otherwise                                   = a
  \end{code}
  
+Note [exprArity for applications]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When we come to an application we check that the arg is trivial.
+   eg  f (fac x) does not have arity 2, 
+                 even if f has arity 3!
+
+* We require that is trivial rather merely cheap.  Suppose f has arity 2.
+  Then    f (Just y)
+  has arity 0, because if we gave it arity 1 and then inlined f we'd get
+          let v = Just y in \w. <f-body>
+  which has arity 0.  And we try to maintain the invariant that we don't
+  have arity decreases.
+
+*  The `max 0` is important!  (\x y -> f x) has arity 2, even if f is
+   unknown, hence arity 0
+
+
  %************************************************************************
  %*                                                                     *
-\subsection{Eta reduction and expansion}
+          Eta expansion
  %*                                                                     *
  %************************************************************************
  
-exprEtaExpandArity is used when eta expanding
-       e  ==>  \xy -> e x y
-
-It returns 1 (or more) to:
-       case x of p -> \s -> ...
-because for I/O ish things we really want to get that \s to the top.
-We are prepared to evaluate x each time round the loop in order to get that
-
-It's all a bit more subtle than it looks:
+\begin{code}
+-- ^ The Arity returned is the number of value args the 
+-- expression can be applied to without doing much work
+exprEtaExpandArity :: DynFlags -> CoreExpr -> Arity
+-- exprEtaExpandArity is used when eta expanding
+--     e  ==>  \xy -> e x y
+exprEtaExpandArity dflags e
+    = applyStateHack e (arityType dicts_cheap e)
+  where
+    dicts_cheap = dopt Opt_DictsCheap dflags
+
+exprBotStrictness_maybe :: CoreExpr -> Maybe (Arity, StrictSig)
+-- A cheap and cheerful function that identifies bottoming functions
+-- and gives them a suitable strictness signatures.  It's used during
+-- float-out
+exprBotStrictness_maybe e
+  = case arityType False e of
+       AT _ ATop -> Nothing
+       AT a ABot -> Just (a, mkStrictSig (mkTopDmdType (replicate a topDmd) BotRes))
+\end{code}     
+
+Note [Definition of arity]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+The "arity" of an expression 'e' is n if
+   applying 'e' to *fewer* than n *value* arguments
+   converges rapidly
  
-1.  One-shot lambdas
+Or, to put it another way
  
-Consider one-shot lambdas
-               let x = expensive in \y z -> E
-We want this to have arity 2 if the \y-abstraction is a 1-shot lambda
-Hence the ArityType returned by arityType
+   there is no work lost in duplicating the partial
+   application (e x1 .. x(n-1))
  
-2.  The state-transformer hack
+In the divegent case, no work is lost by duplicating because if the thing
+is evaluated once, that's the end of the program.
  
-The one-shot lambda special cause is particularly important/useful for
-IO state transformers, where we often get
-       let x = E in \ s -> ...
+Or, to put it another way, in any context C
  
-and the \s is a real-world state token abstraction.  Such abstractions
-are almost invariably 1-shot, so we want to pull the \s out, past the
-let x=E, even if E is expensive.  So we treat state-token lambdas as 
-one-shot even if they aren't really.  The hack is in Id.isOneShotBndr.
+   C[ (\x1 .. xn. e x1 .. xn) ]
+         is as efficient as
+   C[ e ]
  
-3.  Dealing with bottom
+It's all a bit more subtle than it looks:
  
-Consider also 
-       f = \x -> error "foo"
-Here, arity 1 is fine.  But if it is
-       f = \x -> case x of 
-                       True  -> error "foo"
-                       False -> \y -> x+y
-then we want to get arity 2.  Tecnically, this isn't quite right, because
-       (f True) `seq` 1
-should diverge, but it'll converge if we eta-expand f.  Nevertheless, we
-do so; it improves some programs significantly, and increasing convergence
-isn't a bad thing.  Hence the ABot/ATop in ArityType.
+Note [Arity of case expressions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We treat the arity of 
+       case x of p -> \s -> ...
+as 1 (or more) because for I/O ish things we really want to get that
+\s to the top.  We are prepared to evaluate x each time round the loop
+in order to get that.
  
-Actually, the situation is worse.  Consider
+This isn't really right in the presence of seq.  Consider
         f = \x -> case x of
                         True  -> \y -> x+y
                         False -> \y -> x-y
@@ -180,9 +194,28 @@ This should diverge!  But if we eta-expand, it won't.   Again, we ignore this
  "problem", because being scrupulous would lose an important transformation for
  many programs.
  
+1.  Note [One-shot lambdas]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider one-shot lambdas
+               let x = expensive in \y z -> E
+We want this to have arity 1 if the \y-abstraction is a 1-shot lambda.
  
-4. Newtypes
+3.  Note [Dealing with bottom]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+       f = \x -> error "foo"
+Here, arity 1 is fine.  But if it is
+       f = \x -> case x of 
+                       True  -> error "foo"
+                       False -> \y -> x+y
+then we want to get arity 2.  Technically, this isn't quite right, because
+       (f True) `seq` 1
+should diverge, but it'll converge if we eta-expand f.  Nevertheless, we
+do so; it improves some programs significantly, and increasing convergence
+isn't a bad thing.  Hence the ABot/ATop in ArityType.
  
+4. Note [Newtype arity]
+~~~~~~~~~~~~~~~~~~~~~~~~
  Non-recursive newtypes are transparent, and should not get in the way.
  We do (currently) eta-expand recursive newtypes too.  So if we have, say
  
@@ -196,82 +229,182 @@ that is, etaExpandArity looks through the coerce.
  When we eta-expand e to arity 1: eta_expand 1 e T
  we want to get:                 coerce T (\x::[T] -> (coerce ([T]->Int) e) x)
  
-HOWEVER, note that if you use coerce bogusly you can ge
-       coerce Int negate
-And since negate has arity 2, you might try to eta expand.  But you can't
-decopose Int to a function type.   Hence the final case in eta_expand.
+  HOWEVER, note that if you use coerce bogusly you can ge
+       coerce Int negate
+  And since negate has arity 2, you might try to eta expand.  But you can't
+  decopose Int to a function type.   Hence the final case in eta_expand.
+  
+\begin{code}
+applyStateHack :: CoreExpr -> ArityType -> Arity
+applyStateHack e (AT orig_arity is_bot)
+  | opt_NoStateHack = orig_arity
+  | ABot <- is_bot  = orig_arity   -- Note [State hack and bottoming functions]
+  | otherwise       = go orig_ty orig_arity
+  where                        -- Note [The state-transformer hack]
+    orig_ty = exprType e
+    go :: Type -> Arity -> Arity
+    go ty arity                -- This case analysis should match that in eta_expand
+       | Just (_, ty') <- splitForAllTy_maybe ty   = go ty' arity
+       | Just (arg,res) <- splitFunTy_maybe ty
+       , arity > 0 || isStateHackType arg = 1 + go res (arity-1)
+
+-- See Note [trimCast]
+       | Just (tc,tys) <- splitTyConApp_maybe ty 
+       , Just (ty', _) <- instNewTyCon_maybe tc tys
+       , not (isRecursiveTyCon tc)                 = go ty' arity
+               -- Important to look through non-recursive newtypes, so that, eg 
+               --      (f x)   where f has arity 2, f :: Int -> IO ()
+               -- Here we want to get arity 1 for the result!
+-------
+
+{-
+       = if arity > 0 then 1 + go res (arity-1)
+         else if isStateHackType arg then
+               pprTrace "applystatehack" (vcat [ppr orig_arity, ppr orig_ty,
+                                               ppr ty, ppr res, ppr e]) $
+               1 + go res (arity-1)
+          else WARN( arity > 0, ppr arity ) 0
+-}                                              
+       | otherwise = WARN( arity > 0, ppr arity <+> ppr ty) 0
+\end{code}
+
+Note [The state-transformer hack]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Suppose we have 
+       f = e
+where e has arity n.  Then, if we know from the context that f has
+a usage type like
+       t1 -> ... -> tn -1-> t(n+1) -1-> ... -1-> tm -> ...
+then we can expand the arity to m.  This usage type says that
+any application (x e1 .. en) will be applied to uniquely to (m-n) more args
+Consider f = \x. let y = <expensive> 
+                in case x of
+                     True  -> foo
+                     False -> \(s:RealWorld) -> e
+where foo has arity 1.  Then we want the state hack to
+apply to foo too, so we can eta expand the case.
  
+Then we expect that if f is applied to one arg, it'll be applied to two
+(that's the hack -- we don't really know, and sometimes it's false)
+See also Id.isOneShotBndr.
  
-\begin{code}
--- ^ The Arity returned is the number of value args the 
--- expression can be applied to without doing much work
-exprEtaExpandArity :: DynFlags -> CoreExpr -> Arity
-exprEtaExpandArity dflags e = arityDepth (arityType dflags e)
+Note [State hack and bottoming functions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+It's a terrible idea to use the state hack on a bottoming function.
+Here's what happens (Trac #2861):
+
+  f :: String -> IO T
+  f = \p. error "..."
+
+Eta-expand, using the state hack:
+
+  f = \p. (\s. ((error "...") |> g1) s) |> g2
+  g1 :: IO T ~ (S -> (S,T))
+  g2 :: (S -> (S,T)) ~ IO T
+
+Extrude the g2
+
+  f' = \p. \s. ((error "...") |> g1) s
+  f = f' |> (String -> g2)
+
+Discard args for bottomming function
+
+  f' = \p. \s. ((error "...") |> g1 |> g3
+  g3 :: (S -> (S,T)) ~ (S,T)
+
+Extrude g1.g3
+
+  f'' = \p. \s. (error "...")
+  f' = f'' |> (String -> S -> g1.g3)
  
--- A limited sort of function type
-data ArityType = AFun Bool ArityType   -- True <=> one-shot
-              | ATop                   -- Know nothing
+And now we can repeat the whole loop.  Aargh!  The bug is in applying the
+state hack to a function which then swallows the argument.
+
+
+-------------------- Main arity code ----------------------------
+\begin{code}
+-- If e has ArityType (AT n r), then the term 'e'
+--  * Must be applied to at least n *value* args 
+--     before doing any significant work
+--  * It will not diverge before being applied to n
+--     value arguments
+--  * If 'r' is ABot, then it guarantees to diverge if 
+--     applied to n arguments (or more)
+
+data ArityType = AT Arity ArityRes
+data ArityRes  = ATop                  -- Know nothing
                | ABot                   -- Diverges
  
-arityDepth :: ArityType -> Arity
-arityDepth (AFun _ ty) = 1 + arityDepth ty
-arityDepth _           = 0
+vanillaArityType :: ArityType
+vanillaArityType = AT 0 ATop   -- Totally uninformative
  
-andArityType :: ArityType -> ArityType -> ArityType
-andArityType ABot           at2           = at2
-andArityType ATop           _             = ATop
-andArityType (AFun t1 at1)  (AFun t2 at2) = AFun (t1 && t2) (andArityType at1 at2)
-andArityType at1            at2           = andArityType at2 at1
+incArity :: ArityType -> ArityType
+incArity (AT a r) = AT (a+1) r
  
-arityType :: DynFlags -> CoreExpr -> ArityType
-       -- (go1 e) = [b1,..,bn]
-       -- means expression can be rewritten \x_b1 -> ... \x_bn -> body
-       -- where bi is True <=> the lambda is one-shot
+decArity :: ArityType -> ArityType
+decArity (AT 0 r) = AT 0     r
+decArity (AT a r) = AT (a-1) r
  
-arityType dflags (Note _ e) = arityType dflags e
---     Not needed any more: etaExpand is cleverer
--- removed: | ok_note n = arityType dflags e
--- removed: | otherwise = ATop
+andArityType :: ArityType -> ArityType -> ArityType   -- Used for branches of a 'case'
+andArityType (AT a1 ATop) (AT a2 ATop) = AT (a1 `min` a2) ATop
+andArityType (AT _  ABot) (AT a2 ATop) = AT a2           ATop
+andArityType (AT a1 ATop) (AT _  ABot) = AT a1           ATop
+andArityType (AT a1 ABot) (AT a2 ABot) = AT (a1 `max` a2) ABot
  
-arityType dflags (Cast e _) = arityType dflags e
+---------------------------
+trimCast :: Coercion -> ArityType -> ArityType
+-- Trim the arity to be no more than allowed by the
+-- arrows in ty2, where co :: ty1~ty2
+trimCast _ at = at
  
-arityType _ (Var v)
-  = mk (idArity v) (arg_tys (idType v))
+{-        Omitting for now Note [trimCast]
+trimCast co at@(AT ar _)
+  | ar > co_arity = AT co_arity ATop
+  | otherwise     = at
    where
-    mk :: Arity -> [Type] -> ArityType
-       -- The argument types are only to steer the "state hack"
-       -- Consider case x of
-       --              True  -> foo
-       --              False -> \(s:RealWorld) -> e
-       -- where foo has arity 1.  Then we want the state hack to
-       -- apply to foo too, so we can eta expand the case.
-    mk 0 tys | isBottomingId v                   = ABot
-             | (ty:_) <- tys, isStateHackType ty = AFun True ATop
-             | otherwise                         = ATop
-    mk n (ty:tys) = AFun (isStateHackType ty) (mk (n-1) tys)
-    mk n []       = AFun False               (mk (n-1) [])
-
-    arg_tys :: Type -> [Type]  -- Ignore for-alls
-    arg_tys ty 
-       | Just (_, ty')  <- splitForAllTy_maybe ty = arg_tys ty'
-       | Just (arg,res) <- splitFunTy_maybe ty    = arg : arg_tys res
-       | otherwise                                = []
+    (_, ty2) = coercionKind co
+    co_arity = typeArity ty2
+-}
+\end{code}
+
+Note [trimCast]
+~~~~~~~~~~~~~~~
+When you try putting trimCast back in, comment out the snippets
+flagged by the other references to Note [trimCast]
+
+\begin{code}
+---------------------------
+trimArity :: Bool -> ArityType -> ArityType
+-- We have something like (let x = E in b), where b has the given
+-- arity type.  Then
+--     * If E is cheap we can push it inside as far as we like
+--     * If b eventually diverges, we allow ourselves to push inside
+--       arbitrarily, even though that is not quite right
+trimArity _cheap (AT a ABot) = AT a ABot
+trimArity True   (AT a ATop) = AT a ATop
+trimArity False  (AT _ ATop) = AT 0 ATop       -- Bale out
+
+---------------------------
+arityType :: Bool -> CoreExpr -> ArityType
+arityType _ (Var v)
+  | Just strict_sig <- idStrictness_maybe v
+  , (ds, res) <- splitStrictSig strict_sig
+  , isBotRes res
+  = AT (length ds) ABot        -- Function diverges
+  | otherwise
+  = AT (idArity v) ATop
  
         -- Lambdas; increase arity
-arityType dflags (Lam x e)
-  | isId x    = AFun (isOneShotBndr x) (arityType dflags e)
-  | otherwise = arityType dflags e
+arityType dicts_cheap (Lam x e)
+  | isId x    = incArity (arityType dicts_cheap e)
+  | otherwise = arityType dicts_cheap e
  
         -- Applications; decrease arity
-arityType dflags (App f (Type _)) = arityType dflags f
-arityType dflags (App f a)
-   = case arityType dflags f of
-       ABot -> ABot    -- If function diverges, ignore argument
-       ATop -> ATop    -- No no info about function
-       AFun _ xs
-               | exprIsCheap a -> xs
-               | otherwise     -> ATop
-                                                          
+arityType dicts_cheap (App fun (Type _))
+   = arityType dicts_cheap fun
+arityType dicts_cheap (App fun arg )
+   = trimArity (exprIsCheap arg) (decArity (arityType dicts_cheap fun))
+
         -- Case/Let; keep arity if either the expression is cheap
         -- or it's a 1-shot lambda
         -- The former is not really right for Haskell
@@ -279,26 +412,21 @@ arityType dflags (App f a)
         --  ===>
         --      f x y = case x of { (a,b) -> e }
         -- The difference is observable using 'seq'
-arityType dflags (Case scrut _ _ alts)
-  = case foldr1 andArityType [arityType dflags rhs | (_,_,rhs) <- alts] of
-        xs | exprIsCheap scrut     -> xs
-        AFun one_shot _ | one_shot -> AFun True ATop
-        _                          -> ATop
-
-arityType dflags (Let b e) 
-  = case arityType dflags e of
-        xs              | cheap_bind b -> xs
-        AFun one_shot _ | one_shot     -> AFun True ATop
-        _                              -> ATop
+arityType dicts_cheap (Case scrut _ _ alts)
+  = trimArity (exprIsCheap scrut)
+             (foldr1 andArityType [arityType dicts_cheap rhs | (_,_,rhs) <- alts])
+
+arityType dicts_cheap (Let b e) 
+  = trimArity (cheap_bind b) (arityType dicts_cheap e)
    where
      cheap_bind (NonRec b e) = is_cheap (b,e)
      cheap_bind (Rec prs)    = all is_cheap prs
-    is_cheap (b,e) = (dopt Opt_DictsCheap dflags && isDictId b)
+    is_cheap (b,e) = (dicts_cheap && isDictLikeTy (idType b))
                    || exprIsCheap e
         -- If the experimental -fdicts-cheap flag is on, we eta-expand through
         -- dictionary bindings.  This improves arities. Thereby, it also
         -- means that full laziness is less prone to floating out the
-       -- application of a function to its dictionary arguments, which
+       -- application of a function to its dictionary arguments, which
         -- can thereby lose opportunities for fusion.  Example:
         --      foo :: Ord a => a -> ...
         --      foo = /\a \(d:Ord a). let d' = ...d... in \(x:a). ....
@@ -308,14 +436,19 @@ arityType dflags (Let b e)
         --
         -- The (foo DInt) is floated out, and makes ineffective a RULE 
         --      foo (bar x) = ...
-       --
+       --
         -- One could go further and make exprIsCheap reply True to any
         -- dictionary-typed expression, but that's more work.
+       -- 
+       -- See Note [Dictionary-like types] in TcType.lhs for why we use
+       -- isDictLikeTy here rather than isDictTy
  
-arityType _ _ = ATop
+arityType dicts_cheap (Note _ e)  = arityType dicts_cheap e
+arityType dicts_cheap (Cast e co) = trimCast co (arityType dicts_cheap e)
+arityType _           _           = vanillaArityType
  \end{code}
-
-
+  
+  
  %************************************************************************
  %*                                                                     *
                The main eta-expander                                                            
@@ -332,6 +465,13 @@ simplification but it's not too hard.  The alernative, of relying on
  a subsequent clean-up phase of the Simplifier to de-crapify the result,
  means you can't really use it in CorePrep, which is painful.
  
+Note [Eta expansion and SCCs]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Note that SCCs are not treated specially by etaExpand.  If we have
+       etaExpand 2 (\x -> scc "foo" e)
+       = (\xy -> (scc "foo" e) y)
+So the costs of evaluating 'e' (not 'e y') are attributed to "foo"
+
  \begin{code}
  -- | @etaExpand n us e ty@ returns an expression with
  -- the same meaning as @e@, but with arity @n@.
@@ -346,11 +486,6 @@ means you can't really use it in CorePrep, which is painful.
  etaExpand :: Arity             -- ^ Result should have this number of value args
           -> CoreExpr           -- ^ Expression to expand
           -> CoreExpr
--- Note that SCCs are not treated specially.  If we have
---     etaExpand 2 (\x -> scc "foo" e)
---     = (\xy -> (scc "foo" e) y)
--- So the costs of evaluating 'e' (not 'e y') are attributed to "foo"
-
  -- etaExpand deals with for-alls. For example:
  --             etaExpand 1 E
  -- where  E :: forall a. a -> a
@@ -361,16 +496,13 @@ etaExpand :: Arity                -- ^ Result should have this number of value args
  -- so perhaps the extra code isn't worth it
  
  etaExpand n orig_expr
-  | manifestArity orig_expr >= n = orig_expr   -- The no-op case
-  | otherwise              
    = go n orig_expr
    where
-      -- Strip off existing lambdas
+      -- Strip off existing lambdas and casts
+      -- Note [Eta expansion and SCCs]
      go 0 expr = expr
      go n (Lam v body) | isTyVar v = Lam v (go n     body)
                               | otherwise = Lam v (go (n-1) body)
-    go n (Note InlineMe expr) = Note InlineMe (go n expr)
-        -- Note [Eta expansion and SCCs]
      go n (Cast expr co) = Cast (go n expr) co
      go n expr           = -- pprTrace "ee" (vcat [ppr orig_expr, ppr expr, ppr etas]) $
                                   etaInfoAbs etas (etaInfoApp subst' expr etas)
@@ -452,8 +584,8 @@ mkEtaWW :: Arity -> InScopeSet -> Type
         -- Outgoing InScopeSet includes the EtaInfo vars
         --   and the original free vars
  
-mkEtaWW n in_scope ty
-  = go n empty_subst ty []
+mkEtaWW orig_n in_scope orig_ty
+  = go orig_n empty_subst orig_ty []
    where
      empty_subst = mkTvSubst in_scope emptyTvSubstEnv
  
@@ -471,6 +603,7 @@ mkEtaWW n in_scope ty
             -- Avoid free vars of the original expression
         = go (n-1) subst' res_ty (EtaVar eta_id' : eis)
                                            
+-- See Note [trimCast]
         | Just(ty',co) <- splitNewTypeRepCo_maybe ty
         =       -- Given this:
                         --      newtype T = MkT ([T] -> Int)
@@ -478,10 +611,12 @@ mkEtaWW n in_scope ty
                         --      eta_expand 1 e T
                         -- We want to get
                         --      coerce T (\x::[T] -> (coerce ([T]->Int) e) x)
-         go n subst ty' (EtaCo (substTy subst co) : eis)
+         go n subst ty' (EtaCo (Type.substTy subst co) : eis)
+-------
  
         | otherwise                        -- We have an expression of arity > 0, 
-       = (getTvInScope subst, reverse eis) -- but its type isn't a function. 
+       = WARN( True, ppr orig_n <+> ppr orig_ty )
+         (getTvInScope subst, reverse eis) -- but its type isn't a function. 
         -- This *can* legitmately happen:
         -- e.g.  coerce Int (\x. x) Essentially the programmer is
         -- playing fast and loose with types (Happy does this a lot).
@@ -490,22 +625,13 @@ mkEtaWW n in_scope ty
     
  
  --------------
--- Avoiding unnecessary substitution
+-- Avoiding unnecessary substitution; use short-cutting versions
  
  subst_expr :: Subst -> CoreExpr -> CoreExpr
-subst_expr s e | isEmptySubst s = e
-              | otherwise      = substExpr s e
+subst_expr = substExprSC (text "CoreArity:substExpr")
  
  subst_bind :: Subst -> CoreBind -> (Subst, CoreBind)
-subst_bind subst (NonRec b r)
-  = (subst', NonRec b' (subst_expr subst r))
-  where
-    (subst', b') = substBndr subst b
-subst_bind subst (Rec prs)
-  = (subst', Rec (bs1 `zip` map (subst_expr subst') rhss))
-  where
-    (bs, rhss) = unzip prs
-    (subst', bs1) = substBndrs subst bs 
+subst_bind = substBindSC
  
  
  --------------
@@ -520,9 +646,9 @@ freshEtaId :: Int -> TvSubst -> Type -> (TvSubst, Id)
  freshEtaId n subst ty
        = (subst', eta_id')
        where
-        ty'     = substTy subst ty
+        ty'     = Type.substTy subst ty
         eta_id' = uniqAway (getTvInScope subst) $
                   mkSysLocal (fsLit "eta") (mkBuiltinUnique n) ty'
-       subst'  = extendTvInScope subst [eta_id']                 
+       subst'  = extendTvInScope subst eta_id'           
  \end{code}