Rewrite CorePrep and improve eta expansion

author simonpj@microsoft.com <unknown>

Tue, 13 Jan 2009 16:49:53 +0000 (16:49 +0000)

committer simonpj@microsoft.com <unknown>

Tue, 13 Jan 2009 16:49:53 +0000 (16:49 +0000)
author simonpj@microsoft.com <unknown>
Tue, 13 Jan 2009 16:49:53 +0000 (16:49 +0000)
committer simonpj@microsoft.com <unknown>
Tue, 13 Jan 2009 16:49:53 +0000 (16:49 +0000)
diff --git a/compiler/coreSyn/CoreArity.lhs b/compiler/coreSyn/CoreArity.lhs

new file mode 100644 (file)

index 0000000..0d1b394
--- /dev/null
+++ b/compiler/coreSyn/CoreArity.lhs
@@ -0,0 +1,531 @@
+%
+% (c) The University of Glasgow 2006
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+
+       Arity and ete expansion
+
+\begin{code}
+-- | Arit and eta expansion
+module CoreArity (
+       manifestArity, exprArity, 
+       exprEtaExpandArity, etaExpand 
+    ) where
+
+#include "HsVersions.h"
+
+import CoreSyn
+import CoreFVs
+import CoreUtils
+import qualified CoreSubst
+import CoreSubst ( Subst, substBndr, substBndrs, substExpr
+                        , mkEmptySubst, isEmptySubst )
+import Var
+import VarEnv
+#if mingw32_TARGET_OS
+import Packages
+#endif
+import Id
+import Type
+import Coercion
+import BasicTypes
+import Unique
+import Outputable
+import DynFlags
+import FastString
+import Maybes
+
+import GHC.Exts                -- For `xori` 
+\end{code}
+
+%************************************************************************
+%*                                                                     *
+              manifestArity and exprArity
+%*                                                                     *
+%************************************************************************
+
+exprArity is a cheap-and-cheerful version of exprEtaExpandArity.
+It tells how many things the expression can be applied to before doing
+any work.  It doesn't look inside cases, lets, etc.  The idea is that
+exprEtaExpandArity will do the hard work, leaving something that's easy
+for exprArity to grapple with.  In particular, Simplify uses exprArity to
+compute the ArityInfo for the Id. 
+
+Originally I thought that it was enough just to look for top-level lambdas, but
+it isn't.  I've seen this
+
+       foo = PrelBase.timesInt
+
+We want foo to get arity 2 even though the eta-expander will leave it
+unchanged, in the expectation that it'll be inlined.  But occasionally it
+isn't, because foo is blacklisted (used in a rule).  
+
+Similarly, see the ok_note check in exprEtaExpandArity.  So 
+       f = __inline_me (\x -> e)
+won't be eta-expanded.
+
+And in any case it seems more robust to have exprArity be a bit more intelligent.
+But note that  (\x y z -> f x y z)
+should have arity 3, regardless of f's arity.
+
+Note [exprArity invariant]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+exprArity has the following invariant:
+       (exprArity e) = n, then manifestArity (etaExpand e n) = n
+
+That is, if exprArity says "the arity is n" then etaExpand really can get
+"n" manifest lambdas to the top.
+
+Why is this important?  Because 
+  - In TidyPgm we use exprArity to fix the *final arity* of 
+    each top-level Id, and in
+  - In CorePrep we use etaExpand on each rhs, so that the visible lambdas
+    actually match that arity, which in turn means
+    that the StgRhs has the right number of lambdas
+
+An alternative would be to do the eta-expansion in TidyPgm, at least
+for top-level bindings, in which case we would not need the trim_arity
+in exprArity.  That is a less local change, so I'm going to leave it for today!
+
+
+\begin{code}
+manifestArity :: CoreExpr -> Arity
+-- ^ manifestArity sees how many leading value lambdas there are
+manifestArity (Lam v e) | isId v    = 1 + manifestArity e
+                       | otherwise = manifestArity e
+manifestArity (Note _ e)           = manifestArity e
+manifestArity (Cast e _)            = manifestArity e
+manifestArity _                     = 0
+
+exprArity :: CoreExpr -> Arity
+-- ^ An approximate, fast, version of 'exprEtaExpandArity'
+exprArity e = go e
+  where
+    go (Var v)                          = idArity v
+    go (Lam x e) | isId x       = go e + 1
+                | otherwise     = go e
+    go (Note _ e)                = go e
+    go (Cast e co)               = trim_arity (go e) 0 (snd (coercionKind co))
+    go (App e (Type _))          = go e
+    go (App f a) | exprIsCheap a = (go f - 1) `max` 0
+       -- NB: exprIsCheap a!  
+       --      f (fac x) does not have arity 2, 
+       --      even if f has arity 3!
+       -- NB: `max 0`!  (\x y -> f x) has arity 2, even if f is
+       --               unknown, hence arity 0
+    go _                          = 0
+
+       -- Note [exprArity invariant]
+    trim_arity n a ty
+       | n==a                                        = a
+       | Just (_, ty') <- splitForAllTy_maybe ty     = trim_arity n a     ty'
+       | Just (_, ty') <- splitFunTy_maybe ty        = trim_arity n (a+1) ty'
+       | Just (ty',_)  <- splitNewTypeRepCo_maybe ty = trim_arity n a     ty'
+       | otherwise                                   = a
+\end{code}
+
+%************************************************************************
+%*                                                                     *
+\subsection{Eta reduction and expansion}
+%*                                                                     *
+%************************************************************************
+
+exprEtaExpandArity is used when eta expanding
+       e  ==>  \xy -> e x y
+
+It returns 1 (or more) to:
+       case x of p -> \s -> ...
+because for I/O ish things we really want to get that \s to the top.
+We are prepared to evaluate x each time round the loop in order to get that
+
+It's all a bit more subtle than it looks:
+
+1.  One-shot lambdas
+
+Consider one-shot lambdas
+               let x = expensive in \y z -> E
+We want this to have arity 2 if the \y-abstraction is a 1-shot lambda
+Hence the ArityType returned by arityType
+
+2.  The state-transformer hack
+
+The one-shot lambda special cause is particularly important/useful for
+IO state transformers, where we often get
+       let x = E in \ s -> ...
+
+and the \s is a real-world state token abstraction.  Such abstractions
+are almost invariably 1-shot, so we want to pull the \s out, past the
+let x=E, even if E is expensive.  So we treat state-token lambdas as 
+one-shot even if they aren't really.  The hack is in Id.isOneShotBndr.
+
+3.  Dealing with bottom
+
+Consider also 
+       f = \x -> error "foo"
+Here, arity 1 is fine.  But if it is
+       f = \x -> case x of 
+                       True  -> error "foo"
+                       False -> \y -> x+y
+then we want to get arity 2.  Tecnically, this isn't quite right, because
+       (f True) `seq` 1
+should diverge, but it'll converge if we eta-expand f.  Nevertheless, we
+do so; it improves some programs significantly, and increasing convergence
+isn't a bad thing.  Hence the ABot/ATop in ArityType.
+
+Actually, the situation is worse.  Consider
+       f = \x -> case x of
+                       True  -> \y -> x+y
+                       False -> \y -> x-y
+Can we eta-expand here?  At first the answer looks like "yes of course", but
+consider
+       (f bot) `seq` 1
+This should diverge!  But if we eta-expand, it won't.   Again, we ignore this
+"problem", because being scrupulous would lose an important transformation for
+many programs.
+
+
+4. Newtypes
+
+Non-recursive newtypes are transparent, and should not get in the way.
+We do (currently) eta-expand recursive newtypes too.  So if we have, say
+
+       newtype T = MkT ([T] -> Int)
+
+Suppose we have
+       e = coerce T f
+where f has arity 1.  Then: etaExpandArity e = 1; 
+that is, etaExpandArity looks through the coerce.
+
+When we eta-expand e to arity 1: eta_expand 1 e T
+we want to get:                 coerce T (\x::[T] -> (coerce ([T]->Int) e) x)
+
+HOWEVER, note that if you use coerce bogusly you can ge
+       coerce Int negate
+And since negate has arity 2, you might try to eta expand.  But you can't
+decopose Int to a function type.   Hence the final case in eta_expand.
+
+
+\begin{code}
+-- ^ The Arity returned is the number of value args the 
+-- expression can be applied to without doing much work
+exprEtaExpandArity :: DynFlags -> CoreExpr -> Arity
+exprEtaExpandArity dflags e = arityDepth (arityType dflags e)
+
+-- A limited sort of function type
+data ArityType = AFun Bool ArityType   -- True <=> one-shot
+              | ATop                   -- Know nothing
+              | ABot                   -- Diverges
+
+arityDepth :: ArityType -> Arity
+arityDepth (AFun _ ty) = 1 + arityDepth ty
+arityDepth _           = 0
+
+andArityType :: ArityType -> ArityType -> ArityType
+andArityType ABot           at2           = at2
+andArityType ATop           _             = ATop
+andArityType (AFun t1 at1)  (AFun t2 at2) = AFun (t1 && t2) (andArityType at1 at2)
+andArityType at1            at2           = andArityType at2 at1
+
+arityType :: DynFlags -> CoreExpr -> ArityType
+       -- (go1 e) = [b1,..,bn]
+       -- means expression can be rewritten \x_b1 -> ... \x_bn -> body
+       -- where bi is True <=> the lambda is one-shot
+
+arityType dflags (Note _ e) = arityType dflags e
+--     Not needed any more: etaExpand is cleverer
+-- removed: | ok_note n = arityType dflags e
+-- removed: | otherwise = ATop
+
+arityType dflags (Cast e _) = arityType dflags e
+
+arityType _ (Var v)
+  = mk (idArity v) (arg_tys (idType v))
+  where
+    mk :: Arity -> [Type] -> ArityType
+       -- The argument types are only to steer the "state hack"
+       -- Consider case x of
+       --              True  -> foo
+       --              False -> \(s:RealWorld) -> e
+       -- where foo has arity 1.  Then we want the state hack to
+       -- apply to foo too, so we can eta expand the case.
+    mk 0 tys | isBottomingId v                   = ABot
+             | (ty:_) <- tys, isStateHackType ty = AFun True ATop
+             | otherwise                         = ATop
+    mk n (ty:tys) = AFun (isStateHackType ty) (mk (n-1) tys)
+    mk n []       = AFun False               (mk (n-1) [])
+
+    arg_tys :: Type -> [Type]  -- Ignore for-alls
+    arg_tys ty 
+       | Just (_, ty')  <- splitForAllTy_maybe ty = arg_tys ty'
+       | Just (arg,res) <- splitFunTy_maybe ty    = arg : arg_tys res
+       | otherwise                                = []
+
+       -- Lambdas; increase arity
+arityType dflags (Lam x e)
+  | isId x    = AFun (isOneShotBndr x) (arityType dflags e)
+  | otherwise = arityType dflags e
+
+       -- Applications; decrease arity
+arityType dflags (App f (Type _)) = arityType dflags f
+arityType dflags (App f a)
+   = case arityType dflags f of
+       ABot -> ABot    -- If function diverges, ignore argument
+       ATop -> ATop    -- No no info about function
+       AFun _ xs
+               | exprIsCheap a -> xs
+               | otherwise     -> ATop
+                                                          
+       -- Case/Let; keep arity if either the expression is cheap
+       -- or it's a 1-shot lambda
+       -- The former is not really right for Haskell
+       --      f x = case x of { (a,b) -> \y. e }
+       --  ===>
+       --      f x y = case x of { (a,b) -> e }
+       -- The difference is observable using 'seq'
+arityType dflags (Case scrut _ _ alts)
+  = case foldr1 andArityType [arityType dflags rhs | (_,_,rhs) <- alts] of
+        xs | exprIsCheap scrut     -> xs
+        AFun one_shot _ | one_shot -> AFun True ATop
+        _                          -> ATop
+
+arityType dflags (Let b e) 
+  = case arityType dflags e of
+        xs              | cheap_bind b -> xs
+        AFun one_shot _ | one_shot     -> AFun True ATop
+        _                              -> ATop
+  where
+    cheap_bind (NonRec b e) = is_cheap (b,e)
+    cheap_bind (Rec prs)    = all is_cheap prs
+    is_cheap (b,e) = (dopt Opt_DictsCheap dflags && isDictId b)
+                  || exprIsCheap e
+       -- If the experimental -fdicts-cheap flag is on, we eta-expand through
+       -- dictionary bindings.  This improves arities. Thereby, it also
+       -- means that full laziness is less prone to floating out the
+       -- application of a function to its dictionary arguments, which
+       -- can thereby lose opportunities for fusion.  Example:
+       --      foo :: Ord a => a -> ...
+       --      foo = /\a \(d:Ord a). let d' = ...d... in \(x:a). ....
+       --              -- So foo has arity 1
+       --
+       --      f = \x. foo dInt $ bar x
+       --
+       -- The (foo DInt) is floated out, and makes ineffective a RULE 
+       --      foo (bar x) = ...
+       --
+       -- One could go further and make exprIsCheap reply True to any
+       -- dictionary-typed expression, but that's more work.
+
+arityType _ _ = ATop
+\end{code}
+
+
+%************************************************************************
+%*                                                                     *
+              The main eta-expander                                                            
+%*                                                                     *
+%************************************************************************
+
+IMPORTANT NOTE: The eta expander is careful not to introduce "crap".
+In particular, given a CoreExpr satisfying the 'CpeRhs' invariant (in
+CorePrep), it returns a CoreExpr satisfying the same invariant. See
+Note [Eta expansion and the CorePrep invariants] in CorePrep.
+
+This means the eta-expander has to do a bit of on-the-fly
+simplification but it's not too hard.  The alernative, of relying on 
+a subsequent clean-up phase of the Simplifier to de-crapify the result,
+means you can't really use it in CorePrep, which is painful.
+
+\begin{code}
+-- | @etaExpand n us e ty@ returns an expression with
+-- the same meaning as @e@, but with arity @n@.
+--
+-- Given:
+--
+-- > e' = etaExpand n us e ty
+--
+-- We should have that:
+--
+-- > ty = exprType e = exprType e'
+etaExpand :: Arity             -- ^ Result should have this number of value args
+         -> CoreExpr           -- ^ Expression to expand
+         -> CoreExpr
+-- Note that SCCs are not treated specially.  If we have
+--     etaExpand 2 (\x -> scc "foo" e)
+--     = (\xy -> (scc "foo" e) y)
+-- So the costs of evaluating 'e' (not 'e y') are attributed to "foo"
+
+-- etaExpand deals with for-alls. For example:
+--             etaExpand 1 E
+-- where  E :: forall a. a -> a
+-- would return
+--     (/\b. \y::a -> E b y)
+--
+-- It deals with coerces too, though they are now rare
+-- so perhaps the extra code isn't worth it
+
+etaExpand n orig_expr
+  | manifestArity orig_expr >= n = orig_expr   -- The no-op case
+  | otherwise              
+  = go n orig_expr
+  where
+      -- Strip off existing lambdas
+    go 0 expr = expr
+    go n (Lam v body) | isTyVar v = Lam v (go n     body)
+                             | otherwise = Lam v (go (n-1) body)
+    go n (Note InlineMe expr) = Note InlineMe (go n expr)
+        -- Note [Eta expansion and SCCs]
+    go n (Cast expr co) = Cast (go n expr) co
+    go n expr           = -- pprTrace "ee" (vcat [ppr orig_expr, ppr expr, ppr etas]) $
+                                 etaInfoAbs etas (etaInfoApp subst' expr etas)
+                       where
+                           in_scope = mkInScopeSet (exprFreeVars expr)
+                           (in_scope', etas) = mkEtaWW n in_scope (exprType expr)
+                           subst' = mkEmptySubst in_scope'
+
+                               -- Wrapper    Unwrapper
+--------------
+data EtaInfo = EtaVar Var      -- /\a. [],   [] a
+                               -- \x.  [],   [] x
+            | EtaCo Coercion   -- [] |> co,  [] |> (sym co)
+
+instance Outputable EtaInfo where
+   ppr (EtaVar v) = ptext (sLit "EtaVar") <+> ppr v
+   ppr (EtaCo co) = ptext (sLit "EtaCo")  <+> ppr co
+
+pushCoercion :: Coercion -> [EtaInfo] -> [EtaInfo]
+pushCoercion co1 (EtaCo co2 : eis)
+  | isIdentityCoercion co = eis
+  | otherwise            = EtaCo co : eis
+  where
+    co = co1 `mkTransCoercion` co2
+
+pushCoercion co eis = EtaCo co : eis
+
+--------------
+etaInfoAbs :: [EtaInfo] -> CoreExpr -> CoreExpr
+etaInfoAbs []               expr = expr
+etaInfoAbs (EtaVar v : eis) expr = Lam v (etaInfoAbs eis expr)
+etaInfoAbs (EtaCo co : eis) expr = Cast (etaInfoAbs eis expr) (mkSymCoercion co)
+
+--------------
+etaInfoApp :: Subst -> CoreExpr -> [EtaInfo] -> CoreExpr
+-- (etaInfoApp s e eis) returns something equivalent to 
+--            ((substExpr s e) `appliedto` eis)
+
+etaInfoApp subst (Lam v1 e) (EtaVar v2 : eis) 
+  = etaInfoApp subst' e eis
+  where
+    subst' | isTyVar v1 = CoreSubst.extendTvSubst subst v1 (mkTyVarTy v2) 
+          | otherwise  = CoreSubst.extendIdSubst subst v1 (Var v2)
+
+etaInfoApp subst (Cast e co1) eis
+  = etaInfoApp subst e (pushCoercion co' eis)
+  where
+    co' = CoreSubst.substTy subst co1
+
+etaInfoApp subst (Case e b _ alts) eis 
+  = Case (subst_expr subst e) b1 (coreAltsType alts') alts'
+  where
+    (subst1, b1) = substBndr subst b
+    alts' = map subst_alt alts
+    subst_alt (con, bs, rhs) = (con, bs', etaInfoApp subst2 rhs eis) 
+             where
+                (subst2,bs') = substBndrs subst1 bs
+    
+etaInfoApp subst (Let b e) eis 
+  = Let b' (etaInfoApp subst' e eis)
+  where
+    (subst', b') = subst_bind subst b
+
+etaInfoApp subst (Note note e) eis
+  = Note note (etaInfoApp subst e eis)
+
+etaInfoApp subst e eis
+  = go (subst_expr subst e) eis
+  where
+    go e []                  = e
+    go e (EtaVar v    : eis) = go (App e (varToCoreExpr v)) eis
+    go e (EtaCo co    : eis) = go (Cast e co) eis
+
+--------------
+mkEtaWW :: Arity -> InScopeSet -> Type
+       -> (InScopeSet, [EtaInfo])
+       -- EtaInfo contains fresh variables,
+       --   not free in the incoming CoreExpr
+       -- Outgoing InScopeSet includes the EtaInfo vars
+       --   and the original free vars
+
+mkEtaWW n in_scope ty
+  = go n empty_subst ty []
+  where
+    empty_subst = mkTvSubst in_scope emptyTvSubstEnv
+
+    go n subst ty eis
+       | n == 0
+       = (getTvInScope subst, reverse eis)
+
+       | Just (tv,ty') <- splitForAllTy_maybe ty
+       , let (subst', tv') = substTyVarBndr subst tv
+           -- Avoid free vars of the original expression
+       = go n subst' ty' (EtaVar tv' : eis)
+
+       | Just (arg_ty, res_ty) <- splitFunTy_maybe ty
+       , let (subst', eta_id') = freshEtaId n subst arg_ty 
+           -- Avoid free vars of the original expression
+       = go (n-1) subst' res_ty (EtaVar eta_id' : eis)
+                                          
+       | Just(ty',co) <- splitNewTypeRepCo_maybe ty
+       =       -- Given this:
+                       --      newtype T = MkT ([T] -> Int)
+                       -- Consider eta-expanding this
+                       --      eta_expand 1 e T
+                       -- We want to get
+                       --      coerce T (\x::[T] -> (coerce ([T]->Int) e) x)
+         go n subst ty' (EtaCo (substTy subst co) : eis)
+
+       | otherwise                        -- We have an expression of arity > 0, 
+       = (getTvInScope subst, reverse eis) -- but its type isn't a function. 
+       -- This *can* legitmately happen:
+       -- e.g.  coerce Int (\x. x) Essentially the programmer is
+       -- playing fast and loose with types (Happy does this a lot).
+       -- So we simply decline to eta-expand.  Otherwise we'd end up
+       -- with an explicit lambda having a non-function type
+   
+
+--------------
+-- Avoiding unnecessary substitution
+
+subst_expr :: Subst -> CoreExpr -> CoreExpr
+subst_expr s e | isEmptySubst s = e
+              | otherwise      = substExpr s e
+
+subst_bind :: Subst -> CoreBind -> (Subst, CoreBind)
+subst_bind subst (NonRec b r)
+  = (subst', NonRec b' (subst_expr subst r))
+  where
+    (subst', b') = substBndr subst b
+subst_bind subst (Rec prs)
+  = (subst', Rec (bs1 `zip` map (subst_expr subst') rhss))
+  where
+    (bs, rhss) = unzip prs
+    (subst', bs1) = substBndrs subst bs 
+
+
+--------------
+freshEtaId :: Int -> TvSubst -> Type -> (TvSubst, Id)
+-- Make a fresh Id, with specified type (after applying substitution)
+-- It should be "fresh" in the sense that it's not in the in-scope set
+-- of the TvSubstEnv; and it should itself then be added to the in-scope
+-- set of the TvSubstEnv
+-- 
+-- The Int is just a reasonable starting point for generating a unique;
+-- it does not necessarily have to be unique itself.
+freshEtaId n subst ty
+      = (subst', eta_id')
+      where
+        ty'     = substTy subst ty
+       eta_id' = uniqAway (getTvInScope subst) $
+                 mkSysLocal (fsLit "eta") (mkBuiltinUnique n) ty'
+       subst'  = extendTvInScope subst [eta_id']                 
+\end{code}
+
diff --git a/compiler/coreSyn/CorePrep.lhs b/compiler/coreSyn/CorePrep.lhs

index db8bebc..b8dd80f 100644 (file)
--- a/compiler/coreSyn/CorePrep.lhs
+++ b/compiler/coreSyn/CorePrep.lhs
@@ -11,7 +11,8 @@ module CorePrep (
  
  #include "HsVersions.h"
  
-import CoreUtils hiding (exprIsTrivial)
+import CoreUtils
+import CoreArity
  import CoreFVs
  import CoreLint
  import CoreSyn
@@ -36,6 +37,7 @@ import Util
  import Outputable
  import MonadUtils
  import FastString
+import Control.Monad
  \end{code}
  
  -- ---------------------------------------------------------------------------
@@ -92,10 +94,41 @@ when type erasure is done for conversion to STG, we don't end up with
  any trivial or useless bindings.
  
    
+Invariants
+~~~~~~~~~~
+Here is the syntax of the Core produced by CorePrep:
  
--- -----------------------------------------------------------------------------
--- Top level stuff
--- -----------------------------------------------------------------------------
+    Trivial expressions 
+       triv ::= lit |  var  | triv ty  |  /\a. triv  |  triv |> co
+
+    Applications
+       app ::= lit  |  var  |  app triv  |  app ty  |  app |> co
+
+    Expressions
+       body ::= app  
+              | let(rec) x = rhs in body     -- Boxed only
+              | case body of pat -> body
+             | /\a. body
+              | body |> co
+
+    Right hand sides (only place where lambdas can occur)
+       rhs ::= /\a.rhs  |  \x.rhs  |  body
+
+We define a synonym for each of these non-terminals.  Functions
+with the corresponding name produce a result in that syntax.
+
+\begin{code}
+type CpeTriv = CoreExpr           -- Non-terminal 'triv'
+type CpeApp  = CoreExpr           -- Non-terminal 'app'
+type CpeBody = CoreExpr           -- Non-terminal 'body'
+type CpeRhs  = CoreExpr           -- Non-terminal 'rhs'
+\end{code}
+
+%************************************************************************
+%*                                                                     *
+               Top level stuff
+%*                                                                     *
+%************************************************************************
  
  \begin{code}
  corePrepPgm :: DynFlags -> [CoreBind] -> [TyCon] -> IO [CoreBind]
@@ -119,15 +152,68 @@ corePrepExpr :: DynFlags -> CoreExpr -> IO CoreExpr
  corePrepExpr dflags expr = do
      showPass dflags "CorePrep"
      us <- mkSplitUniqSupply 's'
-    let new_expr = initUs_ us (corePrepAnExpr emptyCorePrepEnv expr)
+    let new_expr = initUs_ us (cpeBodyNF emptyCorePrepEnv expr)
      dumpIfSet_dyn dflags Opt_D_dump_prep "CorePrep" (ppr new_expr)
      return new_expr
-\end{code}
  
--- -----------------------------------------------------------------------------
--- Implicit bindings
--- -----------------------------------------------------------------------------
+corePrepTopBinds :: [CoreBind] -> UniqSM Floats
+-- Note [Floating out of top level bindings]
+corePrepTopBinds binds 
+  = go emptyCorePrepEnv binds
+  where
+    go _   []             = return emptyFloats
+    go env (bind : binds) = do (env', bind') <- cpeBind TopLevel env bind
+                               binds' <- go env' binds
+                               return (bind' `appendFloats` binds')
  
+mkDataConWorkers :: [TyCon] -> [CoreBind]
+-- See Note [Data constructor workers]
+mkDataConWorkers data_tycons
+  = [ NonRec id (Var id)       -- The ice is thin here, but it works
+    | tycon <- data_tycons,    -- CorePrep will eta-expand it
+      data_con <- tyConDataCons tycon,
+      let id = dataConWorkId data_con ]
+\end{code}
+
+Note [Floating out of top level bindings]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+NB: we do need to float out of top-level bindings
+Consider       x = length [True,False]
+We want to get
+               s1 = False : []
+               s2 = True  : s1
+               x  = length s2
+
+We return a *list* of bindings, because we may start with
+       x* = f (g y)
+where x is demanded, in which case we want to finish with
+       a = g y
+       x* = f a
+And then x will actually end up case-bound
+
+Note [CafInfo and floating]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+What happens to the CafInfo on the floated bindings?  By default, all
+the CafInfos will be set to MayHaveCafRefs, which is safe.
+
+This might be pessimistic, because the floated binding might not refer
+to any CAFs and the GC will end up doing more traversal than is
+necessary, but it's still better than not floating the bindings at
+all, because then the GC would have to traverse the structure in the
+heap instead.  Given this, we decided not to try to get the CafInfo on
+the floated bindings correct, because it looks difficult.
+
+But that means we can't float anything out of a NoCafRefs binding.
+Consider       f = g (h x)
+If f is NoCafRefs, we don't want to convert to
+              sat = h x
+               f = g sat
+where sat conservatively says HasCafRefs, because now f's info
+is wrong.  I don't think this is common, so we simply switch off
+floating in this case.
+
+Note [Data constructor workers]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  Create any necessary "implicit" bindings for data con workers.  We
  create the rather strange (non-recursive!) binding
  
@@ -143,235 +229,84 @@ Hmm.  Should we create bindings for dictionary constructors?  They are
  always fully applied, and the bindings are just there to support
  partial applications. But it's easier to let them through.
  
-\begin{code}
-mkDataConWorkers :: [TyCon] -> [CoreBind]
-mkDataConWorkers data_tycons
-  = [ NonRec id (Var id)       -- The ice is thin here, but it works
-    | tycon <- data_tycons,    -- CorePrep will eta-expand it
-      data_con <- tyConDataCons tycon,
-      let id = dataConWorkId data_con ]
-\end{code}
-       
-
-\begin{code}
--- ---------------------------------------------------------------------------
--- Dealing with bindings
--- ---------------------------------------------------------------------------
-
-data FloatingBind = FloatLet CoreBind
-                 | FloatCase Id CoreExpr Bool
-                       -- Invariant: the expression is not a lambda
-                       -- The bool indicates "ok-for-speculation"
-
-data Floats = Floats OkToSpec (OrdList FloatingBind)
-
--- Can we float these binds out of the rhs of a let?  We cache this decision
--- to avoid having to recompute it in a non-linear way when there are
--- deeply nested lets.
-data OkToSpec
-   = NotOkToSpec       -- definitely not
-   | OkToSpec          -- yes
-   | IfUnboxedOk       -- only if floating an unboxed binding is ok
-
-emptyFloats :: Floats
-emptyFloats = Floats OkToSpec nilOL
-
-addFloat :: Floats -> FloatingBind -> Floats
-addFloat (Floats ok_to_spec floats) new_float
-  = Floats (combine ok_to_spec (check new_float)) (floats `snocOL` new_float)
-  where
-    check (FloatLet _) = OkToSpec
-    check (FloatCase _ _ ok_for_spec) 
-       | ok_for_spec  =  IfUnboxedOk
-       | otherwise    =  NotOkToSpec
-       -- The ok-for-speculation flag says that it's safe to
-       -- float this Case out of a let, and thereby do it more eagerly
-       -- We need the top-level flag because it's never ok to float
-       -- an unboxed binding to the top level
-
-unitFloat :: FloatingBind -> Floats
-unitFloat = addFloat emptyFloats
-
-appendFloats :: Floats -> Floats -> Floats
-appendFloats (Floats spec1 floats1) (Floats spec2 floats2)
-  = Floats (combine spec1 spec2) (floats1 `appOL` floats2)
-
-concatFloats :: [Floats] -> Floats
-concatFloats = foldr appendFloats emptyFloats
-
-combine :: OkToSpec -> OkToSpec -> OkToSpec
-combine NotOkToSpec _ = NotOkToSpec
-combine _ NotOkToSpec = NotOkToSpec
-combine IfUnboxedOk _ = IfUnboxedOk
-combine _ IfUnboxedOk = IfUnboxedOk
-combine _ _           = OkToSpec
-    
-instance Outputable FloatingBind where
-  ppr (FloatLet bind)        = text "FloatLet" <+> ppr bind
-  ppr (FloatCase b rhs spec) = text "FloatCase" <+> ppr b <+> ppr spec <+> equals <+> ppr rhs
-
-deFloatTop :: Floats -> [CoreBind]
--- For top level only; we don't expect any FloatCases
-deFloatTop (Floats _ floats)
-  = foldrOL get [] floats
-  where
-    get (FloatLet b) bs = b:bs
-    get b            _  = pprPanic "corePrepPgm" (ppr b)
-
-allLazy :: TopLevelFlag -> RecFlag -> Floats -> Bool
-allLazy top_lvl is_rec (Floats ok_to_spec _)
-  = case ok_to_spec of
-       OkToSpec    -> True
-       NotOkToSpec -> False
-       IfUnboxedOk -> isNotTopLevel top_lvl && isNonRec is_rec
  
--- ---------------------------------------------------------------------------
---                     Bindings
--- ---------------------------------------------------------------------------
+%************************************************************************
+%*                                                                     *
+               The main code
+%*                                                                     *
+%************************************************************************
  
-corePrepTopBinds :: [CoreBind] -> UniqSM Floats
-corePrepTopBinds binds 
-  = go emptyCorePrepEnv binds
-  where
-    go _   []             = return emptyFloats
-    go env (bind : binds) = do (env', bind') <- corePrepTopBind env bind
-                               binds' <- go env' binds
-                               return (bind' `appendFloats` binds')
+\begin{code}
+cpeBind :: TopLevelFlag
+       -> CorePrepEnv -> CoreBind
+       -> UniqSM (CorePrepEnv, Floats)
+cpeBind top_lvl env (NonRec bndr rhs)
+  = do { (_, bndr1) <- cloneBndr env bndr
+       ; let is_strict   = isStrictDmd (idNewDemandInfo bndr)
+             is_unlifted = isUnLiftedType (idType bndr)
+       ; (floats, bndr2, rhs2) <- cpePair top_lvl NonRecursive 
+                                                 (is_strict || is_unlifted) 
+                                         env bndr1 rhs
+       ; let new_float = mkFloat is_strict is_unlifted bndr2 rhs2
  
--- NB: we do need to float out of top-level bindings
--- Consider    x = length [True,False]
--- We want to get
---             s1 = False : []
---             s2 = True  : s1
---             x  = length s2
-
--- We return a *list* of bindings, because we may start with
---     x* = f (g y)
--- where x is demanded, in which case we want to finish with
---     a = g y
---     x* = f a
--- And then x will actually end up case-bound
---
--- What happens to the CafInfo on the floated bindings?  By
--- default, all the CafInfos will be set to MayHaveCafRefs,
--- which is safe.
---
--- This might be pessimistic, because eg. s1 & s2
--- might not refer to any CAFs and the GC will end up doing
--- more traversal than is necessary, but it's still better
--- than not floating the bindings at all, because then
--- the GC would have to traverse the structure in the heap
--- instead.  Given this, we decided not to try to get
--- the CafInfo on the floated bindings correct, because
--- it looks difficult.
-
---------------------------------
-corePrepTopBind :: CorePrepEnv -> CoreBind -> UniqSM (CorePrepEnv, Floats)
-corePrepTopBind env (NonRec bndr rhs) = do
-    (env', bndr') <- cloneBndr env bndr
-    (floats, rhs') <- corePrepRhs TopLevel NonRecursive env (bndr, rhs)
-    return (env', addFloat floats (FloatLet (NonRec bndr' rhs')))
-
-corePrepTopBind env (Rec pairs) = corePrepRecPairs TopLevel env pairs
-
---------------------------------
-corePrepBind ::  CorePrepEnv -> CoreBind -> UniqSM (CorePrepEnv, Floats)
-       -- This one is used for *local* bindings
-corePrepBind env (NonRec bndr rhs) = do
-    rhs1 <- etaExpandRhs bndr rhs
-    (floats, rhs2) <- corePrepExprFloat env rhs1
-    (_, bndr') <- cloneBndr env bndr
-    (floats', bndr'') <- mkLocalNonRec bndr' (bdrDem bndr) floats rhs2
          -- We want bndr'' in the envt, because it records
          -- the evaluated-ness of the binder
-    return (extendCorePrepEnv env bndr bndr'', floats')
-
-corePrepBind env (Rec pairs) = corePrepRecPairs NotTopLevel env pairs
-
---------------------------------
-corePrepRecPairs :: TopLevelFlag -> CorePrepEnv
-                -> [(Id,CoreExpr)]     -- Recursive bindings
-                -> UniqSM (CorePrepEnv, Floats)
--- Used for all recursive bindings, top level and otherwise
-corePrepRecPairs lvl env pairs = do
-    (env', bndrs') <- cloneBndrs env (map fst pairs)
-    (floats_s, rhss') <- mapAndUnzipM (corePrepRhs lvl Recursive env') pairs
-    return (env', unitFloat (FloatLet (Rec (flatten (concatFloats floats_s) bndrs' rhss'))))
+       ; return (extendCorePrepEnv env bndr bndr2, 
+                        addFloat floats new_float) }
+
+cpeBind top_lvl env (Rec pairs)
+  = do { let (bndrs,rhss) = unzip pairs
+       ; (env', bndrs1) <- cloneBndrs env (map fst pairs)
+       ; stuff <- zipWithM (cpePair top_lvl Recursive False env') bndrs1 rhss
+
+       ; let (floats_s, bndrs2, rhss2) = unzip3 stuff
+             all_pairs = foldrOL add_float (bndrs1 `zip` rhss2)
+                                          (concatFloats floats_s)
+       ; return (extendCorePrepEnvList env (bndrs `zip` bndrs2),
+                        unitFloat (FloatLet (Rec all_pairs))) }
    where
         -- Flatten all the floats, and the currrent
         -- group into a single giant Rec
-    flatten (Floats _ floats) bndrs rhss = foldrOL get (bndrs `zip` rhss) floats
-
-    get (FloatLet (NonRec b r)) prs2 = (b,r) : prs2
-    get (FloatLet (Rec prs1))   prs2 = prs1 ++ prs2
-    get b                       _    = pprPanic "corePrepRecPairs" (ppr b)
-
---------------------------------
-corePrepRhs :: TopLevelFlag -> RecFlag
-           -> CorePrepEnv -> (Id, CoreExpr)
-           -> UniqSM (Floats, CoreExpr)
--- Used for top-level bindings, and local recursive bindings
-corePrepRhs top_lvl is_rec env (bndr, rhs) = do
-    rhs' <- etaExpandRhs bndr rhs
-    floats_w_rhs <- corePrepExprFloat env rhs'
-    floatRhs top_lvl is_rec bndr floats_w_rhs
-
-
--- ---------------------------------------------------------------------------
--- Making arguments atomic (function args & constructor args)
--- ---------------------------------------------------------------------------
-
--- This is where we arrange that a non-trivial argument is let-bound
-corePrepArg :: CorePrepEnv -> CoreArg -> RhsDemand
-          -> UniqSM (Floats, CoreArg)
-corePrepArg env arg dem = do
-    (floats, arg') <- corePrepExprFloat env arg
-    if exprIsTrivial arg' && allLazy NotTopLevel NonRecursive floats
-       -- Note [Floating unlifted arguments]
-     then return (floats, arg')
-     else do v <- newVar (exprType arg')
-             (floats', v') <- mkLocalNonRec v dem floats arg'
-             return (floats', Var v')
-
--- version that doesn't consider an scc annotation to be trivial.
-exprIsTrivial :: CoreExpr -> Bool
-exprIsTrivial (Var _)                  = True
-exprIsTrivial (Type _)                 = True
-exprIsTrivial (Lit _)                  = True
-exprIsTrivial (App e arg)              = isTypeArg arg && exprIsTrivial e
-exprIsTrivial (Note (SCC _) _)         = False
-exprIsTrivial (Note _ e)               = exprIsTrivial e
-exprIsTrivial (Cast e _)               = exprIsTrivial e
-exprIsTrivial (Lam b body) | isTyVar b = exprIsTrivial body
-exprIsTrivial _                        = False
-\end{code}
-
-Note [Floating unlifted arguments]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Consider    C (let v* = expensive in v)
-
-where the "*" indicates "will be demanded".  Usually v will have been
-inlined by now, but let's suppose it hasn't (see Trac #2756).  Then we
-do *not* want to get
-
-     let v* = expensive in C v
+    add_float (FloatLet (NonRec b r)) prs2 = (b,r) : prs2
+    add_float (FloatLet (Rec prs1))   prs2 = prs1 ++ prs2
+    add_float b                       _    = pprPanic "cpeBind" (ppr b)
+
+---------------
+cpePair :: TopLevelFlag -> RecFlag -> RhsDemand
+       -> CorePrepEnv -> Id -> CoreExpr
+       -> UniqSM (Floats, Id, CoreExpr)
+-- Used for all bindings
+cpePair top_lvl is_rec is_strict_or_unlifted env bndr rhs
+  = do { (floats, rhs') <- cpeRhs want_float (idArity bndr) env rhs
+
+               -- Record if the binder is evaluated
+       ; let bndr' | exprIsHNF rhs' = bndr `setIdUnfolding` evaldUnfolding
+                          | otherwise      = bndr
+
+       ; return (floats, bndr', rhs') }
+  where
+    want_float floats rhs 
+     | isTopLevel top_lvl = wantFloatTop bndr floats
+     | otherwise          = wantFloatNested is_rec is_strict_or_unlifted floats rhs
  
-because that has different strictness.  Hence the use of 'allLazy'.
-(NB: the let v* turns into a FloatCase, in mkLocalNonRec.)
  
  
-\begin{code}
  -- ---------------------------------------------------------------------------
--- Dealing with expressions
+--             CpeRhs: produces a result satisfying CpeRhs
  -- ---------------------------------------------------------------------------
  
-corePrepAnExpr :: CorePrepEnv -> CoreExpr -> UniqSM CoreExpr
-corePrepAnExpr env expr = do
-    (floats, expr) <- corePrepExprFloat env expr
-    mkBinds floats expr
-
-
-corePrepExprFloat :: CorePrepEnv -> CoreExpr -> UniqSM (Floats, CoreExpr)
+cpeRhs :: (Floats -> CpeRhs -> Bool)   -- Float the floats out
+       -> Arity                -- Guarantees an Rhs with this manifest arity
+       -> CorePrepEnv
+       -> CoreExpr     -- Expression and its type
+       -> UniqSM (Floats, CpeRhs)
+cpeRhs want_float arity env expr
+  = do { (floats, rhs) <- cpeRhsE env expr
+       ; if want_float floats rhs
+                then return (floats,      cpeEtaExpand arity rhs)
+                else return (emptyFloats, cpeEtaExpand arity (wrapBinds floats rhs)) }
+
+cpeRhsE :: CorePrepEnv -> CoreExpr -> UniqSM (Floats, CpeRhs)
  -- If
  --     e  ===>  (bs, e')
  -- then        
@@ -380,76 +315,115 @@ corePrepExprFloat :: CorePrepEnv -> CoreExpr -> UniqSM (Floats, CoreExpr)
  -- For example
  --     f (g x)   ===>   ([v = g x], f v)
  
-corePrepExprFloat env (Var v) = do
-    v1 <- fiddleCCall v
-    let
-        v2 = lookupCorePrepEnv env v1
-    maybeSaturate v2 (Var v2) 0 emptyFloats (idType v2)
+cpeRhsE _env expr@(Type _) = return (emptyFloats, expr)
+cpeRhsE _env expr@(Lit _)  = return (emptyFloats, expr)
+cpeRhsE env expr@(App {})  = cpeApp env expr
+cpeRhsE env expr@(Var {})  = cpeApp env expr
+
+cpeRhsE env (Let bind expr)
+  = do { (env', new_binds) <- cpeBind NotTopLevel env bind
+       ; (floats, body) <- cpeRhsE env' expr
+       ; return (new_binds `appendFloats` floats, body) }
+
+cpeRhsE env (Note note expr)
+  | ignoreNote note
+  = cpeRhsE env expr
+  | otherwise        -- Just SCCs actually
+  = do { body <- cpeBodyNF env expr
+       ; return (emptyFloats, Note note body) }
+
+cpeRhsE env (Cast expr co)
+   = do { (floats, expr') <- cpeRhsE env expr
+        ; return (floats, Cast expr' co) }
+
+cpeRhsE env expr@(Lam {})
+   = do { let (bndrs,body) = collectBinders expr
+        ; (env', bndrs') <- cloneBndrs env bndrs
+       ; body' <- cpeBodyNF env' body
+       ; return (emptyFloats, mkLams bndrs' body') }
+
+cpeRhsE env (Case (Var id) bndr ty [(DEFAULT,[],expr)])
+  | Just (TickBox {}) <- isTickBoxOp_maybe id
+  = do { body <- cpeBodyNF env expr
+       ; return (emptyFloats, Case (Var id) bndr ty [(DEFAULT,[],body)]) }
+
+cpeRhsE env (Case scrut bndr ty alts)
+  = do { (floats, scrut') <- cpeBody env scrut
+       ; let bndr1 = bndr `setIdUnfolding` evaldUnfolding
+            -- Record that the case binder is evaluated in the alternatives
+       ; (env', bndr2) <- cloneBndr env bndr1
+       ; alts' <- mapM (sat_alt env') alts
+       ; return (floats, Case scrut' bndr2 ty alts') }
+  where
+    sat_alt env (con, bs, rhs)
+       = do { (env2, bs') <- cloneBndrs env bs
+            ; rhs' <- cpeBodyNF env2 rhs
+            ; return (con, bs', rhs') }
  
-corePrepExprFloat _env expr@(Type _)
-  = return (emptyFloats, expr)
+-- ---------------------------------------------------------------------------
+--             CpeBody: produces a result satisfying CpeBody
+-- ---------------------------------------------------------------------------
  
-corePrepExprFloat _env expr@(Lit _)
-  = return (emptyFloats, expr)
+cpeBodyNF :: CorePrepEnv -> CoreExpr -> UniqSM CpeBody
+cpeBodyNF env expr 
+  = do { (floats, body) <- cpeBody env expr
+       ; return (wrapBinds floats body) }
  
-corePrepExprFloat env (Let bind body) = do
-    (env', new_binds) <- corePrepBind env bind
-    (floats, new_body) <- corePrepExprFloat env' body
-    return (new_binds `appendFloats` floats, new_body)
+--------
+cpeBody :: CorePrepEnv -> CoreExpr -> UniqSM (Floats, CpeBody)
+cpeBody env expr
+  = do { (floats1, rhs) <- cpeRhsE env expr
+       ; (floats2, body) <- rhsToBody rhs
+       ; return (floats1 `appendFloats` floats2, body) }
  
-corePrepExprFloat env (Note n@(SCC _) expr) = do
-    expr1 <- corePrepAnExpr env expr
-    (floats, expr2) <- deLamFloat expr1
-    return (floats, Note n expr2)
+--------
+rhsToBody :: CpeRhs -> UniqSM (Floats, CpeBody)
+-- Remove top level lambdas by let-bindinig
  
-corePrepExprFloat env (Note other_note expr) = do
-    (floats, expr') <- corePrepExprFloat env expr
-    return (floats, Note other_note expr')
+rhsToBody (Note n expr)
+        -- You can get things like
+        --      case e of { p -> coerce t (\s -> ...) }
+  = do { (floats, expr') <- rhsToBody expr
+       ; return (floats, Note n expr') }
  
-corePrepExprFloat env (Cast expr co) = do
-    (floats, expr') <- corePrepExprFloat env expr
-    return (floats, Cast expr' co)
+rhsToBody (Cast e co)
+  = do { (floats, e') <- rhsToBody e
+       ; return (floats, Cast e' co) }
  
-corePrepExprFloat env expr@(Lam _ _) = do
-    (env', bndrs') <- cloneBndrs env bndrs
-    body' <- corePrepAnExpr env' body
-    return (emptyFloats, mkLams bndrs' body')
+rhsToBody expr@(Lam {})
+  | Just no_lam_result <- tryEtaReduce bndrs body
+  = return (emptyFloats, no_lam_result)
+  | all isTyVar bndrs          -- Type lambdas are ok
+  = return (emptyFloats, expr)
+  | otherwise                  -- Some value lambdas
+  = do { fn <- newVar (exprType expr)
+       ; let rhs   = cpeEtaExpand (exprArity expr) expr
+                    float = FloatLet (NonRec fn rhs)
+       ; return (unitFloat float, Var fn) }
    where
      (bndrs,body) = collectBinders expr
  
-corePrepExprFloat env (Case (Var id) bndr ty [(DEFAULT,[],expr)])
-  | Just (TickBox {}) <- isTickBoxOp_maybe id = do
-    expr1 <- corePrepAnExpr env expr
-    (floats, expr2) <- deLamFloat expr1
-    return (floats, Case (Var id) bndr ty [(DEFAULT,[],expr2)])
-
-corePrepExprFloat env (Case scrut bndr ty alts) = do
-    (floats1, scrut1) <- corePrepExprFloat env scrut
-    (floats2, scrut2) <- deLamFloat scrut1
-    let
-        bndr1 = bndr `setIdUnfolding` evaldUnfolding
-        -- Record that the case binder is evaluated in the alternatives
-    (env', bndr2) <- cloneBndr env bndr1
-    alts' <- mapM (sat_alt env') alts
-    return (floats1 `appendFloats` floats2 , Case scrut2 bndr2 ty alts')
-  where
-    sat_alt env (con, bs, rhs) = do
-            (env2, bs') <- cloneBndrs env bs
-            rhs1 <- corePrepAnExpr env2 rhs
-            rhs2 <- deLam rhs1
-            return (con, bs', rhs2)
+rhsToBody expr = return (emptyFloats, expr)
+
  
-corePrepExprFloat env expr@(App _ _) = do
-    (app, (head,depth), ty, floats, ss) <- collect_args expr 0
-    MASSERT(null ss)   -- make sure we used all the strictness info
+
+-- ---------------------------------------------------------------------------
+--             CpeApp: produces a result satisfying CpeApp
+-- ---------------------------------------------------------------------------
+
+cpeApp :: CorePrepEnv -> CoreExpr -> UniqSM (Floats, CpeRhs)
+-- May return a CpeRhs because of saturating primops
+cpeApp env expr 
+  = do { (app, (head,depth), _, floats, ss) <- collect_args expr 0
+       ; MASSERT(null ss)      -- make sure we used all the strictness info
  
         -- Now deal with the function
-    case head of
-      Var fn_id -> maybeSaturate fn_id app depth floats ty
-      _other    -> return (floats, app)
+       ; case head of
+           Var fn_id -> do { sat_app <- maybeSaturate fn_id app depth
+                          ; return (floats, sat_app) }
+           _other    -> return (floats, app) }
  
    where
-
      -- Deconstruct and rebuild the application, floating any non-atomic
      -- arguments to the outside.  We collect the type of the expression,
      -- the head of the application, and the number of actual value arguments,
@@ -458,34 +432,34 @@ corePrepExprFloat env expr@(App _ _) = do
  
      collect_args
         :: CoreExpr
-       -> Int                            -- current app depth
-       -> UniqSM (CoreExpr,              -- the rebuilt expression
-                  (CoreExpr,Int),        -- the head of the application,
-                                         -- and no. of args it was applied to
-                  Type,                  -- type of the whole expr
-                  Floats,                -- any floats we pulled out
-                  [Demand])              -- remaining argument demands
-
-    collect_args (App fun arg@(Type arg_ty)) depth = do
-          (fun',hd,fun_ty,floats,ss) <- collect_args fun depth
-          return (App fun' arg, hd, applyTy fun_ty arg_ty, floats, ss)
-
-    collect_args (App fun arg) depth = do
-          (fun',hd,fun_ty,floats,ss) <- collect_args fun (depth+1)
-          let
+       -> Int                     -- Current app depth
+       -> UniqSM (CpeApp,         -- The rebuilt expression
+                  (CoreExpr,Int), -- The head of the application,
+                                  -- and no. of args it was applied to
+                  Type,           -- Type of the whole expr
+                  Floats,         -- Any floats we pulled out
+                  [Demand])       -- Remaining argument demands
+
+    collect_args (App fun arg@(Type arg_ty)) depth
+      = do { (fun',hd,fun_ty,floats,ss) <- collect_args fun depth
+           ; return (App fun' arg, hd, applyTy fun_ty arg_ty, floats, ss) }
+
+    collect_args (App fun arg) depth
+      = do { (fun',hd,fun_ty,floats,ss) <- collect_args fun (depth+1)
+           ; let
                (ss1, ss_rest)   = case ss of
                                     (ss1:ss_rest) -> (ss1,     ss_rest)
                                     []            -> (lazyDmd, [])
-              (arg_ty, res_ty) = expectJust "corePrepExprFloat:collect_args" $
+              (arg_ty, res_ty) = expectJust "cpeBody:collect_args" $
                                   splitFunTy_maybe fun_ty
  
-          (fs, arg') <- corePrepArg env arg (mkDemTy ss1 arg_ty)
-          return (App fun' arg', hd, res_ty, fs `appendFloats` floats, ss_rest)
+           ; (fs, arg') <- cpeArg env (isStrictDmd ss1) arg arg_ty
+           ; return (App fun' arg', hd, res_ty, fs `appendFloats` floats, ss_rest) }
  
-    collect_args (Var v) depth = do
-          v1 <- fiddleCCall v
-          let v2 = lookupCorePrepEnv env v1
-          return (Var v2, (Var v2, depth), idType v2, emptyFloats, stricts)
+    collect_args (Var v) depth 
+      = do { v1 <- fiddleCCall v
+           ; let v2 = lookupCorePrepEnv env v1
+           ; return (Var v2, (Var v2, depth), idType v2, emptyFloats, stricts) }
         where
           stricts = case idNewStrictness v of
                         StrictSig (DmdType _ demands _)
@@ -498,254 +472,209 @@ corePrepExprFloat env expr@(App _ _) = do
                 -- Here, we can't evaluate the arg strictly, because this 
                 -- partial application might be seq'd
  
-    collect_args (Cast fun co) depth = do
-          let (_ty1,ty2) = coercionKind co
-          (fun', hd, _, floats, ss) <- collect_args fun depth
-          return (Cast fun' co, hd, ty2, floats, ss)
+    collect_args (Cast fun co) depth
+      = do { let (_ty1,ty2) = coercionKind co
+           ; (fun', hd, _, floats, ss) <- collect_args fun depth
+           ; return (Cast fun' co, hd, ty2, floats, ss) }
            
      collect_args (Note note fun) depth
-        | ignore_note note = do -- Drop these notes altogether
-                                -- They aren't used by the code generator
-          (fun', hd, fun_ty, floats, ss) <- collect_args fun depth
-         return (fun', hd, fun_ty, floats, ss)
+      | ignoreNote note         -- Drop these notes altogether
+      = collect_args fun depth  -- They aren't used by the code generator
  
         -- N-variable fun, better let-bind it
         -- ToDo: perhaps we can case-bind rather than let-bind this closure,
         -- since it is sure to be evaluated.
-    collect_args fun depth = do
-          (fun_floats, fun') <- corePrepExprFloat env fun
-          fn_id <- newVar ty
-          (floats, fn_id') <- mkLocalNonRec fn_id onceDem fun_floats fun'
-          return (Var fn_id', (Var fn_id', depth), ty, floats, [])
+    collect_args fun depth
+      = do { (fun_floats, fun') <- cpeArg env True fun ty
+           ; return (fun', (fun', depth), ty, fun_floats, []) }
          where
           ty = exprType fun
  
-    ignore_note        (CoreNote _) = True 
-    ignore_note        InlineMe     = True
-    ignore_note        _other       = False
-       -- We don't ignore SCCs, since they require some code generation
+-- ---------------------------------------------------------------------------
+--     CpeArg: produces a result satisfying CpeArg
+-- ---------------------------------------------------------------------------
+
+-- This is where we arrange that a non-trivial argument is let-bound
+cpeArg :: CorePrepEnv -> RhsDemand -> CoreArg -> Type
+       -> UniqSM (Floats, CpeTriv)
+cpeArg env is_strict arg arg_ty
+  | cpe_ExprIsTrivial arg      -- Do not eta expand etc a trivial argument
+  = cpeBody env arg    -- Must still do substitution though
+  | otherwise
+  = do { (floats, arg') <- cpeRhs want_float
+                                         (exprArity arg) env arg
+       ; v <- newVar arg_ty
+       ; let arg_float = mkFloat is_strict is_unlifted v arg'
+       ; return (addFloat floats arg_float, Var v) }
+  where
+    is_unlifted = isUnLiftedType arg_ty
+    want_float = wantFloatNested NonRecursive (is_strict || is_unlifted)
+\end{code}
+
+Note [Floating unlifted arguments]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider    C (let v* = expensive in v)
+
+where the "*" indicates "will be demanded".  Usually v will have been
+inlined by now, but let's suppose it hasn't (see Trac #2756).  Then we
+do *not* want to get
+
+     let v* = expensive in C v
+
+because that has different strictness.  Hence the use of 'allLazy'.
+(NB: the let v* turns into a FloatCase, in mkLocalNonRec.)
+
  
  ------------------------------------------------------------------------------
  -- Building the saturated syntax
  -- ---------------------------------------------------------------------------
  
--- maybeSaturate deals with saturating primops and constructors
--- The type is the type of the entire application
-maybeSaturate :: Id -> CoreExpr -> Int -> Floats -> Type -> UniqSM (Floats, CoreExpr)
-maybeSaturate fn expr n_args floats ty
+maybeSaturate deals with saturating primops and constructors
+The type is the type of the entire application
+
+\begin{code}
+maybeSaturate :: Id -> CpeApp -> Int -> UniqSM CpeRhs
+maybeSaturate fn expr n_args
    | Just DataToTagOp <- isPrimOpId_maybe fn     -- DataToTag must have an evaluated arg
                                                  -- A gruesome special case
-  = do sat_expr <- saturate_it
+  = saturateDataToTag sat_expr
  
-        -- OK, now ensure that the arg is evaluated.
-        -- But (sigh) take into account the lambdas we've now introduced
-       let (eta_bndrs, eta_body) = collectBinders sat_expr
-       (eta_floats, eta_body') <- eval_data2tag_arg eta_body
-       if null eta_bndrs then
-           return (floats `appendFloats` eta_floats, eta_body')
-        else do
-           eta_body'' <- mkBinds eta_floats eta_body'
-           return (floats, mkLams eta_bndrs eta_body'')
-
-  | hasNoBinding fn = do sat_expr <- saturate_it
-                         return (floats, sat_expr)
-
-  | otherwise       = return (floats, expr)
+  | hasNoBinding fn       -- There's no binding
+  = return sat_expr
  
+  | otherwise 
+  = return expr
    where
      fn_arity    = idArity fn
      excess_arity = fn_arity - n_args
-
-    saturate_it :: UniqSM CoreExpr
-    saturate_it | excess_arity == 0 = return expr
-                | otherwise         = do us <- getUniquesM
-                                         return (etaExpand excess_arity us expr ty)
-
-       -- Ensure that the argument of DataToTagOp is evaluated
-    eval_data2tag_arg :: CoreExpr -> UniqSM (Floats, CoreExpr)
+    sat_expr     = cpeEtaExpand excess_arity expr
+
+-------------
+saturateDataToTag :: CpeApp -> UniqSM CpeApp
+-- Horrid: ensure that the arg of data2TagOp is evaluated
+--   (data2tag x) -->  (case x of y -> data2tag y)
+-- (yuk yuk) take into account the lambdas we've now introduced
+saturateDataToTag sat_expr
+  = do { let (eta_bndrs, eta_body) = collectBinders sat_expr
+       ; eta_body' <- eval_data2tag_arg eta_body
+       ; return (mkLams eta_bndrs eta_body') }
+  where
+    eval_data2tag_arg :: CpeApp -> UniqSM CpeBody
      eval_data2tag_arg app@(fun `App` arg)
          | exprIsHNF arg         -- Includes nullary constructors
-        = return (emptyFloats, app)   -- The arg is evaluated
+        = return app           -- The arg is evaluated
          | otherwise                     -- Arg not evaluated, so evaluate it
-        = do arg_id <- newVar (exprType arg)
-             let
-                arg_id1 = setIdUnfolding arg_id evaldUnfolding
-             return (unitFloat (FloatCase arg_id1 arg False ),
-                     fun `App` Var arg_id1)
+        = do { arg_id <- newVar (exprType arg)
+             ; let arg_id1 = setIdUnfolding arg_id evaldUnfolding
+             ; return (Case arg arg_id1 (exprType app)
+                            [(DEFAULT, [], fun `App` Var arg_id1)]) }
  
      eval_data2tag_arg (Note note app)  -- Scc notes can appear
-        = do (floats, app') <- eval_data2tag_arg app
-             return (floats, Note note app')
+        = do { app' <- eval_data2tag_arg app
+             ; return (Note note app') }
  
      eval_data2tag_arg other    -- Should not happen
         = pprPanic "eval_data2tag" (ppr other)
+\end{code}
  
  
--- ---------------------------------------------------------------------------
--- Precipitating the floating bindings
--- ---------------------------------------------------------------------------
-
-floatRhs :: TopLevelFlag -> RecFlag
-        -> Id
-        -> (Floats, CoreExpr)  -- Rhs: let binds in body
-        -> UniqSM (Floats,     -- Floats out of this bind
-                   CoreExpr)   -- Final Rhs
-
-floatRhs top_lvl is_rec _bndr (floats, rhs)
-  | isTopLevel top_lvl || exprIsHNF rhs,       -- Float to expose value or 
-    allLazy top_lvl is_rec floats              -- at top level
-  =    -- Why the test for allLazy? 
-       --      v = f (x `divInt#` y)
-       -- we don't want to float the case, even if f has arity 2,
-       -- because floating the case would make it evaluated too early
-    return (floats, rhs)
-    
-  | otherwise = do
-       -- Don't float; the RHS isn't a value
-    rhs' <- mkBinds floats rhs
-    return (emptyFloats, rhs')
-
--- mkLocalNonRec is used only for *nested*, *non-recursive* bindings
-mkLocalNonRec :: Id  -> RhsDemand      -- Lhs: id with demand
-             -> Floats -> CoreExpr     -- Rhs: let binds in body
-             -> UniqSM (Floats, Id)    -- The new Id may have an evaldUnfolding, 
-                                       -- to record that it's been evaluated
-
-mkLocalNonRec bndr dem floats rhs
-  | isUnLiftedType (idType bndr)
-       -- If this is an unlifted binding, we always make a case for it.
-  = ASSERT( not (isUnboxedTupleType (idType bndr)) )
-    let
-       float = FloatCase bndr rhs (exprOkForSpeculation rhs)
-    in
-    return (addFloat floats float, evald_bndr)
-
-  | isStrict dem 
-       -- It's a strict let so we definitely float all the bindings
-  = let                -- Don't make a case for a value binding,
-               -- even if it's strict.  Otherwise we get
-               --      case (\x -> e) of ...!
-       float | exprIsHNF rhs = FloatLet (NonRec bndr rhs)
-             | otherwise     = FloatCase bndr rhs (exprOkForSpeculation rhs)
-    in
-    return (addFloat floats float, evald_bndr)
-
-  | otherwise
-  = do (floats', rhs') <- floatRhs NotTopLevel NonRecursive bndr (floats, rhs)
-       return (addFloat floats' (FloatLet (NonRec bndr rhs')),
-               if exprIsHNF rhs' then evald_bndr else bndr)
-
-  where
-    evald_bndr = bndr `setIdUnfolding` evaldUnfolding
-       -- Record if the binder is evaluated
-
-
-mkBinds :: Floats -> CoreExpr -> UniqSM CoreExpr
--- Lambdas are not allowed as the body of a 'let'
-mkBinds (Floats _ binds) body 
-  | isNilOL binds = return body
-  | otherwise    = do { body' <- deLam body
-                       ; return (wrapBinds binds body') }
-
-wrapBinds :: OrdList FloatingBind -> CoreExpr -> CoreExpr
-wrapBinds binds body
-  = foldrOL mk_bind body binds
-  where
-    mk_bind (FloatCase bndr rhs _) body = Case rhs bndr (exprType body) [(DEFAULT, [], body)]
-    mk_bind (FloatLet bind)        body = Let bind body
-
----------------------
-etaExpandRhs :: CoreBndr -> CoreExpr -> UniqSM CoreExpr
-etaExpandRhs bndr rhs = do
-       -- Eta expand to match the arity claimed by the binder
-       -- Remember, CorePrep must not change arity
-       --
-       -- Eta expansion might not have happened already, 
-       -- because it is done by the simplifier only when 
-       -- there at least one lambda already.
-       -- 
-       -- NB1:we could refrain when the RHS is trivial (which can happen
-       --     for exported things).  This would reduce the amount of code
-       --     generated (a little) and make things a little words for
-       --     code compiled without -O.  The case in point is data constructor
-       --     wrappers.
-       --
-       -- NB2: we have to be careful that the result of etaExpand doesn't
-       --    invalidate any of the assumptions that CorePrep is attempting
-       --    to establish.  One possible cause is eta expanding inside of
-       --    an SCC note - we're now careful in etaExpand to make sure the
-       --    SCC is pushed inside any new lambdas that are generated.
-       --
-       -- NB3: It's important to do eta expansion, and *then* ANF-ising
-       --              f = /\a -> g (h 3)      -- h has arity 2
-       -- If we ANF first we get
-       --              f = /\a -> let s = h 3 in g s
-       -- and now eta expansion gives
-       --              f = /\a -> \ y -> (let s = h 3 in g s) y
-       -- which is horrible.
-       -- Eta expanding first gives
-       --              f = /\a -> \y -> let s = h 3 in g s y
-       --
-    us <- getUniquesM
-    let eta_rhs = etaExpand arity us rhs (idType bndr)
-
-    ASSERT2( manifestArity eta_rhs == arity, (ppr bndr <+> ppr arity <+> ppr (exprArity rhs)) 
-                                             $$ ppr rhs $$ ppr eta_rhs )
-       -- Assertion checks that eta expansion was successful
-      return eta_rhs
-  where
-       -- For a GlobalId, take the Arity from the Id.
-       -- It was set in CoreTidy and must not change
-       -- For all others, just expand at will
-    arity | isGlobalId bndr = idArity bndr
-         | otherwise       = exprArity rhs
  
--- ---------------------------------------------------------------------------
--- Eliminate Lam as a non-rhs (STG doesn't have such a thing)
--- We arrange that they only show up as the RHS of a let(rec)
--- ---------------------------------------------------------------------------
  
-deLam :: CoreExpr -> UniqSM CoreExpr
--- Takes an expression that may be a lambda, 
--- and returns one that definitely isn't:
---     (\x.e) ==>  let f = \x.e in f
-deLam expr = do
-    (Floats _ binds, expr) <- deLamFloat expr
-    return (wrapBinds binds expr)
+%************************************************************************
+%*                                                                     *
+               Simple CoreSyn operations
+%*                                                                     *
+%************************************************************************
  
+\begin{code}
+       -- We don't ignore SCCs, since they require some code generation
+ignoreNote :: Note -> Bool
+-- Tells which notes to drop altogether; they are ignored by code generation
+-- Do not ignore SCCs!
+-- It's important that we do drop InlineMe notes; for example
+--    unzip = __inline_me__ (/\ab. foldr (..) (..))
+-- Here unzip gets arity 1 so we'll eta-expand it. But we don't
+-- want to get this:
+--     unzip = /\ab \xs. (__inline_me__ ...) a b xs
+ignoreNote (CoreNote _) = True 
+ignoreNote InlineMe     = True
+ignoreNote _other       = False
+
+
+cpe_ExprIsTrivial :: CoreExpr -> Bool
+-- Version that doesn't consider an scc annotation to be trivial.
+cpe_ExprIsTrivial (Var _)                  = True
+cpe_ExprIsTrivial (Type _)                 = True
+cpe_ExprIsTrivial (Lit _)                  = True
+cpe_ExprIsTrivial (App e arg)              = isTypeArg arg && cpe_ExprIsTrivial e
+cpe_ExprIsTrivial (Note (SCC _) _)         = False
+cpe_ExprIsTrivial (Note _ e)               = cpe_ExprIsTrivial e
+cpe_ExprIsTrivial (Cast e _)               = cpe_ExprIsTrivial e
+cpe_ExprIsTrivial (Lam b body) | isTyVar b = cpe_ExprIsTrivial body
+cpe_ExprIsTrivial _                        = False
+\end{code}
  
-deLamFloat :: CoreExpr -> UniqSM (Floats, CoreExpr)
--- Remove top level lambdas by let-bindinig
+-- -----------------------------------------------------------------------------
+--     Eta reduction
+-- -----------------------------------------------------------------------------
  
-deLamFloat (Note n expr) = do
-        -- You can get things like
-        --      case e of { p -> coerce t (\s -> ...) }
-    (floats, expr') <- deLamFloat expr
-    return (floats, Note n expr')
+Note [Eta expansion]
+~~~~~~~~~~~~~~~~~~~~~
+Eta expand to match the arity claimed by the binder Remember,
+CorePrep must not change arity
+
+Eta expansion might not have happened already, because it is done by
+the simplifier only when there at least one lambda already.
+
+NB1:we could refrain when the RHS is trivial (which can happen
+    for exported things).  This would reduce the amount of code
+    generated (a little) and make things a little words for
+    code compiled without -O.  The case in point is data constructor
+    wrappers.
+
+NB2: we have to be careful that the result of etaExpand doesn't
+   invalidate any of the assumptions that CorePrep is attempting
+   to establish.  One possible cause is eta expanding inside of
+   an SCC note - we're now careful in etaExpand to make sure the
+   SCC is pushed inside any new lambdas that are generated.
+
+Note [Eta expansion and the CorePrep invariants]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+It turns out to be much much easier to do eta expansion
+*after* the main CorePrep stuff.  But that places constraints
+on the eta expander: given a CpeRhs, it must return a CpeRhs.
+
+For example here is what we do not want:
+               f = /\a -> g (h 3)      -- h has arity 2
+After ANFing we get
+               f = /\a -> let s = h 3 in g s
+and now we do NOT want eta expansion to give
+               f = /\a -> \ y -> (let s = h 3 in g s) y
+
+Instead CoreArity.etaExpand gives
+               f = /\a -> \y -> let s = h 3 in g s y
  
-deLamFloat (Cast e co) = do
-    (floats, e') <- deLamFloat e
-    return (floats, Cast e' co)
+\begin{code}
+cpeEtaExpand :: Arity -> CoreExpr -> CoreExpr
+cpeEtaExpand arity expr
+  | arity == 0 = expr
+  | otherwise  = etaExpand arity expr
+\end{code}
  
-deLamFloat expr 
-  | null bndrs = return (emptyFloats, expr)
-  | otherwise 
-  = case tryEta bndrs body of
-      Just no_lam_result -> return (emptyFloats, no_lam_result)
-      Nothing            -> do fn <- newVar (exprType expr)
-                               return (unitFloat (FloatLet (NonRec fn expr)), 
-                                         Var fn)
-  where
-    (bndrs,body) = collectBinders expr
+-- -----------------------------------------------------------------------------
+--     Eta reduction
+-- -----------------------------------------------------------------------------
  
--- Why try eta reduction?  Hasn't the simplifier already done eta?
--- But the simplifier only eta reduces if that leaves something
--- trivial (like f, or f Int).  But for deLam it would be enough to
--- get to a partial application:
---     \xs. map f xs ==> map f
+Why try eta reduction?  Hasn't the simplifier already done eta?
+But the simplifier only eta reduces if that leaves something
+trivial (like f, or f Int).  But for deLam it would be enough to
+get to a partial application:
+       case x of { p -> \xs. map f xs }
+    ==> case x of { p -> map f }
  
-tryEta :: [CoreBndr] -> CoreExpr -> Maybe CoreExpr
-tryEta bndrs expr@(App _ _)
+\begin{code}
+tryEtaReduce :: [CoreBndr] -> CoreExpr -> Maybe CoreExpr
+tryEtaReduce bndrs expr@(App _ _)
    | ok_to_eta_reduce f &&
      n_remaining >= 0 &&
      and (zipWith ok bndrs last_args) &&
@@ -765,15 +694,15 @@ tryEta bndrs expr@(App _ _)
      ok_to_eta_reduce (Var f) = not (hasNoBinding f)
      ok_to_eta_reduce _       = False --safe. ToDo: generalise
  
-tryEta bndrs (Let bind@(NonRec _ r) body)
+tryEtaReduce bndrs (Let bind@(NonRec _ r) body)
    | not (any (`elemVarSet` fvs) bndrs)
-  = case tryEta bndrs body of
+  = case tryEtaReduce bndrs body of
         Just e -> Just (Let bind e)
         Nothing -> Nothing
    where
      fvs = exprFreeVars r
  
-tryEta _ _ = Nothing
+tryEtaReduce _ _ = Nothing
  \end{code}
  
  
@@ -782,35 +711,121 @@ tryEta _ _ = Nothing
  -- -----------------------------------------------------------------------------
  
  \begin{code}
-data RhsDemand
-     = RhsDemand { isStrict  :: Bool,  -- True => used at least once
-                  _isOnceDem :: Bool   -- True => used at most once
-                 }
+type RhsDemand = Bool  -- True => used strictly; hence not top-level, non-recursive
+\end{code}
  
-mkDem :: Demand -> Bool -> RhsDemand
-mkDem strict once = RhsDemand (isStrictDmd strict) once
+%************************************************************************
+%*                                                                     *
+               Floats
+%*                                                                     *
+%************************************************************************
  
-mkDemTy :: Demand -> Type -> RhsDemand
-mkDemTy strict _ty = RhsDemand (isStrictDmd strict)
-                               False {- For now -}
+\begin{code}
+data FloatingBind 
+  = FloatLet CoreBind          -- Rhs of bindings are CpeRhss
+  | FloatCase Id CpeBody Bool   -- The bool indicates "ok-for-speculation"
+
+data Floats = Floats OkToSpec (OrdList FloatingBind)
+
+-- Can we float these binds out of the rhs of a let?  We cache this decision
+-- to avoid having to recompute it in a non-linear way when there are
+-- deeply nested lets.
+data OkToSpec
+   = NotOkToSpec       -- definitely not
+   | OkToSpec          -- yes
+   | IfUnboxedOk       -- only if floating an unboxed binding is ok
  
-bdrDem :: Id -> RhsDemand
-bdrDem id = mkDem (idNewDemandInfo id)
-                 False {- For now -}
+mkFloat :: Bool -> Bool -> Id -> CpeRhs -> FloatingBind
+mkFloat is_strict is_unlifted bndr rhs
+  | use_case  = FloatCase bndr rhs (exprOkForSpeculation rhs)
+  | otherwise = FloatLet (NonRec bndr rhs)
+  where
+    use_case = is_unlifted || is_strict && not (exprIsHNF rhs)
+               -- Don't make a case for a value binding,
+               -- even if it's strict.  Otherwise we get
+               --      case (\x -> e) of ...!
+             
+emptyFloats :: Floats
+emptyFloats = Floats OkToSpec nilOL
  
--- safeDem :: RhsDemand
--- safeDem = RhsDemand False False  -- always safe to use this
+wrapBinds :: Floats -> CoreExpr -> CoreExpr
+wrapBinds (Floats _ binds) body
+  = foldrOL mk_bind body binds
+  where
+    mk_bind (FloatCase bndr rhs _) body = Case rhs bndr (exprType body) [(DEFAULT, [], body)]
+    mk_bind (FloatLet bind)        body = Let bind body
  
-onceDem :: RhsDemand
-onceDem = RhsDemand False True   -- used at most once
-\end{code}
+addFloat :: Floats -> FloatingBind -> Floats
+addFloat (Floats ok_to_spec floats) new_float
+  = Floats (combine ok_to_spec (check new_float)) (floats `snocOL` new_float)
+  where
+    check (FloatLet _) = OkToSpec
+    check (FloatCase _ _ ok_for_spec) 
+       | ok_for_spec  =  IfUnboxedOk
+       | otherwise    =  NotOkToSpec
+       -- The ok-for-speculation flag says that it's safe to
+       -- float this Case out of a let, and thereby do it more eagerly
+       -- We need the top-level flag because it's never ok to float
+       -- an unboxed binding to the top level
+
+unitFloat :: FloatingBind -> Floats
+unitFloat = addFloat emptyFloats
+
+appendFloats :: Floats -> Floats -> Floats
+appendFloats (Floats spec1 floats1) (Floats spec2 floats2)
+  = Floats (combine spec1 spec2) (floats1 `appOL` floats2)
+
+concatFloats :: [Floats] -> OrdList FloatingBind
+concatFloats = foldr (\ (Floats _ bs1) bs2 -> appOL bs1 bs2) nilOL
+
+combine :: OkToSpec -> OkToSpec -> OkToSpec
+combine NotOkToSpec _ = NotOkToSpec
+combine _ NotOkToSpec = NotOkToSpec
+combine IfUnboxedOk _ = IfUnboxedOk
+combine _ IfUnboxedOk = IfUnboxedOk
+combine _ _           = OkToSpec
+    
+instance Outputable FloatingBind where
+  ppr (FloatLet bind)        = text "FloatLet" <+> ppr bind
+  ppr (FloatCase b rhs spec) = text "FloatCase" <+> ppr b <+> ppr spec <+> equals <+> ppr rhs
  
+deFloatTop :: Floats -> [CoreBind]
+-- For top level only; we don't expect any FloatCases
+deFloatTop (Floats _ floats)
+  = foldrOL get [] floats
+  where
+    get (FloatLet b) bs = b:bs
+    get b            _  = pprPanic "corePrepPgm" (ppr b)
  
+-------------------------------------------
+wantFloatTop :: Id -> Floats -> Bool
+       -- Note [CafInfo and floating]
+wantFloatTop bndr floats = mayHaveCafRefs (idCafInfo bndr)
+                          && allLazyTop floats
+
+wantFloatNested :: RecFlag -> Bool -> Floats -> CpeRhs -> Bool
+wantFloatNested is_rec strict_or_unlifted floats rhs
+  = strict_or_unlifted
+  || (allLazyNested is_rec floats && exprIsHNF rhs)
+       -- Why the test for allLazyNested? 
+       --      v = f (x `divInt#` y)
+       -- we don't want to float the case, even if f has arity 2,
+       -- because floating the case would make it evaluated too early
+
+allLazyTop :: Floats -> Bool
+allLazyTop (Floats OkToSpec _) = True
+allLazyTop _                  = False
+
+allLazyNested :: RecFlag -> Floats -> Bool
+allLazyNested _      (Floats OkToSpec    _) = True
+allLazyNested _      (Floats NotOkToSpec _) = False
+allLazyNested is_rec (Floats IfUnboxedOk _) = isNonRec is_rec
+\end{code}
  
  
  %************************************************************************
  %*                                                                     *
-\subsection{Cloning}
+               Cloning
  %*                                                                     *
  %************************************************************************
  
@@ -827,6 +842,9 @@ emptyCorePrepEnv = CPE emptyVarEnv
  extendCorePrepEnv :: CorePrepEnv -> Id -> Id -> CorePrepEnv
  extendCorePrepEnv (CPE env) id id' = CPE (extendVarEnv env id id')
  
+extendCorePrepEnvList :: CorePrepEnv -> [(Id,Id)] -> CorePrepEnv
+extendCorePrepEnvList (CPE env) prs = CPE (extendVarEnvList env prs)
+
  lookupCorePrepEnv :: CorePrepEnv -> Id -> Id
  lookupCorePrepEnv (CPE env) id
    = case lookupVarEnv env id of
diff --git a/compiler/coreSyn/CoreSubst.lhs b/compiler/coreSyn/CoreSubst.lhs

index e08cdb8..f63968e 100644 (file)
--- a/compiler/coreSyn/CoreSubst.lhs
+++ b/compiler/coreSyn/CoreSubst.lhs
@@ -12,7 +12,7 @@ module CoreSubst (
  
          -- ** Substituting into expressions and related types
         deShadowBinds,
-       substTy, substExpr, substSpec, substWorker,
+       substTy, substExpr, substBind, substSpec, substWorker,
         lookupIdSubst, lookupTvSubst, 
  
          -- ** Operations on substitutions
diff --git a/compiler/coreSyn/CoreTidy.lhs b/compiler/coreSyn/CoreTidy.lhs

index 586f032..11e45b1 100644 (file)
--- a/compiler/coreSyn/CoreTidy.lhs
+++ b/compiler/coreSyn/CoreTidy.lhs
@@ -14,7 +14,7 @@ module CoreTidy (
  #include "HsVersions.h"
  
  import CoreSyn
-import CoreUtils
+import CoreArity
  import Id
  import IdInfo
  import Type
diff --git a/compiler/coreSyn/CoreUtils.lhs b/compiler/coreSyn/CoreUtils.lhs

index 25224a6..374c344 100644 (file)
--- a/compiler/coreSyn/CoreUtils.lhs
+++ b/compiler/coreSyn/CoreUtils.lhs
@@ -30,10 +30,6 @@ module CoreUtils (
         exprIsConApp_maybe, exprIsBottom,
         rhsIsStatic,
  
-       -- * Arity and eta expansion
-       manifestArity, exprArity, 
-       exprEtaExpandArity, etaExpand, 
-
         -- * Expression and bindings size
         coreBindsSize, exprSize,
  
@@ -72,10 +68,8 @@ import Type
  import Coercion
  import TyCon
  import CostCentre
-import BasicTypes
  import Unique
  import Outputable
-import DynFlags
  import TysPrim
  import FastString
  import Maybes
@@ -915,408 +909,6 @@ exprIsConApp_maybe expr = analyse (collectArgs expr)
  
  %************************************************************************
  %*                                                                     *
-\subsection{Eta reduction and expansion}
-%*                                                                     *
-%************************************************************************
-
-\begin{code}
--- ^ The Arity returned is the number of value args the 
--- expression can be applied to without doing much work
-exprEtaExpandArity :: DynFlags -> CoreExpr -> Arity
-{- 
-exprEtaExpandArity is used when eta expanding
-       e  ==>  \xy -> e x y
-
-It returns 1 (or more) to:
-       case x of p -> \s -> ...
-because for I/O ish things we really want to get that \s to the top.
-We are prepared to evaluate x each time round the loop in order to get that
-
-It's all a bit more subtle than it looks:
-
-1.  One-shot lambdas
-
-Consider one-shot lambdas
-               let x = expensive in \y z -> E
-We want this to have arity 2 if the \y-abstraction is a 1-shot lambda
-Hence the ArityType returned by arityType
-
-2.  The state-transformer hack
-
-The one-shot lambda special cause is particularly important/useful for
-IO state transformers, where we often get
-       let x = E in \ s -> ...
-
-and the \s is a real-world state token abstraction.  Such abstractions
-are almost invariably 1-shot, so we want to pull the \s out, past the
-let x=E, even if E is expensive.  So we treat state-token lambdas as 
-one-shot even if they aren't really.  The hack is in Id.isOneShotBndr.
-
-3.  Dealing with bottom
-
-Consider also 
-       f = \x -> error "foo"
-Here, arity 1 is fine.  But if it is
-       f = \x -> case x of 
-                       True  -> error "foo"
-                       False -> \y -> x+y
-then we want to get arity 2.  Tecnically, this isn't quite right, because
-       (f True) `seq` 1
-should diverge, but it'll converge if we eta-expand f.  Nevertheless, we
-do so; it improves some programs significantly, and increasing convergence
-isn't a bad thing.  Hence the ABot/ATop in ArityType.
-
-Actually, the situation is worse.  Consider
-       f = \x -> case x of
-                       True  -> \y -> x+y
-                       False -> \y -> x-y
-Can we eta-expand here?  At first the answer looks like "yes of course", but
-consider
-       (f bot) `seq` 1
-This should diverge!  But if we eta-expand, it won't.   Again, we ignore this
-"problem", because being scrupulous would lose an important transformation for
-many programs.
-
-
-4. Newtypes
-
-Non-recursive newtypes are transparent, and should not get in the way.
-We do (currently) eta-expand recursive newtypes too.  So if we have, say
-
-       newtype T = MkT ([T] -> Int)
-
-Suppose we have
-       e = coerce T f
-where f has arity 1.  Then: etaExpandArity e = 1; 
-that is, etaExpandArity looks through the coerce.
-
-When we eta-expand e to arity 1: eta_expand 1 e T
-we want to get:                 coerce T (\x::[T] -> (coerce ([T]->Int) e) x)
-
-HOWEVER, note that if you use coerce bogusly you can ge
-       coerce Int negate
-And since negate has arity 2, you might try to eta expand.  But you can't
-decopose Int to a function type.   Hence the final case in eta_expand.
--}
-
-
-exprEtaExpandArity dflags e = arityDepth (arityType dflags e)
-
--- A limited sort of function type
-data ArityType = AFun Bool ArityType   -- True <=> one-shot
-              | ATop                   -- Know nothing
-              | ABot                   -- Diverges
-
-arityDepth :: ArityType -> Arity
-arityDepth (AFun _ ty) = 1 + arityDepth ty
-arityDepth _           = 0
-
-andArityType :: ArityType -> ArityType -> ArityType
-andArityType ABot           at2           = at2
-andArityType ATop           _             = ATop
-andArityType (AFun t1 at1)  (AFun t2 at2) = AFun (t1 && t2) (andArityType at1 at2)
-andArityType at1            at2           = andArityType at2 at1
-
-arityType :: DynFlags -> CoreExpr -> ArityType
-       -- (go1 e) = [b1,..,bn]
-       -- means expression can be rewritten \x_b1 -> ... \x_bn -> body
-       -- where bi is True <=> the lambda is one-shot
-
-arityType dflags (Note _ e) = arityType dflags e
---     Not needed any more: etaExpand is cleverer
--- removed: | ok_note n = arityType dflags e
--- removed: | otherwise = ATop
-
-arityType dflags (Cast e _) = arityType dflags e
-
-arityType _ (Var v)
-  = mk (idArity v) (arg_tys (idType v))
-  where
-    mk :: Arity -> [Type] -> ArityType
-       -- The argument types are only to steer the "state hack"
-       -- Consider case x of
-       --              True  -> foo
-       --              False -> \(s:RealWorld) -> e
-       -- where foo has arity 1.  Then we want the state hack to
-       -- apply to foo too, so we can eta expand the case.
-    mk 0 tys | isBottomingId v                   = ABot
-             | (ty:_) <- tys, isStateHackType ty = AFun True ATop
-             | otherwise                         = ATop
-    mk n (ty:tys) = AFun (isStateHackType ty) (mk (n-1) tys)
-    mk n []       = AFun False               (mk (n-1) [])
-
-    arg_tys :: Type -> [Type]  -- Ignore for-alls
-    arg_tys ty 
-       | Just (_, ty')  <- splitForAllTy_maybe ty = arg_tys ty'
-       | Just (arg,res) <- splitFunTy_maybe ty    = arg : arg_tys res
-       | otherwise                                = []
-
-       -- Lambdas; increase arity
-arityType dflags (Lam x e)
-  | isId x    = AFun (isOneShotBndr x) (arityType dflags e)
-  | otherwise = arityType dflags e
-
-       -- Applications; decrease arity
-arityType dflags (App f (Type _)) = arityType dflags f
-arityType dflags (App f a)
-   = case arityType dflags f of
-       ABot -> ABot    -- If function diverges, ignore argument
-       ATop -> ATop    -- No no info about function
-       AFun _ xs
-               | exprIsCheap a -> xs
-               | otherwise     -> ATop
-                                                          
-       -- Case/Let; keep arity if either the expression is cheap
-       -- or it's a 1-shot lambda
-       -- The former is not really right for Haskell
-       --      f x = case x of { (a,b) -> \y. e }
-       --  ===>
-       --      f x y = case x of { (a,b) -> e }
-       -- The difference is observable using 'seq'
-arityType dflags (Case scrut _ _ alts)
-  = case foldr1 andArityType [arityType dflags rhs | (_,_,rhs) <- alts] of
-        xs | exprIsCheap scrut     -> xs
-        AFun one_shot _ | one_shot -> AFun True ATop
-        _                          -> ATop
-
-arityType dflags (Let b e) 
-  = case arityType dflags e of
-        xs              | cheap_bind b -> xs
-        AFun one_shot _ | one_shot     -> AFun True ATop
-        _                              -> ATop
-  where
-    cheap_bind (NonRec b e) = is_cheap (b,e)
-    cheap_bind (Rec prs)    = all is_cheap prs
-    is_cheap (b,e) = (dopt Opt_DictsCheap dflags && isDictId b)
-                  || exprIsCheap e
-       -- If the experimental -fdicts-cheap flag is on, we eta-expand through
-       -- dictionary bindings.  This improves arities. Thereby, it also
-       -- means that full laziness is less prone to floating out the
-       -- application of a function to its dictionary arguments, which
-       -- can thereby lose opportunities for fusion.  Example:
-       --      foo :: Ord a => a -> ...
-       --      foo = /\a \(d:Ord a). let d' = ...d... in \(x:a). ....
-       --              -- So foo has arity 1
-       --
-       --      f = \x. foo dInt $ bar x
-       --
-       -- The (foo DInt) is floated out, and makes ineffective a RULE 
-       --      foo (bar x) = ...
-       --
-       -- One could go further and make exprIsCheap reply True to any
-       -- dictionary-typed expression, but that's more work.
-
-arityType _ _ = ATop
-
-{- NOT NEEDED ANY MORE: etaExpand is cleverer
-ok_note InlineMe = False
-ok_note other    = True
-    -- Notice that we do not look through __inline_me__
-    -- This may seem surprising, but consider
-    --         f = _inline_me (\x -> e)
-    -- We DO NOT want to eta expand this to
-    --         f = \x -> (_inline_me (\x -> e)) x
-    -- because the _inline_me gets dropped now it is applied, 
-    -- giving just
-    --         f = \x -> e
-    -- A Bad Idea
--}
-\end{code}
-
-
-\begin{code}
--- | @etaExpand n us e ty@ returns an expression with
--- the same meaning as @e@, but with arity @n@.
---
--- Given:
---
--- > e' = etaExpand n us e ty
---
--- We should have that:
---
--- > ty = exprType e = exprType e'
-etaExpand :: Arity             -- ^ Result should have this number of value args
-         -> [Unique]           -- ^ Uniques to assign to the new binders
-         -> CoreExpr           -- ^ Expression to expand
-         -> Type               -- ^ Type of expression to expand
-         -> CoreExpr
--- Note that SCCs are not treated specially.  If we have
---     etaExpand 2 (\x -> scc "foo" e)
---     = (\xy -> (scc "foo" e) y)
--- So the costs of evaluating 'e' (not 'e y') are attributed to "foo"
-
-etaExpand n us expr ty
-  | manifestArity expr >= n = expr             -- The no-op case
-  | otherwise              
-  = eta_expand n us expr ty
-
--- manifestArity sees how many leading value lambdas there are
-manifestArity :: CoreExpr -> Arity
-manifestArity (Lam v e) | isId v    = 1 + manifestArity e
-                       | otherwise = manifestArity e
-manifestArity (Note _ e)           = manifestArity e
-manifestArity (Cast e _)            = manifestArity e
-manifestArity _                     = 0
-
--- etaExpand deals with for-alls. For example:
---             etaExpand 1 E
--- where  E :: forall a. a -> a
--- would return
---     (/\b. \y::a -> E b y)
---
--- It deals with coerces too, though they are now rare
--- so perhaps the extra code isn't worth it
-eta_expand :: Int -> [Unique] -> CoreExpr -> Type -> CoreExpr
-
-eta_expand n _ expr ty
-  | n == 0 && 
-    -- The ILX code generator requires eta expansion for type arguments
-    -- too, but alas the 'n' doesn't tell us how many of them there 
-    -- may be.  So we eagerly eta expand any big lambdas, and just
-    -- cross our fingers about possible loss of sharing in the ILX case. 
-    -- The Right Thing is probably to make 'arity' include
-    -- type variables throughout the compiler.  (ToDo.)
-    not (isForAllTy ty)        
-    -- Saturated, so nothing to do
-  = expr
-
-       -- Short cut for the case where there already
-       -- is a lambda; no point in gratuitously adding more
-eta_expand n us (Lam v body) ty
-  | isTyVar v
-  = Lam v (eta_expand n us body (applyTy ty (mkTyVarTy v)))
-
-  | otherwise
-  = Lam v (eta_expand (n-1) us body (funResultTy ty))
-
--- We used to have a special case that stepped inside Coerces here,
--- thus:  eta_expand n us (Note note@(Coerce _ ty) e) _  
---             = Note note (eta_expand n us e ty)
--- BUT this led to an infinite loop
--- Example:    newtype T = MkT (Int -> Int)
---     eta_expand 1 (coerce (Int->Int) e)
---     --> coerce (Int->Int) (eta_expand 1 T e)
---             by the bogus eqn
---     --> coerce (Int->Int) (coerce T 
---             (\x::Int -> eta_expand 1 (coerce (Int->Int) e)))
---             by the splitNewType_maybe case below
---     and round we go
-
-eta_expand n us expr ty
-  = ASSERT2 (exprType expr `coreEqType` ty, ppr (exprType expr) $$ ppr ty)
-    case splitForAllTy_maybe ty of { 
-         Just (tv,ty') -> 
-
-              Lam lam_tv (eta_expand n us2 (App expr (Type (mkTyVarTy lam_tv))) (substTyWith [tv] [mkTyVarTy lam_tv] ty'))
-                  where 
-                    lam_tv = setVarName tv (mkSysTvName uniq (fsLit "etaT"))
-                       -- Using tv as a base retains its tyvar/covar-ness
-                    (uniq:us2) = us 
-       ; Nothing ->
-  
-       case splitFunTy_maybe ty of {
-         Just (arg_ty, res_ty) -> Lam arg1 (eta_expand (n-1) us2 (App expr (Var arg1)) res_ty)
-                               where
-                                  arg1       = mkSysLocal (fsLit "eta") uniq arg_ty
-                                  (uniq:us2) = us
-                                  
-       ; Nothing ->
-
-               -- Given this:
-               --      newtype T = MkT ([T] -> Int)
-               -- Consider eta-expanding this
-               --      eta_expand 1 e T
-               -- We want to get
-               --      coerce T (\x::[T] -> (coerce ([T]->Int) e) x)
-
-       case splitNewTypeRepCo_maybe ty of {
-         Just(ty1,co) -> mkCoerce (mkSymCoercion co) 
-                                  (eta_expand n us (mkCoerce co expr) ty1) ;
-         Nothing  -> 
-
-       -- We have an expression of arity > 0, but its type isn't a function
-       -- This *can* legitmately happen: e.g.  coerce Int (\x. x)
-       -- Essentially the programmer is playing fast and loose with types
-       -- (Happy does this a lot).  So we simply decline to eta-expand.
-       -- Otherwise we'd end up with an explicit lambda having a non-function type
-       expr
-       }}}
-\end{code}
-
-exprArity is a cheap-and-cheerful version of exprEtaExpandArity.
-It tells how many things the expression can be applied to before doing
-any work.  It doesn't look inside cases, lets, etc.  The idea is that
-exprEtaExpandArity will do the hard work, leaving something that's easy
-for exprArity to grapple with.  In particular, Simplify uses exprArity to
-compute the ArityInfo for the Id. 
-
-Originally I thought that it was enough just to look for top-level lambdas, but
-it isn't.  I've seen this
-
-       foo = PrelBase.timesInt
-
-We want foo to get arity 2 even though the eta-expander will leave it
-unchanged, in the expectation that it'll be inlined.  But occasionally it
-isn't, because foo is blacklisted (used in a rule).  
-
-Similarly, see the ok_note check in exprEtaExpandArity.  So 
-       f = __inline_me (\x -> e)
-won't be eta-expanded.
-
-And in any case it seems more robust to have exprArity be a bit more intelligent.
-But note that  (\x y z -> f x y z)
-should have arity 3, regardless of f's arity.
-
-Note [exprArity invariant]
-~~~~~~~~~~~~~~~~~~~~~~~~~~
-exprArity has the following invariant:
-       (exprArity e) = n, then manifestArity (etaExpand e n) = n
-
-That is, if exprArity says "the arity is n" then etaExpand really can get
-"n" manifest lambdas to the top.
-
-Why is this important?  Because 
-  - In TidyPgm we use exprArity to fix the *final arity* of 
-    each top-level Id, and in
-  - In CorePrep we use etaExpand on each rhs, so that the visible lambdas
-    actually match that arity, which in turn means
-    that the StgRhs has the right number of lambdas
-
-An alternative would be to do the eta-expansion in TidyPgm, at least
-for top-level bindings, in which case we would not need the trim_arity
-in exprArity.  That is a less local change, so I'm going to leave it for today!
-
-
-\begin{code}
--- | An approximate, fast, version of 'exprEtaExpandArity'
-exprArity :: CoreExpr -> Arity
-exprArity e = go e
-  where
-    go (Var v)                          = idArity v
-    go (Lam x e) | isId x       = go e + 1
-                | otherwise     = go e
-    go (Note _ e)                = go e
-    go (Cast e co)               = trim_arity (go e) 0 (snd (coercionKind co))
-    go (App e (Type _))          = go e
-    go (App f a) | exprIsCheap a = (go f - 1) `max` 0
-       -- NB: exprIsCheap a!  
-       --      f (fac x) does not have arity 2, 
-       --      even if f has arity 3!
-       -- NB: `max 0`!  (\x y -> f x) has arity 2, even if f is
-       --               unknown, hence arity 0
-    go _                          = 0
-
-       -- Note [exprArity invariant]
-    trim_arity n a ty
-       | n==a                                        = a
-       | Just (_, ty') <- splitForAllTy_maybe ty     = trim_arity n a     ty'
-       | Just (_, ty') <- splitFunTy_maybe ty        = trim_arity n (a+1) ty'
-       | Just (ty',_)  <- splitNewTypeRepCo_maybe ty = trim_arity n a     ty'
-       | otherwise                                   = a
-\end{code}
-
-%************************************************************************
-%*                                                                     *
  \subsection{Equality}
  %*                                                                     *
  %************************************************************************
diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in

index 22181fd..80132a6 100644 (file)
--- a/compiler/ghc.cabal.in
+++ b/compiler/ghc.cabal.in
@@ -241,6 +241,7 @@ Library
          ClosureInfo
          CodeGen
          SMRep
+        CoreArity
          CoreFVs
          CoreLint
          CorePrep
diff --git a/compiler/main/TidyPgm.lhs b/compiler/main/TidyPgm.lhs

index f7644f6..869cc8f 100644 (file)
--- a/compiler/main/TidyPgm.lhs
+++ b/compiler/main/TidyPgm.lhs
@@ -19,7 +19,8 @@ import CoreTidy
  import PprCore
  import CoreLint
  import CoreUtils
-import Class   ( classSelIds )
+import CoreArity       ( exprArity )
+import Class           ( classSelIds )
  import VarEnv
  import VarSet
  import Var
diff --git a/compiler/simplCore/OccurAnal.lhs b/compiler/simplCore/OccurAnal.lhs

index 6af776a..83fbad1 100644 (file)
--- a/compiler/simplCore/OccurAnal.lhs
+++ b/compiler/simplCore/OccurAnal.lhs
@@ -980,6 +980,16 @@ us to adjust the OccInfo for 'x' and 'b' as we go.
  The Maybe (Id,CoreExpr) passed to occAnalAlt is the extra let-binding
  {x=b}; it's Nothing if the binder-swap doesn't happen.
  
+There is a danger though.  Consider
+      let v = x +# y
+      in case (f v) of w -> ...v...v...
+And suppose that (f v) expands to just v.  Then we'd like to
+use 'w' instead of 'v' in the alternative.  But it may be too
+late; we may have substituted the (cheap) x+#y for v in the 
+same simplifier pass that reduced (f v) to v.
+
+I think this is just too bad.  CSE will recover some of it.
+
  Note [Binder swap on GlobalId scrutinees]
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  When the scrutinee is a GlobalId we must take care in two ways
@@ -993,6 +1003,67 @@ When the scrutinee is a GlobalId we must take care in two ways
       has an External Name. See, for example, SimplEnv Note [Global Ids in
       the substitution].
  
+Historical note [no-case-of-case]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We *used* to suppress the binder-swap in case expressoins when 
+-fno-case-of-case is on.  Old remarks:
+    "This happens in the first simplifier pass,
+    and enhances full laziness.  Here's the bad case:
+            f = \ y -> ...(case x of I# v -> ...(case x of ...) ... )
+    If we eliminate the inner case, we trap it inside the I# v -> arm,
+    which might prevent some full laziness happening.  I've seen this
+    in action in spectral/cichelli/Prog.hs:
+             [(m,n) | m <- [1..max], n <- [1..max]]
+    Hence the check for NoCaseOfCase."
+However, now the full-laziness pass itself reverses the binder-swap, so this
+check is no longer necessary.
+
+Historical note [Suppressing the case binder-swap]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+This old note describes a problem that is also fixed by doing the
+binder-swap in OccAnal:
+
+    There is another situation when it might make sense to suppress the
+    case-expression binde-swap. If we have
+
+        case x of w1 { DEFAULT -> case x of w2 { A -> e1; B -> e2 }
+                       ...other cases .... }
+
+    We'll perform the binder-swap for the outer case, giving
+
+        case x of w1 { DEFAULT -> case w1 of w2 { A -> e1; B -> e2 }
+                       ...other cases .... }
+
+    But there is no point in doing it for the inner case, because w1 can't
+    be inlined anyway.  Furthermore, doing the case-swapping involves
+    zapping w2's occurrence info (see paragraphs that follow), and that
+    forces us to bind w2 when doing case merging.  So we get
+
+        case x of w1 { A -> let w2 = w1 in e1
+                       B -> let w2 = w1 in e2
+                       ...other cases .... }
+
+    This is plain silly in the common case where w2 is dead.
+
+    Even so, I can't see a good way to implement this idea.  I tried
+    not doing the binder-swap if the scrutinee was already evaluated
+    but that failed big-time:
+
+            data T = MkT !Int
+
+            case v of w  { MkT x ->
+            case x of x1 { I# y1 ->
+            case x of x2 { I# y2 -> ...
+
+    Notice that because MkT is strict, x is marked "evaluated".  But to
+    eliminate the last case, we must either make sure that x (as well as
+    x1) has unfolding MkT y1.  THe straightforward thing to do is to do
+    the binder-swap.  So this whole note is a no-op.
+
+It's fixed by doing the binder-swap in OccAnal because we can do the
+binder-swap unconditionally and still get occurrence analysis
+information right.
+
  Note [Case of cast]
  ~~~~~~~~~~~~~~~~~~~
  Consider        case (x `cast` co) of b { I# ->
diff --git a/compiler/simplCore/SimplUtils.lhs b/compiler/simplCore/SimplUtils.lhs

index 0f6cf73..88abf4a 100644 (file)
--- a/compiler/simplCore/SimplUtils.lhs
+++ b/compiler/simplCore/SimplUtils.lhs
@@ -34,6 +34,7 @@ import qualified CoreSubst
  import PprCore
  import CoreFVs
  import CoreUtils
+import CoreArity       ( etaExpand, exprEtaExpandArity )
  import CoreUnfold
  import Name
  import Id
@@ -230,8 +231,18 @@ splitInlineCont (ApplyTo dup (Type ty) se c)
    | Just (c1, c2) <- splitInlineCont c = Just (ApplyTo dup (Type ty) se c1, c2)
  splitInlineCont cont@(Stop {})         = Just (mkBoringStop, cont)
  splitInlineCont cont@(StrictBind {})   = Just (mkBoringStop, cont)
-splitInlineCont cont@(StrictArg  {})   = Just (mkBoringStop, cont)
  splitInlineCont _                      = Nothing
+       -- NB: we dissolve an InlineMe in any strict context, 
+       --     not just function aplication.  
+       -- E.g.  foldr k z (__inline_me (case x of p -> build ...))
+       --     Here we want to get rid of the __inline_me__ so we
+       --     can float the case, and see foldr/build
+       --
+       -- However *not* in a strict RHS, else we get
+       --         let f = __inline_me__ (\x. e) in ...f...
+       -- Now if f is guaranteed to be called, hence a strict binding
+       -- we don't thereby want to dissolve the __inline_me__; for
+       -- example, 'f' might be a  wrapper, so we'd inline the worker
  \end{code}
  
  
@@ -818,14 +829,14 @@ activeRule dflags env
  %************************************************************************
  
  \begin{code}
-mkLam :: [OutBndr] -> OutExpr -> SimplM OutExpr
+mkLam :: SimplEnv -> [OutBndr] -> OutExpr -> SimplM OutExpr
  -- mkLam tries three things
  --     a) eta reduction, if that gives a trivial expression
  --     b) eta expansion [only if there are some value lambdas]
  
-mkLam [] body 
+mkLam _b [] body 
    = return body
-mkLam bndrs body
+mkLam _env bndrs body
    = do { dflags <- getDOptsSmpl
         ; mkLam' dflags bndrs body }
    where
@@ -847,7 +858,7 @@ mkLam bndrs body
  
        | dopt Opt_DoLambdaEtaExpansion dflags,
         any isRuntimeVar bndrs
-      = do { body' <- tryEtaExpansion dflags body
+      = do { let body' = tryEtaExpansion dflags body
            ; return (mkLams bndrs body') }
     
        | otherwise 
@@ -1032,11 +1043,10 @@ when computing arity; and etaExpand adds the coerces as necessary when
  actually computing the expansion.
  
  \begin{code}
-tryEtaExpansion :: DynFlags -> OutExpr -> SimplM OutExpr
+tryEtaExpansion :: DynFlags -> OutExpr -> OutExpr
  -- There is at least one runtime binder in the binders
-tryEtaExpansion dflags body = do
-    us <- getUniquesM
-    return (etaExpand fun_arity us body (exprType body))
+tryEtaExpansion dflags body
+  = etaExpand fun_arity body
    where
      fun_arity = exprEtaExpandArity dflags body
  \end{code}
diff --git a/compiler/simplCore/Simplify.lhs b/compiler/simplCore/Simplify.lhs

index eba2728..10965a1 100644 (file)
--- a/compiler/simplCore/Simplify.lhs
+++ b/compiler/simplCore/Simplify.lhs
@@ -26,6 +26,7 @@ import NewDemand        ( isStrictDmd, splitStrictSig )
  import PprCore          ( pprParendExpr, pprCoreExpr )
  import CoreUnfold       ( mkUnfolding, callSiteInline, CallCtxt(..) )
  import CoreUtils
+import CoreArity       ( exprArity )
  import Rules            ( lookupRule, getRules )
  import BasicTypes       ( isMarkedStrict )
  import CostCentre       ( currentCCS )
@@ -339,7 +340,7 @@ simplLazyBind env top_lvl is_rec bndr bndr1 rhs rhs_se
          ; (env', rhs')
              <-  if not (doFloatFromRhs top_lvl is_rec False body2 body_env2)
                  then                            -- No floating, just wrap up!
-                     do { rhs' <- mkLam tvs' (wrapFloats body_env2 body2)
+                     do { rhs' <- mkLam env tvs' (wrapFloats body_env2 body2)
                          ; return (env, rhs') }
  
                  else if null tvs then           -- Simple floating
@@ -349,7 +350,7 @@ simplLazyBind env top_lvl is_rec bndr bndr1 rhs rhs_se
                  else                            -- Do type-abstraction first
                       do { tick LetFloatFromLet
                          ; (poly_binds, body3) <- abstractFloats tvs' body_env2 body2
-                        ; rhs' <- mkLam tvs' body3
+                        ; rhs' <- mkLam env tvs' body3
                          ; let env' = foldl (addPolyBind top_lvl) env poly_binds
                          ; return (env', rhs') }
  
@@ -918,7 +919,7 @@ simplLam env (bndr:bndrs) body (ApplyTo _ arg arg_se cont)
  simplLam env bndrs body cont
    = do  { (env', bndrs') <- simplLamBndrs env bndrs
          ; body' <- simplExpr env' body
-        ; new_lam <- mkLam bndrs' body'
+        ; new_lam <- mkLam env' bndrs' body'
          ; rebuild env' new_lam cont }
  
  ------------------
@@ -1093,7 +1094,7 @@ completeCall env var cont
              Just unfolding      -- There is an inlining!
                ->  do { tick (UnfoldingDone var)
                       ; (if dopt Opt_D_dump_inlinings dflags then
-                           pprTrace ("Inlining done" ++ showSDoc (ppr var)) (vcat [
+                           pprTrace ("Inlining done: " ++ showSDoc (ppr var)) (vcat [
                                  text "Before:" <+> ppr var <+> sep (map pprParendExpr args),
                                  text "Inlined fn: " <+> nest 2 (ppr unfolding),
                                  text "Cont:  " <+> ppr call_cont])
@@ -1437,59 +1438,6 @@ At one point I did transformation in LiberateCase, but it's more robust here.
  LiberateCase gets to see it.)
  
  
-Historical note [no-case-of-case]
-~~~~~~~~~~~~~~~~~~~~~~
-We *used* to suppress the binder-swap in case expressoins when 
--fno-case-of-case is on.  Old remarks:
-    "This happens in the first simplifier pass,
-    and enhances full laziness.  Here's the bad case:
-            f = \ y -> ...(case x of I# v -> ...(case x of ...) ... )
-    If we eliminate the inner case, we trap it inside the I# v -> arm,
-    which might prevent some full laziness happening.  I've seen this
-    in action in spectral/cichelli/Prog.hs:
-             [(m,n) | m <- [1..max], n <- [1..max]]
-    Hence the check for NoCaseOfCase."
-However, now the full-laziness pass itself reverses the binder-swap, so this
-check is no longer necessary.
-
-Historical note [Suppressing the case binder-swap]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-There is another situation when it might make sense to suppress the
-case-expression binde-swap. If we have
-
-    case x of w1 { DEFAULT -> case x of w2 { A -> e1; B -> e2 }
-                   ...other cases .... }
-
-We'll perform the binder-swap for the outer case, giving
-
-    case x of w1 { DEFAULT -> case w1 of w2 { A -> e1; B -> e2 }
-                   ...other cases .... }
-
-But there is no point in doing it for the inner case, because w1 can't
-be inlined anyway.  Furthermore, doing the case-swapping involves
-zapping w2's occurrence info (see paragraphs that follow), and that
-forces us to bind w2 when doing case merging.  So we get
-
-    case x of w1 { A -> let w2 = w1 in e1
-                   B -> let w2 = w1 in e2
-                   ...other cases .... }
-
-This is plain silly in the common case where w2 is dead.
-
-Even so, I can't see a good way to implement this idea.  I tried
-not doing the binder-swap if the scrutinee was already evaluated
-but that failed big-time:
-
-        data T = MkT !Int
-
-        case v of w  { MkT x ->
-        case x of x1 { I# y1 ->
-        case x of x2 { I# y2 -> ...
-
-Notice that because MkT is strict, x is marked "evaluated".  But to
-eliminate the last case, we must either make sure that x (as well as
-x1) has unfolding MkT y1.  THe straightforward thing to do is to do
-the binder-swap.  So this whole note is a no-op.
  
  
  \begin{code}
diff --git a/compiler/stgSyn/CoreToStg.lhs b/compiler/stgSyn/CoreToStg.lhs

index d8c63b6..6dd0255 100644 (file)
--- a/compiler/stgSyn/CoreToStg.lhs
+++ b/compiler/stgSyn/CoreToStg.lhs
@@ -12,7 +12,8 @@ module CoreToStg ( coreToStg, coreExprToStg ) where
  #include "HsVersions.h"
  
  import CoreSyn
-import CoreUtils       ( rhsIsStatic, manifestArity, exprType, findDefault )
+import CoreUtils       ( rhsIsStatic, exprType, findDefault )
+import CoreArity       ( manifestArity )
  import StgSyn
  
  import Type
diff --git a/compiler/stranal/DmdAnal.lhs b/compiler/stranal/DmdAnal.lhs

index 917c624..41f574a 100644 (file)
--- a/compiler/stranal/DmdAnal.lhs
+++ b/compiler/stranal/DmdAnal.lhs
@@ -25,7 +25,8 @@ import StaticFlags    ( opt_MaxWorkerArgs )
  import NewDemand       -- All of it
  import CoreSyn
  import PprCore 
-import CoreUtils       ( exprIsHNF, exprIsTrivial, exprArity )
+import CoreUtils       ( exprIsHNF, exprIsTrivial )
+import CoreArity       ( exprArity )
  import DataCon         ( dataConTyCon )
  import TyCon           ( isProductTyCon, isRecursiveTyCon )
  import Id              ( Id, idType, idInlinePragma,
diff --git a/compiler/stranal/WorkWrap.lhs b/compiler/stranal/WorkWrap.lhs

index 9f19dc3..71f9ef8 100644 (file)
--- a/compiler/stranal/WorkWrap.lhs
+++ b/compiler/stranal/WorkWrap.lhs
@@ -10,7 +10,8 @@ module WorkWrap ( wwTopBinds, mkWrapper ) where
  
  import CoreSyn
  import CoreUnfold      ( certainlyWillInline )
-import CoreUtils       ( exprType, exprIsHNF, exprArity )
+import CoreUtils       ( exprType, exprIsHNF )
+import CoreArity       ( exprArity )
  import Var
  import Id              ( Id, idType, isOneShotLambda, 
                           setIdNewStrictness, mkWorkerId,
author	simonpj@microsoft.com <unknown>
	Tue, 13 Jan 2009 16:49:53 +0000 (16:49 +0000)
committer	simonpj@microsoft.com <unknown>
	Tue, 13 Jan 2009 16:49:53 +0000 (16:49 +0000)
compiler/coreSyn/CoreArity.lhs	[new file with mode: 0644]	patch \| blob
compiler/coreSyn/CorePrep.lhs		patch \| blob \| history
compiler/coreSyn/CoreSubst.lhs		patch \| blob \| history
compiler/coreSyn/CoreTidy.lhs		patch \| blob \| history
compiler/coreSyn/CoreUtils.lhs		patch \| blob \| history
compiler/ghc.cabal.in		patch \| blob \| history
compiler/main/TidyPgm.lhs		patch \| blob \| history
compiler/simplCore/OccurAnal.lhs		patch \| blob \| history
compiler/simplCore/SimplUtils.lhs		patch \| blob \| history
compiler/simplCore/Simplify.lhs		patch \| blob \| history
compiler/stgSyn/CoreToStg.lhs		patch \| blob \| history
compiler/stranal/DmdAnal.lhs		patch \| blob \| history
compiler/stranal/WorkWrap.lhs		patch \| blob \| history