Bottom extraction: float out bottoming expressions to top level

[ghc-hetmet.git] / compiler / simplCore / SimplUtils.lhs
diff --git a/compiler/simplCore/SimplUtils.lhs b/compiler/simplCore/SimplUtils.lhs

index 1c6768d..56d2795 100644 (file)
--- a/compiler/simplCore/SimplUtils.lhs
+++ b/compiler/simplCore/SimplUtils.lhs
@@ -6,18 +6,19 @@
  \begin{code}
  module SimplUtils (
         -- Rebuilding
-       mkLam, mkCase, prepareAlts, bindCaseBndr,
+       mkLam, mkCase, prepareAlts, 
  
         -- Inlining,
         preInlineUnconditionally, postInlineUnconditionally, 
-       activeInline, activeRule, inlineMode,
+       activeUnfolding, activeUnfInRule, activeRule, 
+        simplEnvForGHCi, simplEnvForRules, updModeForInlineRules,
  
         -- The continuation type
         SimplCont(..), DupFlag(..), ArgInfo(..),
         contIsDupable, contResultType, contIsTrivial, contArgs, dropArgs, 
-       countValArgs, countArgs, splitInlineCont,
-       mkBoringStop, mkLazyArgStop, contIsRhsOrArg,
-       interestingCallContext, interestingArgContext,
+       pushArgs, countValArgs, countArgs, addArgTo,
+       mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg,
+       interestingCallContext, 
  
         interestingArg, mkArgInfo,
         
@@ -39,7 +40,7 @@ import CoreUnfold
  import Name
  import Id
  import Var     ( isCoVar )
-import NewDemand
+import Demand
  import SimplMonad
  import Type    hiding( substTy )
  import Coercion ( coercionKind )
@@ -52,7 +53,7 @@ import MonadUtils
  import Outputable
  import FastString
  
-import List( nub )
+import Data.List
  \end{code}
  
  
@@ -98,44 +99,53 @@ data SimplCont
  
    | ApplyTo            -- C arg
         DupFlag 
-       InExpr SimplEnv         -- The argument and its static env
+       InExpr StaticEnv                -- The argument and its static env
         SimplCont
  
    | Select             -- case C of alts
         DupFlag 
-       InId [InAlt] SimplEnv   -- The case binder, alts, and subst-env
+       InId [InAlt] StaticEnv  -- The case binder, alts, and subst-env
         SimplCont
  
    -- The two strict forms have no DupFlag, because we never duplicate them
    | StrictBind                 -- (\x* \xs. e) C
         InId [InBndr]           -- let x* = [] in e     
-       InExpr SimplEnv         --      is a special case 
+       InExpr StaticEnv        --      is a special case 
         SimplCont       
  
-  | StrictArg          -- e C
-       OutExpr                 -- e 
-       CallCtxt                -- Whether *this* argument position is interesting
-       ArgInfo                 -- Whether the function at the head of e has rules, etc
-       SimplCont               --     plus strictness flags for *further* args
+  | StrictArg          -- f e1 ..en C
+       ArgInfo         -- Specifies f, e1..en, Whether f has rules, etc
+                       --     plus strictness flags for *further* args
+        CallCtxt        -- Whether *this* argument position is interesting
+       SimplCont               
  
  data ArgInfo 
    = ArgInfo {
-       ai_rules :: Bool,       -- Function has rules (recursively)
-                               --      => be keener to inline in all args
-       ai_strs :: [Bool],      -- Strictness of arguments
+        ai_fun   :: Id,                -- The function
+       ai_args  :: [OutExpr],  -- ...applied to these args (which are in *reverse* order)
+       ai_rules :: [CoreRule], -- Rules for this function
+
+       ai_encl :: Bool,        -- Flag saying whether this function 
+                               -- or an enclosing one has rules (recursively)
+                               --      True => be keener to inline in all args
+       
+       ai_strs :: [Bool],      -- Strictness of remaining arguments
                                 --   Usually infinite, but if it is finite it guarantees
                                 --   that the function diverges after being given
                                 --   that number of args
-       ai_discs :: [Int]       -- Discounts for arguments; non-zero => be keener to inline
+       ai_discs :: [Int]       -- Discounts for remaining arguments; non-zero => be keener to inline
                                 --   Always infinite
      }
  
+addArgTo :: ArgInfo -> OutExpr -> ArgInfo
+addArgTo ai arg = ai { ai_args = arg : ai_args ai }
+
  instance Outputable SimplCont where
    ppr (Stop interesting)            = ptext (sLit "Stop") <> brackets (ppr interesting)
    ppr (ApplyTo dup arg _ cont)       = ((ptext (sLit "ApplyTo") <+> ppr dup <+> pprParendExpr arg)
                                           {-  $$ nest 2 (pprSimplEnv se) -}) $$ ppr cont
    ppr (StrictBind b _ _ _ cont)      = (ptext (sLit "StrictBind") <+> ppr b) $$ ppr cont
-  ppr (StrictArg f _ _ cont)         = (ptext (sLit "StrictArg") <+> ppr f) $$ ppr cont
+  ppr (StrictArg ai _ cont)          = (ptext (sLit "StrictArg") <+> ppr (ai_fun ai)) $$ ppr cont
    ppr (Select dup bndr alts _ cont)  = (ptext (sLit "Select") <+> ppr dup <+> ppr bndr) $$ 
                                        (nest 4 (ppr alts)) $$ ppr cont 
    ppr (CoerceIt co cont)            = (ptext (sLit "CoerceIt") <+> ppr co) $$ ppr cont
@@ -152,6 +162,9 @@ instance Outputable DupFlag where
  mkBoringStop :: SimplCont
  mkBoringStop = Stop BoringCtxt
  
+mkRhsStop :: SimplCont -- See Note [RHS of lets] in CoreUnfold
+mkRhsStop = Stop (ArgCtxt False)
+
  mkLazyArgStop :: CallCtxt -> SimplCont
  mkLazyArgStop cci = Stop cci
  
@@ -187,13 +200,17 @@ contResultType env ty cont
      go (Stop {})                      ty = ty
      go (CoerceIt co cont)             _  = go cont (snd (coercionKind co))
      go (StrictBind _ bs body se cont) _  = go cont (subst_ty se (exprType (mkLams bs body)))
-    go (StrictArg fn _ _ cont)        _  = go cont (funResultTy (exprType fn))
+    go (StrictArg ai _ cont)          _  = go cont (funResultTy (argInfoResultTy ai))
      go (Select _ _ alts se cont)      _  = go cont (subst_ty se (coreAltsType alts))
      go (ApplyTo _ arg se cont)        ty = go cont (apply_to_arg ty arg se)
  
      apply_to_arg ty (Type ty_arg) se = applyTy ty (subst_ty se ty_arg)
      apply_to_arg ty _             _  = funResultTy ty
  
+argInfoResultTy :: ArgInfo -> OutType
+argInfoResultTy (ArgInfo { ai_fun = fun, ai_args = args })
+  = foldr (\arg fn_ty -> applyTypeToArg fn_ty arg) (idType fun) args
+
  -------------------
  countValArgs :: SimplCont -> Int
  countValArgs (ApplyTo _ (Type _) _ cont) = countValArgs cont
@@ -211,77 +228,19 @@ contArgs cont = go [] cont
      go args (ApplyTo _ arg se cont) = go (substExpr se arg : args) cont
      go args cont                   = (reverse args, cont)
  
+pushArgs :: SimplEnv -> [CoreExpr] -> SimplCont -> SimplCont
+pushArgs _env []         cont = cont
+pushArgs env  (arg:args) cont = ApplyTo NoDup arg env (pushArgs env args cont)
+
  dropArgs :: Int -> SimplCont -> SimplCont
  dropArgs 0 cont = cont
  dropArgs n (ApplyTo _ _ _ cont) = dropArgs (n-1) cont
  dropArgs n other               = pprPanic "dropArgs" (ppr n <+> ppr other)
-
---------------------
-splitInlineCont :: SimplCont -> Maybe (SimplCont, SimplCont)
--- Returns Nothing if the continuation should dissolve an InlineMe Note
--- Return Just (c1,c2) otherwise, 
---     where c1 is the continuation to put inside the InlineMe 
---     and   c2 outside
-
--- Example: (__inline_me__ (/\a. e)) ty
---     Here we want to do the beta-redex without dissolving the InlineMe
--- See test simpl017 (and Trac #1627) for a good example of why this is important
-
-splitInlineCont (ApplyTo dup (Type ty) se c)
-  | Just (c1, c2) <- splitInlineCont c = Just (ApplyTo dup (Type ty) se c1, c2)
-splitInlineCont cont@(Stop {})         = Just (mkBoringStop, cont)
-splitInlineCont cont@(StrictBind {})   = Just (mkBoringStop, cont)
-splitInlineCont _                      = Nothing
-       -- NB: we dissolve an InlineMe in any strict context, 
-       --     not just function aplication.  
-       -- E.g.  foldr k z (__inline_me (case x of p -> build ...))
-       --     Here we want to get rid of the __inline_me__ so we
-       --     can float the case, and see foldr/build
-       --
-       -- However *not* in a strict RHS, else we get
-       --         let f = __inline_me__ (\x. e) in ...f...
-       -- Now if f is guaranteed to be called, hence a strict binding
-       -- we don't thereby want to dissolve the __inline_me__; for
-       -- example, 'f' might be a  wrapper, so we'd inline the worker
-\end{code}
-
-
-\begin{code}
-interestingArg :: OutExpr -> Bool
-       -- An argument is interesting if it has *some* structure
-       -- We are here trying to avoid unfolding a function that
-       -- is applied only to variables that have no unfolding
-       -- (i.e. they are probably lambda bound): f x y z
-       -- There is little point in inlining f here.
-interestingArg (Var v)          = hasSomeUnfolding (idUnfolding v)
-                                       -- Was: isValueUnfolding (idUnfolding v')
-                                       -- But that seems over-pessimistic
-                                || isDataConWorkId v
-                                       -- This accounts for an argument like
-                                       -- () or [], which is definitely interesting
-interestingArg (Type _)                 = False
-interestingArg (App fn (Type _)) = interestingArg fn
-interestingArg (Note _ a)       = interestingArg a
-
--- Idea (from Sam B); I'm not sure if it's a good idea, so commented out for now
--- interestingArg expr | isUnLiftedType (exprType expr)
---        -- Unlifted args are only ever interesting if we know what they are
---  =                  case expr of
---                        Lit lit -> True
---                        _       -> False
-
-interestingArg _                 = True
-       -- Consider     let x = 3 in f x
-       -- The substitution will contain (x -> ContEx 3), and we want to
-       -- to say that x is an interesting argument.
-       -- But consider also (\x. f x y) y
-       -- The substitution will contain (x -> ContEx y), and we want to say
-       -- that x is not interesting (assuming y has no unfolding)
  \end{code}
  
  
-Comment about interestingCallContext
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Note [Interesting call context]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  We want to avoid inlining an expression where there can't possibly be
  any gain, such as in an argument position.  Hence, if the continuation
  is interesting (eg. a case scrutinee, application etc.) then we
@@ -316,13 +275,15 @@ default case.
  
  \begin{code}
  interestingCallContext :: SimplCont -> CallCtxt
+-- See Note [Interesting call context]
  interestingCallContext cont
    = interesting cont
    where
      interesting (Select _ bndr _ _ _)
         | isDeadBinder bndr = CaseCtxt
-       | otherwise         = ArgCtxt False 2   -- If the binder is used, this
+       | otherwise         = ArgCtxt False     -- If the binder is used, this
                                                 -- is like a strict let
+                                               -- See Note [RHS of lets] in CoreUnfold
                 
      interesting (ApplyTo _ arg _ cont)
         | isTypeArg arg = interesting cont
@@ -331,10 +292,10 @@ interestingCallContext cont
                                         -- motivation to inline. See Note [Cast then apply]
                                         -- in CoreUnfold
  
-    interesting (StrictArg _ cci _ _)  = cci
-    interesting (StrictBind {})                = BoringCtxt
-    interesting (Stop cci)             = cci
-    interesting (CoerceIt _ cont)      = interesting cont
+    interesting (StrictArg _ cci _) = cci
+    interesting (StrictBind {})            = BoringCtxt
+    interesting (Stop cci)         = cci
+    interesting (CoerceIt _ cont)   = interesting cont
         -- If this call is the arg of a strict function, the context
         -- is a bit interesting.  If we inline here, we may get useful
         -- evaluation information to avoid repeated evals: e.g.
@@ -353,24 +314,27 @@ interestingCallContext cont
  
  -------------------
  mkArgInfo :: Id
+         -> [CoreRule] -- Rules for function
           -> Int        -- Number of value args
-         -> SimplCont  -- Context of the cal
+         -> SimplCont  -- Context of the call
           -> ArgInfo
  
-mkArgInfo fun n_val_args call_cont
+mkArgInfo fun rules n_val_args call_cont
    | n_val_args < idArity fun           -- Note [Unsaturated functions]
-  = ArgInfo { ai_rules = False
+  = ArgInfo { ai_fun = fun, ai_args = [], ai_rules = rules
+            , ai_encl = False
             , ai_strs = vanilla_stricts 
             , ai_discs = vanilla_discounts }
    | otherwise
-  = ArgInfo { ai_rules = interestingArgContext fun call_cont
+  = ArgInfo { ai_fun = fun, ai_args = [], ai_rules = rules
+            , ai_encl = interestingArgContext rules call_cont
             , ai_strs  = add_type_str (idType fun) arg_stricts
             , ai_discs = arg_discounts }
    where
      vanilla_discounts, arg_discounts :: [Int]
      vanilla_discounts = repeat 0
      arg_discounts = case idUnfolding fun of
-                       CoreUnfolding _ _ _ _ (UnfoldIfGoodArgs _ discounts _ _)
+                       CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = discounts}}
                               -> discounts ++ vanilla_discounts
                         _     -> vanilla_discounts
  
@@ -378,7 +342,7 @@ mkArgInfo fun n_val_args call_cont
      vanilla_stricts  = repeat False
  
      arg_stricts
-      = case splitStrictSig (idNewStrictness fun) of
+      = case splitStrictSig (idStrictness fun) of
           (demands, result_info)
                 | not (demands `lengthExceeds` n_val_args)
                 ->      -- Enough args, use the strictness given.
@@ -424,7 +388,7 @@ it'll just be floated out again.  Even if f has lots of discounts
  on its first argument -- it must be saturated for these to kick in
  -}
  
-interestingArgContext :: Id -> SimplCont -> Bool
+interestingArgContext :: [CoreRule] -> SimplCont -> Bool
  -- If the argument has form (f x y), where x,y are boring,
  -- and f is marked INLINE, then we don't want to inline f.
  -- But if the context of the argument is
@@ -435,25 +399,27 @@ interestingArgContext :: Id -> SimplCont -> Bool
  -- where h has rules, then we do want to inline f; hence the
  -- call_cont argument to interestingArgContext
  --
--- The interesting_arg_ctxt flag makes this happen; if it's
+-- The ai-rules flag makes this happen; if it's
  -- set, the inliner gets just enough keener to inline f 
  -- regardless of how boring f's arguments are, if it's marked INLINE
  --
  -- The alternative would be to *always* inline an INLINE function,
  -- regardless of how boring its context is; but that seems overkill
  -- For example, it'd mean that wrapper functions were always inlined
-interestingArgContext fn call_cont
-  = idHasRules fn || go call_cont
+interestingArgContext rules call_cont
+  = notNull rules || enclosing_fn_has_rules
    where
-    go (Select {})          = False
-    go (ApplyTo {})         = False
-    go (StrictArg _ cci _ _) = interesting cci
-    go (StrictBind {})      = False    -- ??
-    go (CoerceIt _ c)       = go c
-    go (Stop cci)            = interesting cci
-
-    interesting (ArgCtxt rules _) = rules
-    interesting _                 = False
+    enclosing_fn_has_rules = go call_cont
+
+    go (Select {})        = False
+    go (ApplyTo {})       = False
+    go (StrictArg _ cci _) = interesting cci
+    go (StrictBind {})    = False      -- ??
+    go (CoerceIt _ c)     = go c
+    go (Stop cci)          = interesting cci
+
+    interesting (ArgCtxt rules) = rules
+    interesting _               = False
  \end{code}
  
  
@@ -464,18 +430,61 @@ interestingArgContext fn call_cont
  %*                                                                     *
  %************************************************************************
  
-Inlining is controlled partly by the SimplifierMode switch.  This has two
-settings:
+\begin{code}
+simplEnvForGHCi :: SimplEnv
+simplEnvForGHCi = mkSimplEnv allOffSwitchChecker $
+                  SimplGently { sm_rules = False, sm_inline = False }
+   -- Do not do any inlining, in case we expose some unboxed
+   -- tuple stuff that confuses the bytecode interpreter
+
+simplEnvForRules :: SimplEnv
+simplEnvForRules = mkSimplEnv allOffSwitchChecker $
+                   SimplGently { sm_rules = True, sm_inline = False }
+
+updModeForInlineRules :: SimplifierMode -> SimplifierMode
+updModeForInlineRules mode
+  = case mode of      
+      SimplGently {} -> mode   -- Don't modify mode if we already gentle
+      SimplPhase  {} -> SimplGently { sm_rules = True, sm_inline = True }
+       -- Simplify as much as possible, subject to the usual "gentle" rules
+\end{code}
  
+Inlining is controlled partly by the SimplifierMode switch.  This has two
+settings
+       
         SimplGently     (a) Simplifying before specialiser/full laziness
-                       (b) Simplifiying inside INLINE pragma
+                       (b) Simplifiying inside InlineRules
                         (c) Simplifying the LHS of a rule
                         (d) Simplifying a GHCi expression or Template 
                                 Haskell splice
  
         SimplPhase n _   Used at all other times
  
-The key thing about SimplGently is that it does no call-site inlining.
+Note [Gentle mode]
+~~~~~~~~~~~~~~~~~~
+Gentle mode has a separate boolean flag to control
+       a) inlining (sm_inline flag)
+       b) rules    (sm_rules  flag)
+A key invariant about Gentle mode is that it is treated as the EARLIEST
+phase.  Something is inlined if the sm_inline flag is on AND the thing
+is inlinable in the earliest phase.  This is important. Example
+
+  {-# INLINE [~1] g #-}
+  g = ...
+  
+  {-# INLINE f #-}
+  f x = g (g x)
+
+If we were to inline g into f's inlining, then an importing module would
+never be able to do
+       f e --> g (g e) ---> RULE fires
+because the InlineRule for f has had g inlined into it.
+
+On the other hand, it is bad not to do ANY inlining into an
+InlineRule, because then recursive knots in instance declarations
+don't get unravelled.
+
+However, *sometimes* SimplGently must do no call-site inlining at all.
  Before full laziness we must be careful not to inline wrappers,
  because doing so inhibits floating
      e.g. ...(case f x of ...)...
@@ -489,17 +498,24 @@ running it, we don't want to use -O2.  Indeed, we don't want to inline
  anything, because the byte-code interpreter might get confused about 
  unboxed tuples and suchlike.
  
-INLINE pragmas
-~~~~~~~~~~~~~~
-SimplGently is also used as the mode to simplify inside an InlineMe note.
+Note [RULEs enabled in SimplGently]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+RULES are enabled when doing "gentle" simplification.  Two reasons:
  
-\begin{code}
-inlineMode :: SimplifierMode
-inlineMode = SimplGently
-\end{code}
+  * We really want the class-op cancellation to happen:
+        op (df d1 d2) --> $cop3 d1 d2
+    because this breaks the mutual recursion between 'op' and 'df'
+
+  * I wanted the RULE
+        lift String ===> ...
+    to work in Template Haskell when simplifying
+    splices, so we get simpler code for literal strings
  
-It really is important to switch off inlinings inside such
-expressions.  Consider the following example 
+Note [Simplifying gently inside InlineRules]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We don't do much simplification inside InlineRules (which come from
+INLINE pragmas).  It really is important to switch off inlinings
+inside such expressions.  Consider the following example
  
         let f = \pq -> BIG
         in
@@ -508,16 +524,14 @@ expressions.  Consider the following example
         in ...g...g...g...g...g...
  
  Now, if that's the ONLY occurrence of f, it will be inlined inside g,
-and thence copied multiple times when g is inlined.
-
+and thence copied multiple times when g is inlined.  
  
-This function may be inlinined in other modules, so we
-don't want to remove (by inlining) calls to functions that have
-specialisations, or that may have transformation rules in an importing
-scope.
+This function may be inlinined in other modules, so we don't want to
+remove (by inlining) calls to functions that have specialisations, or
+that may have transformation rules in an importing scope.
  
  E.g.   {-# INLINE f #-}
-               f x = ...g...
+       f x = ...g...
  
  and suppose that g is strict *and* has specialisations.  If we inline
  g's wrapper, we deny f the chance of getting the specialised version
@@ -535,15 +549,14 @@ continuation.  That's why the keep_inline predicate returns True for
  ArgOf continuations.  It shouldn't do any harm not to dissolve the
  inline-me note under these circumstances.
  
-Note that the result is that we do very little simplification
-inside an InlineMe.  
+Although we do very little simplification inside an InlineRule,
+the RHS is simplified as normal.  For example:
  
         all xs = foldr (&&) True xs
         any p = all . map p  {-# INLINE any #-}
  
-Problem: any won't get deforested, and so if it's exported and the
-importer doesn't use the inlining, (eg passes it as an arg) then we
-won't get deforestation at all.  We havn't solved this problem yet!
+The RHS of 'any' will get optimised and deforested; but the InlineRule
+will still mention the original RHS.
  
  
  preInlineUnconditionally
@@ -610,11 +623,30 @@ seems a bit fragile.
  Conclusion: inline top level things gaily until Phase 0 (the last
  phase), at which point don't.
  
+Note [pre/postInlineUnconditionally in gentle mode]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Even in gentle mode we want to do preInlineUnconditionally.  The
+reason is that too little clean-up happens if you don't inline
+use-once things.  Also a bit of inlining is *good* for full laziness;
+it can expose constant sub-expressions.  Example in
+spectral/mandel/Mandel.hs, where the mandelset function gets a useful
+let-float if you inline windowToViewport
+
+However, as usual for Gentle mode, do not inline things that are
+inactive in the intial stages.  See Note [Gentle mode].
+
+Note [Top-level botomming Ids]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Don't inline top-level Ids that are bottoming, even if they are used just
+once, because FloatOut has gone to some trouble to extract them out.
+Inlining them won't make the program run faster!
+
  \begin{code}
  preInlineUnconditionally :: SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
  preInlineUnconditionally env top_lvl bndr rhs
-  | not active                    = False
-  | opt_SimplNoPreInlining = False
+  | not active                                      = False
+  | isTopLevel top_lvl && isBottomingId bndr = False   -- Note [Top-level bottoming Ids]
+  | opt_SimplNoPreInlining                   = False
    | otherwise = case idOccInfo bndr of
                   IAmDead                    -> True    -- Happens in ((\x.1) v)
                   OneOcc in_lam True int_cxt -> try_once in_lam int_cxt
@@ -622,15 +654,15 @@ preInlineUnconditionally env top_lvl bndr rhs
    where
      phase = getMode env
      active = case phase of
-                  SimplGently    -> isAlwaysActive prag
-                  SimplPhase n _ -> isActive n prag
-    prag = idInlinePragma bndr
-
+                  SimplGently {} -> isEarlyActive act
+                       -- See Note [pre/postInlineUnconditionally in gentle mode]
+                  SimplPhase n _ -> isActive n act
+    act = idInlineActivation bndr
      try_once in_lam int_cxt    -- There's one textual occurrence
         | not in_lam = isNotTopLevel top_lvl || early_phase
         | otherwise  = int_cxt && canInlineInLam rhs
  
--- Be very careful before inlining inside a lambda, becuase (a) we must not 
+-- Be very careful before inlining inside a lambda, because (a) we must not 
  -- invalidate occurrence information, and (b) we want to avoid pushing a
  -- single allocation (here) into multiple allocations (inside lambda).  
  -- Inlining a *function* with a single *saturated* call would be ok, mind you.
@@ -707,17 +739,19 @@ story for now.
  \begin{code}
  postInlineUnconditionally 
      :: SimplEnv -> TopLevelFlag
-    -> InId            -- The binder (an OutId would be fine too)
+    -> OutId           -- The binder (an InId would be fine too)
      -> OccInfo                 -- From the InId
      -> OutExpr
      -> Unfolding
      -> Bool
  postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
-  | not active            = False
-  | isLoopBreaker occ_info = False     -- If it's a loop-breaker of any kind, don't inline
+  | not active                 = False
+  | isLoopBreaker occ_info      = False        -- If it's a loop-breaker of any kind, don't inline
                                         -- because it might be referred to "earlier"
-  | isExportedId bndr      = False
-  | exprIsTrivial rhs     = True
+  | isExportedId bndr           = False
+  | isStableUnfolding unfolding = False        -- Note [InlineRule and postInlineUnconditionally]
+  | exprIsTrivial rhs          = True
+  | isTopLevel top_lvl          = False        -- Note [Top level and postInlineUnconditionally]
    | otherwise
    = case occ_info of
         -- The point of examining occ_info here is that for *non-values* 
@@ -730,7 +764,8 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
         --      case v of
         --         True  -> case x of ...
         --         False -> case x of ...
-       -- I'm not sure how important this is in practice
+       -- This is very important in practice; e.g. wheel-seive1 doubles 
+       -- in allocation if you miss this out
        OneOcc in_lam _one_br int_cxt    -- OneOcc => no code-duplication issue
         ->     smallEnoughToInline unfolding    -- Small enough to dup
                         -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
@@ -743,8 +778,8 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
                         -- PRINCIPLE: when we've already simplified an expression once, 
                         -- make sure that we only inline it if it's reasonably small.
  
-          &&  ((isNotTopLevel top_lvl && not in_lam) || 
-                       -- But outside a lambda, we want to be reasonably aggressive
+           && (not in_lam || 
+                       -- Outside a lambda, we want to be reasonably aggressive
                         -- about inlining into multiple branches of case
                         -- e.g. let x = <non-value> 
                         --      in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... } 
@@ -778,32 +813,61 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
  
    where
      active = case getMode env of
-                  SimplGently    -> isAlwaysActive prag
-                  SimplPhase n _ -> isActive n prag
-    prag = idInlinePragma bndr
+                  SimplGently {} -> isEarlyActive act
+                       -- See Note [pre/postInlineUnconditionally in gentle mode]
+                  SimplPhase n _ -> isActive n act
+    act = idInlineActivation bndr
  
-activeInline :: SimplEnv -> OutId -> Bool
-activeInline env id
+activeUnfolding :: SimplEnv -> IdUnfoldingFun
+activeUnfolding env
+  = case getMode env of
+      SimplGently { sm_inline = False } -> active_unfolding_minimal
+      SimplGently { sm_inline = True  } -> active_unfolding_gentle
+      SimplPhase n _                    -> active_unfolding n
+
+activeUnfInRule :: SimplEnv -> IdUnfoldingFun
+-- When matching in RULE, we want to "look through" an unfolding
+-- if *rules* are on, even if *inlinings* are not.  A notable example
+-- is DFuns, which really we want to match in rules like (op dfun)
+-- in gentle mode.
+activeUnfInRule env
    = case getMode env of
-      SimplGently -> False
-       -- No inlining at all when doing gentle stuff,
-       -- except for local things that occur once (pre/postInlineUnconditionally)
-       -- The reason is that too little clean-up happens if you 
-       -- don't inline use-once things.   Also a bit of inlining is *good* for
-       -- full laziness; it can expose constant sub-expressions.
-       -- Example in spectral/mandel/Mandel.hs, where the mandelset 
-       -- function gets a useful let-float if you inline windowToViewport
-
-       -- NB: we used to have a second exception, for data con wrappers.
-       -- On the grounds that we use gentle mode for rule LHSs, and 
-       -- they match better when data con wrappers are inlined.
-       -- But that only really applies to the trivial wrappers (like (:)),
-       -- and they are now constructed as Compulsory unfoldings (in MkId)
-       -- so they'll happen anyway.
-
-      SimplPhase n _ -> isActive n prag
+      SimplGently { sm_rules = False } -> active_unfolding_minimal
+      SimplGently { sm_rules = True  } -> active_unfolding_gentle
+      SimplPhase n _                   -> active_unfolding n
+
+active_unfolding_minimal :: IdUnfoldingFun
+-- Compuslory unfoldings only
+-- Ignore SimplGently, because we want to inline regardless;
+-- the Id has no top-level binding at all
+--
+-- NB: we used to have a second exception, for data con wrappers.
+-- On the grounds that we use gentle mode for rule LHSs, and 
+-- they match better when data con wrappers are inlined.
+-- But that only really applies to the trivial wrappers (like (:)),
+-- and they are now constructed as Compulsory unfoldings (in MkId)
+-- so they'll happen anyway.
+active_unfolding_minimal id
+  | isCompulsoryUnfolding unf = unf
+  | otherwise                 = NoUnfolding
    where
-    prag = idInlinePragma id
+    unf = realIdUnfolding id   -- Never a loop breaker
+
+active_unfolding_gentle :: IdUnfoldingFun
+-- Anything that is early-active
+-- See Note [Gentle mode]
+active_unfolding_gentle id
+  | isEarlyActive (idInlineActivation id) = idUnfolding id
+  | otherwise                             = NoUnfolding
+      -- idUnfolding checks for loop-breakers
+      -- Things with an INLINE pragma may have 
+      -- an unfolding *and* be a loop breaker  
+      -- (maybe the knot is not yet untied)
+
+active_unfolding :: CompilerPhase -> IdUnfoldingFun
+active_unfolding n id
+  | isActive n (idInlineActivation id) = idUnfolding id
+  | otherwise                          = NoUnfolding
  
  activeRule :: DynFlags -> SimplEnv -> Maybe (Activation -> Bool)
  -- Nothing => No rules at all
@@ -812,15 +876,37 @@ activeRule dflags env
    = Nothing    -- Rewriting is off
    | otherwise
    = case getMode env of
-       SimplGently    -> Just isAlwaysActive
-                       -- Used to be Nothing (no rules in gentle mode)
-                       -- Main motivation for changing is that I wanted
-                       --      lift String ===> ...
-                       -- to work in Template Haskell when simplifying
-                       -- splices, so we get simpler code for literal strings
-       SimplPhase n _ -> Just (isActive n)
+      SimplGently { sm_rules = rules_on } 
+        | rules_on  -> Just isEarlyActive      -- Note [RULEs enabled in SimplGently]
+        | otherwise -> Nothing
+      SimplPhase n _ -> Just (isActive n)
  \end{code}
  
+Note [Top level and postInlineUnconditionally]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We don't do postInlineUnconditionally for top-level things (except
+ones that are trivial).  There is no point, because the main goal is
+to get rid of local bindings used in multiple case branches. And
+doing so risks replacing a single global allocation with local allocations.
+
+
+Note [InlineRule and postInlineUnconditionally]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Do not do postInlineUnconditionally if the Id has an InlineRule, otherwise
+we lose the unfolding.  Example
+
+     -- f has InlineRule with rhs (e |> co)
+     --   where 'e' is big
+     f = e |> co
+
+Then there's a danger we'll optimise to
+
+     f' = e
+     f = f' |> co
+
+and now postInlineUnconditionally, losing the InlineRule on f.  Now f'
+won't inline because 'e' is too big.
+
  
  %************************************************************************
  %*                                                                     *
@@ -836,7 +922,7 @@ mkLam :: SimplEnv -> [OutBndr] -> OutExpr -> SimplM OutExpr
  
  mkLam _b [] body 
    = return body
-mkLam _env bndrs body
+mkLam env bndrs body
    = do { dflags <- getDOptsSmpl
         ; mkLam' dflags bndrs body }
    where
@@ -857,7 +943,9 @@ mkLam _env bndrs body
            ; return etad_lam }
  
        | dopt Opt_DoLambdaEtaExpansion dflags,
-       any isRuntimeVar bndrs
+        not (inGentleMode env),              -- In gentle mode don't eta-expansion
+       any isRuntimeVar bndrs        -- because it can clutter up the code
+                                     -- with casts etc that may not be removed
        = do { let body' = tryEtaExpansion dflags body
            ; return (mkLams bndrs body') }
     
@@ -1253,83 +1341,61 @@ Historical note: if you use let-bindings instead of a substitution, beware of th
  
  prepareAlts tries these things:
  
-1.  If several alternatives are identical, merge them into
-    a single DEFAULT alternative.  I've occasionally seen this 
-    making a big difference:
+1.  Eliminate alternatives that cannot match, including the
+    DEFAULT alternative.
  
-       case e of               =====>     case e of
-         C _ -> f x                         D v -> ....v....
-         D v -> ....v....                   DEFAULT -> f x
-         DEFAULT -> f x
+2.  If the DEFAULT alternative can match only one possible constructor,
+    then make that constructor explicit.
+    e.g.
+        case e of x { DEFAULT -> rhs }
+     ===>
+        case e of x { (a,b) -> rhs }
+    where the type is a single constructor type.  This gives better code
+    when rhs also scrutinises x or e.
  
-   The point is that we merge common RHSs, at least for the DEFAULT case.
-   [One could do something more elaborate but I've never seen it needed.]
-   To avoid an expensive test, we just merge branches equal to the *first*
-   alternative; this picks up the common cases
-       a) all branches equal
-       b) some branches equal to the DEFAULT (which occurs first)
+3. Returns a list of the constructors that cannot holds in the
+   DEFAULT alternative (if there is one)
  
-2.  Case merging:
-       case e of b {             ==>   case e of b {
-        p1 -> rhs1                      p1 -> rhs1
-        ...                             ...
-        pm -> rhsm                      pm -> rhsm
-        _  -> case b of b' {            pn -> let b'=b in rhsn
-                    pn -> rhsn          ...
-                    ...                 po -> let b'=b in rhso
-                    po -> rhso          _  -> let b'=b in rhsd
-                    _  -> rhsd
-       }  
-    
-    which merges two cases in one case when -- the default alternative of
-    the outer case scrutises the same variable as the outer case This
-    transformation is called Case Merging.  It avoids that the same
-    variable is scrutinised multiple times.
+Here "cannot match" includes knowledge from GADTs
  
+It's a good idea do do this stuff before simplifying the alternatives, to
+avoid simplifying alternatives we know can't happen, and to come up with
+the list of constructors that are handled, to put into the IdInfo of the
+case binder, for use when simplifying the alternatives.
  
-The case where transformation (1) showed up was like this (lib/std/PrelCError.lhs):
+Eliminating the default alternative in (1) isn't so obvious, but it can
+happen:
  
-       x | p `is` 1 -> e1
-         | p `is` 2 -> e2
-       ...etc...
+data Colour = Red | Green | Blue
  
-where @is@ was something like
-       
-       p `is` n = p /= (-1) && p == n
+f x = case x of
+        Red -> ..
+        Green -> ..
+        DEFAULT -> h x
  
-This gave rise to a horrible sequence of cases
+h y = case y of
+        Blue -> ..
+        DEFAULT -> [ case y of ... ]
  
-       case p of
-         (-1) -> $j p
-         1    -> e1
-         DEFAULT -> $j p
-
-and similarly in cascade for all the join points!
-
-Note [Dead binders]
-~~~~~~~~~~~~~~~~~~~~
-We do this *here*, looking at un-simplified alternatives, because we
-have to check that r doesn't mention the variables bound by the
-pattern in each alternative, so the binder-info is rather useful.
+If we inline h into f, the default case of the inlined h can't happen.
+If we don't notice this, we may end up filtering out *all* the cases
+of the inner case y, which give us nowhere to go!
  
  \begin{code}
-prepareAlts :: SimplEnv -> OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
-prepareAlts env scrut case_bndr' alts
-  = do { dflags <- getDOptsSmpl
-       ; alts <- combineIdenticalAlts case_bndr' alts
-
-       ; let (alts_wo_default, maybe_deflt) = findDefault alts
+prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
+prepareAlts scrut case_bndr' alts
+  = do { let (alts_wo_default, maybe_deflt) = findDefault alts
               alt_cons = [con | (con,_,_) <- alts_wo_default]
               imposs_deflt_cons = nub (imposs_cons ++ alt_cons)
                 -- "imposs_deflt_cons" are handled 
                 --   EITHER by the context, 
                 --   OR by a non-DEFAULT branch in this case expression.
  
-       ; default_alts <- prepareDefault dflags env case_bndr' mb_tc_app 
+       ; default_alts <- prepareDefault case_bndr' mb_tc_app 
                                          imposs_deflt_cons maybe_deflt
  
         ; let trimmed_alts = filterOut impossible_alt alts_wo_default
-             merged_alts = mergeAlts trimmed_alts default_alts
+             merged_alts  = mergeAlts trimmed_alts default_alts
                 -- We need the mergeAlts in case the new default_alt 
                 -- has turned into a constructor alternative.
                 -- The merge keeps the inner DEFAULT at the front, if there is one
@@ -1350,29 +1416,7 @@ prepareAlts env scrut case_bndr' alts
      impossible_alt _                   = False
  
  
---------------------------------------------------
---     1. Merge identical branches
---------------------------------------------------
-combineIdenticalAlts :: OutId -> [InAlt] -> SimplM [InAlt]
-
-combineIdenticalAlts case_bndr ((_con1,bndrs1,rhs1) : con_alts)
-  | all isDeadBinder bndrs1,                   -- Remember the default 
-    length filtered_alts < length con_alts     -- alternative comes first
-       -- Also Note [Dead binders]
-  = do { tick (AltMerge case_bndr)
-       ; return ((DEFAULT, [], rhs1) : filtered_alts) }
-  where
-    filtered_alts       = filter keep con_alts
-    keep (_con,bndrs,rhs) = not (all isDeadBinder bndrs && rhs `cheapEqExpr` rhs1)
-
-combineIdenticalAlts _ alts = return alts
-
--------------------------------------------------------------------------
---                     Prepare the default alternative
--------------------------------------------------------------------------
-prepareDefault :: DynFlags
-              -> SimplEnv
-              -> OutId         -- Case binder; need just for its type. Note that as an
+prepareDefault :: OutId                -- Case binder; need just for its type. Note that as an
                                 --   OutId, it has maximum information; this is important.
                                 --   Test simpl013 is an example
                -> Maybe (TyCon, [Type]) -- Type of scrutinee, decomposed
@@ -1380,42 +1424,9 @@ prepareDefault :: DynFlags
                -> Maybe InExpr  -- Rhs
                -> SimplM [InAlt]        -- Still unsimplified
                                         -- We use a list because it's what mergeAlts expects,
-                                       -- And becuase case-merging can cause many to show up
-
--------        Merge nested cases ----------
-prepareDefault dflags env outer_bndr _bndr_ty imposs_cons (Just deflt_rhs)
-  | dopt Opt_CaseMerge dflags
-  , Case (Var inner_scrut_var) inner_bndr _ inner_alts <- deflt_rhs
-  , DoneId inner_scrut_var' <- substId env inner_scrut_var
-       -- Remember, inner_scrut_var is an InId, but outer_bndr is an OutId
-  , inner_scrut_var' == outer_bndr
-       -- NB: the substId means that if the outer scrutinee was a 
-       --     variable, and inner scrutinee is the same variable, 
-       --     then inner_scrut_var' will be outer_bndr
-       --     via the magic of simplCaseBinder
-  = do { tick (CaseMerge outer_bndr)
-
-       ; let munge_rhs rhs = bindCaseBndr inner_bndr (Var outer_bndr) rhs
-       ; return [(con, args, munge_rhs rhs) | (con, args, rhs) <- inner_alts,
-                                              not (con `elem` imposs_cons) ]
-               -- NB: filter out any imposs_cons.  Example:
-               --      case x of 
-               --        A -> e1
-               --        DEFAULT -> case x of 
-               --                      A -> e2
-               --                      B -> e3
-               -- When we merge, we must ensure that e1 takes 
-               -- precedence over e2 as the value for A!  
-       }
-       -- Warning: don't call prepareAlts recursively!
-       -- Firstly, there's no point, because inner alts have already had
-       -- mkCase applied to them, so they won't have a case in their default
-       -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
-       -- in munge_rhs may put a case into the DEFAULT branch!
-
  
  --------- Fill in known constructor -----------
-prepareDefault _ _ case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rhs)
+prepareDefault case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rhs)
    |    -- This branch handles the case where we are 
         -- scrutinisng an algebraic data type
      isAlgTyCon tycon           -- It's a data type, tuple, or unboxed tuples.  
@@ -1434,7 +1445,7 @@ prepareDefault _ _ case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rh
                                 -- which would be quite legitmate.  But it's a really obscure corner, and
                                 -- not worth wasting code on.
    , let imposs_data_cons = [con | DataAlt con <- imposs_cons]  -- We now know it's a data type 
-       impossible con  = con `elem` imposs_data_cons || dataConCannotMatch inst_tys con
+       impossible con   = con `elem` imposs_data_cons || dataConCannotMatch inst_tys con
    = case filterOut impossible all_cons of
         []    -> return []      -- Eliminate the default alternative
                                 -- altogether if it can't match
@@ -1449,27 +1460,48 @@ prepareDefault _ _ case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rh
         _ -> return [(DEFAULT, [], deflt_rhs)]
  
    | debugIsOn, isAlgTyCon tycon, not (isOpenTyCon tycon), null (tyConDataCons tycon)
-       -- This can legitimately happen for type families, so don't report that
+       -- Check for no data constructors
+        -- This can legitimately happen for type families, so don't report that
    = pprTrace "prepareDefault" (ppr case_bndr <+> ppr tycon)
          $ return [(DEFAULT, [], deflt_rhs)]
  
  --------- Catch-all cases -----------
-prepareDefault _dflags _env _case_bndr _bndr_ty _imposs_cons (Just deflt_rhs)
+prepareDefault _case_bndr _bndr_ty _imposs_cons (Just deflt_rhs)
    = return [(DEFAULT, [], deflt_rhs)]
  
-prepareDefault _dflags _env _case_bndr _bndr_ty _imposs_cons Nothing
+prepareDefault _case_bndr _bndr_ty _imposs_cons Nothing
    = return []  -- No default branch
  \end{code}
  
  
  
-=================================================================================
+%************************************************************************
+%*                                                                     *
+               mkCase
+%*                                                                     *
+%************************************************************************
  
  mkCase tries these things
  
-1.  Eliminate the case altogether if possible
+1.  Merge Nested Cases
+
+       case e of b {             ==>   case e of b {
+        p1 -> rhs1                      p1 -> rhs1
+        ...                             ...
+        pm -> rhsm                      pm -> rhsm
+        _  -> case b of b' {            pn -> let b'=b in rhsn
+                    pn -> rhsn          ...
+                    ...                 po -> let b'=b in rhso
+                    po -> rhso          _  -> let b'=b in rhsd
+                    _  -> rhsd
+       }  
+    
+    which merges two cases in one case when -- the default alternative of
+    the outer case scrutises the same variable as the outer case. This
+    transformation is called Case Merging.  It avoids that the same
+    variable is scrutinised multiple times.
  
-2.  Case-identity:
+2.  Eliminate Identity Case
  
         case e of               ===> e
                 True  -> True;
@@ -1477,19 +1509,99 @@ mkCase tries these things
  
      and similar friends.
  
+3.  Merge identical alternatives.
+    If several alternatives are identical, merge them into
+    a single DEFAULT alternative.  I've occasionally seen this 
+    making a big difference:
+
+       case e of               =====>     case e of
+         C _ -> f x                         D v -> ....v....
+         D v -> ....v....                   DEFAULT -> f x
+         DEFAULT -> f x
+
+   The point is that we merge common RHSs, at least for the DEFAULT case.
+   [One could do something more elaborate but I've never seen it needed.]
+   To avoid an expensive test, we just merge branches equal to the *first*
+   alternative; this picks up the common cases
+       a) all branches equal
+       b) some branches equal to the DEFAULT (which occurs first)
+
+The case where Merge Identical Alternatives transformation showed up
+was like this (base/Foreign/C/Err/Error.lhs):
+
+       x | p `is` 1 -> e1
+         | p `is` 2 -> e2
+       ...etc...
+
+where @is@ was something like
+       
+       p `is` n = p /= (-1) && p == n
+
+This gave rise to a horrible sequence of cases
+
+       case p of
+         (-1) -> $j p
+         1    -> e1
+         DEFAULT -> $j p
+
+and similarly in cascade for all the join points!
+
  
  \begin{code}
-mkCase :: OutExpr -> OutId -> [OutAlt] -- Increasing order
-       -> SimplM OutExpr
+mkCase, mkCase1, mkCase2 
+   :: DynFlags 
+   -> OutExpr -> OutId
+   -> [OutAlt]         -- Alternatives in standard (increasing) order
+   -> SimplM OutExpr
  
  --------------------------------------------------
---     2. Identity case
+--     1. Merge Nested Cases
  --------------------------------------------------
  
-mkCase scrut case_bndr alts    -- Identity case
+mkCase dflags scrut outer_bndr ((DEFAULT, _, deflt_rhs) : outer_alts)
+  | dopt Opt_CaseMerge dflags
+  , Case (Var inner_scrut_var) inner_bndr _ inner_alts <- deflt_rhs
+  , inner_scrut_var == outer_bndr
+  = do { tick (CaseMerge outer_bndr)
+
+       ; let wrap_alt (con, args, rhs) = ASSERT( outer_bndr `notElem` args )
+                                          (con, args, wrap_rhs rhs)
+               -- Simplifier's no-shadowing invariant should ensure
+               -- that outer_bndr is not shadowed by the inner patterns
+              wrap_rhs rhs = Let (NonRec inner_bndr (Var outer_bndr)) rhs
+               -- The let is OK even for unboxed binders, 
+
+             wrapped_alts | isDeadBinder inner_bndr = inner_alts
+                           | otherwise               = map wrap_alt inner_alts
+
+             merged_alts = mergeAlts outer_alts wrapped_alts
+               -- NB: mergeAlts gives priority to the left
+               --      case x of 
+               --        A -> e1
+               --        DEFAULT -> case x of 
+               --                      A -> e2
+               --                      B -> e3
+               -- When we merge, we must ensure that e1 takes 
+               -- precedence over e2 as the value for A!  
+
+       ; mkCase1 dflags scrut outer_bndr merged_alts
+       }
+       -- Warning: don't call mkCase recursively!
+       -- Firstly, there's no point, because inner alts have already had
+       -- mkCase applied to them, so they won't have a case in their default
+       -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
+       -- in munge_rhs may put a case into the DEFAULT branch!
+
+mkCase dflags scrut bndr alts = mkCase1 dflags scrut bndr alts
+
+--------------------------------------------------
+--     2. Eliminate Identity Case
+--------------------------------------------------
+
+mkCase1 _dflags scrut case_bndr alts   -- Identity case
    | all identity_alt alts
-  = do tick (CaseIdentity case_bndr)
-       return (re_cast scrut)
+  = do { tick (CaseIdentity case_bndr)
+       ; return (re_cast scrut) }
    where
      identity_alt (con, args, rhs) = check_eq con args (de_cast rhs)
  
@@ -1517,22 +1629,93 @@ mkCase scrut case_bndr alts     -- Identity case
                         (_,_,Cast _ co) -> Cast scrut co
                         _               -> scrut
  
+--------------------------------------------------
+--     3. Merge Identical Alternatives
+--------------------------------------------------
+mkCase1 dflags scrut case_bndr ((_con1,bndrs1,rhs1) : con_alts)
+  | all isDeadBinder bndrs1                    -- Remember the default 
+  , length filtered_alts < length con_alts     -- alternative comes first
+       -- Also Note [Dead binders]
+  = do { tick (AltMerge case_bndr)
+       ; mkCase2 dflags scrut case_bndr alts' }
+  where
+    alts' = (DEFAULT, [], rhs1) : filtered_alts
+    filtered_alts        = filter keep con_alts
+    keep (_con,bndrs,rhs) = not (all isDeadBinder bndrs && rhs `cheapEqExpr` rhs1)
  
+mkCase1 dflags scrut bndr alts = mkCase2 dflags scrut bndr alts
  
  --------------------------------------------------
  --     Catch-all
  --------------------------------------------------
-mkCase scrut bndr alts = return (Case scrut bndr (coreAltsType alts) alts)
+mkCase2 _dflags scrut bndr alts 
+  = return (Case scrut bndr (coreAltsType alts) alts)
  \end{code}
  
-
-When adding auxiliary bindings for the case binder, it's worth checking if
-its dead, because it often is, and occasionally these mkCase transformations
-cascade rather nicely.
-
-\begin{code}
-bindCaseBndr :: Id -> CoreExpr -> CoreExpr -> CoreExpr
-bindCaseBndr bndr rhs body
-  | isDeadBinder bndr = body
-  | otherwise         = bindNonRec bndr rhs body
-\end{code}
+Note [Dead binders]
+~~~~~~~~~~~~~~~~~~~~
+Note that dead-ness is maintained by the simplifier, so that it is
+accurate after simplification as well as before.
+
+
+Note [Cascading case merge]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Case merging should cascade in one sweep, because it
+happens bottom-up
+
+      case e of a {
+        DEFAULT -> case a of b 
+                      DEFAULT -> case b of c {
+                                     DEFAULT -> e
+                                     A -> ea
+                      B -> eb
+        C -> ec
+==>
+      case e of a {
+        DEFAULT -> case a of b 
+                      DEFAULT -> let c = b in e
+                      A -> let c = b in ea
+                      B -> eb
+        C -> ec
+==>
+      case e of a {
+        DEFAULT -> let b = a in let c = b in e
+        A -> let b = a in let c = b in ea
+        B -> let b = a in eb
+        C -> ec
+
+
+However here's a tricky case that we still don't catch, and I don't
+see how to catch it in one pass:
+
+  case x of c1 { I# a1 ->
+  case a1 of c2 ->
+    0 -> ...
+    DEFAULT -> case x of c3 { I# a2 ->
+               case a2 of ...
+
+After occurrence analysis (and its binder-swap) we get this
+ 
+  case x of c1 { I# a1 -> 
+  let x = c1 in                -- Binder-swap addition
+  case a1 of c2 -> 
+    0 -> ...
+    DEFAULT -> case x of c3 { I# a2 ->
+               case a2 of ...
+
+When we simplify the inner case x, we'll see that
+x=c1=I# a1.  So we'll bind a2 to a1, and get
+
+  case x of c1 { I# a1 -> 
+  case a1 of c2 -> 
+    0 -> ...
+    DEFAULT -> case a1 of ...
+
+This is corect, but we can't do a case merge in this sweep
+because c2 /= a1.  Reason: the binding c1=I# a1 went inwards
+without getting changed to c1=I# c2.  
+
+I don't think this is worth fixing, even if I knew how. It'll
+all come out in the next pass anyway.
+
+  
\ No newline at end of file