Completely new treatment of INLINE pragmas (big patch)

[ghc-hetmet.git] / compiler / simplCore / Simplify.lhs
diff --git a/compiler/simplCore/Simplify.lhs b/compiler/simplCore/Simplify.lhs

index 249dae2..5bcda0c 100644 (file)
--- a/compiler/simplCore/Simplify.lhs
+++ b/compiler/simplCore/Simplify.lhs
@@ -13,8 +13,8 @@ import SimplMonad
  import Type hiding      ( substTy, extendTvSubst )
  import SimplEnv
  import SimplUtils
-import Literal         ( mkStringLit )
  import MkId            ( rUNTIME_ERROR_ID )
+import FamInstEnv      ( FamInstEnv )
  import Id
  import Var
  import IdInfo
@@ -22,11 +22,11 @@ import Coercion
  import FamInstEnv       ( topNormaliseType )
  import DataCon          ( dataConRepStrictness, dataConUnivTyVars )
  import CoreSyn
-import NewDemand        ( isStrictDmd )
+import NewDemand        ( isStrictDmd, splitStrictSig )
  import PprCore          ( pprParendExpr, pprCoreExpr )
  import CoreUnfold       ( mkUnfolding, callSiteInline, CallCtxt(..) )
  import CoreUtils
-import Rules            ( lookupRule )
+import Rules            ( lookupRule, getRules )
  import BasicTypes       ( isMarkedStrict )
  import CostCentre       ( currentCCS )
  import TysPrim          ( realWorldStatePrimTy )
@@ -35,8 +35,9 @@ import BasicTypes       ( TopLevelFlag(..), isTopLevel,
                            RecFlag(..), isNonRuleLoopBreaker )
  import Maybes           ( orElse )
  import Data.List        ( mapAccumL )
+import MonadUtils      ( foldlM )
+import StaticFlags     ( opt_PassCaseBndrToJoinPoints )
  import Outputable
-import MonadUtils
  import FastString
  \end{code}
  
@@ -256,7 +257,7 @@ simplRecBind env0 top_lvl pairs0
          ; env1 <- go (zapFloats env_with_info) triples
          ; return (env0 `addRecFloats` env1) }
          -- addFloats adds the floats from env1,
-        -- *and* updates env0 with the in-scope set from env1
+        -- _and_ updates env0 with the in-scope set from env1
    where
      add_rules :: SimplEnv -> (InBndr,InExpr) -> (SimplEnv, (InBndr, OutBndr, InExpr))
          -- Add the (substituted) rules to the binder
@@ -351,21 +352,10 @@ simplLazyBind env top_lvl is_rec bndr bndr1 rhs rhs_se
                       do { tick LetFloatFromLet
                          ; (poly_binds, body3) <- abstractFloats tvs' body_env2 body2
                          ; rhs' <- mkLam tvs' body3
-                        ; env' <- foldlM add_poly_bind env poly_binds
+                        ; env' <- foldlM (addPolyBind top_lvl) env poly_binds
                          ; return (env', rhs') }
  
          ; completeBind env' top_lvl bndr bndr1 rhs' }
-  where
-    add_poly_bind env (NonRec poly_id rhs)
-       = completeBind env top_lvl poly_id poly_id rhs
-               -- completeBind adds the new binding in the
-               -- proper way (ie complete with unfolding etc),
-               -- and extends the in-scope set
-    add_poly_bind env bind@(Rec _)
-       = return (extendFloats env bind)
-               -- Hack: letrecs are more awkward, so we extend "by steam"
-               -- without adding unfoldings etc.  At worst this leads to
-               -- more simplifier iterations
  \end{code}
  
  A specialised variant of simplNonRec used when the RHS is already simplified,
@@ -378,6 +368,9 @@ simplNonRecX :: SimplEnv
               -> SimplM SimplEnv
  
  simplNonRecX env bndr new_rhs
+  | isDeadBinder bndr  -- Not uncommon; e.g. case (a,b) of b { (p,q) -> p }
+  = return env         --               Here b is dead, and we avoid creating
+  | otherwise          --               the binding b = (a,b)
    = do  { (env', bndr') <- simplBinder env bndr
          ; completeNonRecX env' (isStrictId bndr) bndr bndr' new_rhs }
  
@@ -523,7 +516,18 @@ makeTrivial env expr
    | otherwise           -- See Note [Take care] below
    = do  { var <- newId (fsLit "a") (exprType expr)
          ; env' <- completeNonRecX env False var var expr
-        ; return (env', substExpr env' (Var var)) }
+--       pprTrace "makeTrivial" (vcat [ppr var <+> ppr (exprArity (substExpr env' (Var var)))
+--                                    , ppr expr
+--                                    , ppr (substExpr env' (Var var))
+--                                    , ppr (idArity (fromJust (lookupInScope (seInScope env') var))) ]) $
+       ; return (env', substExpr env' (Var var)) }
+       -- The substitution is needed becase we're constructing a new binding
+       --     a = rhs
+       -- And if rhs is of form (rhs1 |> co), then we might get
+       --     a1 = rhs1
+       --     a = a1 |> co
+       -- and now a's RHS is trivial and can be substituted out, and that
+       -- is what completeNonRecX will do
  \end{code}
  
  
@@ -563,64 +567,124 @@ completeBind :: SimplEnv
  --      * or by adding to the floats in the envt
  
  completeBind env top_lvl old_bndr new_bndr new_rhs
-  | postInlineUnconditionally env top_lvl new_bndr occ_info new_rhs unfolding
-                -- Inline and discard the binding
-  = do  { tick (PostInlineUnconditionally old_bndr)
-        ; -- pprTrace "postInlineUnconditionally" (ppr old_bndr <+> ppr new_bndr <+> ppr new_rhs) $
-          return (extendIdSubst env old_bndr (DoneEx new_rhs)) }
-        -- Use the substitution to make quite, quite sure that the
-        -- substitution will happen, since we are going to discard the binding
-
-  |  otherwise
-  = let
-        --      Arity info
-        new_bndr_info = idInfo new_bndr `setArityInfo` exprArity new_rhs
-
-        --      Unfolding info
-        -- Add the unfolding *only* for non-loop-breakers
-        -- Making loop breakers not have an unfolding at all
-        -- means that we can avoid tests in exprIsConApp, for example.
-        -- This is important: if exprIsConApp says 'yes' for a recursive
-        -- thing, then we can get into an infinite loop
-
-        --      Demand info
-        -- If the unfolding is a value, the demand info may
-        -- go pear-shaped, so we nuke it.  Example:
-        --      let x = (a,b) in
-        --      case x of (p,q) -> h p q x
-        -- Here x is certainly demanded. But after we've nuked
-        -- the case, we'll get just
-        --      let x = (a,b) in h a b x
-        -- and now x is not demanded (I'm assuming h is lazy)
-        -- This really happens.  Similarly
-        --      let f = \x -> e in ...f..f...
-        -- After inlining f at some of its call sites the original binding may
-        -- (for example) be no longer strictly demanded.
-        -- The solution here is a bit ad hoc...
-        info_w_unf = new_bndr_info `setUnfoldingInfo` unfolding
-                                   `setWorkerInfo`    worker_info
-
-        final_info | omit_unfolding             = new_bndr_info
-                   | isEvaldUnfolding unfolding = zapDemandInfo info_w_unf `orElse` info_w_unf
-                   | otherwise                  = info_w_unf
-
-        final_id = new_bndr `setIdInfo` final_info
+  = do { let old_info = idInfo old_bndr
+             old_unf  = unfoldingInfo old_info
+             occ_info = occInfo old_info
+
+       ; new_unfolding <- simplUnfolding env top_lvl old_bndr occ_info old_unf new_rhs
+
+       ; if postInlineUnconditionally env top_lvl new_bndr occ_info new_rhs new_unfolding
+                       -- Inline and discard the binding
+         then do  { tick (PostInlineUnconditionally old_bndr)
+                   ; return (extendIdSubst env old_bndr (DoneEx new_rhs)) }
+               -- Use the substitution to make quite, quite sure that the
+               -- substitution will happen, since we are going to discard the binding
+
+         else return (addNonRecWithUnf env new_bndr new_rhs new_unfolding) }
+
+------------------------------
+addPolyBind :: TopLevelFlag -> SimplEnv -> OutBind -> SimplM SimplEnv
+-- Add a new binding to the environment, complete with its unfolding
+-- but *do not* do postInlineUnconditionally, because we have already
+-- processed some of the scope of the binding
+-- We still want the unfolding though.  Consider
+--     let 
+--           x = /\a. let y = ... in Just y
+--     in body
+-- Then we float the y-binding out (via abstractFloats and addPolyBind)
+-- but 'x' may well then be inlined in 'body' in which case we'd like the 
+-- opportunity to inline 'y' too.
+
+addPolyBind top_lvl env (NonRec poly_id rhs)
+  = do { unfolding <- simplUnfolding env top_lvl poly_id NoOccInfo noUnfolding rhs
+                       -- Assumes that poly_id did not have an INLINE prag
+                       -- which is perhaps wrong.  ToDo: think about this
+        ; return (addNonRecWithUnf env poly_id rhs unfolding) }
+
+addPolyBind _ env bind@(Rec _) = return (extendFloats env bind)
+               -- Hack: letrecs are more awkward, so we extend "by steam"
+               -- without adding unfoldings etc.  At worst this leads to
+               -- more simplifier iterations
+
+------------------------------
+addNonRecWithUnf :: SimplEnv
+                -> OutId -> OutExpr    -- New binder and RHS
+                -> Unfolding           -- New unfolding
+                -> SimplEnv
+addNonRecWithUnf env new_bndr new_rhs new_unfolding
+  = let new_arity = exprArity new_rhs
+       old_arity = idArity new_bndr
+        info1 = idInfo new_bndr `setArityInfo` new_arity
+       
+              -- Unfolding info: Note [Setting the new unfolding]
+       info2 = info1 `setUnfoldingInfo` new_unfolding
+
+        -- Demand info: Note [Setting the demand info]
+        info3 | isEvaldUnfolding new_unfolding = zapDemandInfo info2 `orElse` info2
+              | otherwise                      = info2
+
+        final_id = new_bndr `setIdInfo` info3
+       dmd_arity = length $ fst $ splitStrictSig $ idNewStrictness new_bndr
      in
-                -- These seqs forces the Id, and hence its IdInfo,
-                -- and hence any inner substitutions
-    final_id                                    `seq`
-    -- pprTrace "Binding" (ppr final_id <+> ppr unfolding) $
-    return (addNonRec env final_id new_rhs)
-       -- The addNonRec adds it to the in-scope set too
+    ASSERT( isId new_bndr )
+    WARN( new_arity < old_arity || new_arity < dmd_arity, 
+          (ppr final_id <+> ppr old_arity <+> ppr new_arity <+> ppr dmd_arity) $$ ppr new_rhs )
+
+    final_id `seq`   -- This seq forces the Id, and hence its IdInfo,
+                    -- and hence any inner substitutions
+           -- pprTrace "Binding" (ppr final_id <+> ppr unfolding) $
+    addNonRec env final_id new_rhs
+               -- The addNonRec adds it to the in-scope set too
+
+
+------------------------------
+simplUnfolding :: SimplEnv-> TopLevelFlag
+              -> Id    -- Debug output only
+              -> OccInfo -> Unfolding -> OutExpr
+              -> SimplM Unfolding
+simplUnfolding env top_lvl bndr occ_info old_unf new_rhs       -- Note [Setting the new unfolding]
+  | omit_unfolding = WARN( is_inline_rule, ppr bndr ) return NoUnfolding       
+  | is_inline_rule = return (substUnfolding env is_top_lvl old_unf)
+  | otherwise     = return (mkUnfolding is_top_lvl new_rhs)
    where
-    unfolding      = mkUnfolding (isTopLevel top_lvl) new_rhs
-    worker_info    = substWorker env (workerInfo old_info)
-    omit_unfolding = isNonRuleLoopBreaker occ_info || not (activeInline env old_bndr)
-    old_info       = idInfo old_bndr
-    occ_info       = occInfo old_info
+    is_top_lvl     = isTopLevel top_lvl
+    is_inline_rule = isInlineRule old_unf
+    omit_unfolding = isNonRuleLoopBreaker occ_info
  \end{code}
  
  
+Note [Setting the new unfolding]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+* If there's an INLINE pragma, we use substUnfolding to retain the 
+  supplied inlining
+
+* If not, we make an unfolding from the new RHS.  But *only* for
+  non-loop-breakers. Making loop breakers not have an unfolding at all
+  means that we can avoid tests in exprIsConApp, for example.  This is
+  important: if exprIsConApp says 'yes' for a recursive thing, then we
+  can get into an infinite loop
+
+If there's an INLINE pragma on a loop breaker, we simply discard it 
+(with a DEBUG warning).  The desugarer complains about binding groups
+that look likely to trigger this behaviour.
+
+
+Note [Setting the demand info]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If the unfolding is a value, the demand info may
+go pear-shaped, so we nuke it.  Example:
+     let x = (a,b) in
+     case x of (p,q) -> h p q x
+Here x is certainly demanded. But after we've nuked
+the case, we'll get just
+     let x = (a,b) in h a b x
+and now x is not demanded (I'm assuming h is lazy)
+This really happens.  Similarly
+     let f = \x -> e in ...f..f...
+After inlining f at some of its call sites the original binding may
+(for example) be no longer strictly demanded.
+The solution here is a bit ad hoc...
+
  
  %************************************************************************
  %*                                                                      *
@@ -799,10 +863,10 @@ simplCast env body co0 cont0
  
         add_coerce co1 (s1, _k2) (CoerceIt co2 cont)
           | (_l1, t1) <- coercionKind co2
-                --      coerce T1 S1 (coerce S1 K1 e)
+               --      e |> (g1 :: S1~L) |> (g2 :: L~T1)
                  -- ==>
-                --      e,                      if T1=K1
-                --      coerce T1 K1 e,         otherwise
+                --      e,                       if T1=T2
+                --      e |> (g1 . g2 :: T1~T2)  otherwise
                  --
                  -- For example, in the initial form of a worker
                  -- we may find  (coerce T (coerce S (\x.e))) y
@@ -812,7 +876,7 @@ simplCast env body co0 cont0
           | otherwise           = CoerceIt (mkTransCoercion co1 co2) cont
  
         add_coerce co (s1s2, _t1t2) (ApplyTo dup (Type arg_ty) arg_se cont)
-                -- (f `cast` g) ty  --->   (f ty) `cast` (g @ ty)
+                -- (f |> g) ty  --->   (f ty) |> (g @ ty)
                  -- This implements the PushT rule from the paper
           | Just (tyvar,_) <- splitForAllTy_maybe s1s2
           , not (isCoVar tyvar)
@@ -825,12 +889,12 @@ simplCast env body co0 cont0
         add_coerce co (s1s2, _t1t2) (ApplyTo dup arg arg_se cont)
           | not (isTypeArg arg)  -- This implements the Push rule from the paper
           , isFunTy s1s2   -- t1t2 must be a function type, becuase it's applied
-                -- co : s1s2 :=: t1t2
-                --      (coerce (T1->T2) (S1->S2) F) E
+                --      (e |> (g :: s1s2 ~ t1->t2)) f
                  -- ===>
-                --      coerce T2 S2 (F (coerce S1 T1 E))
+                --      (e (f |> (arg g :: t1~s1))
+               --      |> (res g :: s2->t2)
                  --
-                -- t1t2 must be a function type, T1->T2, because it's applied
+                -- t1t2 must be a function type, t1->t2, because it's applied
                  -- to something but s1s2 might conceivably not be
                  --
                  -- When we build the ApplyTo we can't mix the out-types
@@ -841,9 +905,9 @@ simplCast env body co0 cont0
                  -- Example of use: Trac #995
           = ApplyTo dup new_arg (zapSubstEnv env) (addCoerce co2 cont)
           where
-           -- we split coercion t1->t2 :=: s1->s2 into t1 :=: s1 and
-           -- t2 :=: s2 with left and right on the curried form:
-           --    (->) t1 t2 :=: (->) s1 s2
+           -- we split coercion t1->t2 ~ s1->s2 into t1 ~ s1 and
+           -- t2 ~ s2 with left and right on the curried form:
+           --    (->) t1 t2 ~ (->) s1 s2
             [co1, co2] = decomposeCo 2 co
             new_arg    = mkCoerce (mkSymCoercion co1) arg'
             arg'       = substExpr (arg_se `setInScope` env) arg
@@ -864,14 +928,7 @@ simplLam :: SimplEnv -> [InId] -> InExpr -> SimplCont
  
  simplLam env [] body cont = simplExprF env body cont
  
-        -- Type-beta reduction
-simplLam env (bndr:bndrs) body (ApplyTo _ (Type ty_arg) arg_se cont)
-  = ASSERT( isTyVar bndr )
-    do  { tick (BetaReduction bndr)
-        ; ty_arg' <- simplType (arg_se `setInScope` env) ty_arg
-        ; simplLam (extendTvSubst env bndr ty_arg') bndrs body cont }
-
-        -- Ordinary beta reduction
+        -- Beta reduction
  simplLam env (bndr:bndrs) body (ApplyTo _ arg arg_se cont)
    = do  { tick (BetaReduction bndr)
          ; simplNonRecE env bndr (arg, arg_se) (bndrs, body) cont }
@@ -885,7 +942,7 @@ simplLam env bndrs body cont
  
  ------------------
  simplNonRecE :: SimplEnv
-             -> InId                    -- The binder
+             -> InBndr                  -- The binder
               -> (InExpr, SimplEnv)      -- Rhs of binding (or arg of lambda)
               -> ([InBndr], InExpr)      -- Body of the let/lambda
                                          --      \xs.e
@@ -904,9 +961,11 @@ simplNonRecE :: SimplEnv
  -- Why?  Because of the binder-occ-info-zapping done before
  --       the call to simplLam in simplExprF (Lam ...)
  
-       -- First deal with type lets: let a = Type ty in b
+       -- First deal with type applications and type lets
+       --   (/\a. e) (Type ty)   and   (let a = Type ty in e)
  simplNonRecE env bndr (Type ty_arg, rhs_se) (bndrs, body) cont
-  = do { ty_arg' <- simplType (rhs_se `setInScope` env) ty_arg
+  = ASSERT( isTyVar bndr )
+    do { ty_arg' <- simplType (rhs_se `setInScope` env) ty_arg
         ; simplLam (extendTvSubst env bndr ty_arg') bndrs body cont }
  
  simplNonRecE env bndr (rhs, rhs_se) (bndrs, body) cont
@@ -919,7 +978,8 @@ simplNonRecE env bndr (rhs, rhs_se) (bndrs, body) cont
                       (StrictBind bndr bndrs body env cont) }
  
    | otherwise
-  = do  { (env1, bndr1) <- simplNonRecBndr env bndr
+  = ASSERT( not (isTyVar bndr) )
+    do  { (env1, bndr1) <- simplNonRecBndr env bndr
          ; let (env2, bndr2) = addBndrRules env1 bndr bndr1
          ; env3 <- simplLazyBind env2 NotTopLevel NonRecursive bndr bndr2 rhs rhs_se
          ; simplLam env3 bndrs body cont }
@@ -941,21 +1001,9 @@ simplNote env (SCC cc) e cont
    = do  { e' <- simplExpr (setEnclosingCC env currentCCS) e
          ; rebuild env (mkSCC cc e') cont }
  
--- See notes with SimplMonad.inlineMode
-simplNote env InlineMe e cont
-  | Just (inside, outside) <- splitInlineCont cont  -- Boring boring continuation; see notes above
-  = do  {                       -- Don't inline inside an INLINE expression
-          e' <- simplExprC (setMode inlineMode env) e inside
-        ; rebuild env (mkInlineMe e') outside }
-
-  | otherwise   -- Dissolve the InlineMe note if there's
-                -- an interesting context of any kind to combine with
-                -- (even a type application -- anything except Stop)
-  = simplExprF env e cont
-
-simplNote env (CoreNote s) e cont = do
-    e' <- simplExpr env e
-    rebuild env (Note (CoreNote s) e') cont
+simplNote env (CoreNote s) e cont
+  = do { e' <- simplExpr env e
+       ; rebuild env (Note (CoreNote s) e') cont }
  \end{code}
  
  
@@ -1018,12 +1066,13 @@ completeCall env var cont
          -- is recursive, and hence a loop breaker:
          --      foldr k z (build g) = g k z
          -- So it's up to the programmer: rules can cause divergence
-        ; rules <- getRules
+        ; rule_base <- getSimplRules
          ; let   in_scope   = getInScope env
+               rules      = getRules rule_base var
                  maybe_rule = case activeRule dflags env of
                                  Nothing     -> Nothing  -- No rules apply
                                  Just act_fn -> lookupRule act_fn in_scope
-                                                          rules var args
+                                                          var args rules 
          ; case maybe_rule of {
              Just (rule, rule_rhs) -> do
                  tick (RuleFired (ru_name rule))
@@ -1051,7 +1100,7 @@ completeCall env var cont
              Just unfolding      -- There is an inlining!
                ->  do { tick (UnfoldingDone var)
                       ; (if dopt Opt_D_dump_inlinings dflags then
-                           pprTrace ("Inlining done" ++ showSDoc (ppr var)) (vcat [
+                           pprTrace ("Inlining done: " ++ showSDoc (ppr var)) (vcat [
                                  text "Before:" <+> ppr var <+> sep (map pprParendExpr args),
                                  text "Inlined fn: " <+> nest 2 (ppr unfolding),
                                  text "Cont:  " <+> ppr call_cont])
@@ -1153,7 +1202,91 @@ all this at once is TOO HARD!
  %*                                                                      *
  %************************************************************************
  
-Blob of helper functions for the "case-of-something-else" situation.
+Note [Case elimination]
+~~~~~~~~~~~~~~~~~~~~~~~
+The case-elimination transformation discards redundant case expressions.
+Start with a simple situation:
+
+        case x# of      ===>   e[x#/y#]
+          y# -> e
+
+(when x#, y# are of primitive type, of course).  We can't (in general)
+do this for algebraic cases, because we might turn bottom into
+non-bottom!
+
+The code in SimplUtils.prepareAlts has the effect of generalise this
+idea to look for a case where we're scrutinising a variable, and we
+know that only the default case can match.  For example:
+
+        case x of
+          0#      -> ...
+          DEFAULT -> ...(case x of
+                         0#      -> ...
+                         DEFAULT -> ...) ...
+
+Here the inner case is first trimmed to have only one alternative, the
+DEFAULT, after which it's an instance of the previous case.  This
+really only shows up in eliminating error-checking code.
+
+We also make sure that we deal with this very common case:
+
+        case e of
+          x -> ...x...
+
+Here we are using the case as a strict let; if x is used only once
+then we want to inline it.  We have to be careful that this doesn't
+make the program terminate when it would have diverged before, so we
+check that
+        - e is already evaluated (it may so if e is a variable)
+        - x is used strictly, or
+
+Lastly, the code in SimplUtils.mkCase combines identical RHSs.  So
+
+        case e of       ===> case e of DEFAULT -> r
+           True  -> r
+           False -> r
+
+Now again the case may be elminated by the CaseElim transformation.
+
+
+Further notes about case elimination
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider:       test :: Integer -> IO ()
+                test = print
+
+Turns out that this compiles to:
+    Print.test
+      = \ eta :: Integer
+          eta1 :: State# RealWorld ->
+          case PrelNum.< eta PrelNum.zeroInteger of wild { __DEFAULT ->
+          case hPutStr stdout
+                 (PrelNum.jtos eta ($w[] @ Char))
+                 eta1
+          of wild1 { (# new_s, a4 #) -> PrelIO.lvl23 new_s  }}
+
+Notice the strange '<' which has no effect at all. This is a funny one.
+It started like this:
+
+f x y = if x < 0 then jtos x
+          else if y==0 then "" else jtos x
+
+At a particular call site we have (f v 1).  So we inline to get
+
+        if v < 0 then jtos x
+        else if 1==0 then "" else jtos x
+
+Now simplify the 1==0 conditional:
+
+        if v<0 then jtos v else jtos v
+
+Now common-up the two branches of the case:
+
+        case (v<0) of DEFAULT -> jtos v
+
+Why don't we drop the case?  Because it's strict in v.  It's technically
+wrong to drop even unnecessary evaluations, and in practice they
+may be a result of 'seq' so we *definitely* don't want to drop those.
+I don't really know how to improve this situation.
  
  \begin{code}
  ---------------------------------------------------------
@@ -1187,7 +1320,7 @@ rebuildCase env scrut case_bndr alts cont
  
  rebuildCase env scrut case_bndr [(_, bndrs, rhs)] cont
    -- See if we can get rid of the case altogether
-  -- See the extensive notes on case-elimination above
+  -- See Note [Case eliminiation] 
    -- mkCase made sure that if all the alternatives are equal,
    -- then there is now only one (DEFAULT) rhs
   | all isDeadBinder bndrs       -- bndrs are [InId]
@@ -1247,7 +1380,7 @@ rebuildCase env scrut case_bndr alts cont
                 -- inaccessible.  So we simply put an error case here instead.
             pprTrace "mkCase: null alts" (ppr case_bndr <+> ppr scrut) $
             let res_ty' = contResultType env' (substTy env' (coreAltsType alts)) dup_cont
-               lit = Lit (mkStringLit "Impossible alternative")
+               lit = mkStringLit "Impossible alternative"
             in return (env', mkApps (Var rUNTIME_ERROR_ID) [Type res_ty', lit])
  
           else do
@@ -1263,75 +1396,15 @@ try to eliminate uses of v in the RHSs in favour of case_bndr; that
  way, there's a chance that v will now only be used once, and hence
  inlined.
  
-Note [no-case-of-case]
-~~~~~~~~~~~~~~~~~~~~~~
-There is a time we *don't* want to do that, namely when
--fno-case-of-case is on.  This happens in the first simplifier pass,
-and enhances full laziness.  Here's the bad case:
-        f = \ y -> ...(case x of I# v -> ...(case x of ...) ... )
-If we eliminate the inner case, we trap it inside the I# v -> arm,
-which might prevent some full laziness happening.  I've seen this
-in action in spectral/cichelli/Prog.hs:
-         [(m,n) | m <- [1..max], n <- [1..max]]
-Hence the check for NoCaseOfCase.
-
-Note [Suppressing the case binder-swap]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-There is another situation when it might make sense to suppress the
-case-expression binde-swap. If we have
-
-    case x of w1 { DEFAULT -> case x of w2 { A -> e1; B -> e2 }
-                   ...other cases .... }
-
-We'll perform the binder-swap for the outer case, giving
-
-    case x of w1 { DEFAULT -> case w1 of w2 { A -> e1; B -> e2 }
-                   ...other cases .... }
-
-But there is no point in doing it for the inner case, because w1 can't
-be inlined anyway.  Furthermore, doing the case-swapping involves
-zapping w2's occurrence info (see paragraphs that follow), and that
-forces us to bind w2 when doing case merging.  So we get
-
-    case x of w1 { A -> let w2 = w1 in e1
-                   B -> let w2 = w1 in e2
-                   ...other cases .... }
-
-This is plain silly in the common case where w2 is dead.
-
-Even so, I can't see a good way to implement this idea.  I tried
-not doing the binder-swap if the scrutinee was already evaluated
-but that failed big-time:
-
-        data T = MkT !Int
-
-        case v of w  { MkT x ->
-        case x of x1 { I# y1 ->
-        case x of x2 { I# y2 -> ...
-
-Notice that because MkT is strict, x is marked "evaluated".  But to
-eliminate the last case, we must either make sure that x (as well as
-x1) has unfolding MkT y1.  THe straightforward thing to do is to do
-the binder-swap.  So this whole note is a no-op.
+Historical note: we use to do the "case binder swap" in the Simplifier
+so there were additional complications if the scrutinee was a variable.
+Now the binder-swap stuff is done in the occurrence analyer; see
+OccurAnal Note [Binder swap].
  
  Note [zapOccInfo]
  ~~~~~~~~~~~~~~~~~
-If we replace the scrutinee, v, by tbe case binder, then we have to nuke
-any occurrence info (eg IAmDead) in the case binder, because the
-case-binder now effectively occurs whenever v does.  AND we have to do
-the same for the pattern-bound variables!  Example:
-
-        (case x of { (a,b) -> a }) (case x of { (p,q) -> q })
-
-Here, b and p are dead.  But when we move the argment inside the first
-case RHS, and eliminate the second case, we get
-
-        case x of { (a,b) -> a b }
-
-Urk! b is alive!  Reason: the scrutinee was a variable, and case elimination
-happened.
-
-Indeed, this can happen anytime the case binder isn't dead:
+If the case binder is not dead, then neither are the pattern bound
+variables:  
          case <any> of x { (a,b) ->
          case x of { (p,q) -> p } }
  Here (a,b) both look dead, but come alive after the inner case is eliminated.
@@ -1340,6 +1413,10 @@ The point is that we bring into the envt a binding
  after the outer case, and that makes (a,b) alive.  At least we do unless
  the case binder is guaranteed dead.
  
+In practice, the scrutinee is almost always a variable, so we pretty
+much always zap the OccInfo of the binders.  It doesn't matter much though.
+
+
  Note [Case of cast]
  ~~~~~~~~~~~~~~~~~~~
  Consider        case (v `cast` co) of x { I# ->
@@ -1379,125 +1456,82 @@ At one point I did transformation in LiberateCase, but it's more robust here.
  (Otherwise, there's a danger that we'll simply drop the 'seq' altogether, before
  LiberateCase gets to see it.)
  
-Note [Case elimination]
-~~~~~~~~~~~~~~~~~~~~~~~
-The case-elimination transformation discards redundant case expressions.
-Start with a simple situation:
-
-        case x# of      ===>   e[x#/y#]
-          y# -> e
-
-(when x#, y# are of primitive type, of course).  We can't (in general)
-do this for algebraic cases, because we might turn bottom into
-non-bottom!
-
-The code in SimplUtils.prepareAlts has the effect of generalise this
-idea to look for a case where we're scrutinising a variable, and we
-know that only the default case can match.  For example:
-
-        case x of
-          0#      -> ...
-          DEFAULT -> ...(case x of
-                         0#      -> ...
-                         DEFAULT -> ...) ...
-
-Here the inner case is first trimmed to have only one alternative, the
-DEFAULT, after which it's an instance of the previous case.  This
-really only shows up in eliminating error-checking code.
  
-We also make sure that we deal with this very common case:
-
-        case e of
-          x -> ...x...
-
-Here we are using the case as a strict let; if x is used only once
-then we want to inline it.  We have to be careful that this doesn't
-make the program terminate when it would have diverged before, so we
-check that
-        - e is already evaluated (it may so if e is a variable)
-        - x is used strictly, or
-
-Lastly, the code in SimplUtils.mkCase combines identical RHSs.  So
-
-        case e of       ===> case e of DEFAULT -> r
-           True  -> r
-           False -> r
-
-Now again the case may be elminated by the CaseElim transformation.
-
-
-Further notes about case elimination
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Consider:       test :: Integer -> IO ()
-                test = print
+Historical note [no-case-of-case]
+~~~~~~~~~~~~~~~~~~~~~~
+We *used* to suppress the binder-swap in case expressoins when 
+-fno-case-of-case is on.  Old remarks:
+    "This happens in the first simplifier pass,
+    and enhances full laziness.  Here's the bad case:
+            f = \ y -> ...(case x of I# v -> ...(case x of ...) ... )
+    If we eliminate the inner case, we trap it inside the I# v -> arm,
+    which might prevent some full laziness happening.  I've seen this
+    in action in spectral/cichelli/Prog.hs:
+             [(m,n) | m <- [1..max], n <- [1..max]]
+    Hence the check for NoCaseOfCase."
+However, now the full-laziness pass itself reverses the binder-swap, so this
+check is no longer necessary.
+
+Historical note [Suppressing the case binder-swap]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+There is another situation when it might make sense to suppress the
+case-expression binde-swap. If we have
  
-Turns out that this compiles to:
-    Print.test
-      = \ eta :: Integer
-          eta1 :: State# RealWorld ->
-          case PrelNum.< eta PrelNum.zeroInteger of wild { __DEFAULT ->
-          case hPutStr stdout
-                 (PrelNum.jtos eta ($w[] @ Char))
-                 eta1
-          of wild1 { (# new_s, a4 #) -> PrelIO.lvl23 new_s  }}
+    case x of w1 { DEFAULT -> case x of w2 { A -> e1; B -> e2 }
+                   ...other cases .... }
  
-Notice the strange '<' which has no effect at all. This is a funny one.
-It started like this:
+We'll perform the binder-swap for the outer case, giving
  
-f x y = if x < 0 then jtos x
-          else if y==0 then "" else jtos x
+    case x of w1 { DEFAULT -> case w1 of w2 { A -> e1; B -> e2 }
+                   ...other cases .... }
  
-At a particular call site we have (f v 1).  So we inline to get
+But there is no point in doing it for the inner case, because w1 can't
+be inlined anyway.  Furthermore, doing the case-swapping involves
+zapping w2's occurrence info (see paragraphs that follow), and that
+forces us to bind w2 when doing case merging.  So we get
  
-        if v < 0 then jtos x
-        else if 1==0 then "" else jtos x
+    case x of w1 { A -> let w2 = w1 in e1
+                   B -> let w2 = w1 in e2
+                   ...other cases .... }
  
-Now simplify the 1==0 conditional:
+This is plain silly in the common case where w2 is dead.
  
-        if v<0 then jtos v else jtos v
+Even so, I can't see a good way to implement this idea.  I tried
+not doing the binder-swap if the scrutinee was already evaluated
+but that failed big-time:
  
-Now common-up the two branches of the case:
+        data T = MkT !Int
  
-        case (v<0) of DEFAULT -> jtos v
+        case v of w  { MkT x ->
+        case x of x1 { I# y1 ->
+        case x of x2 { I# y2 -> ...
  
-Why don't we drop the case?  Because it's strict in v.  It's technically
-wrong to drop even unnecessary evaluations, and in practice they
-may be a result of 'seq' so we *definitely* don't want to drop those.
-I don't really know how to improve this situation.
+Notice that because MkT is strict, x is marked "evaluated".  But to
+eliminate the last case, we must either make sure that x (as well as
+x1) has unfolding MkT y1.  THe straightforward thing to do is to do
+the binder-swap.  So this whole note is a no-op.
  
  
  \begin{code}
-simplCaseBinder :: SimplEnv -> OutExpr -> OutId -> [InAlt]
-                -> SimplM (SimplEnv, OutExpr, OutId)
-simplCaseBinder env0 scrut0 case_bndr0 alts
-  = do  { (env1, case_bndr1) <- simplBinder env0 case_bndr0
-
-        ; fam_envs <- getFamEnvs
-        ; (env2, scrut2, case_bndr2) <- improve_seq fam_envs env1 scrut0
-                                                case_bndr0 case_bndr1 alts
-                        -- Note [Improving seq]
-
-        ; let (env3, case_bndr3) = improve_case_bndr env2 scrut2 case_bndr2
-                        -- Note [Case of cast]
-
-        ; return (env3, scrut2, case_bndr3) }
-  where
-
-    improve_seq fam_envs env scrut case_bndr case_bndr1 [(DEFAULT,_,_)]
-        | Just (co, ty2) <- topNormaliseType fam_envs (idType case_bndr1)
-        =  do { case_bndr2 <- newId (fsLit "nt") ty2
-              ; let rhs  = DoneEx (Var case_bndr2 `Cast` mkSymCoercion co)
-                    env2 = extendIdSubst env case_bndr rhs
-              ; return (env2, scrut `Cast` co, case_bndr2) }
-
-    improve_seq _ env scrut _ case_bndr1 _
-        = return (env, scrut, case_bndr1)
-
-
+improveSeq :: (FamInstEnv, FamInstEnv) -> SimplEnv
+          -> OutExpr -> InId -> OutId -> [InAlt]
+          -> SimplM (SimplEnv, OutExpr, OutId)
+-- Note [Improving seq]
+improveSeq fam_envs env scrut case_bndr case_bndr1 [(DEFAULT,_,_)]
+  | Just (co, ty2) <- topNormaliseType fam_envs (idType case_bndr1)
+  =  do { case_bndr2 <- newId (fsLit "nt") ty2
+        ; let rhs  = DoneEx (Var case_bndr2 `Cast` mkSymCoercion co)
+              env2 = extendIdSubst env case_bndr rhs
+        ; return (env2, scrut `Cast` co, case_bndr2) }
+
+improveSeq _ env scrut _ case_bndr1 _
+  = return (env, scrut, case_bndr1)
+
+{-
      improve_case_bndr env scrut case_bndr
-        | switchIsOn (getSwitchChecker env) NoCaseOfCase
-                -- See Note [no-case-of-case]
-        = (env, case_bndr)
+        -- See Note [no-case-of-case]
+       --  | switchIsOn (getSwitchChecker env) NoCaseOfCase
+       --  = (env, case_bndr)
  
          | otherwise     -- Failed try; see Note [Suppressing the case binder-swap]
                          --     not (isEvaldUnfolding (idUnfolding v))
@@ -1514,12 +1548,9 @@ simplCaseBinder env0 scrut0 case_bndr0 alts
  
              _ -> (env, case_bndr)
          where
-          case_bndr' = zapOccInfo case_bndr
+          case_bndr' = zapIdOccInfo case_bndr
            env1       = modifyInScope env case_bndr case_bndr'
-
-
-zapOccInfo :: InId -> InId      -- See Note [zapOccInfo]
-zapOccInfo b = b `setIdOccInfo` NoOccInfo
+-}
  \end{code}
  
  
@@ -1575,10 +1606,15 @@ simplAlts :: SimplEnv
  
  simplAlts env scrut case_bndr alts cont'
    = -- pprTrace "simplAlts" (ppr alts $$ ppr (seIdSubst env)) $
-    do  { let alt_env = zapFloats env
-        ; (alt_env', scrut', case_bndr') <- simplCaseBinder alt_env scrut case_bndr alts
+    do  { let env0 = zapFloats env
  
-        ; (imposs_deflt_cons, in_alts) <- prepareAlts alt_env' scrut case_bndr' alts
+        ; (env1, case_bndr1) <- simplBinder env0 case_bndr
+
+        ; fam_envs <- getFamEnvs
+       ; (alt_env', scrut', case_bndr') <- improveSeq fam_envs env1 scrut 
+                                                      case_bndr case_bndr1 alts
+
+        ; (imposs_deflt_cons, in_alts) <- prepareAlts alt_env' scrut' case_bndr' alts
  
          ; alts' <- mapM (simplAlt alt_env' imposs_deflt_cons case_bndr' cont') in_alts
          ; return (scrut', case_bndr', alts') }
@@ -1644,6 +1680,7 @@ simplAlt env _ case_bndr' cont' (DataAlt con, vs, rhs)
                evald_v  = zapped_v `setIdUnfolding` evaldUnfolding
            go _ _ = pprPanic "cat_evals" (ppr con $$ ppr vs $$ ppr the_strs)
  
+       -- See Note [zapOccInfo]
          -- zap_occ_info: if the case binder is alive, then we add the unfolding
          --      case_bndr = C vs
          -- to the envt; so vs are now very much alive
@@ -1651,16 +1688,23 @@ simplAlt env _ case_bndr' cont' (DataAlt con, vs, rhs)
          --        case e of t { (a,b) -> ...(case t of (p,q) -> p)... }
          --   ==>  case e of t { (a,b) -> ...(a)... }
          -- Look, Ma, a is alive now.
-    zap_occ_info | isDeadBinder case_bndr' = \ident -> ident
-                 | otherwise               = zapOccInfo
+    zap_occ_info = zapCasePatIdOcc case_bndr'
  
  addBinderUnfolding :: SimplEnv -> Id -> CoreExpr -> SimplEnv
  addBinderUnfolding env bndr rhs
-  = modifyInScope env bndr (bndr `setIdUnfolding` mkUnfolding False rhs)
+  = modifyInScope env (bndr `setIdUnfolding` mkUnfolding False rhs)
  
  addBinderOtherCon :: SimplEnv -> Id -> [AltCon] -> SimplEnv
  addBinderOtherCon env bndr cons
-  = modifyInScope env bndr (bndr `setIdUnfolding` mkOtherCon cons)
+  = modifyInScope env (bndr `setIdUnfolding` mkOtherCon cons)
+
+zapCasePatIdOcc :: Id -> Id -> Id
+-- Consider  case e of b { (a,b) -> ... }
+-- Then if we bind b to (a,b) in "...", and b is not dead,
+-- then we must zap the deadness info on a,b
+zapCasePatIdOcc case_bndr
+  | isDeadBinder case_bndr = \ pat_id -> pat_id
+  | otherwise             = \ pat_id -> zapIdOccInfo pat_id
  \end{code}
  
  
@@ -1710,9 +1754,8 @@ knownAlt env scrut _ bndr (LitAlt _, bs, rhs) cont
          ; simplExprF env' rhs cont }
  
  knownAlt env scrut the_args bndr (DataAlt dc, bs, rhs) cont
-  = do  { let dead_bndr  = isDeadBinder bndr    -- bndr is an InId
-              n_drop_tys = length (dataConUnivTyVars dc)
-        ; env' <- bind_args env dead_bndr bs (drop n_drop_tys the_args)
+  = do  { let n_drop_tys = length (dataConUnivTyVars dc)
+        ; env' <- bind_args env bs (drop n_drop_tys the_args)
          ; let
                  -- It's useful to bind bndr to scrut, rather than to a fresh
                  -- binding      x = Con arg1 .. argn
@@ -1729,28 +1772,29 @@ knownAlt env scrut the_args bndr (DataAlt dc, bs, rhs) cont
                                  -- args are aready OutExprs, but bs are InIds
  
          ; env'' <- simplNonRecX env' bndr bndr_rhs
-        ; -- pprTrace "knownCon2" (ppr bs $$ ppr rhs $$ ppr (seIdSubst env'')) $
-          simplExprF env'' rhs cont }
+        ; simplExprF env'' rhs cont }
    where
-    -- Ugh!
-    bind_args env' _ [] _  = return env'
+    zap_occ = zapCasePatIdOcc bndr    -- bndr is an InId
  
-    bind_args env' dead_bndr (b:bs') (Type ty : args)
+                  -- Ugh!
+    bind_args env' [] _  = return env'
+
+    bind_args env' (b:bs') (Type ty : args)
        = ASSERT( isTyVar b )
-        bind_args (extendTvSubst env' b ty) dead_bndr bs' args
+        bind_args (extendTvSubst env' b ty) bs' args
  
-    bind_args env' dead_bndr (b:bs') (arg : args)
+    bind_args env' (b:bs') (arg : args)
        = ASSERT( isId b )
-        do { let b' = if dead_bndr then b else zapOccInfo b
+        do { let b' = zap_occ b
               -- Note that the binder might be "dead", because it doesn't
               -- occur in the RHS; and simplNonRecX may therefore discard
               -- it via postInlineUnconditionally.
               -- Nevertheless we must keep it if the case-binder is alive,
               -- because it may be used in the con_app.  See Note [zapOccInfo]
             ; env'' <- simplNonRecX env' b' arg
-           ; bind_args env'' dead_bndr bs' args }
+           ; bind_args env'' bs' args }
  
-    bind_args _ _ _ _ =
+    bind_args _ _ _ =
        pprPanic "bind_args" $ ppr dc $$ ppr bs $$ ppr the_args $$
                               text "scrut:" <+> ppr scrut
  \end{code}
@@ -1805,14 +1849,16 @@ mkDupableCont env (ApplyTo _ arg se cont)
      do  { (env', dup_cont, nodup_cont) <- mkDupableCont env cont
          ; arg' <- simplExpr (se `setInScope` env') arg
          ; (env'', arg'') <- makeTrivial env' arg'
-        ; let app_cont = ApplyTo OkToDup arg'' (zapSubstEnv env') dup_cont
+        ; let app_cont = ApplyTo OkToDup arg'' (zapSubstEnv env'') dup_cont
          ; return (env'', app_cont, nodup_cont) }
  
-mkDupableCont env cont@(Select _ _ [(_, bs, _rhs)] _ _)
+mkDupableCont env cont@(Select _ case_bndr [(_, bs, _rhs)] _ _)
  --  See Note [Single-alternative case]
  --  | not (exprIsDupable rhs && contIsDupable case_cont)
  --  | not (isDeadBinder case_bndr)
-  | all isDeadBinder bs         -- InIds
+  | all isDeadBinder bs  -- InIds
+    && not (isUnLiftedType (idType case_bndr))
+    -- Note [Single-alternative-unlifted]
    = return (env, mkBoringStop, cont)
  
  mkDupableCont env (Select _ case_bndr alts se cont)
@@ -1862,40 +1908,97 @@ mkDupableAlts env case_bndr' the_alts
  
  mkDupableAlt :: SimplEnv -> OutId -> (AltCon, [CoreBndr], CoreExpr)
                -> SimplM (SimplEnv, (AltCon, [CoreBndr], CoreExpr))
-mkDupableAlt env case_bndr' (con, bndrs', rhs')
-  | exprIsDupable rhs'  -- Note [Small alternative rhs]
-  = return (env, (con, bndrs', rhs'))
+mkDupableAlt env case_bndr1 (con, bndrs1, rhs1)
+  | exprIsDupable rhs1  -- Note [Small alternative rhs]
+  = return (env, (con, bndrs1, rhs1))
    | otherwise
-  = do  { let rhs_ty'     = exprType rhs'
-              used_bndrs' = filter abstract_over (case_bndr' : bndrs')
-              abstract_over bndr
+  = do  { let abstract_over bndr
                    | isTyVar bndr = True -- Abstract over all type variables just in case
                    | otherwise    = not (isDeadBinder bndr)
                          -- The deadness info on the new Ids is preserved by simplBinders
  
-        ; (final_bndrs', final_args)    -- Note [Join point abstraction]
-                <- if (any isId used_bndrs')
-                   then return (used_bndrs', varsToCoreExprs used_bndrs')
+              inst_tys1 = tyConAppArgs (idType case_bndr1)
+              con_app dc = mkConApp dc (map Type inst_tys1 ++ varsToCoreExprs bndrs1)
+
+             (rhs2, final_bndrs)   -- See Note [Passing the case binder to join points]
+                | isDeadBinder case_bndr1
+                = (rhs1, filter abstract_over bndrs1)
+                | opt_PassCaseBndrToJoinPoints, not (null bndrs1)
+                = (rhs1, (case_bndr1 : filter abstract_over bndrs1))
+                | otherwise 
+                 = case con of
+                    DataAlt dc -> (Let (NonRec case_bndr1 (con_app dc)) rhs1, bndrs1)
+                    LitAlt lit -> ASSERT( null bndrs1 ) (Let (NonRec case_bndr1 (Lit lit)) rhs1, [])
+                    DEFAULT    -> ASSERT( null bndrs1 ) (rhs1, [case_bndr1])
+
+        ; (final_bndrs1, final_args)    -- Note [Join point abstraction]
+                <- if (any isId final_bndrs)
+                   then return (final_bndrs, varsToCoreExprs final_bndrs)
                      else do { rw_id <- newId (fsLit "w") realWorldStatePrimTy
-                            ; return ([rw_id], [Var realWorldPrimId]) }
+                            ; return (rw_id : final_bndrs,  
+                                     Var realWorldPrimId : varsToCoreExprs final_bndrs) }
  
-        ; join_bndr <- newId (fsLit "$j") (mkPiTypes final_bndrs' rhs_ty')
+        ; let rhs_ty1 = exprType rhs1
+        ; join_bndr <- newId (fsLit "$j") (mkPiTypes final_bndrs1 rhs_ty1)
                  -- Note [Funky mkPiTypes]
  
          ; let   -- We make the lambdas into one-shot-lambdas.  The
                  -- join point is sure to be applied at most once, and doing so
                  -- prevents the body of the join point being floated out by
                  -- the full laziness pass
-                really_final_bndrs     = map one_shot final_bndrs'
+                really_final_bndrs     = map one_shot final_bndrs1
                  one_shot v | isId v    = setOneShotLambda v
                             | otherwise = v
-                join_rhs  = mkLams really_final_bndrs rhs'
+                join_rhs  = mkLams really_final_bndrs rhs2
                  join_call = mkApps (Var join_bndr) final_args
  
-        ; return (addNonRec env join_bndr join_rhs, (con, bndrs', join_call)) }
+       ; env1 <- addPolyBind NotTopLevel env (NonRec join_bndr join_rhs)
+        ; return (env1, (con, bndrs1, join_call)) }
                  -- See Note [Duplicated env]
  \end{code}
  
+Note [Passing the case binder to join points]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Suppose we have
+   case e of cb { C1 -> r1[cb]; C2 x y z -> r2[cb,x] }
+and we want to make join points for the two alternatives, 
+which mention the case binder 'cb'.  Should we pass 'cb' to
+the join point, or reconstruct it? Here are the two alternatives 
+for the C2 alternative:
+
+  Plan A(pass cb):         j2 cb x = r2[cb,x]
+
+  Plan B(reconstruct cb):  j2 x y z = let cb = C2 x y z in r2[cb,x]
+
+The advantge of Plan B is that we can "see" the definition of cb
+in r2, and that may be important when we inline stuff in r2.  The
+disadvantage is that if this optimisation doesn't happen, we end up
+re-allocating C2, when it already exists.  This does happen occasionally;
+an example is the function nofib/spectral/cichelli/Auxil.$whinsert.
+
+Plan B is always better if the constructor is nullary.
+
+In both cases we don't have liveness info for cb on a branch-by-branch
+basis, and it's possible that 'cb' is used in some branches but not
+others.  Well, the absence analyser will find that out later, so it's
+not too bad.
+
+Sadly, at the time of writing, neither choice seems an unequivocal
+win. Here are nofib results, for adding -fpass-case-bndr-to-join-points
+(all others are zero effect):
+
+        Program           Size    Allocs   Runtime   Elapsed
+--------------------------------------------------------------------------------
+       cichelli          +0.0%     -4.4%      0.13      0.13
+            pic          +0.0%     -0.7%      0.01      0.04
+      transform          -0.0%     +2.8%     -0.4%     -9.1%
+      wave4main          +0.0%    +10.5%     +3.1%     +3.4%
+--------------------------------------------------------------------------------
+            Min          -0.0%     -4.4%     -7.0%    -31.9%
+            Max          +0.1%    +10.5%     +3.1%    +15.0%
+ Geometric Mean          +0.0%     +0.1%     -1.7%     -6.1%
+
+
  Note [Duplicated env]
  ~~~~~~~~~~~~~~~~~~~~~
  Some of the alternatives are simplified, but have not been turned into a join point
@@ -2061,3 +2164,37 @@ Other choices:
       When x is inlined into its full context, we find that it was a bad
       idea to have pushed the outer case inside the (...) case.
  
+Note [Single-alternative-unlifted]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Here's another single-alternative where we really want to do case-of-case:
+
+data Mk1 = Mk1 Int#
+data Mk1 = Mk2 Int#
+
+M1.f =
+    \r [x_s74 y_s6X]
+        case
+            case y_s6X of tpl_s7m {
+              M1.Mk1 ipv_s70 -> ipv_s70;
+              M1.Mk2 ipv_s72 -> ipv_s72;
+            }
+        of
+        wild_s7c
+        { __DEFAULT ->
+              case
+                  case x_s74 of tpl_s7n {
+                    M1.Mk1 ipv_s77 -> ipv_s77;
+                    M1.Mk2 ipv_s79 -> ipv_s79;
+                  }
+              of
+              wild1_s7b
+              { __DEFAULT -> ==# [wild1_s7b wild_s7c];
+              };
+        };
+
+So the outer case is doing *nothing at all*, other than serving as a
+join-point.  In this case we really want to do case-of-case and decide
+whether to use a real join point or just duplicate the continuation.
+
+Hence: check whether the case binder's type is unlifted, because then
+the outer case is *not* a seq.