Preserve strictness when floating coercions

[ghc-hetmet.git] / compiler / simplCore / Simplify.lhs
diff --git a/compiler/simplCore/Simplify.lhs b/compiler/simplCore/Simplify.lhs

index 5bcda0c..f8462be 100644 (file)
--- a/compiler/simplCore/Simplify.lhs
+++ b/compiler/simplCore/Simplify.lhs
@@ -13,30 +13,33 @@ import SimplMonad
  import Type hiding      ( substTy, extendTvSubst )
  import SimplEnv
  import SimplUtils
  import Type hiding      ( substTy, extendTvSubst )
  import SimplEnv
  import SimplUtils
-import MkId            ( rUNTIME_ERROR_ID )
  import FamInstEnv      ( FamInstEnv )
  import Id
  import FamInstEnv      ( FamInstEnv )
  import Id
+import MkId            ( mkImpossibleExpr, seqId )
  import Var
  import IdInfo
  import Var
  import IdInfo
+import Name            ( mkSystemVarName )
  import Coercion
  import FamInstEnv       ( topNormaliseType )
  import Coercion
  import FamInstEnv       ( topNormaliseType )
-import DataCon          ( dataConRepStrictness, dataConUnivTyVars )
+import DataCon          ( DataCon, dataConWorkId, dataConRepStrictness )
  import CoreSyn
  import NewDemand        ( isStrictDmd, splitStrictSig )
  import PprCore          ( pprParendExpr, pprCoreExpr )
  import CoreSyn
  import NewDemand        ( isStrictDmd, splitStrictSig )
  import PprCore          ( pprParendExpr, pprCoreExpr )
-import CoreUnfold       ( mkUnfolding, callSiteInline, CallCtxt(..) )
+import CoreUnfold       ( mkUnfolding, mkCoreUnfolding, mkInlineRule, 
+                          exprIsConApp_maybe, callSiteInline, CallCtxt(..) )
  import CoreUtils
  import CoreUtils
+import qualified CoreSubst
+import CoreArity       ( exprArity )
  import Rules            ( lookupRule, getRules )
  import Rules            ( lookupRule, getRules )
-import BasicTypes       ( isMarkedStrict )
-import CostCentre       ( currentCCS )
+import BasicTypes       ( isMarkedStrict, Arity )
+import CostCentre       ( currentCCS, pushCCisNop )
  import TysPrim          ( realWorldStatePrimTy )
  import PrelInfo         ( realWorldPrimId )
  import BasicTypes       ( TopLevelFlag(..), isTopLevel,
                            RecFlag(..), isNonRuleLoopBreaker )
  import TysPrim          ( realWorldStatePrimTy )
  import PrelInfo         ( realWorldPrimId )
  import BasicTypes       ( TopLevelFlag(..), isTopLevel,
                            RecFlag(..), isNonRuleLoopBreaker )
+import MonadUtils      ( foldlM )
  import Maybes           ( orElse )
  import Data.List        ( mapAccumL )
  import Maybes           ( orElse )
  import Data.List        ( mapAccumL )
-import MonadUtils      ( foldlM )
-import StaticFlags     ( opt_PassCaseBndrToJoinPoints )
  import Outputable
  import FastString
  \end{code}
  import Outputable
  import FastString
  \end{code}
@@ -202,7 +205,7 @@ expansion at a let RHS can concentrate solely on the PAP case.
  %************************************************************************
  
  \begin{code}
  %************************************************************************
  
  \begin{code}
-simplTopBinds :: SimplEnv -> [InBind] -> SimplM [OutBind]
+simplTopBinds :: SimplEnv -> [InBind] -> SimplM SimplEnv
  
  simplTopBinds env0 binds0
    = do  {       -- Put all the top-level binders into scope at the start
  
  simplTopBinds env0 binds0
    = do  {       -- Put all the top-level binders into scope at the start
@@ -215,7 +218,7 @@ simplTopBinds env0 binds0
                            dopt Opt_D_dump_rule_firings dflags
          ; env2 <- simpl_binds dump_flag env1 binds0
          ; freeTick SimplifierDone
                            dopt Opt_D_dump_rule_firings dflags
          ; env2 <- simpl_binds dump_flag env1 binds0
          ; freeTick SimplifierDone
-        ; return (getFloats env2) }
+        ; return env2 }
    where
          -- We need to track the zapped top-level binders, because
          -- they should have their fragile IdInfo zapped (notably occurrence info)
    where
          -- We need to track the zapped top-level binders, because
          -- they should have their fragile IdInfo zapped (notably occurrence info)
@@ -333,15 +336,14 @@ simplLazyBind env top_lvl is_rec bndr bndr1 rhs rhs_se
                  -- See Note [Floating and type abstraction] in SimplUtils
  
          -- Simplify the RHS
                  -- See Note [Floating and type abstraction] in SimplUtils
  
          -- Simplify the RHS
-        ; (body_env1, body1) <- simplExprF body_env body mkBoringStop
-
+        ; (body_env1, body1) <- simplExprF body_env body mkRhsStop
          -- ANF-ise a constructor or PAP rhs
          -- ANF-ise a constructor or PAP rhs
-        ; (body_env2, body2) <- prepareRhs body_env1 body1
+        ; (body_env2, body2) <- prepareRhs body_env1 bndr1 body1
  
          ; (env', rhs')
              <-  if not (doFloatFromRhs top_lvl is_rec False body2 body_env2)
                  then                            -- No floating, just wrap up!
  
          ; (env', rhs')
              <-  if not (doFloatFromRhs top_lvl is_rec False body2 body_env2)
                  then                            -- No floating, just wrap up!
-                     do { rhs' <- mkLam tvs' (wrapFloats body_env2 body2)
+                     do { rhs' <- mkLam env tvs' (wrapFloats body_env2 body2)
                          ; return (env, rhs') }
  
                  else if null tvs then           -- Simple floating
                          ; return (env, rhs') }
  
                  else if null tvs then           -- Simple floating
@@ -351,7 +353,7 @@ simplLazyBind env top_lvl is_rec bndr bndr1 rhs rhs_se
                  else                            -- Do type-abstraction first
                       do { tick LetFloatFromLet
                          ; (poly_binds, body3) <- abstractFloats tvs' body_env2 body2
                  else                            -- Do type-abstraction first
                       do { tick LetFloatFromLet
                          ; (poly_binds, body3) <- abstractFloats tvs' body_env2 body2
-                        ; rhs' <- mkLam tvs' body3
+                        ; rhs' <- mkLam env tvs' body3
                          ; env' <- foldlM (addPolyBind top_lvl) env poly_binds
                          ; return (env', rhs') }
  
                          ; env' <- foldlM (addPolyBind top_lvl) env poly_binds
                          ; return (env', rhs') }
  
@@ -382,7 +384,7 @@ completeNonRecX :: SimplEnv
                  -> SimplM SimplEnv
  
  completeNonRecX env is_strict old_bndr new_bndr new_rhs
                  -> SimplM SimplEnv
  
  completeNonRecX env is_strict old_bndr new_bndr new_rhs
-  = do  { (env1, rhs1) <- prepareRhs (zapFloats env) new_rhs
+  = do  { (env1, rhs1) <- prepareRhs (zapFloats env) new_bndr new_rhs
          ; (env2, rhs2) <-
                  if doFloatFromRhs NotTopLevel NonRecursive is_strict rhs1 env1
                  then do { tick LetFloatFromLet
          ; (env2, rhs2) <-
                  if doFloatFromRhs NotTopLevel NonRecursive is_strict rhs1 env1
                  then do { tick LetFloatFromLet
@@ -433,15 +435,19 @@ Here we want to make e1,e2 trivial and get
  That's what the 'go' loop in prepareRhs does
  
  \begin{code}
  That's what the 'go' loop in prepareRhs does
  
  \begin{code}
-prepareRhs :: SimplEnv -> OutExpr -> SimplM (SimplEnv, OutExpr)
+prepareRhs :: SimplEnv -> OutId -> OutExpr -> SimplM (SimplEnv, OutExpr)
  -- Adds new floats to the env iff that allows us to return a good RHS
  -- Adds new floats to the env iff that allows us to return a good RHS
-prepareRhs env (Cast rhs co)    -- Note [Float coercions]
+prepareRhs env id (Cast rhs co)    -- Note [Float coercions]
    | (ty1, _ty2) <- coercionKind co       -- Do *not* do this if rhs has an unlifted type
    , not (isUnLiftedType ty1)            -- see Note [Float coercions (unlifted)]
    | (ty1, _ty2) <- coercionKind co       -- Do *not* do this if rhs has an unlifted type
    , not (isUnLiftedType ty1)            -- see Note [Float coercions (unlifted)]
-  = do  { (env', rhs') <- makeTrivial env rhs
+  = do  { (env', rhs') <- makeTrivialWithInfo env sanitised_info rhs
          ; return (env', Cast rhs' co) }
          ; return (env', Cast rhs' co) }
+  where
+    sanitised_info = vanillaIdInfo `setNewStrictnessInfo` newStrictnessInfo info
+                                   `setNewDemandInfo`     newDemandInfo info
+    info = idInfo id
  
  
-prepareRhs env0 rhs0
+prepareRhs env0 _ rhs0
    = do  { (_is_val, env1, rhs1) <- go 0 env0 rhs0
          ; return (env1, rhs1) }
    where
    = do  { (_is_val, env1, rhs1) <- go 0 env0 rhs0
          ; return (env1, rhs1) }
    where
@@ -462,7 +468,8 @@ prepareRhs env0 rhs0
          where
            is_val = n_val_args > 0       -- There is at least one arg
                                          -- ...and the fun a constructor or PAP
          where
            is_val = n_val_args > 0       -- There is at least one arg
                                          -- ...and the fun a constructor or PAP
-                 && (isDataConWorkId fun || n_val_args < idArity fun)
+                 && (isConLikeId fun || n_val_args < idArity fun)
+                                  -- See Note [CONLIKE pragma] in BasicTypes
      go _ env other
          = return (False, env, other)
  \end{code}
      go _ env other
          = return (False, env, other)
  \end{code}
@@ -490,6 +497,17 @@ and lead to further optimisation.  Example:
            go n = case x of { T m -> go (n-m) }
                  -- This case should optimise
  
            go n = case x of { T m -> go (n-m) }
                  -- This case should optimise
  
+Note [Preserve strictness when floating coercions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In the Note [Float coercions] transformation, keep the strictness info.
+Eg
+       f = e `cast` co    -- f has strictness SSL
+When we transform to
+        f' = e            -- f' also has strictness SSL
+        f = f' `cast` co   -- f still has strictness SSL
+
+Its not wrong to drop it on the floor, but better to keep it.
+
  Note [Float coercions (unlifted)]
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  BUT don't do [Float coercions] if 'e' has an unlifted type.
  Note [Float coercions (unlifted)]
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  BUT don't do [Float coercions] if 'e' has an unlifted type.
@@ -510,16 +528,19 @@ These strange casts can happen as a result of case-of-case
  \begin{code}
  makeTrivial :: SimplEnv -> OutExpr -> SimplM (SimplEnv, OutExpr)
  -- Binds the expression to a variable, if it's not trivial, returning the variable
  \begin{code}
  makeTrivial :: SimplEnv -> OutExpr -> SimplM (SimplEnv, OutExpr)
  -- Binds the expression to a variable, if it's not trivial, returning the variable
-makeTrivial env expr
+makeTrivial env expr = makeTrivialWithInfo env vanillaIdInfo expr
+
+makeTrivialWithInfo :: SimplEnv -> IdInfo -> OutExpr -> SimplM (SimplEnv, OutExpr)
+-- Propagate strictness and demand info to the new binder
+-- Note [Preserve strictness when floating coercions]
+makeTrivialWithInfo env info expr
    | exprIsTrivial expr
    = return (env, expr)
    | otherwise           -- See Note [Take care] below
    | exprIsTrivial expr
    = return (env, expr)
    | otherwise           -- See Note [Take care] below
-  = do  { var <- newId (fsLit "a") (exprType expr)
+  = do  { uniq <- getUniqueM
+        ; let name = mkSystemVarName uniq (fsLit "a")
+              var = mkLocalIdWithInfo name (exprType expr) info
          ; env' <- completeNonRecX env False var var expr
          ; env' <- completeNonRecX env False var var expr
---       pprTrace "makeTrivial" (vcat [ppr var <+> ppr (exprArity (substExpr env' (Var var)))
---                                    , ppr expr
---                                    , ppr (substExpr env' (Var var))
---                                    , ppr (idArity (fromJust (lookupInScope (seInScope env') var))) ]) $
         ; return (env', substExpr env' (Var var)) }
         -- The substitution is needed becase we're constructing a new binding
         --     a = rhs
         ; return (env', substExpr env' (Var var)) }
         -- The substitution is needed becase we're constructing a new binding
         --     a = rhs
@@ -571,7 +592,7 @@ completeBind env top_lvl old_bndr new_bndr new_rhs
               old_unf  = unfoldingInfo old_info
               occ_info = occInfo old_info
  
               old_unf  = unfoldingInfo old_info
               occ_info = occInfo old_info
  
-       ; new_unfolding <- simplUnfolding env top_lvl old_bndr occ_info old_unf new_rhs
+       ; new_unfolding <- simplUnfolding env top_lvl old_bndr occ_info new_rhs old_unf
  
         ; if postInlineUnconditionally env top_lvl new_bndr occ_info new_rhs new_unfolding
                         -- Inline and discard the binding
  
         ; if postInlineUnconditionally env top_lvl new_bndr occ_info new_rhs new_unfolding
                         -- Inline and discard the binding
@@ -596,7 +617,7 @@ addPolyBind :: TopLevelFlag -> SimplEnv -> OutBind -> SimplM SimplEnv
  -- opportunity to inline 'y' too.
  
  addPolyBind top_lvl env (NonRec poly_id rhs)
  -- opportunity to inline 'y' too.
  
  addPolyBind top_lvl env (NonRec poly_id rhs)
-  = do { unfolding <- simplUnfolding env top_lvl poly_id NoOccInfo noUnfolding rhs
+  = do { unfolding <- simplUnfolding env top_lvl poly_id NoOccInfo rhs noUnfolding
                         -- Assumes that poly_id did not have an INLINE prag
                         -- which is perhaps wrong.  ToDo: think about this
          ; return (addNonRecWithUnf env poly_id rhs unfolding) }
                         -- Assumes that poly_id did not have an INLINE prag
                         -- which is perhaps wrong.  ToDo: think about this
          ; return (addNonRecWithUnf env poly_id rhs unfolding) }
@@ -628,7 +649,9 @@ addNonRecWithUnf env new_bndr new_rhs new_unfolding
      in
      ASSERT( isId new_bndr )
      WARN( new_arity < old_arity || new_arity < dmd_arity, 
      in
      ASSERT( isId new_bndr )
      WARN( new_arity < old_arity || new_arity < dmd_arity, 
-          (ppr final_id <+> ppr old_arity <+> ppr new_arity <+> ppr dmd_arity) $$ ppr new_rhs )
+          (ptext (sLit "Arity decrease:") <+> ppr final_id <+> ppr old_arity
+               <+> ppr new_arity <+> ppr dmd_arity) )
+       -- Note [Arity decrease]
  
      final_id `seq`   -- This seq forces the Id, and hence its IdInfo,
                      -- and hence any inner substitutions
  
      final_id `seq`   -- This seq forces the Id, and hence its IdInfo,
                      -- and hence any inner substitutions
@@ -636,27 +659,62 @@ addNonRecWithUnf env new_bndr new_rhs new_unfolding
      addNonRec env final_id new_rhs
                 -- The addNonRec adds it to the in-scope set too
  
      addNonRec env final_id new_rhs
                 -- The addNonRec adds it to the in-scope set too
  
-
  ------------------------------
  simplUnfolding :: SimplEnv-> TopLevelFlag
                -> Id    -- Debug output only
  ------------------------------
  simplUnfolding :: SimplEnv-> TopLevelFlag
                -> Id    -- Debug output only
-              -> OccInfo -> Unfolding -> OutExpr
-              -> SimplM Unfolding
-simplUnfolding env top_lvl bndr occ_info old_unf new_rhs       -- Note [Setting the new unfolding]
-  | omit_unfolding = WARN( is_inline_rule, ppr bndr ) return NoUnfolding       
-  | is_inline_rule = return (substUnfolding env is_top_lvl old_unf)
-  | otherwise     = return (mkUnfolding is_top_lvl new_rhs)
+              -> OccInfo -> OutExpr
+              -> Unfolding -> SimplM Unfolding
+-- Note [Setting the new unfolding]
+simplUnfolding env _ _ _ _ (DFunUnfolding con ops)
+  = return (DFunUnfolding con ops')
+  where
+    ops' = map (CoreSubst.substExpr (mkCoreSubst env)) ops
+
+simplUnfolding env top_lvl _ _ _ 
+    (CoreUnfolding { uf_tmpl = expr, uf_arity = arity
+                   , uf_guidance = guide@(InlineRule {}) })
+  = do { expr' <- simplExpr (setMode simplGentlyForInlineRules env) expr
+                      -- See Note [Simplifying gently inside InlineRules] in SimplUtils
+       ; let mb_wkr' = CoreSubst.substInlineRuleInfo (mkCoreSubst env) (ir_info guide)
+       ; return (mkCoreUnfolding (isTopLevel top_lvl) expr' arity 
+                                 (guide { ir_info = mb_wkr' })) }
+               -- See Note [Top-level flag on inline rules] in CoreUnfold
+
+simplUnfolding _ top_lvl _ occ_info new_rhs _
+  | omit_unfolding = return NoUnfolding        
+  | otherwise     = return (mkUnfolding (isTopLevel top_lvl) new_rhs)
    where
    where
-    is_top_lvl     = isTopLevel top_lvl
-    is_inline_rule = isInlineRule old_unf
      omit_unfolding = isNonRuleLoopBreaker occ_info
  \end{code}
  
      omit_unfolding = isNonRuleLoopBreaker occ_info
  \end{code}
  
+Note [Arity decrease]
+~~~~~~~~~~~~~~~~~~~~~
+Generally speaking the arity of a binding should not decrease.  But it *can* 
+legitimately happen becuase of RULES.  Eg
+       f = g Int
+where g has arity 2, will have arity 2.  But if there's a rewrite rule
+       g Int --> h
+where h has arity 1, then f's arity will decrease.  Here's a real-life example,
+which is in the output of Specialise:
+
+     Rec {
+       $dm {Arity 2} = \d.\x. op d
+       {-# RULES forall d. $dm Int d = $s$dm #-}
+       
+       dInt = MkD .... opInt ...
+       opInt {Arity 1} = $dm dInt
+
+       $s$dm {Arity 0} = \x. op dInt }
+
+Here opInt has arity 1; but when we apply the rule its arity drops to 0.
+That's why Specialise goes to a little trouble to pin the right arity
+on specialised functions too.
  
  Note [Setting the new unfolding]
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  
  Note [Setting the new unfolding]
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-* If there's an INLINE pragma, we use substUnfolding to retain the 
-  supplied inlining
+* If there's an INLINE pragma, we simplify the RHS gently.  Maybe we
+  should do nothing at all, but simplifying gently might get rid of 
+  more crap.
  
  * If not, we make an unfolding from the new RHS.  But *only* for
    non-loop-breakers. Making loop breakers not have an unfolding at all
  
  * If not, we make an unfolding from the new RHS.  But *only* for
    non-loop-breakers. Making loop breakers not have an unfolding at all
@@ -664,10 +722,9 @@ Note [Setting the new unfolding]
    important: if exprIsConApp says 'yes' for a recursive thing, then we
    can get into an infinite loop
  
    important: if exprIsConApp says 'yes' for a recursive thing, then we
    can get into an infinite loop
  
-If there's an INLINE pragma on a loop breaker, we simply discard it 
-(with a DEBUG warning).  The desugarer complains about binding groups
-that look likely to trigger this behaviour.
-
+If there's an InlineRule on a loop breaker, we hang on to the inlining.
+It's pretty dodgy, but the user did say 'INLINE'.  May need to revisit
+this choice.
  
  Note [Setting the demand info]
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  
  Note [Setting the demand info]
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -781,7 +838,7 @@ simplExprF' env expr@(Lam _ _) cont
  
  simplExprF' env (Type ty) cont
    = ASSERT( contIsRhsOrArg cont )
  
  simplExprF' env (Type ty) cont
    = ASSERT( contIsRhsOrArg cont )
-    do  { ty' <- simplType env ty
+    do  { ty' <- simplCoercion env ty
          ; rebuild env (Type ty') cont }
  
  simplExprF' env (Case scrut bndr _ alts) cont
          ; rebuild env (Type ty') cont }
  
  simplExprF' env (Case scrut bndr _ alts) cont
@@ -816,6 +873,14 @@ simplType env ty
      seqType new_ty   `seq`   return new_ty
    where
      new_ty = substTy env ty
      seqType new_ty   `seq`   return new_ty
    where
      new_ty = substTy env ty
+
+---------------------------------
+simplCoercion :: SimplEnv -> InType -> SimplM OutType
+-- The InType isn't *necessarily* a coercion, but it might be
+-- (in a type application, say) and optCoercion is a no-op on types
+simplCoercion env co
+  = do { co' <- simplType env co
+       ; return (optCoercion co') }
  \end{code}
  
  
  \end{code}
  
  
@@ -853,7 +918,7 @@ rebuild env expr cont0
  simplCast :: SimplEnv -> InExpr -> Coercion -> SimplCont
            -> SimplM (SimplEnv, OutExpr)
  simplCast env body co0 cont0
  simplCast :: SimplEnv -> InExpr -> Coercion -> SimplCont
            -> SimplM (SimplEnv, OutExpr)
  simplCast env body co0 cont0
-  = do  { co1 <- simplType env co0
+  = do  { co1 <- simplCoercion env co0
          ; simplExprF env body (addCoerce co1 cont0) }
    where
         addCoerce co cont = add_coerce co (coercionKind co) cont
          ; simplExprF env body (addCoerce co1 cont0) }
    where
         addCoerce co cont = add_coerce co (coercionKind co) cont
@@ -865,8 +930,8 @@ simplCast env body co0 cont0
           | (_l1, t1) <- coercionKind co2
                 --      e |> (g1 :: S1~L) |> (g2 :: L~T1)
                  -- ==>
           | (_l1, t1) <- coercionKind co2
                 --      e |> (g1 :: S1~L) |> (g2 :: L~T1)
                  -- ==>
-                --      e,                       if T1=T2
-                --      e |> (g1 . g2 :: T1~T2)  otherwise
+                --      e,                       if S1=T1
+                --      e |> (g1 . g2 :: S1~T1)  otherwise
                  --
                  -- For example, in the initial form of a worker
                  -- we may find  (coerce T (coerce S (\x.e))) y
                  --
                  -- For example, in the initial form of a worker
                  -- we may find  (coerce T (coerce S (\x.e))) y
@@ -937,7 +1002,7 @@ simplLam env (bndr:bndrs) body (ApplyTo _ arg arg_se cont)
  simplLam env bndrs body cont
    = do  { (env', bndrs') <- simplLamBndrs env bndrs
          ; body' <- simplExpr env' body
  simplLam env bndrs body cont
    = do  { (env', bndrs') <- simplLamBndrs env bndrs
          ; body' <- simplExpr env' body
-        ; new_lam <- mkLam bndrs' body'
+        ; new_lam <- mkLam env' bndrs' body'
          ; rebuild env' new_lam cont }
  
  ------------------
          ; rebuild env' new_lam cont }
  
  ------------------
@@ -998,6 +1063,9 @@ simplNonRecE env bndr (rhs, rhs_se) (bndrs, body) cont
  simplNote :: SimplEnv -> Note -> CoreExpr -> SimplCont
            -> SimplM (SimplEnv, OutExpr)
  simplNote env (SCC cc) e cont
  simplNote :: SimplEnv -> Note -> CoreExpr -> SimplCont
            -> SimplM (SimplEnv, OutExpr)
  simplNote env (SCC cc) e cont
+  | pushCCisNop cc (getEnclosingCC env)  -- scc "f" (...(scc "f" e)...) 
+  = simplExprF env e cont               -- ==>  scc "f" (...e...)
+  | otherwise
    = do  { e' <- simplExpr (setEnclosingCC env currentCCS) e
          ; rebuild env (mkSCC cc e') cont }
  
    = do  { e' <- simplExpr (setEnclosingCC env currentCCS) e
          ; rebuild env (mkSCC cc e') cont }
  
@@ -1035,8 +1103,7 @@ simplVar env var cont
  
  completeCall :: SimplEnv -> Id -> SimplCont -> SimplM (SimplEnv, OutExpr)
  completeCall env var cont
  
  completeCall :: SimplEnv -> Id -> SimplCont -> SimplM (SimplEnv, OutExpr)
  completeCall env var cont
-  = do  { dflags <- getDOptsSmpl
-        ; let   (args,call_cont) = contArgs cont
+  = do  { let   (args,call_cont) = contArgs cont
                  -- The args are OutExprs, obtained by *lazily* substituting
                  -- in the args found in cont.  These args are only examined
                  -- to limited depth (unless a rule fires).  But we must do
                  -- The args are OutExprs, obtained by *lazily* substituting
                  -- in the args found in cont.  These args are only examined
                  -- to limited depth (unless a rule fires).  But we must do
@@ -1052,45 +1119,20 @@ completeCall env var cont
          -- We used to use the black-listing mechanism to ensure that inlining of
          -- the wrapper didn't occur for things that have specialisations till a
          -- later phase, so but now we just try RULES first
          -- We used to use the black-listing mechanism to ensure that inlining of
          -- the wrapper didn't occur for things that have specialisations till a
          -- later phase, so but now we just try RULES first
-        --
-        -- Note [Rules for recursive functions]
-        -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-        -- You might think that we shouldn't apply rules for a loop breaker:
-        -- doing so might give rise to an infinite loop, because a RULE is
-        -- rather like an extra equation for the function:
-        --      RULE:           f (g x) y = x+y
-        --      Eqn:            f a     y = a-y
-        --
-        -- But it's too drastic to disable rules for loop breakers.
-        -- Even the foldr/build rule would be disabled, because foldr
-        -- is recursive, and hence a loop breaker:
-        --      foldr k z (build g) = g k z
-        -- So it's up to the programmer: rules can cause divergence
+       -- 
+       -- See also Note [Rules for recursive functions]
          ; rule_base <- getSimplRules
          ; rule_base <- getSimplRules
-        ; let   in_scope   = getInScope env
-               rules      = getRules rule_base var
-                maybe_rule = case activeRule dflags env of
-                                Nothing     -> Nothing  -- No rules apply
-                                Just act_fn -> lookupRule act_fn in_scope
-                                                          var args rules 
-        ; case maybe_rule of {
-            Just (rule, rule_rhs) -> do
-                tick (RuleFired (ru_name rule))
-                (if dopt Opt_D_dump_rule_firings dflags then
-                   pprTrace "Rule fired" (vcat [
-                        text "Rule:" <+> ftext (ru_name rule),
-                        text "Before:" <+> ppr var <+> sep (map pprParendExpr args),
-                        text "After: " <+> pprCoreExpr rule_rhs,
-                        text "Cont:  " <+> ppr call_cont])
-                 else
-                        id)             $
-                 simplExprF env rule_rhs (dropArgs (ruleArity rule) cont)
+       ; let rules = getRules rule_base var
+       ; mb_rule <- tryRules env var rules args call_cont
+       ; case mb_rule of {
+            Just (n_args, rule_rhs) -> simplExprF env rule_rhs (dropArgs n_args cont) ;
                   -- The ruleArity says how many args the rule consumed
                   -- The ruleArity says how many args the rule consumed
+           ; Nothing -> do       -- No rules
  
  
-          ; Nothing -> do       -- No rules
  
          ------------- Next try inlining ----------------
  
          ------------- Next try inlining ----------------
-        { let   arg_infos = [interestingArg arg | arg <- args, isValArg arg]
+        { dflags <- getDOptsSmpl
+        ; let   arg_infos = [interestingArg arg | arg <- args, isValArg arg]
                  n_val_args = length arg_infos
                  interesting_cont = interestingCallContext call_cont
                  active_inline = activeInline env var
                  n_val_args = length arg_infos
                  interesting_cont = interestingCallContext call_cont
                  active_inline = activeInline env var
@@ -1114,7 +1156,8 @@ completeCall env var cont
          -- Next, look for rules or specialisations that match
          --
          rebuildCall env (Var var)
          -- Next, look for rules or specialisations that match
          --
          rebuildCall env (Var var)
-                    (mkArgInfo var n_val_args call_cont) cont
+                    (mkArgInfo var rules n_val_args call_cont) 
+                    cont
      }}}}
  
  rebuildCall :: SimplEnv
      }}}}
  
  rebuildCall :: SimplEnv
@@ -1143,7 +1186,7 @@ rebuildCall env fun (ArgInfo { ai_strs = [] }) cont
                     | otherwise = mkCoerce co expr
  
  rebuildCall env fun info (ApplyTo _ (Type arg_ty) se cont)
                     | otherwise = mkCoerce co expr
  
  rebuildCall env fun info (ApplyTo _ (Type arg_ty) se cont)
-  = do  { ty' <- simplType (se `setInScope` env) arg_ty
+  = do  { ty' <- simplCoercion (se `setInScope` env) arg_ty
          ; rebuildCall env (fun `App` Type ty') info cont }
  
  rebuildCall env fun 
          ; rebuildCall env (fun `App` Type ty') info cont }
  
  rebuildCall env fun 
@@ -1165,8 +1208,8 @@ rebuildCall env fun
          ; rebuildCall env (fun `App` arg') arg_info' cont }
    where
      arg_info' = ArgInfo { ai_rules = has_rules, ai_strs = strs, ai_discs = discs }
          ; rebuildCall env (fun `App` arg') arg_info' cont }
    where
      arg_info' = ArgInfo { ai_rules = has_rules, ai_strs = strs, ai_discs = discs }
-    cci | has_rules || disc > 0 = ArgCtxt has_rules disc  -- Be keener here
-        | otherwise             = BoringCtxt              -- Nothing interesting
+    cci | has_rules || disc > 0 = ArgCtxt has_rules  -- Be keener here
+        | otherwise             = BoringCtxt         -- Nothing interesting
  
  rebuildCall env fun _ cont
    = rebuild env fun cont
  
  rebuildCall env fun _ cont
    = rebuild env fun cont
@@ -1196,6 +1239,58 @@ to get the effect that finding (error "foo") in a strict arg position will
  discard the entire application and replace it with (error "foo").  Getting
  all this at once is TOO HARD!
  
  discard the entire application and replace it with (error "foo").  Getting
  all this at once is TOO HARD!
  
+
+%************************************************************************
+%*                                                                      *
+                Rewrite rules
+%*                                                                      *
+%************************************************************************
+
+\begin{code}
+tryRules :: SimplEnv
+         -> Id -> [CoreRule] -> [OutExpr] -> SimplCont 
+        -> SimplM (Maybe (Arity, CoreExpr))         -- The arity is the number of
+                                                    -- args consumed by the rule
+tryRules env fn rules args call_cont
+  | null rules
+  = return Nothing
+  | otherwise
+  = do { dflags <- getDOptsSmpl
+       ; case activeRule dflags env of {
+           Nothing     -> return Nothing  ; -- No rules apply
+           Just act_fn -> 
+
+         case lookupRule act_fn (getInScope env) fn args rules of {
+           Nothing               -> return Nothing ;   -- No rule matches
+           Just (rule, rule_rhs) ->
+
+             do { tick (RuleFired (ru_name rule))
+                ; (if dopt Opt_D_dump_rule_firings dflags then
+                     pprTrace "Rule fired" (vcat [
+                        text "Rule:" <+> ftext (ru_name rule),
+                        text "Before:" <+> ppr fn <+> sep (map pprParendExpr args),
+                        text "After: " <+> pprCoreExpr rule_rhs,
+                        text "Cont:  " <+> ppr call_cont])
+                   else
+                        id)             $
+                   return (Just (ruleArity rule, rule_rhs)) }}}}
+\end{code}
+
+Note [Rules for recursive functions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+You might think that we shouldn't apply rules for a loop breaker:
+doing so might give rise to an infinite loop, because a RULE is
+rather like an extra equation for the function:
+     RULE:           f (g x) y = x+y
+     Eqn:            f a     y = a-y
+
+But it's too drastic to disable rules for loop breakers.
+Even the foldr/build rule would be disabled, because foldr
+is recursive, and hence a loop breaker:
+     foldr k z (build g) = g k z
+So it's up to the programmer: rules can cause divergence
+
+
  %************************************************************************
  %*                                                                      *
                  Rebuilding a cse expression
  %************************************************************************
  %*                                                                      *
                  Rebuilding a cse expression
@@ -1292,26 +1387,40 @@ I don't really know how to improve this situation.
  ---------------------------------------------------------
  --      Eliminate the case if possible
  
  ---------------------------------------------------------
  --      Eliminate the case if possible
  
-rebuildCase :: SimplEnv
-            -> OutExpr          -- Scrutinee
-            -> InId             -- Case binder
-            -> [InAlt]          -- Alternatives (inceasing order)
-            -> SimplCont
-            -> SimplM (SimplEnv, OutExpr)
+rebuildCase, reallyRebuildCase
+   :: SimplEnv
+   -> OutExpr          -- Scrutinee
+   -> InId             -- Case binder
+   -> [InAlt]          -- Alternatives (inceasing order)
+   -> SimplCont
+   -> SimplM (SimplEnv, OutExpr)
  
  --------------------------------------------------
  --      1. Eliminate the case if there's a known constructor
  --------------------------------------------------
  
  rebuildCase env scrut case_bndr alts cont
  
  --------------------------------------------------
  --      1. Eliminate the case if there's a known constructor
  --------------------------------------------------
  
  rebuildCase env scrut case_bndr alts cont
-  | Just (con,args) <- exprIsConApp_maybe scrut
-        -- Works when the scrutinee is a variable with a known unfolding
-        -- as well as when it's an explicit constructor application
-  = knownCon env scrut (DataAlt con) args case_bndr alts cont
-
    | Lit lit <- scrut    -- No need for same treatment as constructors
                          -- because literals are inlined more vigorously
    | Lit lit <- scrut    -- No need for same treatment as constructors
                          -- because literals are inlined more vigorously
-  = knownCon env scrut (LitAlt lit) [] case_bndr alts cont
+  = do  { tick (KnownBranch case_bndr)
+        ; case findAlt (LitAlt lit) alts of
+           Nothing           -> missingAlt env case_bndr alts cont
+           Just (_, bs, rhs) -> simple_rhs bs rhs }
+
+  | Just (con, ty_args, other_args) <- exprIsConApp_maybe scrut
+        -- Works when the scrutinee is a variable with a known unfolding
+        -- as well as when it's an explicit constructor application
+  = do  { tick (KnownBranch case_bndr)
+        ; case findAlt (DataAlt con) alts of
+           Nothing  -> missingAlt env case_bndr alts cont
+            Just (DEFAULT, bs, rhs) -> simple_rhs bs rhs
+           Just (_, bs, rhs)       -> knownCon env scrut con ty_args other_args 
+                                                case_bndr bs rhs cont
+       }
+  where
+    simple_rhs bs rhs = ASSERT( null bs ) 
+                        do { env' <- simplNonRecX env case_bndr scrut
+                          ; simplExprF env' rhs cont }
  
  
  --------------------------------------------------
  
  
  --------------------------------------------------
@@ -1358,12 +1467,31 @@ rebuildCase env scrut case_bndr [(_, bndrs, rhs)] cont
                                      -- exprOkForSpeculation was intended for.
      var_demanded_later _       = False
  
                                      -- exprOkForSpeculation was intended for.
      var_demanded_later _       = False
  
+rebuildCase env scrut case_bndr alts@[(_, bndrs, rhs)] cont
+  | all isDeadBinder (case_bndr : bndrs)  -- So this is just 'seq'
+  =    -- For this case, see Note [User-defined RULES for seq] in MkId
+    do { let rhs' = substExpr env rhs
+             out_args = [Type (substTy env (idType case_bndr)), 
+                        Type (exprType rhs'), scrut, rhs']
+                     -- Lazily evaluated, so we don't do most of this
+
+       ; rule_base <- getSimplRules
+       ; let rules = getRules rule_base seqId
+       ; mb_rule <- tryRules env seqId rules out_args cont
+       ; case mb_rule of 
+           Just (n_args, res) -> simplExprF (zapSubstEnv env) 
+                                           (mkApps res (drop n_args out_args))
+                                            cont
+          Nothing -> reallyRebuildCase env scrut case_bndr alts cont }
+
+rebuildCase env scrut case_bndr alts cont
+  = reallyRebuildCase env scrut case_bndr alts cont
  
  --------------------------------------------------
  --      3. Catch-all case
  --------------------------------------------------
  
  
  --------------------------------------------------
  --      3. Catch-all case
  --------------------------------------------------
  
-rebuildCase env scrut case_bndr alts cont
+reallyRebuildCase env scrut case_bndr alts cont
    = do  {       -- Prepare the continuation;
                  -- The new subst_env is in place
            (env', dup_cont, nodup_cont) <- prepareCaseCont env alts cont
    = do  {       -- Prepare the continuation;
                  -- The new subst_env is in place
            (env', dup_cont, nodup_cont) <- prepareCaseCont env alts cont
@@ -1372,17 +1500,7 @@ rebuildCase env scrut case_bndr alts cont
          ; (scrut', case_bndr', alts') <- simplAlts env' scrut case_bndr alts dup_cont
  
         -- Check for empty alternatives
          ; (scrut', case_bndr', alts') <- simplAlts env' scrut case_bndr alts dup_cont
  
         -- Check for empty alternatives
-       ; if null alts' then
-               -- This isn't strictly an error, although it is unusual. 
-               -- It's possible that the simplifer might "see" that 
-               -- an inner case has no accessible alternatives before 
-               -- it "sees" that the entire branch of an outer case is 
-               -- inaccessible.  So we simply put an error case here instead.
-           pprTrace "mkCase: null alts" (ppr case_bndr <+> ppr scrut) $
-           let res_ty' = contResultType env' (substTy env' (coreAltsType alts)) dup_cont
-               lit = mkStringLit "Impossible alternative"
-           in return (env', mkApps (Var rUNTIME_ERROR_ID) [Type res_ty', lit])
-
+       ; if null alts' then missingAlt env case_bndr alts cont
           else do
         { case_expr <- mkCase scrut' case_bndr' alts'
  
           else do
         { case_expr <- mkCase scrut' case_bndr' alts'
  
@@ -1419,7 +1537,7 @@ much always zap the OccInfo of the binders.  It doesn't matter much though.
  
  Note [Case of cast]
  ~~~~~~~~~~~~~~~~~~~
  
  Note [Case of cast]
  ~~~~~~~~~~~~~~~~~~~
-Consider        case (v `cast` co) of x { I# ->
+Consider        case (v `cast` co) of x { I# y ->
                  ... (case (v `cast` co) of {...}) ...
  We'd like to eliminate the inner case.  We can get this neatly by
  arranging that inside the outer case we add the unfolding
                  ... (case (v `cast` co) of {...}) ...
  We'd like to eliminate the inner case.  We can get this neatly by
  arranging that inside the outer case we add the unfolding
@@ -1440,10 +1558,31 @@ where x::F Int.  Then we'd like to rewrite (F Int) to Int, getting
             I# x# -> let x = x' `cast` sym co
                      in rhs
  
             I# x# -> let x = x' `cast` sym co
                      in rhs
  
-so that 'rhs' can take advantage of the form of x'.  Notice that Note
-[Case of cast] may then apply to the result.
-
-This showed up in Roman's experiments.  Example:
+so that 'rhs' can take advantage of the form of x'.  
+
+Notice that Note [Case of cast] may then apply to the result. 
+
+Nota Bene: We only do the [Improving seq] transformation if the 
+case binder 'x' is actually used in the rhs; that is, if the case 
+is *not* a *pure* seq.  
+  a) There is no point in adding the cast to a pure seq.
+  b) There is a good reason not to: doing so would interfere 
+     with seq rules (Note [Built-in RULES for seq] in MkId).
+     In particular, this [Improving seq] thing *adds* a cast
+     while [Built-in RULES for seq] *removes* one, so they
+     just flip-flop.
+
+You might worry about 
+   case v of x { __DEFAULT ->
+      ... case (v `cast` co) of y { I# -> ... }}
+This is a pure seq (since x is unused), so [Improving seq] won't happen.
+But it's ok: the simplifier will replace 'v' by 'x' in the rhs to get
+   case v of x { __DEFAULT ->
+      ... case (x `cast` co) of y { I# -> ... }}
+Now the outer case is not a pure seq, so [Improving seq] will happen,
+and then the inner case will disappear.
+
+The need for [Improving seq] showed up in Roman's experiments.  Example:
    foo :: F Int -> Int -> Int
    foo t n = t `seq` bar n
       where
    foo :: F Int -> Int -> Int
    foo t n = t `seq` bar n
       where
@@ -1452,64 +1591,9 @@ This showed up in Roman's experiments.  Example:
  Here we'd like to avoid repeated evaluating t inside the loop, by
  taking advantage of the `seq`.
  
  Here we'd like to avoid repeated evaluating t inside the loop, by
  taking advantage of the `seq`.
  
-At one point I did transformation in LiberateCase, but it's more robust here.
-(Otherwise, there's a danger that we'll simply drop the 'seq' altogether, before
-LiberateCase gets to see it.)
-
-
-Historical note [no-case-of-case]
-~~~~~~~~~~~~~~~~~~~~~~
-We *used* to suppress the binder-swap in case expressoins when 
--fno-case-of-case is on.  Old remarks:
-    "This happens in the first simplifier pass,
-    and enhances full laziness.  Here's the bad case:
-            f = \ y -> ...(case x of I# v -> ...(case x of ...) ... )
-    If we eliminate the inner case, we trap it inside the I# v -> arm,
-    which might prevent some full laziness happening.  I've seen this
-    in action in spectral/cichelli/Prog.hs:
-             [(m,n) | m <- [1..max], n <- [1..max]]
-    Hence the check for NoCaseOfCase."
-However, now the full-laziness pass itself reverses the binder-swap, so this
-check is no longer necessary.
-
-Historical note [Suppressing the case binder-swap]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-There is another situation when it might make sense to suppress the
-case-expression binde-swap. If we have
-
-    case x of w1 { DEFAULT -> case x of w2 { A -> e1; B -> e2 }
-                   ...other cases .... }
-
-We'll perform the binder-swap for the outer case, giving
-
-    case x of w1 { DEFAULT -> case w1 of w2 { A -> e1; B -> e2 }
-                   ...other cases .... }
-
-But there is no point in doing it for the inner case, because w1 can't
-be inlined anyway.  Furthermore, doing the case-swapping involves
-zapping w2's occurrence info (see paragraphs that follow), and that
-forces us to bind w2 when doing case merging.  So we get
-
-    case x of w1 { A -> let w2 = w1 in e1
-                   B -> let w2 = w1 in e2
-                   ...other cases .... }
-
-This is plain silly in the common case where w2 is dead.
-
-Even so, I can't see a good way to implement this idea.  I tried
-not doing the binder-swap if the scrutinee was already evaluated
-but that failed big-time:
-
-        data T = MkT !Int
-
-        case v of w  { MkT x ->
-        case x of x1 { I# y1 ->
-        case x of x2 { I# y2 -> ...
-
-Notice that because MkT is strict, x is marked "evaluated".  But to
-eliminate the last case, we must either make sure that x (as well as
-x1) has unfolding MkT y1.  THe straightforward thing to do is to do
-the binder-swap.  So this whole note is a no-op.
+At one point I did transformation in LiberateCase, but it's more
+robust here.  (Otherwise, there's a danger that we'll simply drop the
+'seq' altogether, before LiberateCase gets to see it.)
  
  
  \begin{code}
  
  
  \begin{code}
@@ -1518,39 +1602,15 @@ improveSeq :: (FamInstEnv, FamInstEnv) -> SimplEnv
            -> SimplM (SimplEnv, OutExpr, OutId)
  -- Note [Improving seq]
  improveSeq fam_envs env scrut case_bndr case_bndr1 [(DEFAULT,_,_)]
            -> SimplM (SimplEnv, OutExpr, OutId)
  -- Note [Improving seq]
  improveSeq fam_envs env scrut case_bndr case_bndr1 [(DEFAULT,_,_)]
-  | Just (co, ty2) <- topNormaliseType fam_envs (idType case_bndr1)
-  =  do { case_bndr2 <- newId (fsLit "nt") ty2
+  | not (isDeadBinder case_bndr)       -- Not a pure seq!  See the Note!
+  , Just (co, ty2) <- topNormaliseType fam_envs (idType case_bndr1)
+  = do { case_bndr2 <- newId (fsLit "nt") ty2
          ; let rhs  = DoneEx (Var case_bndr2 `Cast` mkSymCoercion co)
                env2 = extendIdSubst env case_bndr rhs
          ; return (env2, scrut `Cast` co, case_bndr2) }
  
  improveSeq _ env scrut _ case_bndr1 _
    = return (env, scrut, case_bndr1)
          ; let rhs  = DoneEx (Var case_bndr2 `Cast` mkSymCoercion co)
                env2 = extendIdSubst env case_bndr rhs
          ; return (env2, scrut `Cast` co, case_bndr2) }
  
  improveSeq _ env scrut _ case_bndr1 _
    = return (env, scrut, case_bndr1)
-
-{-
-    improve_case_bndr env scrut case_bndr
-        -- See Note [no-case-of-case]
-       --  | switchIsOn (getSwitchChecker env) NoCaseOfCase
-       --  = (env, case_bndr)
-
-        | otherwise     -- Failed try; see Note [Suppressing the case binder-swap]
-                        --     not (isEvaldUnfolding (idUnfolding v))
-        = case scrut of
-            Var v -> (modifyInScope env1 v case_bndr', case_bndr')
-                -- Note about using modifyInScope for v here
-                -- We could extend the substitution instead, but it would be
-                -- a hack because then the substitution wouldn't be idempotent
-                -- any more (v is an OutId).  And this does just as well.
-
-            Cast (Var v) co -> (addBinderUnfolding env1 v rhs, case_bndr')
-                            where
-                                rhs = Cast (Var case_bndr') (mkSymCoercion co)
-
-            _ -> (env, case_bndr)
-        where
-          case_bndr' = zapIdOccInfo case_bndr
-          env1       = modifyInScope env case_bndr case_bndr'
--}
  \end{code}
  
  
  \end{code}
  
  
@@ -1728,34 +1788,15 @@ and then
  All this should happen in one sweep.
  
  \begin{code}
  All this should happen in one sweep.
  
  \begin{code}
-knownCon :: SimplEnv -> OutExpr -> AltCon
-        -> [OutExpr]           -- Args *including* the universal args
-         -> InId -> [InAlt] -> SimplCont
+knownCon :: SimplEnv           
+         -> OutExpr                            -- The scrutinee
+         -> DataCon -> [OutType] -> [OutExpr]  -- The scrutinee (in pieces)
+         -> InId -> [InBndr] -> InExpr         -- The alternative
+         -> SimplCont
           -> SimplM (SimplEnv, OutExpr)
  
           -> SimplM (SimplEnv, OutExpr)
  
-knownCon env scrut con args bndr alts cont
-  = do  { tick (KnownBranch bndr)
-        ; knownAlt env scrut args bndr (findAlt con alts) cont }
-
-knownAlt :: SimplEnv -> OutExpr -> [OutExpr]
-         -> InId -> (AltCon, [CoreBndr], InExpr) -> SimplCont
-         -> SimplM (SimplEnv, OutExpr)
-knownAlt env scrut _ bndr (DEFAULT, bs, rhs) cont
-  = ASSERT( null bs )
-    do  { env' <- simplNonRecX env bndr scrut
-                -- This might give rise to a binding with non-atomic args
-                -- like x = Node (f x) (g x)
-                -- but simplNonRecX will atomic-ify it
-        ; simplExprF env' rhs cont }
-
-knownAlt env scrut _ bndr (LitAlt _, bs, rhs) cont
-  = ASSERT( null bs )
-    do  { env' <- simplNonRecX env bndr scrut
-        ; simplExprF env' rhs cont }
-
-knownAlt env scrut the_args bndr (DataAlt dc, bs, rhs) cont
-  = do  { let n_drop_tys = length (dataConUnivTyVars dc)
-        ; env' <- bind_args env bs (drop n_drop_tys the_args)
+knownCon env scrut dc dc_ty_args dc_args bndr bs rhs cont
+  = do  { env' <- bind_args env bs dc_args
          ; let
                  -- It's useful to bind bndr to scrut, rather than to a fresh
                  -- binding      x = Con arg1 .. argn
          ; let
                  -- It's useful to bind bndr to scrut, rather than to a fresh
                  -- binding      x = Con arg1 .. argn
@@ -1764,12 +1805,12 @@ knownAlt env scrut the_args bndr (DataAlt dc, bs, rhs) cont
                  -- BUT, if scrut is a not a variable, we must be careful
                  -- about duplicating the arg redexes; in that case, make
                  -- a new con-app from the args
                  -- BUT, if scrut is a not a variable, we must be careful
                  -- about duplicating the arg redexes; in that case, make
                  -- a new con-app from the args
-                bndr_rhs  = case scrut of
-                                Var _ -> scrut
-                                _     -> con_app
-                con_app = mkConApp dc (take n_drop_tys the_args ++ con_args)
-                con_args = [substExpr env' (varToCoreExpr b) | b <- bs]
-                                -- args are aready OutExprs, but bs are InIds
+                bndr_rhs | exprIsTrivial scrut = scrut
+                        | otherwise           = con_app
+                con_app = Var (dataConWorkId dc) 
+                          `mkTyApps` dc_ty_args
+                          `mkApps`   [substExpr env' (varToCoreExpr b) | b <- bs]
+                         -- dc_ty_args are aready OutTypes, but bs are InBndrs
  
          ; env'' <- simplNonRecX env' bndr bndr_rhs
          ; simplExprF env'' rhs cont }
  
          ; env'' <- simplNonRecX env' bndr bndr_rhs
          ; simplExprF env'' rhs cont }
@@ -1795,8 +1836,21 @@ knownAlt env scrut the_args bndr (DataAlt dc, bs, rhs) cont
             ; bind_args env'' bs' args }
  
      bind_args _ _ _ =
             ; bind_args env'' bs' args }
  
      bind_args _ _ _ =
-      pprPanic "bind_args" $ ppr dc $$ ppr bs $$ ppr the_args $$
+      pprPanic "bind_args" $ ppr dc $$ ppr bs $$ ppr dc_args $$
                               text "scrut:" <+> ppr scrut
                               text "scrut:" <+> ppr scrut
+
+-------------------
+missingAlt :: SimplEnv -> Id -> [InAlt] -> SimplCont -> SimplM (SimplEnv, OutExpr)
+               -- This isn't strictly an error, although it is unusual. 
+               -- It's possible that the simplifer might "see" that 
+               -- an inner case has no accessible alternatives before 
+               -- it "sees" that the entire branch of an outer case is 
+               -- inaccessible.  So we simply put an error case here instead.
+missingAlt env case_bndr alts cont
+  = WARN( True, ptext (sLit "missingAlt") <+> ppr case_bndr )
+    return (env, mkImpossibleExpr res_ty)
+  where
+    res_ty = contResultType env (substTy env (coreAltsType alts)) cont
  \end{code}
  
  
  \end{code}
  
  
@@ -1835,11 +1889,20 @@ mkDupableCont env (CoerceIt ty cont)
  
  mkDupableCont env cont@(StrictBind {})
    =  return (env, mkBoringStop, cont)
  
  mkDupableCont env cont@(StrictBind {})
    =  return (env, mkBoringStop, cont)
-        -- See Note [Duplicating strict continuations]
+        -- See Note [Duplicating StrictBind]
  
  
-mkDupableCont env cont@(StrictArg {})
-  =  return (env, mkBoringStop, cont)
-        -- See Note [Duplicating strict continuations]
+mkDupableCont env (StrictArg fun cci ai cont)
+        -- See Note [Duplicating StrictArg]
+  = do { (env', dup, nodup) <- mkDupableCont env cont
+       ; (env'', fun') <- mk_dupable_call env' fun
+       ; return (env'', StrictArg fun' cci ai dup, nodup) }
+  where
+    mk_dupable_call env (Var v)       = return (env, Var v)
+    mk_dupable_call env (App fun arg) = do { (env', fun') <- mk_dupable_call env fun
+                                           ; (env'', arg') <- makeTrivial env' arg
+                                           ; return (env'', fun' `App` arg') }
+    mk_dupable_call _ other = pprPanic "mk_dupable_call" (ppr other)
+       -- The invariant of StrictArg is that the first arg is always an App chain
  
  mkDupableCont env (ApplyTo _ arg se cont)
    =     -- e.g.         [...hole...] (...arg...)
  
  mkDupableCont env (ApplyTo _ arg se cont)
    =     -- e.g.         [...hole...] (...arg...)
@@ -1908,97 +1971,91 @@ mkDupableAlts env case_bndr' the_alts
  
  mkDupableAlt :: SimplEnv -> OutId -> (AltCon, [CoreBndr], CoreExpr)
                -> SimplM (SimplEnv, (AltCon, [CoreBndr], CoreExpr))
  
  mkDupableAlt :: SimplEnv -> OutId -> (AltCon, [CoreBndr], CoreExpr)
                -> SimplM (SimplEnv, (AltCon, [CoreBndr], CoreExpr))
-mkDupableAlt env case_bndr1 (con, bndrs1, rhs1)
-  | exprIsDupable rhs1  -- Note [Small alternative rhs]
-  = return (env, (con, bndrs1, rhs1))
+mkDupableAlt env case_bndr (con, bndrs', rhs')
+  | exprIsDupable rhs'  -- Note [Small alternative rhs]
+  = return (env, (con, bndrs', rhs'))
    | otherwise
    | otherwise
-  = do  { let abstract_over bndr
+  = do  { let rhs_ty'  = exprType rhs'
+             scrut_ty = idType case_bndr
+             case_bndr_w_unf   
+                = case con of 
+                     DEFAULT    -> case_bndr                                   
+                     DataAlt dc -> setIdUnfolding case_bndr unf
+                         where
+                                -- See Note [Case binders and join points]
+                            unf = mkInlineRule InlSat rhs 0
+                            rhs = mkConApp dc (map Type (tyConAppArgs scrut_ty)
+                                               ++ varsToCoreExprs bndrs')
+
+                     LitAlt {} -> WARN( True, ptext (sLit "mkDupableAlt")
+                                               <+> ppr case_bndr <+> ppr con )
+                                  case_bndr
+                          -- The case binder is alive but trivial, so why has 
+                          -- it not been substituted away?
+
+              used_bndrs' | isDeadBinder case_bndr = filter abstract_over bndrs'
+                         | otherwise              = bndrs' ++ [case_bndr_w_unf]
+             
+              abstract_over bndr
                    | isTyVar bndr = True -- Abstract over all type variables just in case
                    | otherwise    = not (isDeadBinder bndr)
                          -- The deadness info on the new Ids is preserved by simplBinders
  
                    | isTyVar bndr = True -- Abstract over all type variables just in case
                    | otherwise    = not (isDeadBinder bndr)
                          -- The deadness info on the new Ids is preserved by simplBinders
  
-              inst_tys1 = tyConAppArgs (idType case_bndr1)
-              con_app dc = mkConApp dc (map Type inst_tys1 ++ varsToCoreExprs bndrs1)
-
-             (rhs2, final_bndrs)   -- See Note [Passing the case binder to join points]
-                | isDeadBinder case_bndr1
-                = (rhs1, filter abstract_over bndrs1)
-                | opt_PassCaseBndrToJoinPoints, not (null bndrs1)
-                = (rhs1, (case_bndr1 : filter abstract_over bndrs1))
-                | otherwise 
-                 = case con of
-                    DataAlt dc -> (Let (NonRec case_bndr1 (con_app dc)) rhs1, bndrs1)
-                    LitAlt lit -> ASSERT( null bndrs1 ) (Let (NonRec case_bndr1 (Lit lit)) rhs1, [])
-                    DEFAULT    -> ASSERT( null bndrs1 ) (rhs1, [case_bndr1])
-
-        ; (final_bndrs1, final_args)    -- Note [Join point abstraction]
-                <- if (any isId final_bndrs)
-                   then return (final_bndrs, varsToCoreExprs final_bndrs)
+        ; (final_bndrs', final_args)    -- Note [Join point abstraction]
+                <- if (any isId used_bndrs')
+                   then return (used_bndrs', varsToCoreExprs used_bndrs')
                      else do { rw_id <- newId (fsLit "w") realWorldStatePrimTy
                      else do { rw_id <- newId (fsLit "w") realWorldStatePrimTy
-                            ; return (rw_id : final_bndrs,  
-                                     Var realWorldPrimId : varsToCoreExprs final_bndrs) }
+                            ; return ([rw_id], [Var realWorldPrimId]) }
  
  
-        ; let rhs_ty1 = exprType rhs1
-        ; join_bndr <- newId (fsLit "$j") (mkPiTypes final_bndrs1 rhs_ty1)
+        ; join_bndr <- newId (fsLit "$j") (mkPiTypes final_bndrs' rhs_ty')
                  -- Note [Funky mkPiTypes]
  
          ; let   -- We make the lambdas into one-shot-lambdas.  The
                  -- join point is sure to be applied at most once, and doing so
                  -- prevents the body of the join point being floated out by
                  -- the full laziness pass
                  -- Note [Funky mkPiTypes]
  
          ; let   -- We make the lambdas into one-shot-lambdas.  The
                  -- join point is sure to be applied at most once, and doing so
                  -- prevents the body of the join point being floated out by
                  -- the full laziness pass
-                really_final_bndrs     = map one_shot final_bndrs1
+                really_final_bndrs     = map one_shot final_bndrs'
                  one_shot v | isId v    = setOneShotLambda v
                             | otherwise = v
                  one_shot v | isId v    = setOneShotLambda v
                             | otherwise = v
-                join_rhs  = mkLams really_final_bndrs rhs2
+                join_rhs  = mkLams really_final_bndrs rhs'
                  join_call = mkApps (Var join_bndr) final_args
  
                  join_call = mkApps (Var join_bndr) final_args
  
-       ; env1 <- addPolyBind NotTopLevel env (NonRec join_bndr join_rhs)
-        ; return (env1, (con, bndrs1, join_call)) }
+       ; env' <- addPolyBind NotTopLevel env (NonRec join_bndr join_rhs)
+        ; return (env', (con, bndrs', join_call)) }
                  -- See Note [Duplicated env]
  \end{code}
  
                  -- See Note [Duplicated env]
  \end{code}
  
-Note [Passing the case binder to join points]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Suppose we have
-   case e of cb { C1 -> r1[cb]; C2 x y z -> r2[cb,x] }
-and we want to make join points for the two alternatives, 
-which mention the case binder 'cb'.  Should we pass 'cb' to
-the join point, or reconstruct it? Here are the two alternatives 
-for the C2 alternative:
+Note [Case binders and join points]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider this 
+   case (case .. ) of c {
+     I# c# -> ....c....
+
+If we make a join point with c but not c# we get
+  $j = \c -> ....c....
  
  
-  Plan A(pass cb):         j2 cb x = r2[cb,x]
+But if later inlining scrutines the c, thus
  
  
-  Plan B(reconstruct cb):  j2 x y z = let cb = C2 x y z in r2[cb,x]
+  $j = \c -> ... case c of { I# y -> ... } ...
  
  
-The advantge of Plan B is that we can "see" the definition of cb
-in r2, and that may be important when we inline stuff in r2.  The
-disadvantage is that if this optimisation doesn't happen, we end up
-re-allocating C2, when it already exists.  This does happen occasionally;
-an example is the function nofib/spectral/cichelli/Auxil.$whinsert.
+we won't see that 'c' has already been scrutinised.  This actually
+happens in the 'tabulate' function in wave4main, and makes a significant
+difference to allocation.
  
  
-Plan B is always better if the constructor is nullary.
+An alternative plan is this:
  
  
-In both cases we don't have liveness info for cb on a branch-by-branch
-basis, and it's possible that 'cb' is used in some branches but not
-others.  Well, the absence analyser will find that out later, so it's
-not too bad.
+   $j = \c# -> let c = I# c# in ...c....
  
  
-Sadly, at the time of writing, neither choice seems an unequivocal
-win. Here are nofib results, for adding -fpass-case-bndr-to-join-points
-(all others are zero effect):
+but that is bad if 'c' is *not* later scrutinised.  
  
  
-        Program           Size    Allocs   Runtime   Elapsed
---------------------------------------------------------------------------------
-       cichelli          +0.0%     -4.4%      0.13      0.13
-            pic          +0.0%     -0.7%      0.01      0.04
-      transform          -0.0%     +2.8%     -0.4%     -9.1%
-      wave4main          +0.0%    +10.5%     +3.1%     +3.4%
---------------------------------------------------------------------------------
-            Min          -0.0%     -4.4%     -7.0%    -31.9%
-            Max          +0.1%    +10.5%     +3.1%    +15.0%
- Geometric Mean          +0.0%     +0.1%     -1.7%     -6.1%
+So instead we do both: we pass 'c' and 'c#' , and record in c's inlining
+that it's really I# c#, thus
+   
+   $j = \c# -> \c[=I# c#] -> ...c....
  
  
+Absence analysis may later discard 'c'.
  
  
+   
  Note [Duplicated env]
  ~~~~~~~~~~~~~~~~~~~~~
  Some of the alternatives are simplified, but have not been turned into a join point
  Note [Duplicated env]
  ~~~~~~~~~~~~~~~~~~~~~
  Some of the alternatives are simplified, but have not been turned into a join point
@@ -2008,7 +2065,7 @@ we'd lose that when zapping the subst-env.  We could have a per-alt subst-env,
  but zapping it (as we do in mkDupableCont, the Select case) is safe, and
  at worst delays the join-point inlining.
  
  but zapping it (as we do in mkDupableCont, the Select case) is safe, and
  at worst delays the join-point inlining.
  
-Note [Small alterantive rhs]
+Note [Small alternative rhs]
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  It is worth checking for a small RHS because otherwise we
  get extra let bindings that may cause an extra iteration of the simplifier to
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  It is worth checking for a small RHS because otherwise we
  get extra let bindings that may cause an extra iteration of the simplifier to
@@ -2077,32 +2134,71 @@ It's a bit silly to add the realWorld dummy arg in this case, making
             True -> $j s
  (the \v alone is enough to make CPR happy) but I think it's rare
  
             True -> $j s
  (the \v alone is enough to make CPR happy) but I think it's rare
  
-Note [Duplicating strict continuations]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Do *not* duplicate StrictBind and StritArg continuations.  We gain
-nothing by propagating them into the expressions, and we do lose a
-lot.  Here's an example:
-        && (case x of { T -> F; F -> T }) E
+Note [Duplicating StrictArg]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The original plan had (where E is a big argument)
+e.g.    f E [..hole..]
+        ==>     let $j = \a -> f E a
+                in $j [..hole..]
+
+But this is terrible! Here's an example:
+        && E (case x of { T -> F; F -> T })
  Now, && is strict so we end up simplifying the case with
  an ArgOf continuation.  If we let-bind it, we get
  Now, && is strict so we end up simplifying the case with
  an ArgOf continuation.  If we let-bind it, we get
-
-        let $j = \v -> && v E
+        let $j = \v -> && E v
          in simplExpr (case x of { T -> F; F -> T })
                       (ArgOf (\r -> $j r)
  And after simplifying more we get
          in simplExpr (case x of { T -> F; F -> T })
                       (ArgOf (\r -> $j r)
  And after simplifying more we get
-
-        let $j = \v -> && v E
+        let $j = \v -> && E v
          in case x of { T -> $j F; F -> $j T }
  Which is a Very Bad Thing
  
          in case x of { T -> $j F; F -> $j T }
  Which is a Very Bad Thing
  
+What we do now is this
+       f E [..hole..]
+       ==>     let a = E
+               in f a [..hole..]
+Now if the thing in the hole is a case expression (which is when
+we'll call mkDupableCont), we'll push the function call into the
+branches, which is what we want.  Now RULES for f may fire, and
+call-pattern specialisation.  Here's an example from Trac #3116
+     go (n+1) (case l of
+                1  -> bs'
+                _  -> Chunk p fpc (o+1) (l-1) bs')
+If we can push the call for 'go' inside the case, we get
+call-pattern specialisation for 'go', which is *crucial* for 
+this program.
+
+Here is the (&&) example: 
+        && E (case x of { T -> F; F -> T })
+  ==>   let a = E in 
+        case x of { T -> && a F; F -> && a T }
+Much better!
+
+Notice that 
+  * Arguments to f *after* the strict one are handled by 
+    the ApplyTo case of mkDupableCont.  Eg
+       f [..hole..] E
+
+  * We can only do the let-binding of E because the function
+    part of a StrictArg continuation is an explicit syntax
+    tree.  In earlier versions we represented it as a function
+    (CoreExpr -> CoreEpxr) which we couldn't take apart.
+
+Do *not* duplicate StrictBind and StritArg continuations.  We gain
+nothing by propagating them into the expressions, and we do lose a
+lot.  
+
+The desire not to duplicate is the entire reason that
+mkDupableCont returns a pair of continuations.
+
+Note [Duplicating StrictBind]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Unlike StrictArg, there doesn't seem anything to gain from
+duplicating a StrictBind continuation, so we don't.
+
  The desire not to duplicate is the entire reason that
  mkDupableCont returns a pair of continuations.
  
  The desire not to duplicate is the entire reason that
  mkDupableCont returns a pair of continuations.
  
-The original plan had:
-e.g.    (...strict-fn...) [...hole...]
-        ==>
-                let $j = \a -> ...strict-fn...
-                in $j [...hole...]
  
  Note [Single-alternative cases]
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  
  Note [Single-alternative cases]
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~