X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2FsimplCore%2FSimplUtils.lhs;h=56d2795e310d9db0f200e5e3432ee0d2abde31bc;hp=c87e1fcd01cd2b4c86f5cb79fe6711c48230f955;hb=b84ba676034763b3082bbd9405794a4fde499d14;hpb=01b453a5c3608f52707ee55374ca50cb592f567d

diff --git a/compiler/simplCore/SimplUtils.lhs b/compiler/simplCore/SimplUtils.lhs
index c87e1fc..56d2795 100644
--- a/compiler/simplCore/SimplUtils.lhs
+++ b/compiler/simplCore/SimplUtils.lhs
@@ -6,17 +6,17 @@
 \begin{code}
 module SimplUtils (
 	-- Rebuilding
-	mkLam, mkCase, prepareAlts, bindCaseBndr,
+	mkLam, mkCase, prepareAlts, 
 
 	-- Inlining,
 	preInlineUnconditionally, postInlineUnconditionally, 
-	activeInline, activeRule, 
+	activeUnfolding, activeUnfInRule, activeRule, 
         simplEnvForGHCi, simplEnvForRules, updModeForInlineRules,
 
 	-- The continuation type
 	SimplCont(..), DupFlag(..), ArgInfo(..),
 	contIsDupable, contResultType, contIsTrivial, contArgs, dropArgs, 
-	countValArgs, countArgs, 
+	pushArgs, countValArgs, countArgs, addArgTo,
 	mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg,
 	interestingCallContext, 
 
@@ -40,7 +40,7 @@ import CoreUnfold
 import Name
 import Id
 import Var	( isCoVar )
-import NewDemand
+import Demand
 import SimplMonad
 import Type	hiding( substTy )
 import Coercion ( coercionKind )
@@ -99,44 +99,53 @@ data SimplCont
 
   | ApplyTo  		-- C arg
 	DupFlag 
-	InExpr SimplEnv		-- The argument and its static env
+	InExpr StaticEnv		-- The argument and its static env
 	SimplCont
 
   | Select   		-- case C of alts
 	DupFlag 
-	InId [InAlt] SimplEnv	-- The case binder, alts, and subst-env
+	InId [InAlt] StaticEnv	-- The case binder, alts, and subst-env
 	SimplCont
 
   -- The two strict forms have no DupFlag, because we never duplicate them
   | StrictBind 		-- (\x* \xs. e) C
 	InId [InBndr]		-- let x* = [] in e 	
-	InExpr SimplEnv		--	is a special case 
+	InExpr StaticEnv	--	is a special case 
 	SimplCont	
 
-  | StrictArg 		-- e C
-	OutExpr			-- e; *always* of form (Var v `App1` e1 .. `App` en)
-	CallCtxt		-- Whether *this* argument position is interesting
- 	ArgInfo			-- Whether the function at the head of e has rules, etc
-	SimplCont		--     plus strictness flags for *further* args
+  | StrictArg 		-- f e1 ..en C
+ 	ArgInfo		-- Specifies f, e1..en, Whether f has rules, etc
+			--     plus strictness flags for *further* args
+        CallCtxt        -- Whether *this* argument position is interesting
+	SimplCont		
 
 data ArgInfo 
   = ArgInfo {
-	ai_rules :: Bool,	-- Function has rules (recursively)
-				--	=> be keener to inline in all args
-	ai_strs :: [Bool],	-- Strictness of arguments
+        ai_fun   :: Id,		-- The function
+	ai_args  :: [OutExpr],	-- ...applied to these args (which are in *reverse* order)
+	ai_rules :: [CoreRule],	-- Rules for this function
+
+	ai_encl :: Bool,	-- Flag saying whether this function 
+				-- or an enclosing one has rules (recursively)
+				--	True => be keener to inline in all args
+	
+	ai_strs :: [Bool],	-- Strictness of remaining arguments
 				--   Usually infinite, but if it is finite it guarantees
 				--   that the function diverges after being given
 				--   that number of args
-	ai_discs :: [Int]	-- Discounts for arguments; non-zero => be keener to inline
+	ai_discs :: [Int]	-- Discounts for remaining arguments; non-zero => be keener to inline
 				--   Always infinite
     }
 
+addArgTo :: ArgInfo -> OutExpr -> ArgInfo
+addArgTo ai arg = ai { ai_args = arg : ai_args ai }
+
 instance Outputable SimplCont where
   ppr (Stop interesting)    	     = ptext (sLit "Stop") <> brackets (ppr interesting)
   ppr (ApplyTo dup arg _ cont)       = ((ptext (sLit "ApplyTo") <+> ppr dup <+> pprParendExpr arg)
 					  {-  $$ nest 2 (pprSimplEnv se) -}) $$ ppr cont
   ppr (StrictBind b _ _ _ cont)      = (ptext (sLit "StrictBind") <+> ppr b) $$ ppr cont
-  ppr (StrictArg f _ _ cont)         = (ptext (sLit "StrictArg") <+> ppr f) $$ ppr cont
+  ppr (StrictArg ai _ cont)          = (ptext (sLit "StrictArg") <+> ppr (ai_fun ai)) $$ ppr cont
   ppr (Select dup bndr alts _ cont)  = (ptext (sLit "Select") <+> ppr dup <+> ppr bndr) $$ 
 				       (nest 4 (ppr alts)) $$ ppr cont 
   ppr (CoerceIt co cont)	     = (ptext (sLit "CoerceIt") <+> ppr co) $$ ppr cont
@@ -191,13 +200,17 @@ contResultType env ty cont
     go (Stop {})                      ty = ty
     go (CoerceIt co cont)             _  = go cont (snd (coercionKind co))
     go (StrictBind _ bs body se cont) _  = go cont (subst_ty se (exprType (mkLams bs body)))
-    go (StrictArg fn _ _ cont)        _  = go cont (funResultTy (exprType fn))
+    go (StrictArg ai _ cont)          _  = go cont (funResultTy (argInfoResultTy ai))
     go (Select _ _ alts se cont)      _  = go cont (subst_ty se (coreAltsType alts))
     go (ApplyTo _ arg se cont)        ty = go cont (apply_to_arg ty arg se)
 
     apply_to_arg ty (Type ty_arg) se = applyTy ty (subst_ty se ty_arg)
     apply_to_arg ty _             _  = funResultTy ty
 
+argInfoResultTy :: ArgInfo -> OutType
+argInfoResultTy (ArgInfo { ai_fun = fun, ai_args = args })
+  = foldr (\arg fn_ty -> applyTypeToArg fn_ty arg) (idType fun) args
+
 -------------------
 countValArgs :: SimplCont -> Int
 countValArgs (ApplyTo _ (Type _) _ cont) = countValArgs cont
@@ -215,6 +228,10 @@ contArgs cont = go [] cont
     go args (ApplyTo _ arg se cont) = go (substExpr se arg : args) cont
     go args cont		    = (reverse args, cont)
 
+pushArgs :: SimplEnv -> [CoreExpr] -> SimplCont -> SimplCont
+pushArgs _env []         cont = cont
+pushArgs env  (arg:args) cont = ApplyTo NoDup arg env (pushArgs env args cont)
+
 dropArgs :: Int -> SimplCont -> SimplCont
 dropArgs 0 cont = cont
 dropArgs n (ApplyTo _ _ _ cont) = dropArgs (n-1) cont
@@ -275,10 +292,10 @@ interestingCallContext cont
 					-- motivation to inline. See Note [Cast then apply]
 					-- in CoreUnfold
 
-    interesting (StrictArg _ cci _ _)	= cci
-    interesting (StrictBind {})	  	= BoringCtxt
-    interesting (Stop cci)   		= cci
-    interesting (CoerceIt _ cont) 	= interesting cont
+    interesting (StrictArg _ cci _) = cci
+    interesting (StrictBind {})	    = BoringCtxt
+    interesting (Stop cci)   	    = cci
+    interesting (CoerceIt _ cont)   = interesting cont
 	-- If this call is the arg of a strict function, the context
 	-- is a bit interesting.  If we inline here, we may get useful
 	-- evaluation information to avoid repeated evals: e.g.
@@ -304,18 +321,20 @@ mkArgInfo :: Id
 
 mkArgInfo fun rules n_val_args call_cont
   | n_val_args < idArity fun		-- Note [Unsaturated functions]
-  = ArgInfo { ai_rules = False
+  = ArgInfo { ai_fun = fun, ai_args = [], ai_rules = rules
+            , ai_encl = False
 	    , ai_strs = vanilla_stricts 
 	    , ai_discs = vanilla_discounts }
   | otherwise
-  = ArgInfo { ai_rules = interestingArgContext rules call_cont
+  = ArgInfo { ai_fun = fun, ai_args = [], ai_rules = rules
+            , ai_encl = interestingArgContext rules call_cont
 	    , ai_strs  = add_type_str (idType fun) arg_stricts
 	    , ai_discs = arg_discounts }
   where
     vanilla_discounts, arg_discounts :: [Int]
     vanilla_discounts = repeat 0
     arg_discounts = case idUnfolding fun of
-			CoreUnfolding {uf_guidance = UnfoldIfGoodArgs {ug_args = discounts}}
+			CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = discounts}}
 			      -> discounts ++ vanilla_discounts
 			_     -> vanilla_discounts
 
@@ -323,7 +342,7 @@ mkArgInfo fun rules n_val_args call_cont
     vanilla_stricts  = repeat False
 
     arg_stricts
-      = case splitStrictSig (idNewStrictness fun) of
+      = case splitStrictSig (idStrictness fun) of
 	  (demands, result_info)
 		| not (demands `lengthExceeds` n_val_args)
 		-> 	-- Enough args, use the strictness given.
@@ -392,12 +411,12 @@ interestingArgContext rules call_cont
   where
     enclosing_fn_has_rules = go call_cont
 
-    go (Select {})	     = False
-    go (ApplyTo {})	     = False
-    go (StrictArg _ cci _ _) = interesting cci
-    go (StrictBind {})	     = False	-- ??
-    go (CoerceIt _ c)	     = go c
-    go (Stop cci)            = interesting cci
+    go (Select {})	   = False
+    go (ApplyTo {})	   = False
+    go (StrictArg _ cci _) = interesting cci
+    go (StrictBind {})	   = False	-- ??
+    go (CoerceIt _ c)	   = go c
+    go (Stop cci)          = interesting cci
 
     interesting (ArgCtxt rules) = rules
     interesting _               = False
@@ -616,11 +635,18 @@ let-float if you inline windowToViewport
 However, as usual for Gentle mode, do not inline things that are
 inactive in the intial stages.  See Note [Gentle mode].
 
+Note [Top-level botomming Ids]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Don't inline top-level Ids that are bottoming, even if they are used just
+once, because FloatOut has gone to some trouble to extract them out.
+Inlining them won't make the program run faster!
+
 \begin{code}
 preInlineUnconditionally :: SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
 preInlineUnconditionally env top_lvl bndr rhs
-  | not active 		   = False
-  | opt_SimplNoPreInlining = False
+  | not active 		                     = False
+  | isTopLevel top_lvl && isBottomingId bndr = False	-- Note [Top-level bottoming Ids]
+  | opt_SimplNoPreInlining                   = False
   | otherwise = case idOccInfo bndr of
 		  IAmDead	     	     -> True	-- Happens in ((\x.1) v)
 	  	  OneOcc in_lam True int_cxt -> try_once in_lam int_cxt
@@ -632,12 +658,11 @@ preInlineUnconditionally env top_lvl bndr rhs
 			-- See Note [pre/postInlineUnconditionally in gentle mode]
 		   SimplPhase n _ -> isActive n act
     act = idInlineActivation bndr
-
     try_once in_lam int_cxt	-- There's one textual occurrence
 	| not in_lam = isNotTopLevel top_lvl || early_phase
 	| otherwise  = int_cxt && canInlineInLam rhs
 
--- Be very careful before inlining inside a lambda, becuase (a) we must not 
+-- Be very careful before inlining inside a lambda, because (a) we must not 
 -- invalidate occurrence information, and (b) we want to avoid pushing a
 -- single allocation (here) into multiple allocations (inside lambda).  
 -- Inlining a *function* with a single *saturated* call would be ok, mind you.
@@ -720,12 +745,13 @@ postInlineUnconditionally
     -> Unfolding
     -> Bool
 postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
-  | not active		   = False
-  | isLoopBreaker occ_info = False	-- If it's a loop-breaker of any kind, don't inline
+  | not active		        = False
+  | isLoopBreaker occ_info      = False	-- If it's a loop-breaker of any kind, don't inline
 					-- because it might be referred to "earlier"
-  | isExportedId bndr      = False
-  | isInlineRule unfolding = False	-- Note [InlineRule and postInlineUnconditionally]
-  | exprIsTrivial rhs 	   = True
+  | isExportedId bndr           = False
+  | isStableUnfolding unfolding = False	-- Note [InlineRule and postInlineUnconditionally]
+  | exprIsTrivial rhs 	        = True
+  | isTopLevel top_lvl          = False	-- Note [Top level and postInlineUnconditionally]
   | otherwise
   = case occ_info of
 	-- The point of examining occ_info here is that for *non-values* 
@@ -738,7 +764,8 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
 	--	case v of
 	--	   True  -> case x of ...
 	--	   False -> case x of ...
-	-- I'm not sure how important this is in practice
+	-- This is very important in practice; e.g. wheel-seive1 doubles 
+	-- in allocation if you miss this out
       OneOcc in_lam _one_br int_cxt	-- OneOcc => no code-duplication issue
 	->     smallEnoughToInline unfolding	-- Small enough to dup
 			-- ToDo: consider discount on smallEnoughToInline if int_cxt is true
@@ -751,8 +778,8 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
 			-- PRINCIPLE: when we've already simplified an expression once, 
 			-- make sure that we only inline it if it's reasonably small.
 
-	   &&  ((isNotTopLevel top_lvl && not in_lam) || 
-			-- But outside a lambda, we want to be reasonably aggressive
+           && (not in_lam || 
+			-- Outside a lambda, we want to be reasonably aggressive
 			-- about inlining into multiple branches of case
 			-- e.g. let x = <non-value> 
 			--	in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... } 
@@ -791,27 +818,56 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
 		   SimplPhase n _ -> isActive n act
     act = idInlineActivation bndr
 
-activeInline :: SimplEnv -> OutId -> Bool
-activeInline env id
-  | isNonRuleLoopBreaker (idOccInfo id)	  -- Things with an INLINE pragma may have 
-    			 	          -- an unfolding *and* be a loop breaker
-  = False				  -- (maybe the knot is not yet untied)
-  | otherwise
+activeUnfolding :: SimplEnv -> IdUnfoldingFun
+activeUnfolding env
   = case getMode env of
-      SimplGently { sm_inline = inlining_on } 
-         -> inlining_on && isEarlyActive act
-	-- See Note [Gentle mode]
-
-	-- NB: we used to have a second exception, for data con wrappers.
-	-- On the grounds that we use gentle mode for rule LHSs, and 
-	-- they match better when data con wrappers are inlined.
-	-- But that only really applies to the trivial wrappers (like (:)),
-	-- and they are now constructed as Compulsory unfoldings (in MkId)
-	-- so they'll happen anyway.
-
-      SimplPhase n _ -> isActive n act
+      SimplGently { sm_inline = False } -> active_unfolding_minimal
+      SimplGently { sm_inline = True  } -> active_unfolding_gentle
+      SimplPhase n _                    -> active_unfolding n
+
+activeUnfInRule :: SimplEnv -> IdUnfoldingFun
+-- When matching in RULE, we want to "look through" an unfolding
+-- if *rules* are on, even if *inlinings* are not.  A notable example
+-- is DFuns, which really we want to match in rules like (op dfun)
+-- in gentle mode.
+activeUnfInRule env
+  = case getMode env of
+      SimplGently { sm_rules = False } -> active_unfolding_minimal
+      SimplGently { sm_rules = True  } -> active_unfolding_gentle
+      SimplPhase n _                   -> active_unfolding n
+
+active_unfolding_minimal :: IdUnfoldingFun
+-- Compuslory unfoldings only
+-- Ignore SimplGently, because we want to inline regardless;
+-- the Id has no top-level binding at all
+--
+-- NB: we used to have a second exception, for data con wrappers.
+-- On the grounds that we use gentle mode for rule LHSs, and 
+-- they match better when data con wrappers are inlined.
+-- But that only really applies to the trivial wrappers (like (:)),
+-- and they are now constructed as Compulsory unfoldings (in MkId)
+-- so they'll happen anyway.
+active_unfolding_minimal id
+  | isCompulsoryUnfolding unf = unf
+  | otherwise                 = NoUnfolding
   where
-    act = idInlineActivation id
+    unf = realIdUnfolding id	-- Never a loop breaker
+
+active_unfolding_gentle :: IdUnfoldingFun
+-- Anything that is early-active
+-- See Note [Gentle mode]
+active_unfolding_gentle id
+  | isEarlyActive (idInlineActivation id) = idUnfolding id
+  | otherwise                             = NoUnfolding
+      -- idUnfolding checks for loop-breakers
+      -- Things with an INLINE pragma may have 
+      -- an unfolding *and* be a loop breaker  
+      -- (maybe the knot is not yet untied)
+
+active_unfolding :: CompilerPhase -> IdUnfoldingFun
+active_unfolding n id
+  | isActive n (idInlineActivation id) = idUnfolding id
+  | otherwise                          = NoUnfolding
 
 activeRule :: DynFlags -> SimplEnv -> Maybe (Activation -> Bool)
 -- Nothing => No rules at all
@@ -826,6 +882,14 @@ activeRule dflags env
       SimplPhase n _ -> Just (isActive n)
 \end{code}
 
+Note [Top level and postInlineUnconditionally]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We don't do postInlineUnconditionally for top-level things (except
+ones that are trivial).  There is no point, because the main goal is
+to get rid of local bindings used in multiple case branches. And
+doing so risks replacing a single global allocation with local allocations.
+
+
 Note [InlineRule and postInlineUnconditionally]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Do not do postInlineUnconditionally if the Id has an InlineRule, otherwise
@@ -1277,83 +1341,61 @@ Historical note: if you use let-bindings instead of a substitution, beware of th
 
 prepareAlts tries these things:
 
-1.  If several alternatives are identical, merge them into
-    a single DEFAULT alternative.  I've occasionally seen this 
-    making a big difference:
+1.  Eliminate alternatives that cannot match, including the
+    DEFAULT alternative.
 
-	case e of		=====>     case e of
-	  C _ -> f x			     D v -> ....v....
-	  D v -> ....v....		     DEFAULT -> f x
-	  DEFAULT -> f x
+2.  If the DEFAULT alternative can match only one possible constructor,
+    then make that constructor explicit.
+    e.g.
+        case e of x { DEFAULT -> rhs }
+     ===>
+        case e of x { (a,b) -> rhs }
+    where the type is a single constructor type.  This gives better code
+    when rhs also scrutinises x or e.
 
-   The point is that we merge common RHSs, at least for the DEFAULT case.
-   [One could do something more elaborate but I've never seen it needed.]
-   To avoid an expensive test, we just merge branches equal to the *first*
-   alternative; this picks up the common cases
-	a) all branches equal
-	b) some branches equal to the DEFAULT (which occurs first)
+3. Returns a list of the constructors that cannot holds in the
+   DEFAULT alternative (if there is one)
 
-2.  Case merging:
-       case e of b {             ==>   case e of b {
-    	 p1 -> rhs1	                 p1 -> rhs1
-    	 ...	                         ...
-    	 pm -> rhsm                      pm -> rhsm
-    	 _  -> case b of b' {            pn -> let b'=b in rhsn
- 		     pn -> rhsn          ...
- 		     ...                 po -> let b'=b in rhso
- 		     po -> rhso          _  -> let b'=b in rhsd
- 		     _  -> rhsd
-       }  
-    
-    which merges two cases in one case when -- the default alternative of
-    the outer case scrutises the same variable as the outer case This
-    transformation is called Case Merging.  It avoids that the same
-    variable is scrutinised multiple times.
-
-
-The case where transformation (1) showed up was like this (lib/std/PrelCError.lhs):
+Here "cannot match" includes knowledge from GADTs
 
-	x | p `is` 1 -> e1
-	  | p `is` 2 -> e2
-	...etc...
+It's a good idea do do this stuff before simplifying the alternatives, to
+avoid simplifying alternatives we know can't happen, and to come up with
+the list of constructors that are handled, to put into the IdInfo of the
+case binder, for use when simplifying the alternatives.
 
-where @is@ was something like
-	
-	p `is` n = p /= (-1) && p == n
+Eliminating the default alternative in (1) isn't so obvious, but it can
+happen:
 
-This gave rise to a horrible sequence of cases
+data Colour = Red | Green | Blue
 
-	case p of
-	  (-1) -> $j p
-	  1    -> e1
-	  DEFAULT -> $j p
+f x = case x of
+        Red -> ..
+        Green -> ..
+        DEFAULT -> h x
 
-and similarly in cascade for all the join points!
+h y = case y of
+        Blue -> ..
+        DEFAULT -> [ case y of ... ]
 
-Note [Dead binders]
-~~~~~~~~~~~~~~~~~~~~
-We do this *here*, looking at un-simplified alternatives, because we
-have to check that r doesn't mention the variables bound by the
-pattern in each alternative, so the binder-info is rather useful.
+If we inline h into f, the default case of the inlined h can't happen.
+If we don't notice this, we may end up filtering out *all* the cases
+of the inner case y, which give us nowhere to go!
 
 \begin{code}
-prepareAlts :: SimplEnv -> OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
-prepareAlts env scrut case_bndr' alts
-  = do	{ dflags <- getDOptsSmpl
-	; alts <- combineIdenticalAlts case_bndr' alts
-
-	; let (alts_wo_default, maybe_deflt) = findDefault alts
+prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
+prepareAlts scrut case_bndr' alts
+  = do	{ let (alts_wo_default, maybe_deflt) = findDefault alts
 	      alt_cons = [con | (con,_,_) <- alts_wo_default]
 	      imposs_deflt_cons = nub (imposs_cons ++ alt_cons)
 		-- "imposs_deflt_cons" are handled 
 		--   EITHER by the context, 
 		--   OR by a non-DEFAULT branch in this case expression.
 
-	; default_alts <- prepareDefault dflags env case_bndr' mb_tc_app 
+	; default_alts <- prepareDefault case_bndr' mb_tc_app 
 					 imposs_deflt_cons maybe_deflt
 
 	; let trimmed_alts = filterOut impossible_alt alts_wo_default
-	      merged_alts = mergeAlts trimmed_alts default_alts
+	      merged_alts  = mergeAlts trimmed_alts default_alts
 		-- We need the mergeAlts in case the new default_alt 
 		-- has turned into a constructor alternative.
 		-- The merge keeps the inner DEFAULT at the front, if there is one
@@ -1374,29 +1416,7 @@ prepareAlts env scrut case_bndr' alts
     impossible_alt _                   = False
 
 
---------------------------------------------------
---	1. Merge identical branches
---------------------------------------------------
-combineIdenticalAlts :: OutId -> [InAlt] -> SimplM [InAlt]
-
-combineIdenticalAlts case_bndr ((_con1,bndrs1,rhs1) : con_alts)
-  | all isDeadBinder bndrs1,			-- Remember the default 
-    length filtered_alts < length con_alts	-- alternative comes first
-	-- Also Note [Dead binders]
-  = do	{ tick (AltMerge case_bndr)
-	; return ((DEFAULT, [], rhs1) : filtered_alts) }
-  where
-    filtered_alts	 = filter keep con_alts
-    keep (_con,bndrs,rhs) = not (all isDeadBinder bndrs && rhs `cheapEqExpr` rhs1)
-
-combineIdenticalAlts _ alts = return alts
-
--------------------------------------------------------------------------
---			Prepare the default alternative
--------------------------------------------------------------------------
-prepareDefault :: DynFlags
-	       -> SimplEnv
-	       -> OutId		-- Case binder; need just for its type. Note that as an
+prepareDefault :: OutId		-- Case binder; need just for its type. Note that as an
 				--   OutId, it has maximum information; this is important.
 				--   Test simpl013 is an example
 	       -> Maybe (TyCon, [Type])	-- Type of scrutinee, decomposed
@@ -1404,42 +1424,9 @@ prepareDefault :: DynFlags
 	       -> Maybe InExpr	-- Rhs
 	       -> SimplM [InAlt]	-- Still unsimplified
 					-- We use a list because it's what mergeAlts expects,
-					-- And becuase case-merging can cause many to show up
-
--------	Merge nested cases ----------
-prepareDefault dflags env outer_bndr _bndr_ty imposs_cons (Just deflt_rhs)
-  | dopt Opt_CaseMerge dflags
-  , Case (Var inner_scrut_var) inner_bndr _ inner_alts <- deflt_rhs
-  , DoneId inner_scrut_var' <- substId env inner_scrut_var
-	-- Remember, inner_scrut_var is an InId, but outer_bndr is an OutId
-  , inner_scrut_var' == outer_bndr
-	-- NB: the substId means that if the outer scrutinee was a 
-	--     variable, and inner scrutinee is the same variable, 
-	--     then inner_scrut_var' will be outer_bndr
-	--     via the magic of simplCaseBinder
-  = do	{ tick (CaseMerge outer_bndr)
-
-	; let munge_rhs rhs = bindCaseBndr inner_bndr (Var outer_bndr) rhs
-	; return [(con, args, munge_rhs rhs) | (con, args, rhs) <- inner_alts,
-					       not (con `elem` imposs_cons) ]
-		-- NB: filter out any imposs_cons.  Example:
-		--	case x of 
-		--	  A -> e1
-		--	  DEFAULT -> case x of 
-		--			A -> e2
-		--			B -> e3
-		-- When we merge, we must ensure that e1 takes 
-		-- precedence over e2 as the value for A!  
-	}
-    	-- Warning: don't call prepareAlts recursively!
-    	-- Firstly, there's no point, because inner alts have already had
-    	-- mkCase applied to them, so they won't have a case in their default
-    	-- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
-    	-- in munge_rhs may put a case into the DEFAULT branch!
-
 
 --------- Fill in known constructor -----------
-prepareDefault _ _ case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rhs)
+prepareDefault case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rhs)
   | 	-- This branch handles the case where we are 
 	-- scrutinisng an algebraic data type
     isAlgTyCon tycon		-- It's a data type, tuple, or unboxed tuples.  
@@ -1458,7 +1445,7 @@ prepareDefault _ _ case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rh
 				-- which would be quite legitmate.  But it's a really obscure corner, and
 				-- not worth wasting code on.
   , let imposs_data_cons = [con | DataAlt con <- imposs_cons]	-- We now know it's a data type 
-	impossible con  = con `elem` imposs_data_cons || dataConCannotMatch inst_tys con
+	impossible con   = con `elem` imposs_data_cons || dataConCannotMatch inst_tys con
   = case filterOut impossible all_cons of
 	[]    -> return []	-- Eliminate the default alternative
 				-- altogether if it can't match
@@ -1473,27 +1460,48 @@ prepareDefault _ _ case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rh
 	_ -> return [(DEFAULT, [], deflt_rhs)]
 
   | debugIsOn, isAlgTyCon tycon, not (isOpenTyCon tycon), null (tyConDataCons tycon)
-	-- This can legitimately happen for type families, so don't report that
+	-- Check for no data constructors
+        -- This can legitimately happen for type families, so don't report that
   = pprTrace "prepareDefault" (ppr case_bndr <+> ppr tycon)
         $ return [(DEFAULT, [], deflt_rhs)]
 
 --------- Catch-all cases -----------
-prepareDefault _dflags _env _case_bndr _bndr_ty _imposs_cons (Just deflt_rhs)
+prepareDefault _case_bndr _bndr_ty _imposs_cons (Just deflt_rhs)
   = return [(DEFAULT, [], deflt_rhs)]
 
-prepareDefault _dflags _env _case_bndr _bndr_ty _imposs_cons Nothing
+prepareDefault _case_bndr _bndr_ty _imposs_cons Nothing
   = return []	-- No default branch
 \end{code}
 
 
 
-=================================================================================
+%************************************************************************
+%*									*
+		mkCase
+%*									*
+%************************************************************************
 
 mkCase tries these things
 
-1.  Eliminate the case altogether if possible
+1.  Merge Nested Cases
+
+       case e of b {             ==>   case e of b {
+    	 p1 -> rhs1	                 p1 -> rhs1
+    	 ...	                         ...
+    	 pm -> rhsm                      pm -> rhsm
+    	 _  -> case b of b' {            pn -> let b'=b in rhsn
+ 		     pn -> rhsn          ...
+ 		     ...                 po -> let b'=b in rhso
+ 		     po -> rhso          _  -> let b'=b in rhsd
+ 		     _  -> rhsd
+       }  
+    
+    which merges two cases in one case when -- the default alternative of
+    the outer case scrutises the same variable as the outer case. This
+    transformation is called Case Merging.  It avoids that the same
+    variable is scrutinised multiple times.
 
-2.  Case-identity:
+2.  Eliminate Identity Case
 
 	case e of 		===> e
 		True  -> True;
@@ -1501,19 +1509,99 @@ mkCase tries these things
 
     and similar friends.
 
+3.  Merge identical alternatives.
+    If several alternatives are identical, merge them into
+    a single DEFAULT alternative.  I've occasionally seen this 
+    making a big difference:
+
+	case e of		=====>     case e of
+	  C _ -> f x			     D v -> ....v....
+	  D v -> ....v....		     DEFAULT -> f x
+	  DEFAULT -> f x
+
+   The point is that we merge common RHSs, at least for the DEFAULT case.
+   [One could do something more elaborate but I've never seen it needed.]
+   To avoid an expensive test, we just merge branches equal to the *first*
+   alternative; this picks up the common cases
+	a) all branches equal
+	b) some branches equal to the DEFAULT (which occurs first)
+
+The case where Merge Identical Alternatives transformation showed up
+was like this (base/Foreign/C/Err/Error.lhs):
+
+	x | p `is` 1 -> e1
+	  | p `is` 2 -> e2
+	...etc...
+
+where @is@ was something like
+	
+	p `is` n = p /= (-1) && p == n
+
+This gave rise to a horrible sequence of cases
+
+	case p of
+	  (-1) -> $j p
+	  1    -> e1
+	  DEFAULT -> $j p
+
+and similarly in cascade for all the join points!
+
 
 \begin{code}
-mkCase :: OutExpr -> OutId -> [OutAlt]	-- Increasing order
-       -> SimplM OutExpr
+mkCase, mkCase1, mkCase2 
+   :: DynFlags 
+   -> OutExpr -> OutId
+   -> [OutAlt]		-- Alternatives in standard (increasing) order
+   -> SimplM OutExpr
 
 --------------------------------------------------
---	2. Identity case
+--	1. Merge Nested Cases
 --------------------------------------------------
 
-mkCase scrut case_bndr alts	-- Identity case
+mkCase dflags scrut outer_bndr ((DEFAULT, _, deflt_rhs) : outer_alts)
+  | dopt Opt_CaseMerge dflags
+  , Case (Var inner_scrut_var) inner_bndr _ inner_alts <- deflt_rhs
+  , inner_scrut_var == outer_bndr
+  = do	{ tick (CaseMerge outer_bndr)
+
+	; let wrap_alt (con, args, rhs) = ASSERT( outer_bndr `notElem` args )
+                                          (con, args, wrap_rhs rhs)
+		-- Simplifier's no-shadowing invariant should ensure
+		-- that outer_bndr is not shadowed by the inner patterns
+              wrap_rhs rhs = Let (NonRec inner_bndr (Var outer_bndr)) rhs
+		-- The let is OK even for unboxed binders, 
+
+	      wrapped_alts | isDeadBinder inner_bndr = inner_alts
+                           | otherwise               = map wrap_alt inner_alts
+
+	      merged_alts = mergeAlts outer_alts wrapped_alts
+		-- NB: mergeAlts gives priority to the left
+		--	case x of 
+		--	  A -> e1
+		--	  DEFAULT -> case x of 
+		--			A -> e2
+		--			B -> e3
+		-- When we merge, we must ensure that e1 takes 
+		-- precedence over e2 as the value for A!  
+
+	; mkCase1 dflags scrut outer_bndr merged_alts
+	}
+    	-- Warning: don't call mkCase recursively!
+    	-- Firstly, there's no point, because inner alts have already had
+    	-- mkCase applied to them, so they won't have a case in their default
+    	-- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
+    	-- in munge_rhs may put a case into the DEFAULT branch!
+
+mkCase dflags scrut bndr alts = mkCase1 dflags scrut bndr alts
+
+--------------------------------------------------
+--	2. Eliminate Identity Case
+--------------------------------------------------
+
+mkCase1 _dflags scrut case_bndr alts	-- Identity case
   | all identity_alt alts
-  = do tick (CaseIdentity case_bndr)
-       return (re_cast scrut)
+  = do { tick (CaseIdentity case_bndr)
+       ; return (re_cast scrut) }
   where
     identity_alt (con, args, rhs) = check_eq con args (de_cast rhs)
 
@@ -1541,22 +1629,93 @@ mkCase scrut case_bndr alts	-- Identity case
 			(_,_,Cast _ co) -> Cast scrut co
 			_    	        -> scrut
 
+--------------------------------------------------
+--	3. Merge Identical Alternatives
+--------------------------------------------------
+mkCase1 dflags scrut case_bndr ((_con1,bndrs1,rhs1) : con_alts)
+  | all isDeadBinder bndrs1			-- Remember the default 
+  , length filtered_alts < length con_alts	-- alternative comes first
+	-- Also Note [Dead binders]
+  = do	{ tick (AltMerge case_bndr)
+	; mkCase2 dflags scrut case_bndr alts' }
+  where
+    alts' = (DEFAULT, [], rhs1) : filtered_alts
+    filtered_alts	  = filter keep con_alts
+    keep (_con,bndrs,rhs) = not (all isDeadBinder bndrs && rhs `cheapEqExpr` rhs1)
 
+mkCase1 dflags scrut bndr alts = mkCase2 dflags scrut bndr alts
 
 --------------------------------------------------
 --	Catch-all
 --------------------------------------------------
-mkCase scrut bndr alts = return (Case scrut bndr (coreAltsType alts) alts)
+mkCase2 _dflags scrut bndr alts 
+  = return (Case scrut bndr (coreAltsType alts) alts)
 \end{code}
 
-
-When adding auxiliary bindings for the case binder, it's worth checking if
-its dead, because it often is, and occasionally these mkCase transformations
-cascade rather nicely.
-
-\begin{code}
-bindCaseBndr :: Id -> CoreExpr -> CoreExpr -> CoreExpr
-bindCaseBndr bndr rhs body
-  | isDeadBinder bndr = body
-  | otherwise         = bindNonRec bndr rhs body
-\end{code}
+Note [Dead binders]
+~~~~~~~~~~~~~~~~~~~~
+Note that dead-ness is maintained by the simplifier, so that it is
+accurate after simplification as well as before.
+
+
+Note [Cascading case merge]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Case merging should cascade in one sweep, because it
+happens bottom-up
+
+      case e of a {
+        DEFAULT -> case a of b 
+                      DEFAULT -> case b of c {
+                                     DEFAULT -> e
+                                     A -> ea
+                      B -> eb
+        C -> ec
+==>
+      case e of a {
+        DEFAULT -> case a of b 
+                      DEFAULT -> let c = b in e
+                      A -> let c = b in ea
+                      B -> eb
+        C -> ec
+==>
+      case e of a {
+        DEFAULT -> let b = a in let c = b in e
+        A -> let b = a in let c = b in ea
+        B -> let b = a in eb
+        C -> ec
+
+
+However here's a tricky case that we still don't catch, and I don't
+see how to catch it in one pass:
+
+  case x of c1 { I# a1 ->
+  case a1 of c2 ->
+    0 -> ...
+    DEFAULT -> case x of c3 { I# a2 ->
+               case a2 of ...
+
+After occurrence analysis (and its binder-swap) we get this
+ 
+  case x of c1 { I# a1 -> 
+  let x = c1 in		-- Binder-swap addition
+  case a1 of c2 -> 
+    0 -> ...
+    DEFAULT -> case x of c3 { I# a2 ->
+               case a2 of ...
+
+When we simplify the inner case x, we'll see that
+x=c1=I# a1.  So we'll bind a2 to a1, and get
+
+  case x of c1 { I# a1 -> 
+  case a1 of c2 -> 
+    0 -> ...
+    DEFAULT -> case a1 of ...
+
+This is corect, but we can't do a case merge in this sweep
+because c2 /= a1.  Reason: the binding c1=I# a1 went inwards
+without getting changed to c1=I# c2.  
+
+I don't think this is worth fixing, even if I knew how. It'll
+all come out in the next pass anyway.
+
+  
\ No newline at end of file