X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2FsimplCore%2FSimplUtils.lhs;h=56d2795e310d9db0f200e5e3432ee0d2abde31bc;hp=1e510421e72a1c94632f5c100e6faadc5557f97f;hb=b84ba676034763b3082bbd9405794a4fde499d14;hpb=f2dcf256399e9a2de6343c625630b51f8abf4863

diff --git a/compiler/simplCore/SimplUtils.lhs b/compiler/simplCore/SimplUtils.lhs
index 1e51042..56d2795 100644
--- a/compiler/simplCore/SimplUtils.lhs
+++ b/compiler/simplCore/SimplUtils.lhs
@@ -5,297 +5,242 @@
 
 \begin{code}
 module SimplUtils (
-	mkLam, mkCase,
+	-- Rebuilding
+	mkLam, mkCase, prepareAlts, 
 
 	-- Inlining,
-	preInlineUnconditionally, postInlineUnconditionally, activeInline, activeRule,
-	inlineMode,
+	preInlineUnconditionally, postInlineUnconditionally, 
+	activeUnfolding, activeUnfInRule, activeRule, 
+        simplEnvForGHCi, simplEnvForRules, updModeForInlineRules,
 
 	-- The continuation type
-	SimplCont(..), DupFlag(..), LetRhsFlag(..), 
-	contIsDupable, contResultType,
-	countValArgs, countArgs, pushContArgs,
-	mkBoringStop, mkLazyArgStop, mkRhsStop, contIsRhs, contIsRhsOrArg,
-	getContArgs, interestingCallContext, interestingArgContext,
-	interestingArg, isStrictType
+	SimplCont(..), DupFlag(..), ArgInfo(..),
+	contIsDupable, contResultType, contIsTrivial, contArgs, dropArgs, 
+	pushArgs, countValArgs, countArgs, addArgTo,
+	mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg,
+	interestingCallContext, 
 
+	interestingArg, mkArgInfo,
+	
+	abstractFloats
     ) where
 
 #include "HsVersions.h"
 
 import SimplEnv
-import DynFlags		( SimplifierSwitch(..), SimplifierMode(..),
-			  DynFlag(..), dopt )
-import StaticFlags	( opt_UF_UpdateInPlace, opt_SimplNoPreInlining,
-			  opt_RulesOff )
+import DynFlags
+import StaticFlags
 import CoreSyn
-import CoreFVs		( exprFreeVars )
-import CoreUtils	( cheapEqExpr, exprType, exprIsTrivial, 
-			  etaExpand, exprEtaExpandArity, bindNonRec, mkCoerce2,
-			  findDefault, exprOkForSpeculation, exprIsHNF, mergeAlts
-			)
-import Literal		( mkStringLit )
-import CoreUnfold	( smallEnoughToInline )
-import MkId		( eRROR_ID )
-import Id		( Id, idType, isDataConWorkId, idOccInfo, isDictId, 
-			  isDeadBinder, idNewDemandInfo, isExportedId,
-			  idUnfolding, idNewStrictness, idInlinePragma, idHasRules
-			)
-import NewDemand	( isStrictDmd, isBotRes, splitStrictSig )
+import qualified CoreSubst
+import PprCore
+import CoreFVs
+import CoreUtils
+import CoreArity	( etaExpand, exprEtaExpandArity )
+import CoreUnfold
+import Name
+import Id
+import Var	( isCoVar )
+import Demand
 import SimplMonad
-import Type		( Type, splitFunTys, dropForAlls, isStrictType,
-			  splitTyConApp_maybe, tyConAppArgs 
-			)
-import TyCon		( tyConDataCons_maybe )
-import DataCon		( dataConRepArity )
+import Type	hiding( substTy )
+import Coercion ( coercionKind )
+import TyCon
+import Unify	( dataConCannotMatch )
 import VarSet
-import BasicTypes	( TopLevelFlag(..), isNotTopLevel, OccInfo(..), isLoopBreaker, isOneOcc,
-			  Activation, isAlwaysActive, isActive )
-import Util		( lengthExceeds )
+import BasicTypes
+import Util
+import MonadUtils
 import Outputable
+import FastString
+
+import Data.List
 \end{code}
 
 
 %************************************************************************
 %*									*
-\subsection{The continuation data type}
+		The SimplCont type
 %*									*
 %************************************************************************
 
-\begin{code}
-data SimplCont		-- Strict contexts
-  = Stop     OutType	-- Type of the result
-	     LetRhsFlag
-	     Bool	-- True <=> There is something interesting about
-			--          the context, and hence the inliner
-			--	    should be a bit keener (see interestingCallContext)
-			-- Two cases:
-			-- (a) This is the RHS of a thunk whose type suggests
-			--     that update-in-place would be possible
-			-- (b) This is an argument of a function that has RULES
-			--     Inlining the call might allow the rule to fire
+A SimplCont allows the simplifier to traverse the expression in a 
+zipper-like fashion.  The SimplCont represents the rest of the expression,
+"above" the point of interest.
 
-  | CoerceIt OutType			-- The To-type, simplified
-	     SimplCont
+You can also think of a SimplCont as an "evaluation context", using
+that term in the way it is used for operational semantics. This is the
+way I usually think of it, For example you'll often see a syntax for
+evaluation context looking like
+	C ::= []  |  C e   |  case C of alts  |  C `cast` co
+That's the kind of thing we are doing here, and I use that syntax in
+the comments.
 
-  | ApplyTo  DupFlag 
-	     InExpr SimplEnv		-- The argument, as yet unsimplified, 
-	     SimplCont			-- and its environment
 
-  | Select   DupFlag 
-	     InId [InAlt] SimplEnv	-- The case binder, alts, and subst-env
-	     SimplCont
+Key points:
+  * A SimplCont describes a *strict* context (just like 
+    evaluation contexts do).  E.g. Just [] is not a SimplCont
 
-  | ArgOf    LetRhsFlag		-- An arbitrary strict context: the argument 
-  	     			-- 	of a strict function, or a primitive-arg fn
-				-- 	or a PrimOp
-				-- No DupFlag, because we never duplicate it
-	     OutType		-- arg_ty: type of the argument itself
-	     OutType		-- cont_ty: the type of the expression being sought by the context
-				--	f (error "foo") ==> coerce t (error "foo")
-				-- when f is strict
-				-- We need to know the type t, to which to coerce.
+  * A SimplCont describes a context that *does not* bind
+    any variables.  E.g. \x. [] is not a SimplCont
 
-	     (SimplEnv -> OutExpr -> SimplM FloatsWithExpr)	-- What to do with the result
-				-- The result expression in the OutExprStuff has type cont_ty
+\begin{code}
+data SimplCont	
+  = Stop		-- An empty context, or hole, []     
+	CallCtxt	-- True <=> There is something interesting about
+			--          the context, and hence the inliner
+			--	    should be a bit keener (see interestingCallContext)
+			-- Specifically:
+			--     This is an argument of a function that has RULES
+			--     Inlining the call might allow the rule to fire
 
-data LetRhsFlag = AnArg		-- It's just an argument not a let RHS
-		| AnRhs 	-- It's the RHS of a let (so please float lets out of big lambdas)
+  | CoerceIt 		-- C `cast` co
+	OutCoercion		-- The coercion simplified
+	SimplCont
+
+  | ApplyTo  		-- C arg
+	DupFlag 
+	InExpr StaticEnv		-- The argument and its static env
+	SimplCont
+
+  | Select   		-- case C of alts
+	DupFlag 
+	InId [InAlt] StaticEnv	-- The case binder, alts, and subst-env
+	SimplCont
+
+  -- The two strict forms have no DupFlag, because we never duplicate them
+  | StrictBind 		-- (\x* \xs. e) C
+	InId [InBndr]		-- let x* = [] in e 	
+	InExpr StaticEnv	--	is a special case 
+	SimplCont	
+
+  | StrictArg 		-- f e1 ..en C
+ 	ArgInfo		-- Specifies f, e1..en, Whether f has rules, etc
+			--     plus strictness flags for *further* args
+        CallCtxt        -- Whether *this* argument position is interesting
+	SimplCont		
+
+data ArgInfo 
+  = ArgInfo {
+        ai_fun   :: Id,		-- The function
+	ai_args  :: [OutExpr],	-- ...applied to these args (which are in *reverse* order)
+	ai_rules :: [CoreRule],	-- Rules for this function
+
+	ai_encl :: Bool,	-- Flag saying whether this function 
+				-- or an enclosing one has rules (recursively)
+				--	True => be keener to inline in all args
+	
+	ai_strs :: [Bool],	-- Strictness of remaining arguments
+				--   Usually infinite, but if it is finite it guarantees
+				--   that the function diverges after being given
+				--   that number of args
+	ai_discs :: [Int]	-- Discounts for remaining arguments; non-zero => be keener to inline
+				--   Always infinite
+    }
 
-instance Outputable LetRhsFlag where
-  ppr AnArg = ptext SLIT("arg")
-  ppr AnRhs = ptext SLIT("rhs")
+addArgTo :: ArgInfo -> OutExpr -> ArgInfo
+addArgTo ai arg = ai { ai_args = arg : ai_args ai }
 
 instance Outputable SimplCont where
-  ppr (Stop ty is_rhs _)  	     = ptext SLIT("Stop") <> brackets (ppr is_rhs) <+> ppr ty
-  ppr (ApplyTo dup arg se cont)      = (ptext SLIT("ApplyTo") <+> ppr dup <+> ppr arg) $$ ppr cont
-  ppr (ArgOf _ _ _ _)   	     = ptext SLIT("ArgOf...")
-  ppr (Select dup bndr alts se cont) = (ptext SLIT("Select") <+> ppr dup <+> ppr bndr) $$ 
-				       (nest 4 (ppr alts)) $$ ppr cont
-  ppr (CoerceIt ty cont)	     = (ptext SLIT("CoerceIt") <+> ppr ty) $$ ppr cont
+  ppr (Stop interesting)    	     = ptext (sLit "Stop") <> brackets (ppr interesting)
+  ppr (ApplyTo dup arg _ cont)       = ((ptext (sLit "ApplyTo") <+> ppr dup <+> pprParendExpr arg)
+					  {-  $$ nest 2 (pprSimplEnv se) -}) $$ ppr cont
+  ppr (StrictBind b _ _ _ cont)      = (ptext (sLit "StrictBind") <+> ppr b) $$ ppr cont
+  ppr (StrictArg ai _ cont)          = (ptext (sLit "StrictArg") <+> ppr (ai_fun ai)) $$ ppr cont
+  ppr (Select dup bndr alts _ cont)  = (ptext (sLit "Select") <+> ppr dup <+> ppr bndr) $$ 
+				       (nest 4 (ppr alts)) $$ ppr cont 
+  ppr (CoerceIt co cont)	     = (ptext (sLit "CoerceIt") <+> ppr co) $$ ppr cont
 
 data DupFlag = OkToDup | NoDup
 
 instance Outputable DupFlag where
-  ppr OkToDup = ptext SLIT("ok")
-  ppr NoDup   = ptext SLIT("nodup")
+  ppr OkToDup = ptext (sLit "ok")
+  ppr NoDup   = ptext (sLit "nodup")
 
 
--------------------
-mkBoringStop :: OutType -> SimplCont
-mkBoringStop ty = Stop ty AnArg False
 
-mkLazyArgStop :: OutType -> Bool -> SimplCont
-mkLazyArgStop ty has_rules = Stop ty AnArg (canUpdateInPlace ty || has_rules)
+-------------------
+mkBoringStop :: SimplCont
+mkBoringStop = Stop BoringCtxt
 
-mkRhsStop :: OutType -> SimplCont
-mkRhsStop ty = Stop ty AnRhs (canUpdateInPlace ty)
+mkRhsStop :: SimplCont	-- See Note [RHS of lets] in CoreUnfold
+mkRhsStop = Stop (ArgCtxt False)
 
-contIsRhs :: SimplCont -> Bool
-contIsRhs (Stop _ AnRhs _)    = True
-contIsRhs (ArgOf AnRhs _ _ _) = True
-contIsRhs other	              = False
+mkLazyArgStop :: CallCtxt -> SimplCont
+mkLazyArgStop cci = Stop cci
 
-contIsRhsOrArg (Stop _ _ _)    = True
-contIsRhsOrArg (ArgOf _ _ _ _) = True
-contIsRhsOrArg other	       = False
+-------------------
+contIsRhsOrArg :: SimplCont -> Bool
+contIsRhsOrArg (Stop {})       = True
+contIsRhsOrArg (StrictBind {}) = True
+contIsRhsOrArg (StrictArg {})  = True
+contIsRhsOrArg _               = False
 
 -------------------
 contIsDupable :: SimplCont -> Bool
-contIsDupable (Stop _ _ _)     		 = True
+contIsDupable (Stop {})                  = True
 contIsDupable (ApplyTo  OkToDup _ _ _)   = True
 contIsDupable (Select   OkToDup _ _ _ _) = True
 contIsDupable (CoerceIt _ cont)          = contIsDupable cont
-contIsDupable other	  	         = False
+contIsDupable _                          = False
 
 -------------------
-discardableCont :: SimplCont -> Bool
-discardableCont (Stop _ _ _)	    = False
-discardableCont (CoerceIt _ cont)   = discardableCont cont
-discardableCont other		    = True
-
-discardCont :: SimplCont	-- A continuation, expecting
-	    -> SimplCont	-- Replace the continuation with a suitable coerce
-discardCont cont = case cont of
-		     Stop to_ty is_rhs _ -> cont
-		     other               -> CoerceIt to_ty (mkBoringStop to_ty)
-		 where
-		   to_ty = contResultType cont
+contIsTrivial :: SimplCont -> Bool
+contIsTrivial (Stop {})                   = True
+contIsTrivial (ApplyTo _ (Type _) _ cont) = contIsTrivial cont
+contIsTrivial (CoerceIt _ cont)           = contIsTrivial cont
+contIsTrivial _                           = False
 
 -------------------
-contResultType :: SimplCont -> OutType
-contResultType (Stop to_ty _ _)	     = to_ty
-contResultType (ArgOf _ _ to_ty _)   = to_ty
-contResultType (ApplyTo _ _ _ cont)  = contResultType cont
-contResultType (CoerceIt _ cont)     = contResultType cont
-contResultType (Select _ _ _ _ cont) = contResultType cont
+contResultType :: SimplEnv -> OutType -> SimplCont -> OutType
+contResultType env ty cont
+  = go cont ty
+  where
+    subst_ty se ty = substTy (se `setInScope` env) ty
 
--------------------
-countValArgs :: SimplCont -> Int
-countValArgs (ApplyTo _ (Type ty) se cont) = countValArgs cont
-countValArgs (ApplyTo _ val_arg   se cont) = 1 + countValArgs cont
-countValArgs other			   = 0
+    go (Stop {})                      ty = ty
+    go (CoerceIt co cont)             _  = go cont (snd (coercionKind co))
+    go (StrictBind _ bs body se cont) _  = go cont (subst_ty se (exprType (mkLams bs body)))
+    go (StrictArg ai _ cont)          _  = go cont (funResultTy (argInfoResultTy ai))
+    go (Select _ _ alts se cont)      _  = go cont (subst_ty se (coreAltsType alts))
+    go (ApplyTo _ arg se cont)        ty = go cont (apply_to_arg ty arg se)
 
-countArgs :: SimplCont -> Int
-countArgs (ApplyTo _ arg se cont) = 1 + countArgs cont
-countArgs other			  = 0
+    apply_to_arg ty (Type ty_arg) se = applyTy ty (subst_ty se ty_arg)
+    apply_to_arg ty _             _  = funResultTy ty
+
+argInfoResultTy :: ArgInfo -> OutType
+argInfoResultTy (ArgInfo { ai_fun = fun, ai_args = args })
+  = foldr (\arg fn_ty -> applyTypeToArg fn_ty arg) (idType fun) args
 
 -------------------
-pushContArgs :: SimplEnv -> [OutArg] -> SimplCont -> SimplCont
--- Pushes args with the specified environment
-pushContArgs env []           cont = cont
-pushContArgs env (arg : args) cont = ApplyTo NoDup arg env (pushContArgs env args cont)
-\end{code}
+countValArgs :: SimplCont -> Int
+countValArgs (ApplyTo _ (Type _) _ cont) = countValArgs cont
+countValArgs (ApplyTo _ _        _ cont) = 1 + countValArgs cont
+countValArgs _                           = 0
 
+countArgs :: SimplCont -> Int
+countArgs (ApplyTo _ _ _ cont) = 1 + countArgs cont
+countArgs _                    = 0
 
-\begin{code}
-getContArgs :: SwitchChecker
-	    -> OutId -> SimplCont 
-	    -> ([(InExpr, SimplEnv, Bool)],	-- Arguments; the Bool is true for strict args
-		SimplCont)			-- Remaining continuation
--- getContArgs id k = (args, k', inl)
--- 	args are the leading ApplyTo items in k
---	(i.e. outermost comes first)
---	augmented with demand info from the functionn
-getContArgs chkr fun orig_cont
-  = let
-		-- Ignore strictness info if the no-case-of-case
-		-- flag is on.  Strictness changes evaluation order
-		-- and that can change full laziness
-	stricts | switchIsOn chkr NoCaseOfCase = vanilla_stricts
-		| otherwise		       = computed_stricts
-    in
-    go [] stricts orig_cont
+contArgs :: SimplCont -> ([OutExpr], SimplCont)
+-- Uses substitution to turn each arg into an OutExpr
+contArgs cont = go [] cont
   where
-    ----------------------------
-
-	-- Type argument
-    go acc ss (ApplyTo _ arg@(Type _) se cont)
-	= go ((arg,se,False) : acc) ss cont
-		-- NB: don't bother to instantiate the function type
-
-	-- Value argument
-    go acc (s:ss) (ApplyTo _ arg se cont)
-	= go ((arg,se,s) : acc) ss cont
-
-	-- We're run out of arguments, or else we've run out of demands
-	-- The latter only happens if the result is guaranteed bottom
-	-- This is the case for
-	--	* case (error "hello") of { ... }
-	--	* (error "Hello") arg
-	--	* f (error "Hello") where f is strict
-	--	etc
-	-- Then, especially in the first of these cases, we'd like to discard
-	-- the continuation, leaving just the bottoming expression.  But the
-	-- type might not be right, so we may have to add a coerce.
-    go acc ss cont 
-	| null ss && discardableCont cont = (reverse acc, discardCont cont)
-	| otherwise			  = (reverse acc, cont)
-
-    ----------------------------
-    vanilla_stricts, computed_stricts :: [Bool]
-    vanilla_stricts  = repeat False
-    computed_stricts = zipWith (||) fun_stricts arg_stricts
-
-    ----------------------------
-    (val_arg_tys, _) = splitFunTys (dropForAlls (idType fun))
-    arg_stricts      = map isStrictType val_arg_tys ++ repeat False
-	-- These argument types are used as a cheap and cheerful way to find
-	-- unboxed arguments, which must be strict.  But it's an InType
-	-- and so there might be a type variable where we expect a function
-	-- type (the substitution hasn't happened yet).  And we don't bother
-	-- doing the type applications for a polymorphic function.
-	-- Hence the splitFunTys*IgnoringForAlls*
-
-    ----------------------------
-	-- If fun_stricts is finite, it means the function returns bottom
-	-- after that number of value args have been consumed
-	-- Otherwise it's infinite, extended with False
-    fun_stricts
-      = case splitStrictSig (idNewStrictness fun) of
-	  (demands, result_info)
-		| not (demands `lengthExceeds` countValArgs orig_cont)
-		-> 	-- Enough args, use the strictness given.
-			-- For bottoming functions we used to pretend that the arg
-			-- is lazy, so that we don't treat the arg as an
-			-- interesting context.  This avoids substituting
-			-- top-level bindings for (say) strings into 
-			-- calls to error.  But now we are more careful about
-			-- inlining lone variables, so its ok (see SimplUtils.analyseCont)
-		   if isBotRes result_info then
-			map isStrictDmd demands		-- Finite => result is bottom
-		   else
-			map isStrictDmd demands ++ vanilla_stricts
+    go args (ApplyTo _ arg se cont) = go (substExpr se arg : args) cont
+    go args cont		    = (reverse args, cont)
 
-	  other -> vanilla_stricts	-- Not enough args, or no strictness
+pushArgs :: SimplEnv -> [CoreExpr] -> SimplCont -> SimplCont
+pushArgs _env []         cont = cont
+pushArgs env  (arg:args) cont = ApplyTo NoDup arg env (pushArgs env args cont)
 
--------------------
-interestingArg :: OutExpr -> Bool
-	-- An argument is interesting if it has *some* structure
-	-- We are here trying to avoid unfolding a function that
-	-- is applied only to variables that have no unfolding
-	-- (i.e. they are probably lambda bound): f x y z
-	-- There is little point in inlining f here.
-interestingArg (Var v)	         = hasSomeUnfolding (idUnfolding v)
-					-- Was: isValueUnfolding (idUnfolding v')
-					-- But that seems over-pessimistic
-				 || isDataConWorkId v
-					-- This accounts for an argument like
-					-- () or [], which is definitely interesting
-interestingArg (Type _)	         = False
-interestingArg (App fn (Type _)) = interestingArg fn
-interestingArg (Note _ a)	 = interestingArg a
-interestingArg other	         = True
-	-- Consider 	let x = 3 in f x
-	-- The substitution will contain (x -> ContEx 3), and we want to
-	-- to say that x is an interesting argument.
-	-- But consider also (\x. f x y) y
-	-- The substitution will contain (x -> ContEx y), and we want to say
-	-- that x is not interesting (assuming y has no unfolding)
+dropArgs :: Int -> SimplCont -> SimplCont
+dropArgs 0 cont = cont
+dropArgs n (ApplyTo _ _ _ cont) = dropArgs (n-1) cont
+dropArgs n other		= pprPanic "dropArgs" (ppr n <+> ppr other)
 \end{code}
 
-Comment about interestingCallContext
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Note [Interesting call context]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 We want to avoid inlining an expression where there can't possibly be
 any gain, such as in an argument position.  Hence, if the continuation
 is interesting (eg. a case scrutinee, application etc.) then we
@@ -327,61 +272,30 @@ applies when x is bound to a lambda expression.  Hence
 contIsInteresting looks for case expressions with just a single
 default case.
 
+
 \begin{code}
-interestingCallContext :: Bool 		-- False <=> no args at all
-		       -> Bool		-- False <=> no value args
-		       -> SimplCont -> Bool
-	-- The "lone-variable" case is important.  I spent ages
-	-- messing about with unsatisfactory varaints, but this is nice.
-	-- The idea is that if a variable appear all alone
-	--	as an arg of lazy fn, or rhs	Stop
-	-- 	as scrutinee of a case		Select
-	--	as arg of a strict fn		ArgOf
-	-- then we should not inline it (unless there is some other reason,
-	-- e.g. is is the sole occurrence).  We achieve this by making
-	-- interestingCallContext return False for a lone variable.
-	--
-	-- Why?  At least in the case-scrutinee situation, turning
-	--	let x = (a,b) in case x of y -> ...
-	-- into
-	--	let x = (a,b) in case (a,b) of y -> ...
-	-- and thence to 
-	--	let x = (a,b) in let y = (a,b) in ...
-	-- is bad if the binding for x will remain.
-	--
-	-- Another example: I discovered that strings
-	-- were getting inlined straight back into applications of 'error'
-	-- because the latter is strict.
-	--	s = "foo"
-	--	f = \x -> ...(error s)...
-
-	-- Fundamentally such contexts should not ecourage inlining because
-	-- the context can ``see'' the unfolding of the variable (e.g. case or a RULE)
-	-- so there's no gain.
-	--
-	-- However, even a type application or coercion isn't a lone variable.
-	-- Consider
-	--	case $fMonadST @ RealWorld of { :DMonad a b c -> c }
-	-- We had better inline that sucker!  The case won't see through it.
-	--
-	-- For now, I'm treating treating a variable applied to types 
-	-- in a *lazy* context "lone". The motivating example was
-	--	f = /\a. \x. BIG
-	--	g = /\a. \y.  h (f a)
-	-- There's no advantage in inlining f here, and perhaps
-	-- a significant disadvantage.  Hence some_val_args in the Stop case
-
-interestingCallContext some_args some_val_args cont
+interestingCallContext :: SimplCont -> CallCtxt
+-- See Note [Interesting call context]
+interestingCallContext cont
   = interesting cont
   where
-    interesting (Select _ _ _ _ _)       = some_args
-    interesting (ApplyTo _ _ _ _)        = True	-- Can happen if we have (coerce t (f x)) y
-						-- Perhaps True is a bit over-keen, but I've
-						-- seen (coerce f) x, where f has an INLINE prag,
-						-- So we have to give some motivaiton for inlining it
-    interesting (ArgOf _ _ _ _)	         = some_val_args
-    interesting (Stop ty _ interesting)  = some_val_args && interesting
-    interesting (CoerceIt _ cont)        = interesting cont
+    interesting (Select _ bndr _ _ _)
+	| isDeadBinder bndr = CaseCtxt
+	| otherwise	    = ArgCtxt False	-- If the binder is used, this
+						-- is like a strict let
+						-- See Note [RHS of lets] in CoreUnfold
+		
+    interesting (ApplyTo _ arg _ cont)
+	| isTypeArg arg = interesting cont
+	| otherwise     = ValAppCtxt 	-- Can happen if we have (f Int |> co) y
+					-- If f has an INLINE prag we need to give it some
+					-- motivation to inline. See Note [Cast then apply]
+					-- in CoreUnfold
+
+    interesting (StrictArg _ cci _) = cci
+    interesting (StrictBind {})	    = BoringCtxt
+    interesting (Stop cci)   	    = cci
+    interesting (CoerceIt _ cont)   = interesting cont
 	-- If this call is the arg of a strict function, the context
 	-- is a bit interesting.  If we inline here, we may get useful
 	-- evaluation information to avoid repeated evals: e.g.
@@ -399,7 +313,82 @@ interestingCallContext some_args some_val_args cont
 
 
 -------------------
-interestingArgContext :: Id -> SimplCont -> Bool
+mkArgInfo :: Id
+	  -> [CoreRule]	-- Rules for function
+	  -> Int	-- Number of value args
+	  -> SimplCont	-- Context of the call
+	  -> ArgInfo
+
+mkArgInfo fun rules n_val_args call_cont
+  | n_val_args < idArity fun		-- Note [Unsaturated functions]
+  = ArgInfo { ai_fun = fun, ai_args = [], ai_rules = rules
+            , ai_encl = False
+	    , ai_strs = vanilla_stricts 
+	    , ai_discs = vanilla_discounts }
+  | otherwise
+  = ArgInfo { ai_fun = fun, ai_args = [], ai_rules = rules
+            , ai_encl = interestingArgContext rules call_cont
+	    , ai_strs  = add_type_str (idType fun) arg_stricts
+	    , ai_discs = arg_discounts }
+  where
+    vanilla_discounts, arg_discounts :: [Int]
+    vanilla_discounts = repeat 0
+    arg_discounts = case idUnfolding fun of
+			CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = discounts}}
+			      -> discounts ++ vanilla_discounts
+			_     -> vanilla_discounts
+
+    vanilla_stricts, arg_stricts :: [Bool]
+    vanilla_stricts  = repeat False
+
+    arg_stricts
+      = case splitStrictSig (idStrictness fun) of
+	  (demands, result_info)
+		| not (demands `lengthExceeds` n_val_args)
+		-> 	-- Enough args, use the strictness given.
+			-- For bottoming functions we used to pretend that the arg
+			-- is lazy, so that we don't treat the arg as an
+			-- interesting context.  This avoids substituting
+			-- top-level bindings for (say) strings into 
+			-- calls to error.  But now we are more careful about
+			-- inlining lone variables, so its ok (see SimplUtils.analyseCont)
+		   if isBotRes result_info then
+			map isStrictDmd demands		-- Finite => result is bottom
+		   else
+			map isStrictDmd demands ++ vanilla_stricts
+	       | otherwise
+	       -> WARN( True, text "More demands than arity" <+> ppr fun <+> ppr (idArity fun) 
+				<+> ppr n_val_args <+> ppr demands ) 
+		   vanilla_stricts	-- Not enough args, or no strictness
+
+    add_type_str :: Type -> [Bool] -> [Bool]
+    -- If the function arg types are strict, record that in the 'strictness bits'
+    -- No need to instantiate because unboxed types (which dominate the strict
+    -- types) can't instantiate type variables.
+    -- add_type_str is done repeatedly (for each call); might be better 
+    -- once-for-all in the function
+    -- But beware primops/datacons with no strictness
+    add_type_str _ [] = []
+    add_type_str fun_ty strs		-- Look through foralls
+	| Just (_, fun_ty') <- splitForAllTy_maybe fun_ty	-- Includes coercions
+	= add_type_str fun_ty' strs
+    add_type_str fun_ty (str:strs)	-- Add strict-type info
+	| Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty
+	= (str || isStrictType arg_ty) : add_type_str fun_ty' strs
+    add_type_str _ strs
+	= strs
+
+{- Note [Unsaturated functions]
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider (test eyeball/inline4)
+	x = a:as
+	y = f x
+where f has arity 2.  Then we do not want to inline 'x', because
+it'll just be floated out again.  Even if f has lots of discounts
+on its first argument -- it must be saturated for these to kick in
+-}
+
+interestingArgContext :: [CoreRule] -> SimplCont -> Bool
 -- If the argument has form (f x y), where x,y are boring,
 -- and f is marked INLINE, then we don't want to inline f.
 -- But if the context of the argument is
@@ -407,43 +396,30 @@ interestingArgContext :: Id -> SimplCont -> Bool
 -- where g has rules, then we *do* want to inline f, in case it
 -- exposes a rule that might fire.  Similarly, if the context is
 --	h (g (f x x))
--- where h has rules, then we do want to inline f.
--- The interesting_arg_ctxt flag makes this happen; if it's
+-- where h has rules, then we do want to inline f; hence the
+-- call_cont argument to interestingArgContext
+--
+-- The ai-rules flag makes this happen; if it's
 -- set, the inliner gets just enough keener to inline f 
 -- regardless of how boring f's arguments are, if it's marked INLINE
 --
 -- The alternative would be to *always* inline an INLINE function,
 -- regardless of how boring its context is; but that seems overkill
 -- For example, it'd mean that wrapper functions were always inlined
-interestingArgContext fn cont
-  = idHasRules fn || go cont
+interestingArgContext rules call_cont
+  = notNull rules || enclosing_fn_has_rules
   where
-    go (Select {})	      = False
-    go (ApplyTo {})	      = False
-    go (ArgOf {})	      = True
-    go (CoerceIt _ c)	      = go c
-    go (Stop _ _ interesting) = interesting
+    enclosing_fn_has_rules = go call_cont
 
--------------------
-canUpdateInPlace :: Type -> Bool
--- Consider   let x = <wurble> in ...
--- If <wurble> returns an explicit constructor, we might be able
--- to do update in place.  So we treat even a thunk RHS context
--- as interesting if update in place is possible.  We approximate
--- this by seeing if the type has a single constructor with a
--- small arity.  But arity zero isn't good -- we share the single copy
--- for that case, so no point in sharing.
-
-canUpdateInPlace ty 
-  | not opt_UF_UpdateInPlace = False
-  | otherwise
-  = case splitTyConApp_maybe ty of 
-	Nothing		-> False 
-	Just (tycon, _) -> case tyConDataCons_maybe tycon of
-				Just [dc]  -> arity == 1 || arity == 2
-				           where
-					      arity = dataConRepArity dc
-				other -> False
+    go (Select {})	   = False
+    go (ApplyTo {})	   = False
+    go (StrictArg _ cci _) = interesting cci
+    go (StrictBind {})	   = False	-- ??
+    go (CoerceIt _ c)	   = go c
+    go (Stop cci)          = interesting cci
+
+    interesting (ArgCtxt rules) = rules
+    interesting _               = False
 \end{code}
 
 
@@ -454,18 +430,61 @@ canUpdateInPlace ty
 %*									*
 %************************************************************************
 
-Inlining is controlled partly by the SimplifierMode switch.  This has two
-settings:
+\begin{code}
+simplEnvForGHCi :: SimplEnv
+simplEnvForGHCi = mkSimplEnv allOffSwitchChecker $
+                  SimplGently { sm_rules = False, sm_inline = False }
+   -- Do not do any inlining, in case we expose some unboxed
+   -- tuple stuff that confuses the bytecode interpreter
+
+simplEnvForRules :: SimplEnv
+simplEnvForRules = mkSimplEnv allOffSwitchChecker $
+                   SimplGently { sm_rules = True, sm_inline = False }
+
+updModeForInlineRules :: SimplifierMode -> SimplifierMode
+updModeForInlineRules mode
+  = case mode of      
+      SimplGently {} -> mode	-- Don't modify mode if we already gentle
+      SimplPhase  {} -> SimplGently { sm_rules = True, sm_inline = True }
+	-- Simplify as much as possible, subject to the usual "gentle" rules
+\end{code}
 
+Inlining is controlled partly by the SimplifierMode switch.  This has two
+settings
+	
 	SimplGently	(a) Simplifying before specialiser/full laziness
-			(b) Simplifiying inside INLINE pragma
+			(b) Simplifiying inside InlineRules
 			(c) Simplifying the LHS of a rule
 			(d) Simplifying a GHCi expression or Template 
 				Haskell splice
 
-	SimplPhase n	Used at all other times
+	SimplPhase n _	 Used at all other times
+
+Note [Gentle mode]
+~~~~~~~~~~~~~~~~~~
+Gentle mode has a separate boolean flag to control
+	a) inlining (sm_inline flag)
+	b) rules    (sm_rules  flag)
+A key invariant about Gentle mode is that it is treated as the EARLIEST
+phase.  Something is inlined if the sm_inline flag is on AND the thing
+is inlinable in the earliest phase.  This is important. Example
+
+  {-# INLINE [~1] g #-}
+  g = ...
+  
+  {-# INLINE f #-}
+  f x = g (g x)
 
-The key thing about SimplGently is that it does no call-site inlining.
+If we were to inline g into f's inlining, then an importing module would
+never be able to do
+	f e --> g (g e) ---> RULE fires
+because the InlineRule for f has had g inlined into it.
+
+On the other hand, it is bad not to do ANY inlining into an
+InlineRule, because then recursive knots in instance declarations
+don't get unravelled.
+
+However, *sometimes* SimplGently must do no call-site inlining at all.
 Before full laziness we must be careful not to inline wrappers,
 because doing so inhibits floating
     e.g. ...(case f x of ...)...
@@ -479,17 +498,24 @@ running it, we don't want to use -O2.  Indeed, we don't want to inline
 anything, because the byte-code interpreter might get confused about 
 unboxed tuples and suchlike.
 
-INLINE pragmas
-~~~~~~~~~~~~~~
-SimplGently is also used as the mode to simplify inside an InlineMe note.
+Note [RULEs enabled in SimplGently]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+RULES are enabled when doing "gentle" simplification.  Two reasons:
 
-\begin{code}
-inlineMode :: SimplifierMode
-inlineMode = SimplGently
-\end{code}
+  * We really want the class-op cancellation to happen:
+        op (df d1 d2) --> $cop3 d1 d2
+    because this breaks the mutual recursion between 'op' and 'df'
+
+  * I wanted the RULE
+        lift String ===> ...
+    to work in Template Haskell when simplifying
+    splices, so we get simpler code for literal strings
 
-It really is important to switch off inlinings inside such
-expressions.  Consider the following example 
+Note [Simplifying gently inside InlineRules]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We don't do much simplification inside InlineRules (which come from
+INLINE pragmas).  It really is important to switch off inlinings
+inside such expressions.  Consider the following example
 
      	let f = \pq -> BIG
      	in
@@ -498,16 +524,14 @@ expressions.  Consider the following example
      	in ...g...g...g...g...g...
 
 Now, if that's the ONLY occurrence of f, it will be inlined inside g,
-and thence copied multiple times when g is inlined.
+and thence copied multiple times when g is inlined.  
 
-
-This function may be inlinined in other modules, so we
-don't want to remove (by inlining) calls to functions that have
-specialisations, or that may have transformation rules in an importing
-scope.
+This function may be inlinined in other modules, so we don't want to
+remove (by inlining) calls to functions that have specialisations, or
+that may have transformation rules in an importing scope.
 
 E.g. 	{-# INLINE f #-}
-		f x = ...g...
+	f x = ...g...
 
 and suppose that g is strict *and* has specialisations.  If we inline
 g's wrapper, we deny f the chance of getting the specialised version
@@ -525,15 +549,14 @@ continuation.  That's why the keep_inline predicate returns True for
 ArgOf continuations.  It shouldn't do any harm not to dissolve the
 inline-me note under these circumstances.
 
-Note that the result is that we do very little simplification
-inside an InlineMe.  
+Although we do very little simplification inside an InlineRule,
+the RHS is simplified as normal.  For example:
 
 	all xs = foldr (&&) True xs
 	any p = all . map p  {-# INLINE any #-}
 
-Problem: any won't get deforested, and so if it's exported and the
-importer doesn't use the inlining, (eg passes it as an arg) then we
-won't get deforestation at all.  We havn't solved this problem yet!
+The RHS of 'any' will get optimised and deforested; but the InlineRule
+will still mention the original RHS.
 
 
 preInlineUnconditionally
@@ -578,7 +601,7 @@ y's occurrence info, which breaks the invariant.  It matters: y
 might have a BIG rhs, which will now be dup'd at every occurrenc of x.
 
 
-Evne RHSs labelled InlineMe aren't caught here, because there might be
+Even RHSs labelled InlineMe aren't caught here, because there might be
 no benefit from inlining at the call site.
 
 [Sept 01] Don't unconditionally inline a top-level thing, because that
@@ -600,27 +623,46 @@ seems a bit fragile.
 Conclusion: inline top level things gaily until Phase 0 (the last
 phase), at which point don't.
 
+Note [pre/postInlineUnconditionally in gentle mode]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Even in gentle mode we want to do preInlineUnconditionally.  The
+reason is that too little clean-up happens if you don't inline
+use-once things.  Also a bit of inlining is *good* for full laziness;
+it can expose constant sub-expressions.  Example in
+spectral/mandel/Mandel.hs, where the mandelset function gets a useful
+let-float if you inline windowToViewport
+
+However, as usual for Gentle mode, do not inline things that are
+inactive in the intial stages.  See Note [Gentle mode].
+
+Note [Top-level botomming Ids]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Don't inline top-level Ids that are bottoming, even if they are used just
+once, because FloatOut has gone to some trouble to extract them out.
+Inlining them won't make the program run faster!
+
 \begin{code}
 preInlineUnconditionally :: SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
 preInlineUnconditionally env top_lvl bndr rhs
-  | not active 		   = False
-  | opt_SimplNoPreInlining = False
+  | not active 		                     = False
+  | isTopLevel top_lvl && isBottomingId bndr = False	-- Note [Top-level bottoming Ids]
+  | opt_SimplNoPreInlining                   = False
   | otherwise = case idOccInfo bndr of
 		  IAmDead	     	     -> True	-- Happens in ((\x.1) v)
 	  	  OneOcc in_lam True int_cxt -> try_once in_lam int_cxt
-		  other 		     -> False
+		  _                          -> False
   where
     phase = getMode env
     active = case phase of
-		   SimplGently  -> isAlwaysActive prag
-		   SimplPhase n -> isActive n prag
-    prag = idInlinePragma bndr
-
+		   SimplGently {} -> isEarlyActive act
+			-- See Note [pre/postInlineUnconditionally in gentle mode]
+		   SimplPhase n _ -> isActive n act
+    act = idInlineActivation bndr
     try_once in_lam int_cxt	-- There's one textual occurrence
 	| not in_lam = isNotTopLevel top_lvl || early_phase
 	| otherwise  = int_cxt && canInlineInLam rhs
 
--- Be very careful before inlining inside a lambda, becuase (a) we must not 
+-- Be very careful before inlining inside a lambda, because (a) we must not 
 -- invalidate occurrence information, and (b) we want to avoid pushing a
 -- single allocation (here) into multiple allocations (inside lambda).  
 -- Inlining a *function* with a single *saturated* call would be ok, mind you.
@@ -641,14 +683,14 @@ preInlineUnconditionally env top_lvl bndr rhs
 	-- canInlineInLam => free vars of rhs are (Once in_lam) or Many,
 	-- so substituting rhs inside a lambda doesn't change the occ info.
 	-- Sadly, not quite the same as exprIsHNF.
-    canInlineInLam (Lit l)		= True
+    canInlineInLam (Lit _)		= True
     canInlineInLam (Lam b e)		= isRuntimeVar b || canInlineInLam e
     canInlineInLam (Note _ e)		= canInlineInLam e
     canInlineInLam _			= False
 
     early_phase = case phase of
-			SimplPhase 0 -> False
-			other	     -> True
+			SimplPhase 0 _ -> False
+			_    	       -> True
 -- If we don't have this early_phase test, consider
 --	x = length [1,2,3]
 -- The full laziness pass carefully floats all the cons cells to
@@ -695,30 +737,49 @@ our new view that inlining is like a RULE, so I'm sticking to the 'active'
 story for now.
 
 \begin{code}
-postInlineUnconditionally :: SimplEnv -> TopLevelFlag -> OutId -> OccInfo -> OutExpr -> Unfolding -> Bool
+postInlineUnconditionally 
+    :: SimplEnv -> TopLevelFlag
+    -> OutId		-- The binder (an InId would be fine too)
+    -> OccInfo 		-- From the InId
+    -> OutExpr
+    -> Unfolding
+    -> Bool
 postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
-  | not active		   = False
-  | isLoopBreaker occ_info = False
-  | isExportedId bndr      = False
-  | exprIsTrivial rhs 	   = True
+  | not active		        = False
+  | isLoopBreaker occ_info      = False	-- If it's a loop-breaker of any kind, don't inline
+					-- because it might be referred to "earlier"
+  | isExportedId bndr           = False
+  | isStableUnfolding unfolding = False	-- Note [InlineRule and postInlineUnconditionally]
+  | exprIsTrivial rhs 	        = True
+  | isTopLevel top_lvl          = False	-- Note [Top level and postInlineUnconditionally]
   | otherwise
   = case occ_info of
-      OneOcc in_lam one_br int_cxt
-	->     (one_br || smallEnoughToInline unfolding)	-- Small enough to dup
+	-- The point of examining occ_info here is that for *non-values* 
+	-- that occur outside a lambda, the call-site inliner won't have
+	-- a chance (becuase it doesn't know that the thing
+	-- only occurs once).   The pre-inliner won't have gotten
+	-- it either, if the thing occurs in more than one branch
+	-- So the main target is things like
+	--	let x = f y in
+	--	case v of
+	--	   True  -> case x of ...
+	--	   False -> case x of ...
+	-- This is very important in practice; e.g. wheel-seive1 doubles 
+	-- in allocation if you miss this out
+      OneOcc in_lam _one_br int_cxt	-- OneOcc => no code-duplication issue
+	->     smallEnoughToInline unfolding	-- Small enough to dup
 			-- ToDo: consider discount on smallEnoughToInline if int_cxt is true
 			--
-		 	-- NB: Do we want to inline arbitrarily big things becuase
-			-- one_br is True? that can lead to inline cascades.  But
-			-- preInlineUnconditionlly has dealt with all the common cases
-			-- so perhaps it's worth the risk. Here's an example
-			--	let f = if b then Left (\x.BIG) else Right (\y.BIG)
-			--	in \y. ....f....
-			-- We can't preInlineUnconditionally because that woud invalidate
-			-- the occ info for b.  Yet f is used just once, and duplicating
-			-- the case work is fine (exprIsCheap).
-
-	   &&  ((isNotTopLevel top_lvl && not in_lam) || 
-			-- But outside a lambda, we want to be reasonably aggressive
+		 	-- NB: Do NOT inline arbitrarily big things, even if one_br is True
+			-- Reason: doing so risks exponential behaviour.  We simplify a big
+			--	   expression, inline it, and simplify it again.  But if the
+			--	   very same thing happens in the big expression, we get 
+			--	   exponential cost!
+			-- PRINCIPLE: when we've already simplified an expression once, 
+			-- make sure that we only inline it if it's reasonably small.
+
+           && (not in_lam || 
+			-- Outside a lambda, we want to be reasonably aggressive
 			-- about inlining into multiple branches of case
 			-- e.g. let x = <non-value> 
 			--	in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... } 
@@ -731,95 +792,202 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
 			-- int_cxt to prevent us inlining inside a lambda without some 
 			-- good reason.  See the notes on int_cxt in preInlineUnconditionally
 
-      other -> False
-	-- The point here is that for *non-values* that occur
-	-- outside a lambda, the call-site inliner won't have
-	-- a chance (becuase it doesn't know that the thing
-	-- only occurs once).   The pre-inliner won't have gotten
-	-- it either, if the thing occurs in more than one branch
-	-- So the main target is things like
-	--	let x = f y in
-	--	case v of
-	--	   True  -> case x of ...
-	--	   False -> case x of ...
-	-- I'm not sure how important this is in practice
+      IAmDead -> True	-- This happens; for example, the case_bndr during case of
+			-- known constructor:  case (a,b) of x { (p,q) -> ... }
+			-- Here x isn't mentioned in the RHS, so we don't want to
+			-- create the (dead) let-binding  let x = (a,b) in ...
+
+      _ -> False
+
+-- Here's an example that we don't handle well:
+--	let f = if b then Left (\x.BIG) else Right (\y.BIG)
+--	in \y. ....case f of {...} ....
+-- Here f is used just once, and duplicating the case work is fine (exprIsCheap).
+-- But
+--  - We can't preInlineUnconditionally because that woud invalidate
+--    the occ info for b.
+--  - We can't postInlineUnconditionally because the RHS is big, and
+--    that risks exponential behaviour
+--  - We can't call-site inline, because the rhs is big
+-- Alas!
+
   where
     active = case getMode env of
-		   SimplGently  -> isAlwaysActive prag
-		   SimplPhase n -> isActive n prag
-    prag = idInlinePragma bndr
+		   SimplGently {} -> isEarlyActive act
+			-- See Note [pre/postInlineUnconditionally in gentle mode]
+		   SimplPhase n _ -> isActive n act
+    act = idInlineActivation bndr
 
-activeInline :: SimplEnv -> OutId -> OccInfo -> Bool
-activeInline env id occ
+activeUnfolding :: SimplEnv -> IdUnfoldingFun
+activeUnfolding env
+  = case getMode env of
+      SimplGently { sm_inline = False } -> active_unfolding_minimal
+      SimplGently { sm_inline = True  } -> active_unfolding_gentle
+      SimplPhase n _                    -> active_unfolding n
+
+activeUnfInRule :: SimplEnv -> IdUnfoldingFun
+-- When matching in RULE, we want to "look through" an unfolding
+-- if *rules* are on, even if *inlinings* are not.  A notable example
+-- is DFuns, which really we want to match in rules like (op dfun)
+-- in gentle mode.
+activeUnfInRule env
   = case getMode env of
-      SimplGently -> isOneOcc occ && isAlwaysActive prag
-	-- No inlining at all when doing gentle stuff,
-	-- except for local things that occur once
-	-- The reason is that too little clean-up happens if you 
-	-- don't inline use-once things.   Also a bit of inlining is *good* for
-	-- full laziness; it can expose constant sub-expressions.
-	-- Example in spectral/mandel/Mandel.hs, where the mandelset 
-	-- function gets a useful let-float if you inline windowToViewport
-
-	-- NB: we used to have a second exception, for data con wrappers.
-	-- On the grounds that we use gentle mode for rule LHSs, and 
-	-- they match better when data con wrappers are inlined.
-	-- But that only really applies to the trivial wrappers (like (:)),
-	-- and they are now constructed as Compulsory unfoldings (in MkId)
-	-- so they'll happen anyway.
-
-      SimplPhase n -> isActive n prag
+      SimplGently { sm_rules = False } -> active_unfolding_minimal
+      SimplGently { sm_rules = True  } -> active_unfolding_gentle
+      SimplPhase n _                   -> active_unfolding n
+
+active_unfolding_minimal :: IdUnfoldingFun
+-- Compuslory unfoldings only
+-- Ignore SimplGently, because we want to inline regardless;
+-- the Id has no top-level binding at all
+--
+-- NB: we used to have a second exception, for data con wrappers.
+-- On the grounds that we use gentle mode for rule LHSs, and 
+-- they match better when data con wrappers are inlined.
+-- But that only really applies to the trivial wrappers (like (:)),
+-- and they are now constructed as Compulsory unfoldings (in MkId)
+-- so they'll happen anyway.
+active_unfolding_minimal id
+  | isCompulsoryUnfolding unf = unf
+  | otherwise                 = NoUnfolding
   where
-    prag = idInlinePragma id
-
-activeRule :: SimplEnv -> Maybe (Activation -> Bool)
+    unf = realIdUnfolding id	-- Never a loop breaker
+
+active_unfolding_gentle :: IdUnfoldingFun
+-- Anything that is early-active
+-- See Note [Gentle mode]
+active_unfolding_gentle id
+  | isEarlyActive (idInlineActivation id) = idUnfolding id
+  | otherwise                             = NoUnfolding
+      -- idUnfolding checks for loop-breakers
+      -- Things with an INLINE pragma may have 
+      -- an unfolding *and* be a loop breaker  
+      -- (maybe the knot is not yet untied)
+
+active_unfolding :: CompilerPhase -> IdUnfoldingFun
+active_unfolding n id
+  | isActive n (idInlineActivation id) = idUnfolding id
+  | otherwise                          = NoUnfolding
+
+activeRule :: DynFlags -> SimplEnv -> Maybe (Activation -> Bool)
 -- Nothing => No rules at all
-activeRule env
-  | opt_RulesOff = Nothing
+activeRule dflags env
+  | not (dopt Opt_EnableRewriteRules dflags)
+  = Nothing	-- Rewriting is off
   | otherwise
   = case getMode env of
-	SimplGently  -> Just isAlwaysActive
-			-- Used to be Nothing (no rules in gentle mode)
-			-- Main motivation for changing is that I wanted
-			-- 	lift String ===> ...
-			-- to work in Template Haskell when simplifying
-			-- splices, so we get simpler code for literal strings
-	SimplPhase n -> Just (isActive n)
-\end{code}	
+      SimplGently { sm_rules = rules_on } 
+        | rules_on  -> Just isEarlyActive	-- Note [RULEs enabled in SimplGently]
+        | otherwise -> Nothing
+      SimplPhase n _ -> Just (isActive n)
+\end{code}
+
+Note [Top level and postInlineUnconditionally]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We don't do postInlineUnconditionally for top-level things (except
+ones that are trivial).  There is no point, because the main goal is
+to get rid of local bindings used in multiple case branches. And
+doing so risks replacing a single global allocation with local allocations.
+
+
+Note [InlineRule and postInlineUnconditionally]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Do not do postInlineUnconditionally if the Id has an InlineRule, otherwise
+we lose the unfolding.  Example
+
+     -- f has InlineRule with rhs (e |> co)
+     --   where 'e' is big
+     f = e |> co
+
+Then there's a danger we'll optimise to
+
+     f' = e
+     f = f' |> co
+
+and now postInlineUnconditionally, losing the InlineRule on f.  Now f'
+won't inline because 'e' is too big.
 
 
 %************************************************************************
 %*									*
-\subsection{Rebuilding a lambda}
+	Rebuilding a lambda
 %*									*
 %************************************************************************
 
 \begin{code}
-mkLam :: SimplEnv -> [OutBinder] -> OutExpr -> SimplCont -> SimplM FloatsWithExpr
+mkLam :: SimplEnv -> [OutBndr] -> OutExpr -> SimplM OutExpr
+-- mkLam tries three things
+--	a) eta reduction, if that gives a trivial expression
+--	b) eta expansion [only if there are some value lambdas]
+
+mkLam _b [] body 
+  = return body
+mkLam env bndrs body
+  = do	{ dflags <- getDOptsSmpl
+	; mkLam' dflags bndrs body }
+  where
+    mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr
+    mkLam' dflags bndrs (Cast body co)
+      | not (any bad bndrs)
+	-- Note [Casts and lambdas]
+      = do { lam <- mkLam' dflags bndrs body
+	   ; return (mkCoerce (mkPiTypes bndrs co) lam) }
+      where
+	co_vars  = tyVarsOfType co
+	bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars      
+
+    mkLam' dflags bndrs body
+      | dopt Opt_DoEtaReduction dflags,
+        Just etad_lam <- tryEtaReduce bndrs body
+      = do { tick (EtaReduction (head bndrs))
+	   ; return etad_lam }
+
+      | dopt Opt_DoLambdaEtaExpansion dflags,
+        not (inGentleMode env),	      -- In gentle mode don't eta-expansion
+   	any isRuntimeVar bndrs	      -- because it can clutter up the code
+	    		 	      -- with casts etc that may not be removed
+      = do { let body' = tryEtaExpansion dflags body
+ 	   ; return (mkLams bndrs body') }
+   
+      | otherwise 
+      = return (mkLams bndrs body)
 \end{code}
 
-Try three things
-	a) eta reduction, if that gives a trivial expression
-	b) eta expansion [only if there are some value lambdas]
-	c) floating lets out through big lambdas 
-		[only if all tyvar lambdas, and only if this lambda
-		 is the RHS of a let]
-
-\begin{code}
-mkLam env bndrs body cont
- = getDOptsSmpl	 `thenSmpl` \dflags ->
-   mkLam' dflags env bndrs body cont
- where
- mkLam' dflags env bndrs body cont
-   | dopt Opt_DoEtaReduction dflags,
-     Just etad_lam <- tryEtaReduce bndrs body
-   = tick (EtaReduction (head bndrs)) 	`thenSmpl_`
-     returnSmpl (emptyFloats env, etad_lam)
-
-   | dopt Opt_DoLambdaEtaExpansion dflags,
-     any isRuntimeVar bndrs
-   = tryEtaExpansion body		`thenSmpl` \ body' ->
-     returnSmpl (emptyFloats env, mkLams bndrs body')
+Note [Casts and lambdas]
+~~~~~~~~~~~~~~~~~~~~~~~~
+Consider 
+	(\x. (\y. e) `cast` g1) `cast` g2
+There is a danger here that the two lambdas look separated, and the 
+full laziness pass might float an expression to between the two.
+
+So this equation in mkLam' floats the g1 out, thus:
+	(\x. e `cast` g1)  -->  (\x.e) `cast` (tx -> g1)
+where x:tx.
+
+In general, this floats casts outside lambdas, where (I hope) they
+might meet and cancel with some other cast:
+	\x. e `cast` co   ===>   (\x. e) `cast` (tx -> co)
+	/\a. e `cast` co  ===>   (/\a. e) `cast` (/\a. co)
+	/\g. e `cast` co  ===>   (/\g. e) `cast` (/\g. co)
+  		  	  (if not (g `in` co))
+
+Notice that it works regardless of 'e'.  Originally it worked only
+if 'e' was itself a lambda, but in some cases that resulted in 
+fruitless iteration in the simplifier.  A good example was when
+compiling Text.ParserCombinators.ReadPrec, where we had a definition 
+like	(\x. Get `cast` g)
+where Get is a constructor with nonzero arity.  Then mkLam eta-expanded
+the Get, and the next iteration eta-reduced it, and then eta-expanded 
+it again.
+
+Note also the side condition for the case of coercion binders.
+It does not make sense to transform
+	/\g. e `cast` g  ==>  (/\g.e) `cast` (/\g.g)
+because the latter is not well-kinded.
+
+--	c) floating lets out through big lambdas 
+--		[only if all tyvar lambdas, and only if this lambda
+--		 is the RHS of a let]
 
 {-	Sept 01: I'm experimenting with getting the
 	full laziness pass to float out past big lambdsa
@@ -828,59 +996,111 @@ mkLam env bndrs body cont
 			-- if this is indeed a right-hand side; otherwise
 			-- we end up floating the thing out, only for float-in
 			-- to float it right back in again!
- = tryRhsTyLam env bndrs body		`thenSmpl` \ (floats, body') ->
-   returnSmpl (floats, mkLams bndrs body')
+ = do (floats, body') <- tryRhsTyLam env bndrs body
+      return (floats, mkLams bndrs body')
 -}
 
-   | otherwise 
-   = returnSmpl (emptyFloats env, mkLams bndrs body)
-\end{code}
-
 
 %************************************************************************
 %*									*
-\subsection{Eta expansion and reduction}
+		Eta reduction
 %*									*
 %************************************************************************
 
-We try for eta reduction here, but *only* if we get all the 
-way to an exprIsTrivial expression.    
-We don't want to remove extra lambdas unless we are going 
-to avoid allocating this thing altogether
+Note [Eta reduction conditions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We try for eta reduction here, but *only* if we get all the way to an
+trivial expression.  We don't want to remove extra lambdas unless we
+are going to avoid allocating this thing altogether.
+
+There are some particularly delicate points here:
+
+* Eta reduction is not valid in general:  
+	\x. bot  /=  bot
+  This matters, partly for old-fashioned correctness reasons but,
+  worse, getting it wrong can yield a seg fault. Consider
+	f = \x.f x
+	h y = case (case y of { True -> f `seq` True; False -> False }) of
+		True -> ...; False -> ...
+
+  If we (unsoundly) eta-reduce f to get f=f, the strictness analyser
+  says f=bottom, and replaces the (f `seq` True) with just
+  (f `cast` unsafe-co).  BUT, as thing stand, 'f' got arity 1, and it
+  *keeps* arity 1 (perhaps also wrongly).  So CorePrep eta-expands 
+  the definition again, so that it does not termninate after all.
+  Result: seg-fault because the boolean case actually gets a function value.
+  See Trac #1947.
+
+  So it's important to to the right thing.
+
+* Note [Arity care]: we need to be careful if we just look at f's
+  arity. Currently (Dec07), f's arity is visible in its own RHS (see
+  Note [Arity robustness] in SimplEnv) so we must *not* trust the
+  arity when checking that 'f' is a value.  Otherwise we will
+  eta-reduce
+      f = \x. f x
+  to
+      f = f
+  Which might change a terminiating program (think (f `seq` e)) to a 
+  non-terminating one.  So we check for being a loop breaker first.
+
+  However for GlobalIds we can look at the arity; and for primops we
+  must, since they have no unfolding.  
+
+* Regardless of whether 'f' is a value, we always want to 
+  reduce (/\a -> f a) to f
+  This came up in a RULE: foldr (build (/\a -> g a))
+  did not match 	  foldr (build (/\b -> ...something complex...))
+  The type checker can insert these eta-expanded versions,
+  with both type and dictionary lambdas; hence the slightly 
+  ad-hoc isDictId
+
+* Never *reduce* arity. For example
+      f = \xy. g x y
+  Then if h has arity 1 we don't want to eta-reduce because then
+  f's arity would decrease, and that is bad
+
+These delicacies are why we don't use exprIsTrivial and exprIsHNF here.
+Alas.
 
 \begin{code}
-tryEtaReduce :: [OutBinder] -> OutExpr -> Maybe OutExpr
+tryEtaReduce :: [OutBndr] -> OutExpr -> Maybe OutExpr
 tryEtaReduce bndrs body 
-	-- We don't use CoreUtils.etaReduce, because we can be more
-	-- efficient here:
-	--  (a) we already have the binders
-	--  (b) we can do the triviality test before computing the free vars
   = go (reverse bndrs) body
   where
+    incoming_arity = count isId bndrs
+
     go (b : bs) (App fun arg) | ok_arg b arg = go bs fun	-- Loop round
     go []       fun           | ok_fun fun   = Just fun		-- Success!
     go _        _			     = Nothing		-- Failure!
 
-    ok_fun fun =  exprIsTrivial fun
-	       && not (any (`elemVarSet` (exprFreeVars fun)) bndrs)
-	       && (exprIsHNF fun || all ok_lam bndrs)
+	-- Note [Eta reduction conditions]
+    ok_fun (App fun (Type ty)) 
+	| not (any (`elemVarSet` tyVarsOfType ty) bndrs)
+	=  ok_fun fun
+    ok_fun (Var fun_id)
+	=  not (fun_id `elem` bndrs)
+	&& (ok_fun_id fun_id || all ok_lam bndrs)
+    ok_fun _fun = False
+
+    ok_fun_id fun = fun_arity fun >= incoming_arity
+
+    fun_arity fun 	      -- See Note [Arity care]
+       | isLocalId fun && isLoopBreaker (idOccInfo fun) = 0
+       | otherwise = idArity fun   	      
+
     ok_lam v = isTyVar v || isDictId v
-	-- The exprIsHNF is because eta reduction is not 
-	-- valid in general:  \x. bot  /=  bot
-	-- So we need to be sure that the "fun" is a value.
-	--
-	-- However, we always want to reduce (/\a -> f a) to f
-	-- This came up in a RULE: foldr (build (/\a -> g a))
-	--	did not match 	   foldr (build (/\b -> ...something complex...))
-	-- The type checker can insert these eta-expanded versions,
-	-- with both type and dictionary lambdas; hence the slightly 
-	-- ad-hoc isDictTy
 
     ok_arg b arg = varToCoreExpr b `cheapEqExpr` arg
 \end{code}
 
 
-	Try eta expansion for RHSs
+%************************************************************************
+%*									*
+		Eta expansion
+%*									*
+%************************************************************************
+
 
 We go for:
    f = \x1..xn -> N  ==>   f = \x1..xn y1..ym -> N y1..ym
@@ -895,19 +1115,28 @@ where (in both cases)
 	* N is a NORMAL FORM (i.e. no redexes anywhere)
 	  wanting a suitable number of extra args.
 
+The biggest reason for doing this is for cases like
+
+	f = \x -> case x of
+		    True  -> \y -> e1
+		    False -> \y -> e2
+
+Here we want to get the lambdas together.  A good exmaple is the nofib
+program fibheaps, which gets 25% more allocation if you don't do this
+eta-expansion.
+
 We may have to sandwich some coerces between the lambdas
 to make the types work.   exprEtaExpandArity looks through coerces
 when computing arity; and etaExpand adds the coerces as necessary when
 actually computing the expansion.
 
 \begin{code}
-tryEtaExpansion :: OutExpr -> SimplM OutExpr
+tryEtaExpansion :: DynFlags -> OutExpr -> OutExpr
 -- There is at least one runtime binder in the binders
-tryEtaExpansion body
-  = getUniquesSmpl 			`thenSmpl` \ us ->
-    returnSmpl (etaExpand fun_arity us body (exprType body))
+tryEtaExpansion dflags body
+  = etaExpand fun_arity body
   where
-    fun_arity = exprEtaExpandArity body
+    fun_arity = exprEtaExpandArity dflags body
 \end{code}
 
 
@@ -917,8 +1146,35 @@ tryEtaExpansion body
 %*									*
 %************************************************************************
 
-tryRhsTyLam tries this transformation, when the big lambda appears as
-the RHS of a let(rec) binding:
+Note [Floating and type abstraction]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider this:
+	x = /\a. C e1 e2
+We'd like to float this to 
+	y1 = /\a. e1
+	y2 = /\a. e2
+	x  = /\a. C (y1 a) (y2 a)
+for the usual reasons: we want to inline x rather vigorously.
+
+You may think that this kind of thing is rare.  But in some programs it is
+common.  For example, if you do closure conversion you might get:
+
+	data a :-> b = forall e. (e -> a -> b) :$ e
+
+	f_cc :: forall a. a :-> a
+	f_cc = /\a. (\e. id a) :$ ()
+
+Now we really want to inline that f_cc thing so that the
+construction of the closure goes away. 
+
+So I have elaborated simplLazyBind to understand right-hand sides that look
+like
+	/\ a1..an. body
+
+and treat them specially. The real work is done in SimplUtils.abstractFloats,
+but there is quite a bit of plumbing in simplLazyBind as well.
+
+The same transformation is good when there are lets in the body:
 
 	/\abc -> let(rec) x = e in b
    ==>
@@ -940,25 +1196,6 @@ let-floating.
 This optimisation is CRUCIAL in eliminating the junk introduced by
 desugaring mutually recursive definitions.  Don't eliminate it lightly!
 
-So far as the implementation is concerned:
-
-	Invariant: go F e = /\tvs -> F e
-	
-	Equalities:
-		go F (Let x=e in b)
-		= Let x' = /\tvs -> F e 
-		  in 
-		  go G b
-		where
-		    G = F . Let x = x' tvs
-	
-		go F (Letrec xi=ei in b)
-		= Letrec {xi' = /\tvs -> G ei} 
-		  in
-		  go G b
-		where
-		  G = F . Let {xi = xi' tvs}
-
 [May 1999]  If we do this transformation *regardless* then we can
 end up with some pretty silly stuff.  For example, 
 
@@ -980,43 +1217,34 @@ and is of the form
 If we abstract this wrt the tyvar we then can't do the case inline
 as we would normally do.
 
+That's why the whole transformation is part of the same process that
+floats let-bindings and constructor arguments out of RHSs.  In particular,
+it is guarded by the doFloatFromRhs call in simplLazyBind.
 
-\begin{code}
-{-	Trying to do this in full laziness
-
-tryRhsTyLam :: SimplEnv -> [OutTyVar] -> OutExpr -> SimplM FloatsWithExpr
--- Call ensures that all the binders are type variables
-
-tryRhsTyLam env tyvars body 		-- Only does something if there's a let
-  |  not (all isTyVar tyvars)
-  || not (worth_it body)		-- inside a type lambda, 
-  = returnSmpl (emptyFloats env, body)	-- and a WHNF inside that
-
-  | otherwise
-  = go env (\x -> x) body
 
+\begin{code}
+abstractFloats :: [OutTyVar] -> SimplEnv -> OutExpr -> SimplM ([OutBind], OutExpr)
+abstractFloats main_tvs body_env body
+  = ASSERT( notNull body_floats )
+    do	{ (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
+	; return (float_binds, CoreSubst.substExpr subst body) }
   where
-    worth_it e@(Let _ _) = whnf_in_middle e
-    worth_it e		 = False
-
-    whnf_in_middle (Let (NonRec x rhs) e) | isUnLiftedType (idType x) = False
-    whnf_in_middle (Let _ e) = whnf_in_middle e
-    whnf_in_middle e	     = exprIsCheap e
-
-    main_tyvar_set = mkVarSet tyvars
-
-    go env fn (Let bind@(NonRec var rhs) body)
-      | exprIsTrivial rhs
-      = go env (fn . Let bind) body
-
-    go env fn (Let (NonRec var rhs) body)
-      = mk_poly tyvars_here var							`thenSmpl` \ (var', rhs') ->
-	addAuxiliaryBind env (NonRec var' (mkLams tyvars_here (fn rhs)))	$ \ env -> 
-	go env (fn . Let (mk_silly_bind var rhs')) body
-
+    main_tv_set = mkVarSet main_tvs
+    body_floats = getFloats body_env
+    empty_subst = CoreSubst.mkEmptySubst (seInScope body_env)
+
+    abstract :: CoreSubst.Subst -> OutBind -> SimplM (CoreSubst.Subst, OutBind)
+    abstract subst (NonRec id rhs)
+      = do { (poly_id, poly_app) <- mk_poly tvs_here id
+	   ; let poly_rhs = mkLams tvs_here rhs'
+		 subst'   = CoreSubst.extendIdSubst subst id poly_app
+	   ; return (subst', (NonRec poly_id poly_rhs)) }
       where
-
-	tyvars_here = varSetElems (main_tyvar_set `intersectVarSet` exprSomeFreeVars isTyVar rhs)
+	rhs' = CoreSubst.substExpr subst rhs
+	tvs_here | any isCoVar main_tvs = main_tvs	-- Note [Abstract over coercions]
+		 | otherwise 
+		 = varSetElems (main_tv_set `intersectVarSet` exprSomeFreeVars isTyVar rhs')
+	
 		-- Abstract only over the type variables free in the rhs
 		-- wrt which the new binding is abstracted.  But the naive
 		-- approach of abstract wrt the tyvars free in the Id's type
@@ -1033,28 +1261,35 @@ tryRhsTyLam env tyvars body 		-- Only does something if there's a let
 		-- abstracting wrt *all* the tyvars.  We'll see if that
 		-- gives rise to problems.   SLPJ June 98
 
-    go env fn (Let (Rec prs) body)
-       = mapAndUnzipSmpl (mk_poly tyvars_here) vars	`thenSmpl` \ (vars', rhss') ->
-	 let
-	    gn body = fn (foldr Let body (zipWith mk_silly_bind vars rhss'))
-	    pairs   = vars' `zip` [mkLams tyvars_here (gn rhs) | rhs <- rhss]
-	 in
-	 addAuxiliaryBind env (Rec pairs)		$ \ env ->
-	 go env gn body 
+    abstract subst (Rec prs)
+       = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly tvs_here) ids
+	    ; let subst' = CoreSubst.extendSubstList subst (ids `zip` poly_apps)
+		  poly_rhss = [mkLams tvs_here (CoreSubst.substExpr subst' rhs) | rhs <- rhss]
+	    ; return (subst', Rec (poly_ids `zip` poly_rhss)) }
        where
-	 (vars,rhss) = unzip prs
-	 tyvars_here = varSetElems (main_tyvar_set `intersectVarSet` exprsSomeFreeVars isTyVar (map snd prs))
-		-- See notes with tyvars_here above
-
-    go env fn body = returnSmpl (emptyFloats env, fn body)
-
-    mk_poly tyvars_here var
-      = getUniqueSmpl		`thenSmpl` \ uniq ->
-	let
-	    poly_name = setNameUnique (idName var) uniq		-- Keep same name
-	    poly_ty   = mkForAllTys tyvars_here (idType var)	-- But new type of course
-	    poly_id   = mkLocalId poly_name poly_ty 
-
+	 (ids,rhss) = unzip prs
+	 	-- For a recursive group, it's a bit of a pain to work out the minimal
+		-- set of tyvars over which to abstract:
+		--	/\ a b c.  let x = ...a... in
+		--	 	   letrec { p = ...x...q...
+		--			    q = .....p...b... } in
+		--		   ...
+		-- Since 'x' is abstracted over 'a', the {p,q} group must be abstracted
+		-- over 'a' (because x is replaced by (poly_x a)) as well as 'b'.  
+		-- Since it's a pain, we just use the whole set, which is always safe
+		-- 
+		-- If you ever want to be more selective, remember this bizarre case too:
+		--	x::a = x
+		-- Here, we must abstract 'x' over 'a'.
+	 tvs_here = main_tvs
+
+    mk_poly tvs_here var
+      = do { uniq <- getUniqueM
+	   ; let  poly_name = setNameUnique (idName var) uniq		-- Keep same name
+		  poly_ty   = mkForAllTys tvs_here (idType var)	-- But new type of course
+		  poly_id   = transferPolyIdInfo var tvs_here $ -- Note [transferPolyIdInfo] in Id.lhs
+			      mkLocalId poly_name poly_ty 
+	   ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
 		-- In the olden days, it was crucial to copy the occInfo of the original var, 
 		-- because we were looking at occurrence-analysed but as yet unsimplified code!
 		-- In particular, we mustn't lose the loop breakers.  BUT NOW we are looking
@@ -1067,10 +1302,17 @@ tryRhsTyLam env tyvars body 		-- Only does something if there's a let
 		-- where x* has an INLINE prag on it.  Now, once x* is inlined,
 		-- the occurrences of x' will be just the occurrences originally
 		-- pinned on x.
-	in
-	returnSmpl (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tyvars_here))
+\end{code}
+
+Note [Abstract over coercions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If a coercion variable (g :: a ~ Int) is free in the RHS, then so is the
+type variable a.  Rather than sort this mess out, we simply bale out and abstract
+wrt all the type variables if any of them are coercion variables.
+
+
+Historical note: if you use let-bindings instead of a substitution, beware of this:
 
-    mk_silly_bind var rhs = NonRec var (Note InlineMe rhs)
 		-- Suppose we start with:
 		--
 		--	x = /\ a -> let g = G in E
@@ -1090,48 +1332,159 @@ tryRhsTyLam env tyvars body 		-- Only does something if there's a let
 		-- Solution: put an INLINE note on g's RHS, so that poly_g seems
 		--	     to appear many times.  (NB: mkInlineMe eliminates
 		--	     such notes on trivial RHSs, so do it manually.)
--}
-\end{code}
 
 %************************************************************************
 %*									*
-\subsection{Case absorption and identity-case elimination}
+		prepareAlts
 %*									*
 %************************************************************************
 
-mkCase puts a case expression back together, trying various transformations first.
+prepareAlts tries these things:
+
+1.  Eliminate alternatives that cannot match, including the
+    DEFAULT alternative.
+
+2.  If the DEFAULT alternative can match only one possible constructor,
+    then make that constructor explicit.
+    e.g.
+        case e of x { DEFAULT -> rhs }
+     ===>
+        case e of x { (a,b) -> rhs }
+    where the type is a single constructor type.  This gives better code
+    when rhs also scrutinises x or e.
+
+3. Returns a list of the constructors that cannot holds in the
+   DEFAULT alternative (if there is one)
+
+Here "cannot match" includes knowledge from GADTs
+
+It's a good idea do do this stuff before simplifying the alternatives, to
+avoid simplifying alternatives we know can't happen, and to come up with
+the list of constructors that are handled, to put into the IdInfo of the
+case binder, for use when simplifying the alternatives.
+
+Eliminating the default alternative in (1) isn't so obvious, but it can
+happen:
+
+data Colour = Red | Green | Blue
+
+f x = case x of
+        Red -> ..
+        Green -> ..
+        DEFAULT -> h x
+
+h y = case y of
+        Blue -> ..
+        DEFAULT -> [ case y of ... ]
+
+If we inline h into f, the default case of the inlined h can't happen.
+If we don't notice this, we may end up filtering out *all* the cases
+of the inner case y, which give us nowhere to go!
 
 \begin{code}
-mkCase :: OutExpr -> OutId -> OutType
-       -> [OutAlt]		-- Increasing order
-       -> SimplM OutExpr
-
-mkCase scrut case_bndr ty alts
-  = getDOptsSmpl  			`thenSmpl` \dflags ->
-    mkAlts dflags scrut case_bndr alts	`thenSmpl` \ better_alts ->
-    mkCase1 scrut case_bndr ty better_alts
+prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
+prepareAlts scrut case_bndr' alts
+  = do	{ let (alts_wo_default, maybe_deflt) = findDefault alts
+	      alt_cons = [con | (con,_,_) <- alts_wo_default]
+	      imposs_deflt_cons = nub (imposs_cons ++ alt_cons)
+		-- "imposs_deflt_cons" are handled 
+		--   EITHER by the context, 
+		--   OR by a non-DEFAULT branch in this case expression.
+
+	; default_alts <- prepareDefault case_bndr' mb_tc_app 
+					 imposs_deflt_cons maybe_deflt
+
+	; let trimmed_alts = filterOut impossible_alt alts_wo_default
+	      merged_alts  = mergeAlts trimmed_alts default_alts
+		-- We need the mergeAlts in case the new default_alt 
+		-- has turned into a constructor alternative.
+		-- The merge keeps the inner DEFAULT at the front, if there is one
+		-- and interleaves the alternatives in the right order
+
+	; return (imposs_deflt_cons, merged_alts) }
+  where
+    mb_tc_app = splitTyConApp_maybe (idType case_bndr')
+    Just (_, inst_tys) = mb_tc_app 
+
+    imposs_cons = case scrut of
+		    Var v -> otherCons (idUnfolding v)
+		    _     -> []
+
+    impossible_alt :: CoreAlt -> Bool
+    impossible_alt (con, _, _) | con `elem` imposs_cons = True
+    impossible_alt (DataAlt con, _, _) = dataConCannotMatch inst_tys con
+    impossible_alt _                   = False
+
+
+prepareDefault :: OutId		-- Case binder; need just for its type. Note that as an
+				--   OutId, it has maximum information; this is important.
+				--   Test simpl013 is an example
+	       -> Maybe (TyCon, [Type])	-- Type of scrutinee, decomposed
+	       -> [AltCon]	-- These cons can't happen when matching the default
+	       -> Maybe InExpr	-- Rhs
+	       -> SimplM [InAlt]	-- Still unsimplified
+					-- We use a list because it's what mergeAlts expects,
+
+--------- Fill in known constructor -----------
+prepareDefault case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rhs)
+  | 	-- This branch handles the case where we are 
+	-- scrutinisng an algebraic data type
+    isAlgTyCon tycon		-- It's a data type, tuple, or unboxed tuples.  
+  , not (isNewTyCon tycon)	-- We can have a newtype, if we are just doing an eval:
+				-- 	case x of { DEFAULT -> e }
+				-- and we don't want to fill in a default for them!
+  , Just all_cons <- tyConDataCons_maybe tycon
+  , not (null all_cons)		-- This is a tricky corner case.  If the data type has no constructors,
+				-- which GHC allows, then the case expression will have at most a default
+				-- alternative.  We don't want to eliminate that alternative, because the
+				-- invariant is that there's always one alternative.  It's more convenient
+				-- to leave	
+				--	case x of { DEFAULT -> e }     
+				-- as it is, rather than transform it to
+				--	error "case cant match"
+				-- which would be quite legitmate.  But it's a really obscure corner, and
+				-- not worth wasting code on.
+  , let imposs_data_cons = [con | DataAlt con <- imposs_cons]	-- We now know it's a data type 
+	impossible con   = con `elem` imposs_data_cons || dataConCannotMatch inst_tys con
+  = case filterOut impossible all_cons of
+	[]    -> return []	-- Eliminate the default alternative
+				-- altogether if it can't match
+
+	[con] -> 	-- It matches exactly one constructor, so fill it in
+		 do { tick (FillInCaseDefault case_bndr)
+                    ; us <- getUniquesM
+                    ; let (ex_tvs, co_tvs, arg_ids) =
+                              dataConRepInstPat us con inst_tys
+                    ; return [(DataAlt con, ex_tvs ++ co_tvs ++ arg_ids, deflt_rhs)] }
+
+	_ -> return [(DEFAULT, [], deflt_rhs)]
+
+  | debugIsOn, isAlgTyCon tycon, not (isOpenTyCon tycon), null (tyConDataCons tycon)
+	-- Check for no data constructors
+        -- This can legitimately happen for type families, so don't report that
+  = pprTrace "prepareDefault" (ppr case_bndr <+> ppr tycon)
+        $ return [(DEFAULT, [], deflt_rhs)]
+
+--------- Catch-all cases -----------
+prepareDefault _case_bndr _bndr_ty _imposs_cons (Just deflt_rhs)
+  = return [(DEFAULT, [], deflt_rhs)]
+
+prepareDefault _case_bndr _bndr_ty _imposs_cons Nothing
+  = return []	-- No default branch
 \end{code}
 
 
-mkAlts tries these things:
 
-1.  If several alternatives are identical, merge them into
-    a single DEFAULT alternative.  I've occasionally seen this 
-    making a big difference:
+%************************************************************************
+%*									*
+		mkCase
+%*									*
+%************************************************************************
 
-	case e of		=====>     case e of
-	  C _ -> f x			     D v -> ....v....
-	  D v -> ....v....		     DEFAULT -> f x
-	  DEFAULT -> f x
+mkCase tries these things
 
-   The point is that we merge common RHSs, at least for the DEFAULT case.
-   [One could do something more elaborate but I've never seen it needed.]
-   To avoid an expensive test, we just merge branches equal to the *first*
-   alternative; this picks up the common cases
-	a) all branches equal
-	b) some branches equal to the DEFAULT (which occurs first)
+1.  Merge Nested Cases
 
-2.  Case merging:
        case e of b {             ==>   case e of b {
     	 p1 -> rhs1	                 p1 -> rhs1
     	 ...	                         ...
@@ -1144,12 +1497,37 @@ mkAlts tries these things:
        }  
     
     which merges two cases in one case when -- the default alternative of
-    the outer case scrutises the same variable as the outer case This
+    the outer case scrutises the same variable as the outer case. This
     transformation is called Case Merging.  It avoids that the same
     variable is scrutinised multiple times.
 
+2.  Eliminate Identity Case
+
+	case e of 		===> e
+		True  -> True;
+		False -> False
+
+    and similar friends.
+
+3.  Merge identical alternatives.
+    If several alternatives are identical, merge them into
+    a single DEFAULT alternative.  I've occasionally seen this 
+    making a big difference:
+
+	case e of		=====>     case e of
+	  C _ -> f x			     D v -> ....v....
+	  D v -> ....v....		     DEFAULT -> f x
+	  DEFAULT -> f x
 
-The case where transformation (1) showed up was like this (lib/std/PrelCError.lhs):
+   The point is that we merge common RHSs, at least for the DEFAULT case.
+   [One could do something more elaborate but I've never seen it needed.]
+   To avoid an expensive test, we just merge branches equal to the *first*
+   alternative; this picks up the common cases
+	a) all branches equal
+	b) some branches equal to the DEFAULT (which occurs first)
+
+The case where Merge Identical Alternatives transformation showed up
+was like this (base/Foreign/C/Err/Error.lhs):
 
 	x | p `is` 1 -> e1
 	  | p `is` 2 -> e2
@@ -1169,295 +1547,175 @@ This gave rise to a horrible sequence of cases
 and similarly in cascade for all the join points!
 
 
-
 \begin{code}
---------------------------------------------------
---	1. Merge identical branches
---------------------------------------------------
-mkAlts dflags scrut case_bndr alts@((con1,bndrs1,rhs1) : con_alts)
-  | all isDeadBinder bndrs1,			-- Remember the default 
-    length filtered_alts < length con_alts	-- alternative comes first
-  = tick (AltMerge case_bndr)			`thenSmpl_`
-    returnSmpl better_alts
-  where
-    filtered_alts	 = filter keep con_alts
-    keep (con,bndrs,rhs) = not (all isDeadBinder bndrs && rhs `cheapEqExpr` rhs1)
-    better_alts		 = (DEFAULT, [], rhs1) : filtered_alts
-
+mkCase, mkCase1, mkCase2 
+   :: DynFlags 
+   -> OutExpr -> OutId
+   -> [OutAlt]		-- Alternatives in standard (increasing) order
+   -> SimplM OutExpr
 
 --------------------------------------------------
---	2.  Merge nested cases
+--	1. Merge Nested Cases
 --------------------------------------------------
 
-mkAlts dflags scrut outer_bndr outer_alts
-  | dopt Opt_CaseMerge dflags,
-    (outer_alts_without_deflt, maybe_outer_deflt)   <- findDefault outer_alts,
-    Just (Case (Var scrut_var) inner_bndr _ inner_alts) <- maybe_outer_deflt,
-    scruting_same_var scrut_var
-  = let
-  	munged_inner_alts = [(con, args, munge_rhs rhs) | (con, args, rhs) <- inner_alts]
-  	munge_rhs rhs = bindCaseBndr inner_bndr (Var outer_bndr) rhs
-  
-  	new_alts = mergeAlts outer_alts_without_deflt munged_inner_alts
-		-- The merge keeps the inner DEFAULT at the front, if there is one
-		-- and eliminates any inner_alts that are shadowed by the outer_alts
-    in
-    tick (CaseMerge outer_bndr)				`thenSmpl_`
-    returnSmpl new_alts
-    	-- Warning: don't call mkAlts recursively!
+mkCase dflags scrut outer_bndr ((DEFAULT, _, deflt_rhs) : outer_alts)
+  | dopt Opt_CaseMerge dflags
+  , Case (Var inner_scrut_var) inner_bndr _ inner_alts <- deflt_rhs
+  , inner_scrut_var == outer_bndr
+  = do	{ tick (CaseMerge outer_bndr)
+
+	; let wrap_alt (con, args, rhs) = ASSERT( outer_bndr `notElem` args )
+                                          (con, args, wrap_rhs rhs)
+		-- Simplifier's no-shadowing invariant should ensure
+		-- that outer_bndr is not shadowed by the inner patterns
+              wrap_rhs rhs = Let (NonRec inner_bndr (Var outer_bndr)) rhs
+		-- The let is OK even for unboxed binders, 
+
+	      wrapped_alts | isDeadBinder inner_bndr = inner_alts
+                           | otherwise               = map wrap_alt inner_alts
+
+	      merged_alts = mergeAlts outer_alts wrapped_alts
+		-- NB: mergeAlts gives priority to the left
+		--	case x of 
+		--	  A -> e1
+		--	  DEFAULT -> case x of 
+		--			A -> e2
+		--			B -> e3
+		-- When we merge, we must ensure that e1 takes 
+		-- precedence over e2 as the value for A!  
+
+	; mkCase1 dflags scrut outer_bndr merged_alts
+	}
+    	-- Warning: don't call mkCase recursively!
     	-- Firstly, there's no point, because inner alts have already had
     	-- mkCase applied to them, so they won't have a case in their default
     	-- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
     	-- in munge_rhs may put a case into the DEFAULT branch!
-  where
-    	-- We are scrutinising the same variable if it's
-    	-- the outer case-binder, or if the outer case scrutinises a variable
-    	-- (and it's the same).  Testing both allows us not to replace the
-    	-- outer scrut-var with the outer case-binder (Simplify.simplCaseBinder).
-    scruting_same_var = case scrut of
-			  Var outer_scrut -> \ v -> v == outer_bndr || v == outer_scrut
-			  other		  -> \ v -> v == outer_bndr
-
-------------------------------------------------
---	Catch-all
-------------------------------------------------
-
-mkAlts dflags scrut case_bndr other_alts = returnSmpl other_alts
-\end{code}
-
-
-
-=================================================================================
-
-mkCase1 tries these things
-
-1.  Eliminate the case altogether if possible
-
-2.  Case-identity:
-
-	case e of 		===> e
-		True  -> True;
-		False -> False
-
-    and similar friends.
-
-
-Start with a simple situation:
-
-	case x# of	===>   e[x#/y#]
-	  y# -> e
-
-(when x#, y# are of primitive type, of course).  We can't (in general)
-do this for algebraic cases, because we might turn bottom into
-non-bottom!
-
-Actually, we generalise this idea to look for a case where we're
-scrutinising a variable, and we know that only the default case can
-match.  For example:
-\begin{verbatim}
-	case x of
-	  0#    -> ...
-	  other -> ...(case x of
-			 0#    -> ...
-			 other -> ...) ...
-\end{code}
-Here the inner case can be eliminated.  This really only shows up in
-eliminating error-checking code.
-
-We also make sure that we deal with this very common case:
-
- 	case e of 
-	  x -> ...x...
-
-Here we are using the case as a strict let; if x is used only once
-then we want to inline it.  We have to be careful that this doesn't 
-make the program terminate when it would have diverged before, so we
-check that 
-	- x is used strictly, or
-	- e is already evaluated (it may so if e is a variable)
-
-Lastly, we generalise the transformation to handle this:
-
-	case e of	===> r
-	   True  -> r
-	   False -> r
-
-We only do this for very cheaply compared r's (constructors, literals
-and variables).  If pedantic bottoms is on, we only do it when the
-scrutinee is a PrimOp which can't fail.
-
-We do it *here*, looking at un-simplified alternatives, because we
-have to check that r doesn't mention the variables bound by the
-pattern in each alternative, so the binder-info is rather useful.
-
-So the case-elimination algorithm is:
-
-	1. Eliminate alternatives which can't match
-
-	2. Check whether all the remaining alternatives
-		(a) do not mention in their rhs any of the variables bound in their pattern
-	   and  (b) have equal rhss
 
-	3. Check we can safely ditch the case:
-		   * PedanticBottoms is off,
-		or * the scrutinee is an already-evaluated variable
-		or * the scrutinee is a primop which is ok for speculation
-			-- ie we want to preserve divide-by-zero errors, and
-			-- calls to error itself!
+mkCase dflags scrut bndr alts = mkCase1 dflags scrut bndr alts
 
-		or * [Prim cases] the scrutinee is a primitive variable
-
-		or * [Alg cases] the scrutinee is a variable and
-		     either * the rhs is the same variable
-			(eg case x of C a b -> x  ===>   x)
-		     or     * there is only one alternative, the default alternative,
-				and the binder is used strictly in its scope.
-				[NB this is helped by the "use default binder where
-				 possible" transformation; see below.]
-
-
-If so, then we can replace the case with one of the rhss.
-
-Further notes about case elimination
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Consider:	test :: Integer -> IO ()
-		test = print
-
-Turns out that this compiles to:
-    Print.test
-      = \ eta :: Integer
-	  eta1 :: State# RealWorld ->
-	  case PrelNum.< eta PrelNum.zeroInteger of wild { __DEFAULT ->
-	  case hPutStr stdout
-		 (PrelNum.jtos eta ($w[] @ Char))
-		 eta1
-	  of wild1 { (# new_s, a4 #) -> PrelIO.lvl23 new_s  }}
-
-Notice the strange '<' which has no effect at all. This is a funny one.  
-It started like this:
-
-f x y = if x < 0 then jtos x
-          else if y==0 then "" else jtos x
-
-At a particular call site we have (f v 1).  So we inline to get
-
-	if v < 0 then jtos x 
-	else if 1==0 then "" else jtos x
-
-Now simplify the 1==0 conditional:
-
-	if v<0 then jtos v else jtos v
-
-Now common-up the two branches of the case:
-
-	case (v<0) of DEFAULT -> jtos v
-
-Why don't we drop the case?  Because it's strict in v.  It's technically
-wrong to drop even unnecessary evaluations, and in practice they
-may be a result of 'seq' so we *definitely* don't want to drop those.
-I don't really know how to improve this situation.
-
-
-\begin{code}
 --------------------------------------------------
---	0. Check for empty alternatives
+--	2. Eliminate Identity Case
 --------------------------------------------------
 
--- This isn't strictly an error.  It's possible that the simplifer might "see"
--- that an inner case has no accessible alternatives before it "sees" that the
--- entire branch of an outer case is inaccessible.  So we simply
--- put an error case here insteadd
-mkCase1 scrut case_bndr ty []
-  = pprTrace "mkCase1: null alts" (ppr case_bndr <+> ppr scrut) $
-    return (mkApps (Var eRROR_ID)
-		   [Type ty, Lit (mkStringLit "Impossible alternative")])
-
---------------------------------------------------
---	1. Eliminate the case altogether if poss
---------------------------------------------------
-
-mkCase1 scrut case_bndr ty [(con,bndrs,rhs)]
-  -- See if we can get rid of the case altogether
-  -- See the extensive notes on case-elimination above
-  -- mkCase made sure that if all the alternatives are equal, 
-  -- then there is now only one (DEFAULT) rhs
- |  all isDeadBinder bndrs,
-
-	-- Check that the scrutinee can be let-bound instead of case-bound
-    exprOkForSpeculation scrut
-		-- OK not to evaluate it
-		-- This includes things like (==# a# b#)::Bool
-		-- so that we simplify 
-		-- 	case ==# a# b# of { True -> x; False -> x }
-		-- to just
-		--	x
-		-- This particular example shows up in default methods for
-		-- comparision operations (e.g. in (>=) for Int.Int32)
-	|| exprIsHNF scrut			-- It's already evaluated
-	|| var_demanded_later scrut		-- It'll be demanded later
-
---      || not opt_SimplPedanticBottoms)	-- Or we don't care!
---	We used to allow improving termination by discarding cases, unless -fpedantic-bottoms was on,
--- 	but that breaks badly for the dataToTag# primop, which relies on a case to evaluate
--- 	its argument:  case x of { y -> dataToTag# y }
---	Here we must *not* discard the case, because dataToTag# just fetches the tag from
---	the info pointer.  So we'll be pedantic all the time, and see if that gives any
--- 	other problems
---	Also we don't want to discard 'seq's
-  = tick (CaseElim case_bndr)			`thenSmpl_` 
-    returnSmpl (bindCaseBndr case_bndr scrut rhs)
-
-  where
-	-- The case binder is going to be evaluated later, 
-	-- and the scrutinee is a simple variable
-    var_demanded_later (Var v) = isStrictDmd (idNewDemandInfo case_bndr)
-    var_demanded_later other   = False
-
-
---------------------------------------------------
---	2. Identity case
---------------------------------------------------
-
-mkCase1 scrut case_bndr ty alts	-- Identity case
+mkCase1 _dflags scrut case_bndr alts	-- Identity case
   | all identity_alt alts
-  = tick (CaseIdentity case_bndr)		`thenSmpl_`
-    returnSmpl (re_note scrut)
+  = do { tick (CaseIdentity case_bndr)
+       ; return (re_cast scrut) }
   where
-    identity_alt (con, args, rhs) = de_note rhs `cheapEqExpr` identity_rhs con args
+    identity_alt (con, args, rhs) = check_eq con args (de_cast rhs)
 
-    identity_rhs (DataAlt con) args = mkConApp con (arg_tys ++ map varToCoreExpr args)
-    identity_rhs (LitAlt lit)  _    = Lit lit
-    identity_rhs DEFAULT       _    = Var case_bndr
+    check_eq DEFAULT       _    (Var v)   = v == case_bndr
+    check_eq (LitAlt lit') _    (Lit lit) = lit == lit'
+    check_eq (DataAlt con) args rhs       = rhs `cheapEqExpr` mkConApp con (arg_tys ++ varsToCoreExprs args)
+					 || rhs `cheapEqExpr` Var case_bndr
+    check_eq _ _ _ = False
 
     arg_tys = map Type (tyConAppArgs (idType case_bndr))
 
 	-- We've seen this:
-	--	case coerce T e of x { _ -> coerce T' x }
-	-- And we definitely want to eliminate this case!
-	-- So we throw away notes from the RHS, and reconstruct
-	-- (at least an approximation) at the other end
-    de_note (Note _ e) = de_note e
-    de_note e	       = e
+	--	case e of x { _ -> x `cast` c }
+	-- And we definitely want to eliminate this case, to give
+	--	e `cast` c
+	-- So we throw away the cast from the RHS, and reconstruct
+	-- it at the other end.  All the RHS casts must be the same
+	-- if (all identity_alt alts) holds.
+	-- 
+	-- Don't worry about nested casts, because the simplifier combines them
+    de_cast (Cast e _) = e
+    de_cast e	       = e
+
+    re_cast scrut = case head alts of
+			(_,_,Cast _ co) -> Cast scrut co
+			_    	        -> scrut
 
-	-- re_note wraps a coerce if it might be necessary
-    re_note scrut = case head alts of
-			(_,_,rhs1@(Note _ _)) -> mkCoerce2 (exprType rhs1) (idType case_bndr) scrut
-			other		      -> scrut
+--------------------------------------------------
+--	3. Merge Identical Alternatives
+--------------------------------------------------
+mkCase1 dflags scrut case_bndr ((_con1,bndrs1,rhs1) : con_alts)
+  | all isDeadBinder bndrs1			-- Remember the default 
+  , length filtered_alts < length con_alts	-- alternative comes first
+	-- Also Note [Dead binders]
+  = do	{ tick (AltMerge case_bndr)
+	; mkCase2 dflags scrut case_bndr alts' }
+  where
+    alts' = (DEFAULT, [], rhs1) : filtered_alts
+    filtered_alts	  = filter keep con_alts
+    keep (_con,bndrs,rhs) = not (all isDeadBinder bndrs && rhs `cheapEqExpr` rhs1)
 
+mkCase1 dflags scrut bndr alts = mkCase2 dflags scrut bndr alts
 
 --------------------------------------------------
 --	Catch-all
 --------------------------------------------------
-mkCase1 scrut bndr ty alts = returnSmpl (Case scrut bndr ty alts)
+mkCase2 _dflags scrut bndr alts 
+  = return (Case scrut bndr (coreAltsType alts) alts)
 \end{code}
 
-
-When adding auxiliary bindings for the case binder, it's worth checking if
-its dead, because it often is, and occasionally these mkCase transformations
-cascade rather nicely.
-
-\begin{code}
-bindCaseBndr bndr rhs body
-  | isDeadBinder bndr = body
-  | otherwise	      = bindNonRec bndr rhs body
-\end{code}
+Note [Dead binders]
+~~~~~~~~~~~~~~~~~~~~
+Note that dead-ness is maintained by the simplifier, so that it is
+accurate after simplification as well as before.
+
+
+Note [Cascading case merge]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Case merging should cascade in one sweep, because it
+happens bottom-up
+
+      case e of a {
+        DEFAULT -> case a of b 
+                      DEFAULT -> case b of c {
+                                     DEFAULT -> e
+                                     A -> ea
+                      B -> eb
+        C -> ec
+==>
+      case e of a {
+        DEFAULT -> case a of b 
+                      DEFAULT -> let c = b in e
+                      A -> let c = b in ea
+                      B -> eb
+        C -> ec
+==>
+      case e of a {
+        DEFAULT -> let b = a in let c = b in e
+        A -> let b = a in let c = b in ea
+        B -> let b = a in eb
+        C -> ec
+
+
+However here's a tricky case that we still don't catch, and I don't
+see how to catch it in one pass:
+
+  case x of c1 { I# a1 ->
+  case a1 of c2 ->
+    0 -> ...
+    DEFAULT -> case x of c3 { I# a2 ->
+               case a2 of ...
+
+After occurrence analysis (and its binder-swap) we get this
+ 
+  case x of c1 { I# a1 -> 
+  let x = c1 in		-- Binder-swap addition
+  case a1 of c2 -> 
+    0 -> ...
+    DEFAULT -> case x of c3 { I# a2 ->
+               case a2 of ...
+
+When we simplify the inner case x, we'll see that
+x=c1=I# a1.  So we'll bind a2 to a1, and get
+
+  case x of c1 { I# a1 -> 
+  case a1 of c2 -> 
+    0 -> ...
+    DEFAULT -> case a1 of ...
+
+This is corect, but we can't do a case merge in this sweep
+because c2 /= a1.  Reason: the binding c1=I# a1 went inwards
+without getting changed to c1=I# c2.  
+
+I don't think this is worth fixing, even if I knew how. It'll
+all come out in the next pass anyway.
+
+  
\ No newline at end of file