X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2FsimplCore%2FSimplUtils.lhs;h=060d346bd3f0e2ed46b4faf953a2d571abddc7f0;hb=a33ae68ab331a16fbb6e7d6931d1c38bd8f37a85;hp=8acf9134e3f8fadb47012a978ba5bc8ac84d3cac;hpb=65b5fb0ff8dd2af5c8bed6db5f059b4f60eb05de;p=ghc-hetmet.git

diff --git a/compiler/simplCore/SimplUtils.lhs b/compiler/simplCore/SimplUtils.lhs
index 8acf913..060d346 100644
--- a/compiler/simplCore/SimplUtils.lhs
+++ b/compiler/simplCore/SimplUtils.lhs
@@ -4,6 +4,13 @@
 \section[SimplUtils]{The simplifier utilities}
 
 \begin{code}
+{-# OPTIONS -w #-}
+-- The above warning supression flag is a temporary kludge.
+-- While working on this module you are encouraged to remove it and fix
+-- any warnings in the module. See
+--     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
+-- for details
+
 module SimplUtils (
 	-- Rebuilding
 	mkLam, mkCase, prepareAlts, bindCaseBndr,
@@ -13,9 +20,9 @@ module SimplUtils (
 	activeInline, activeRule, inlineMode,
 
 	-- The continuation type
-	SimplCont(..), DupFlag(..), LetRhsFlag(..), 
+	SimplCont(..), DupFlag(..), ArgInfo(..),
 	contIsDupable, contResultType, contIsTrivial, contArgs, dropArgs, 
-	countValArgs, countArgs,
+	countValArgs, countArgs, splitInlineCont,
 	mkBoringStop, mkLazyArgStop, mkRhsStop, contIsRhsOrArg,
 	interestingCallContext, interestingArgContext,
 
@@ -39,16 +46,19 @@ import CoreUnfold
 import MkId
 import Name
 import Id
+import Var	( isCoVar )
 import NewDemand
 import SimplMonad
-import Type
+import Type	hiding( substTy )
 import TyCon
 import DataCon
 import Unify	( dataConCannotMatch )
 import VarSet
 import BasicTypes
 import Util
+import MonadUtils
 import Outputable
+
 import List( nub )
 \end{code}
 
@@ -83,14 +93,11 @@ Key points:
 data SimplCont	
   = Stop		-- An empty context, or hole, []     
 	OutType		-- Type of the result
-	LetRhsFlag
-	Bool		-- True <=> There is something interesting about
+	CallCtxt	-- True <=> There is something interesting about
 			--          the context, and hence the inliner
 			--	    should be a bit keener (see interestingCallContext)
-			-- Two cases:
-			-- (a) This is the RHS of a thunk whose type suggests
-			--     that update-in-place would be possible
-			-- (b) This is an argument of a function that has RULES
+			-- Specifically:
+			--     This is an argument of a function that has RULES
 			--     Inlining the call might allow the rule to fire
 
   | CoerceIt 		-- C `cast` co
@@ -115,22 +122,28 @@ data SimplCont
 
   | StrictArg 		-- e C
 	OutExpr OutType		-- e and its type
-	(Bool,[Bool])		-- Whether the function at the head of e has rules,
-	SimplCont		--     plus strictness flags for further args
-
-data LetRhsFlag = AnArg		-- It's just an argument not a let RHS
-		| AnRhs 	-- It's the RHS of a let (so please float lets out of big lambdas)
-
-instance Outputable LetRhsFlag where
-  ppr AnArg = ptext SLIT("arg")
-  ppr AnRhs = ptext SLIT("rhs")
+	CallCtxt		-- Whether *this* argument position is interesting
+ 	ArgInfo			-- Whether the function at the head of e has rules, etc
+	SimplCont		--     plus strictness flags for *further* args
+
+data ArgInfo 
+  = ArgInfo {
+	ai_rules :: Bool,	-- Function has rules (recursively)
+				--	=> be keener to inline in all args
+	ai_strs :: [Bool],	-- Strictness of arguments
+				--   Usually infinite, but if it is finite it guarantees
+				--   that the function diverges after being given
+				--   that number of args
+	ai_discs :: [Int]	-- Discounts for arguments; non-zero => be keener to inline
+				--   Always infinite
+    }
 
 instance Outputable SimplCont where
-  ppr (Stop ty is_rhs _)  	     = ptext SLIT("Stop") <> brackets (ppr is_rhs) <+> ppr ty
+  ppr (Stop ty _) 	 	     = ptext SLIT("Stop") <+> ppr ty
   ppr (ApplyTo dup arg se cont)      = ((ptext SLIT("ApplyTo") <+> ppr dup <+> pprParendExpr arg)
 					  {-  $$ nest 2 (pprSimplEnv se) -}) $$ ppr cont
   ppr (StrictBind b _ _ _ cont)      = (ptext SLIT("StrictBind") <+> ppr b) $$ ppr cont
-  ppr (StrictArg f _ _ cont)         = (ptext SLIT("StrictArg") <+> ppr f) $$ ppr cont
+  ppr (StrictArg f _ _ _ cont)       = (ptext SLIT("StrictArg") <+> ppr f) $$ ppr cont
   ppr (Select dup bndr alts se cont) = (ptext SLIT("Select") <+> ppr dup <+> ppr bndr) $$ 
 				       (nest 4 (ppr alts)) $$ ppr cont 
   ppr (CoerceIt co cont)	     = (ptext SLIT("CoerceIt") <+> ppr co) $$ ppr cont
@@ -145,18 +158,19 @@ instance Outputable DupFlag where
 
 -------------------
 mkBoringStop :: OutType -> SimplCont
-mkBoringStop ty = Stop ty AnArg False
+mkBoringStop ty = Stop ty BoringCtxt
 
-mkLazyArgStop :: OutType -> Bool -> SimplCont
-mkLazyArgStop ty has_rules = Stop ty AnArg (canUpdateInPlace ty || has_rules)
+mkLazyArgStop :: OutType -> CallCtxt -> SimplCont
+mkLazyArgStop ty cci = Stop ty cci
 
 mkRhsStop :: OutType -> SimplCont
-mkRhsStop ty = Stop ty AnRhs (canUpdateInPlace ty)
+mkRhsStop ty = Stop ty BoringCtxt
 
-contIsRhsOrArg (Stop {})       = True
-contIsRhsOrArg (StrictBind {}) = True
-contIsRhsOrArg (StrictArg {})  = True
-contIsRhsOrArg other	       = False
+-------------------
+contIsRhsOrArg (Stop {})       	 = True
+contIsRhsOrArg (StrictBind {}) 	 = True
+contIsRhsOrArg (StrictArg {})  	 = True
+contIsRhsOrArg other	         = False
 
 -------------------
 contIsDupable :: SimplCont -> Bool
@@ -175,8 +189,8 @@ contIsTrivial other			  = False
 
 -------------------
 contResultType :: SimplCont -> OutType
-contResultType (Stop to_ty _ _)	      	 = to_ty
-contResultType (StrictArg _ _ _ cont) 	 = contResultType cont
+contResultType (Stop to_ty _)	      	 = to_ty
+contResultType (StrictArg _ _ _ _ cont)  = contResultType cont
 contResultType (StrictBind _ _ _ _ cont) = contResultType cont
 contResultType (ApplyTo _ _ _ cont)   	 = contResultType cont
 contResultType (CoerceIt _ cont)      	 = contResultType cont
@@ -203,6 +217,26 @@ dropArgs :: Int -> SimplCont -> SimplCont
 dropArgs 0 cont = cont
 dropArgs n (ApplyTo _ _ _ cont) = dropArgs (n-1) cont
 dropArgs n other		= pprPanic "dropArgs" (ppr n <+> ppr other)
+
+--------------------
+splitInlineCont :: SimplCont -> Maybe (SimplCont, SimplCont)
+-- Returns Nothing if the continuation should dissolve an InlineMe Note
+-- Return Just (c1,c2) otherwise, 
+--	where c1 is the continuation to put inside the InlineMe 
+--	and   c2 outside
+
+-- Example: (__inline_me__ (/\a. e)) ty
+--	Here we want to do the beta-redex without dissolving the InlineMe
+-- See test simpl017 (and Trac #1627) for a good example of why this is important
+
+splitInlineCont (ApplyTo dup (Type ty) se c)
+  | Just (c1, c2) <- splitInlineCont c 	        = Just (ApplyTo dup (Type ty) se c1, c2)
+splitInlineCont cont@(Stop ty _)  	     	= Just (mkBoringStop ty, cont)
+splitInlineCont cont@(StrictBind bndr _ _ se _) = Just (mkBoringStop (substTy se (idType bndr)), cont)
+splitInlineCont cont@(StrictArg _ fun_ty _ _ _) = Just (mkBoringStop (funArgTy fun_ty), cont)
+splitInlineCont other				= Nothing
+	-- NB: the calculation of the type for mkBoringStop is an annoying
+	--     duplication of the same calucation in mkDupableCont
 \end{code}
 
 
@@ -273,62 +307,28 @@ applies when x is bound to a lambda expression.  Hence
 contIsInteresting looks for case expressions with just a single
 default case.
 
+
 \begin{code}
-interestingCallContext :: Bool 		-- False <=> no args at all
-		       -> Bool		-- False <=> no value args
-		       -> SimplCont -> Bool
-	-- The "lone-variable" case is important.  I spent ages
-	-- messing about with unsatisfactory varaints, but this is nice.
-	-- The idea is that if a variable appear all alone
-	--	as an arg of lazy fn, or rhs	Stop
-	-- 	as scrutinee of a case		Select
-	--	as arg of a strict fn		ArgOf
-	-- then we should not inline it (unless there is some other reason,
-	-- e.g. is is the sole occurrence).  We achieve this by making
-	-- interestingCallContext return False for a lone variable.
-	--
-	-- Why?  At least in the case-scrutinee situation, turning
-	--	let x = (a,b) in case x of y -> ...
-	-- into
-	--	let x = (a,b) in case (a,b) of y -> ...
-	-- and thence to 
-	--	let x = (a,b) in let y = (a,b) in ...
-	-- is bad if the binding for x will remain.
-	--
-	-- Another example: I discovered that strings
-	-- were getting inlined straight back into applications of 'error'
-	-- because the latter is strict.
-	--	s = "foo"
-	--	f = \x -> ...(error s)...
-
-	-- Fundamentally such contexts should not ecourage inlining because
-	-- the context can ``see'' the unfolding of the variable (e.g. case or a RULE)
-	-- so there's no gain.
-	--
-	-- However, even a type application or coercion isn't a lone variable.
-	-- Consider
-	--	case $fMonadST @ RealWorld of { :DMonad a b c -> c }
-	-- We had better inline that sucker!  The case won't see through it.
-	--
-	-- For now, I'm treating treating a variable applied to types 
-	-- in a *lazy* context "lone". The motivating example was
-	--	f = /\a. \x. BIG
-	--	g = /\a. \y.  h (f a)
-	-- There's no advantage in inlining f here, and perhaps
-	-- a significant disadvantage.  Hence some_val_args in the Stop case
-
-interestingCallContext some_args some_val_args cont
+interestingCallContext :: SimplCont -> CallCtxt
+interestingCallContext cont
   = interesting cont
   where
-    interesting (Select {})              = some_args
-    interesting (ApplyTo {})             = True	-- Can happen if we have (coerce t (f x)) y
-						-- Perhaps True is a bit over-keen, but I've
-						-- seen (coerce f) x, where f has an INLINE prag,
-						-- So we have to give some motivaiton for inlining it
-    interesting (StrictArg {})	         = some_val_args
-    interesting (StrictBind {})	         = some_val_args	-- ??
-    interesting (Stop ty _ interesting)  = some_val_args && interesting
-    interesting (CoerceIt _ cont)        = interesting cont
+    interestingCtxt = ArgCtxt False 2	-- Give *some* incentive!
+
+    interesting (Select _ bndr _ _ _)
+	| isDeadBinder bndr       = CaseCtxt
+	| otherwise	          = interestingCtxt
+		
+    interesting (ApplyTo {})      = interestingCtxt
+				-- Can happen if we have (coerce t (f x)) y
+				-- Perhaps interestingCtxt is a bit over-keen, but I've
+				-- seen (coerce f) x, where f has an INLINE prag,
+				-- So we have to give some motivation for inlining it
+
+    interesting (StrictArg _ _ cci _ _)	= cci
+    interesting (StrictBind {})	  	= BoringCtxt
+    interesting (Stop ty cci)   	= cci
+    interesting (CoerceIt _ cont) 	= interesting cont
 	-- If this call is the arg of a strict function, the context
 	-- is a bit interesting.  If we inline here, we may get useful
 	-- evaluation information to avoid repeated evals: e.g.
@@ -349,21 +349,24 @@ interestingCallContext some_args some_val_args cont
 mkArgInfo :: Id
 	  -> Int	-- Number of value args
 	  -> SimplCont	-- Context of the cal
-	  -> (Bool, [Bool])	-- Arg info
--- The arg info consists of
---  * A Bool indicating if the function has rules (recursively)
---  * A [Bool] indicating strictness for each arg
--- The [Bool] is usually infinite, but if it is finite it 
--- guarantees that the function diverges after being given
--- that number of args
+	  -> ArgInfo
 
 mkArgInfo fun n_val_args call_cont
-  = (interestingArgContext fun call_cont, fun_stricts)
+  = ArgInfo { ai_rules = interestingArgContext fun call_cont
+	    , ai_strs  = arg_stricts
+	    , ai_discs = arg_discounts }
   where
-    vanilla_stricts, fun_stricts :: [Bool]
+    vanilla_discounts, arg_discounts :: [Int]
+    vanilla_discounts = repeat 0
+    arg_discounts = case idUnfolding fun of
+			CoreUnfolding _ _ _ _ (UnfoldIfGoodArgs _ discounts _ _)
+			      -> discounts ++ vanilla_discounts
+			other -> vanilla_discounts
+
+    vanilla_stricts, arg_stricts :: [Bool]
     vanilla_stricts  = repeat False
 
-    fun_stricts
+    arg_stricts
       = case splitStrictSig (idNewStrictness fun) of
 	  (demands, result_info)
 		| not (demands `lengthExceeds` n_val_args)
@@ -389,7 +392,9 @@ interestingArgContext :: Id -> SimplCont -> Bool
 -- where g has rules, then we *do* want to inline f, in case it
 -- exposes a rule that might fire.  Similarly, if the context is
 --	h (g (f x x))
--- where h has rules, then we do want to inline f.
+-- where h has rules, then we do want to inline f; hence the
+-- call_cont argument to interestingArgContext
+--
 -- The interesting_arg_ctxt flag makes this happen; if it's
 -- set, the inliner gets just enough keener to inline f 
 -- regardless of how boring f's arguments are, if it's marked INLINE
@@ -397,36 +402,18 @@ interestingArgContext :: Id -> SimplCont -> Bool
 -- The alternative would be to *always* inline an INLINE function,
 -- regardless of how boring its context is; but that seems overkill
 -- For example, it'd mean that wrapper functions were always inlined
-interestingArgContext fn cont
-  = idHasRules fn || go cont
+interestingArgContext fn call_cont
+  = idHasRules fn || go call_cont
   where
-    go (Select {})	      = False
-    go (ApplyTo {})	      = False
-    go (StrictArg {})	      = True
-    go (StrictBind {})	      = False	-- ??
-    go (CoerceIt _ c)	      = go c
-    go (Stop _ _ interesting) = interesting
-
--------------------
-canUpdateInPlace :: Type -> Bool
--- Consider   let x = <wurble> in ...
--- If <wurble> returns an explicit constructor, we might be able
--- to do update in place.  So we treat even a thunk RHS context
--- as interesting if update in place is possible.  We approximate
--- this by seeing if the type has a single constructor with a
--- small arity.  But arity zero isn't good -- we share the single copy
--- for that case, so no point in sharing.
-
-canUpdateInPlace ty 
-  | not opt_UF_UpdateInPlace = False
-  | otherwise
-  = case splitTyConApp_maybe ty of 
-	Nothing		-> False 
-	Just (tycon, _) -> case tyConDataCons_maybe tycon of
-				Just [dc]  -> arity == 1 || arity == 2
-				           where
-					      arity = dataConRepArity dc
-				other -> False
+    go (Select {})	       = False
+    go (ApplyTo {})	       = False
+    go (StrictArg _ _ cci _ _) = interesting cci
+    go (StrictBind {})	       = False	-- ??
+    go (CoerceIt _ c)	       = go c
+    go (Stop _ cci)            = interesting cci
+
+    interesting (ArgCtxt rules _) = rules
+    interesting other		  = False
 \end{code}
 
 
@@ -446,7 +433,7 @@ settings:
 			(d) Simplifying a GHCi expression or Template 
 				Haskell splice
 
-	SimplPhase n	Used at all other times
+	SimplPhase n _	 Used at all other times
 
 The key thing about SimplGently is that it does no call-site inlining.
 Before full laziness we must be careful not to inline wrappers,
@@ -561,7 +548,7 @@ y's occurrence info, which breaks the invariant.  It matters: y
 might have a BIG rhs, which will now be dup'd at every occurrenc of x.
 
 
-Evne RHSs labelled InlineMe aren't caught here, because there might be
+Even RHSs labelled InlineMe aren't caught here, because there might be
 no benefit from inlining at the call site.
 
 [Sept 01] Don't unconditionally inline a top-level thing, because that
@@ -595,8 +582,8 @@ preInlineUnconditionally env top_lvl bndr rhs
   where
     phase = getMode env
     active = case phase of
-		   SimplGently  -> isAlwaysActive prag
-		   SimplPhase n -> isActive n prag
+		   SimplGently    -> isAlwaysActive prag
+		   SimplPhase n _ -> isActive n prag
     prag = idInlinePragma bndr
 
     try_once in_lam int_cxt	-- There's one textual occurrence
@@ -630,8 +617,8 @@ preInlineUnconditionally env top_lvl bndr rhs
     canInlineInLam _			= False
 
     early_phase = case phase of
-			SimplPhase 0 -> False
-			other	     -> True
+			SimplPhase 0 _ -> False
+			other	       -> True
 -- If we don't have this early_phase test, consider
 --	x = length [1,2,3]
 -- The full laziness pass carefully floats all the cons cells to
@@ -751,8 +738,8 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
 
   where
     active = case getMode env of
-		   SimplGently  -> isAlwaysActive prag
-		   SimplPhase n -> isActive n prag
+		   SimplGently    -> isAlwaysActive prag
+		   SimplPhase n _ -> isActive n prag
     prag = idInlinePragma bndr
 
 activeInline :: SimplEnv -> OutId -> Bool
@@ -774,7 +761,7 @@ activeInline env id
 	-- and they are now constructed as Compulsory unfoldings (in MkId)
 	-- so they'll happen anyway.
 
-      SimplPhase n -> isActive n prag
+      SimplPhase n _ -> isActive n prag
   where
     prag = idInlinePragma id
 
@@ -785,13 +772,13 @@ activeRule dflags env
   = Nothing	-- Rewriting is off
   | otherwise
   = case getMode env of
-	SimplGently  -> Just isAlwaysActive
+	SimplGently    -> Just isAlwaysActive
 			-- Used to be Nothing (no rules in gentle mode)
 			-- Main motivation for changing is that I wanted
 			-- 	lift String ===> ...
 			-- to work in Template Haskell when simplifying
 			-- splices, so we get simpler code for literal strings
-	SimplPhase n -> Just (isActive n)
+	SimplPhase n _ -> Just (isActive n)
 \end{code}
 
 
@@ -814,12 +801,14 @@ mkLam bndrs body
 	; mkLam' dflags bndrs body }
   where
     mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr
-    mkLam' dflags bndrs (Cast body@(Lam _ _) co)
+    mkLam' dflags bndrs (Cast body co)
+      | not (any bad bndrs)
 	-- Note [Casts and lambdas]
-      = do { lam <- mkLam' dflags (bndrs ++ bndrs') body'
+      = do { lam <- mkLam' dflags bndrs body
 	   ; return (mkCoerce (mkPiTypes bndrs co) lam) }
-      where	
-	(bndrs',body') = collectBinders body
+      where
+	co_vars  = tyVarsOfType co
+	bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars      
 
     mkLam' dflags bndrs body
       | dopt Opt_DoEtaReduction dflags,
@@ -833,7 +822,7 @@ mkLam bndrs body
  	   ; return (mkLams bndrs body') }
    
       | otherwise 
-      = returnSmpl (mkLams bndrs body)
+      = return (mkLams bndrs body)
 \end{code}
 
 Note [Casts and lambdas]
@@ -847,9 +836,26 @@ So this equation in mkLam' floats the g1 out, thus:
 	(\x. e `cast` g1)  -->  (\x.e) `cast` (tx -> g1)
 where x:tx.
 
-In general, this floats casts outside lambdas, where (I hope) they might meet
-and cancel with some other cast.
-
+In general, this floats casts outside lambdas, where (I hope) they
+might meet and cancel with some other cast:
+	\x. e `cast` co   ===>   (\x. e) `cast` (tx -> co)
+	/\a. e `cast` co  ===>   (/\a. e) `cast` (/\a. co)
+	/\g. e `cast` co  ===>   (/\g. e) `cast` (/\g. co)
+  		  	  (if not (g `in` co))
+
+Notice that it works regardless of 'e'.  Originally it worked only
+if 'e' was itself a lambda, but in some cases that resulted in 
+fruitless iteration in the simplifier.  A good example was when
+compiling Text.ParserCombinators.ReadPrec, where we had a definition 
+like	(\x. Get `cast` g)
+where Get is a constructor with nonzero arity.  Then mkLam eta-expanded
+the Get, and the next iteration eta-reduced it, and then eta-expanded 
+it again.
+
+Note also the side condition for the case of coercion binders.
+It does not make sense to transform
+	/\g. e `cast` g  ==>  (/\g.e) `cast` (/\g.g)
+because the latter is not well-kinded.
 
 --	c) floating lets out through big lambdas 
 --		[only if all tyvar lambdas, and only if this lambda
@@ -862,55 +868,97 @@ and cancel with some other cast.
 			-- if this is indeed a right-hand side; otherwise
 			-- we end up floating the thing out, only for float-in
 			-- to float it right back in again!
- = tryRhsTyLam env bndrs body		`thenSmpl` \ (floats, body') ->
-   returnSmpl (floats, mkLams bndrs body')
+ = do (floats, body') <- tryRhsTyLam env bndrs body
+      return (floats, mkLams bndrs body')
 -}
 
 
 %************************************************************************
 %*									*
-\subsection{Eta expansion and reduction}
+		Eta reduction
 %*									*
 %************************************************************************
 
-We try for eta reduction here, but *only* if we get all the 
-way to an exprIsTrivial expression.    
-We don't want to remove extra lambdas unless we are going 
-to avoid allocating this thing altogether
+Note [Eta reduction conditions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We try for eta reduction here, but *only* if we get all the way to an
+trivial expression.  We don't want to remove extra lambdas unless we
+are going to avoid allocating this thing altogether.
+
+There are some particularly delicate points here:
+
+* Eta reduction is not valid in general:  
+	\x. bot  /=  bot
+  This matters, partly for old-fashioned correctness reasons but,
+  worse, getting it wrong can yield a seg fault. Consider
+	f = \x.f x
+	h y = case (case y of { True -> f `seq` True; False -> False }) of
+		True -> ...; False -> ...
+
+  If we (unsoundly) eta-reduce f to get f=f, the strictness analyser
+  says f=bottom, and replaces the (f `seq` True) with just
+  (f `cast` unsafe-co).  BUT, as thing stand, 'f' got arity 1, and it
+  *keeps* arity 1 (perhaps also wrongly).  So CorePrep eta-expands 
+  the definition again, so that it does not termninate after all.
+  Result: seg-fault because the boolean case actually gets a function value.
+  See Trac #1947.
+
+  So it's important to to the right thing.
+
+* We need to be careful if we just look at f's arity. Currently (Dec07),
+  f's arity is visible in its own RHS (see Note [Arity robustness] in 
+  SimplEnv) so we must *not* trust the arity when checking that 'f' is
+  a value.  Instead, look at the unfolding. 
+
+  However for GlobalIds we can look at the arity; and for primops we
+  must, since they have no unfolding.  
+
+* Regardless of whether 'f' is a vlaue, we always want to 
+  reduce (/\a -> f a) to f
+  This came up in a RULE: foldr (build (/\a -> g a))
+  did not match 	   foldr (build (/\b -> ...something complex...))
+  The type checker can insert these eta-expanded versions,
+  with both type and dictionary lambdas; hence the slightly 
+  ad-hoc isDictId
+
+These delicacies are why we don't use exprIsTrivial and exprIsHNF here.
+Alas.
 
 \begin{code}
 tryEtaReduce :: [OutBndr] -> OutExpr -> Maybe OutExpr
 tryEtaReduce bndrs body 
-	-- We don't use CoreUtils.etaReduce, because we can be more
-	-- efficient here:
-	--  (a) we already have the binders
-	--  (b) we can do the triviality test before computing the free vars
   = go (reverse bndrs) body
   where
     go (b : bs) (App fun arg) | ok_arg b arg = go bs fun	-- Loop round
     go []       fun           | ok_fun fun   = Just fun		-- Success!
     go _        _			     = Nothing		-- Failure!
 
-    ok_fun fun =  exprIsTrivial fun
-	       && not (any (`elemVarSet` (exprFreeVars fun)) bndrs)
-	       && (exprIsHNF fun || all ok_lam bndrs)
+	-- Note [Eta reduction conditions]
+    ok_fun (App fun (Type ty)) 
+	| not (any (`elemVarSet` tyVarsOfType ty) bndrs)
+	=  ok_fun fun
+    ok_fun (Var fun_id)
+	=  not (fun_id `elem` bndrs)
+	&& (ok_fun_id fun_id || all ok_lam bndrs)
+    ok_fun _fun = False
+
+    ok_fun_id fun
+	| isLocalId fun       = isEvaldUnfolding (idUnfolding fun)
+	| isDataConWorkId fun = True
+	| isGlobalId fun      = idArity fun > 0
+
     ok_lam v = isTyVar v || isDictId v
-	-- The exprIsHNF is because eta reduction is not 
-	-- valid in general:  \x. bot  /=  bot
-	-- So we need to be sure that the "fun" is a value.
-	--
-	-- However, we always want to reduce (/\a -> f a) to f
-	-- This came up in a RULE: foldr (build (/\a -> g a))
-	--	did not match 	   foldr (build (/\b -> ...something complex...))
-	-- The type checker can insert these eta-expanded versions,
-	-- with both type and dictionary lambdas; hence the slightly 
-	-- ad-hoc isDictTy
 
     ok_arg b arg = varToCoreExpr b `cheapEqExpr` arg
 \end{code}
 
 
-	Try eta expansion for RHSs
+%************************************************************************
+%*									*
+		Eta expansion
+%*									*
+%************************************************************************
+
 
 We go for:
    f = \x1..xn -> N  ==>   f = \x1..xn y1..ym -> N y1..ym
@@ -925,6 +973,16 @@ where (in both cases)
 	* N is a NORMAL FORM (i.e. no redexes anywhere)
 	  wanting a suitable number of extra args.
 
+The biggest reason for doing this is for cases like
+
+	f = \x -> case x of
+		    True  -> \y -> e1
+		    False -> \y -> e2
+
+Here we want to get the lambdas together.  A good exmaple is the nofib
+program fibheaps, which gets 25% more allocation if you don't do this
+eta-expansion.
+
 We may have to sandwich some coerces between the lambdas
 to make the types work.   exprEtaExpandArity looks through coerces
 when computing arity; and etaExpand adds the coerces as necessary when
@@ -933,9 +991,9 @@ actually computing the expansion.
 \begin{code}
 tryEtaExpansion :: DynFlags -> OutExpr -> SimplM OutExpr
 -- There is at least one runtime binder in the binders
-tryEtaExpansion dflags body
-  = getUniquesSmpl 			`thenSmpl` \ us ->
-    returnSmpl (etaExpand fun_arity us body (exprType body))
+tryEtaExpansion dflags body = do
+    us <- getUniquesM
+    return (etaExpand fun_arity us body (exprType body))
   where
     fun_arity = exprEtaExpandArity dflags body
 \end{code}
@@ -1027,7 +1085,7 @@ it is guarded by the doFloatFromRhs call in simplLazyBind.
 abstractFloats :: [OutTyVar] -> SimplEnv -> OutExpr -> SimplM ([OutBind], OutExpr)
 abstractFloats main_tvs body_env body
   = ASSERT( notNull body_floats )
-    do	{ (subst, float_binds) <- mapAccumLSmpl abstract empty_subst body_floats
+    do	{ (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
 	; return (float_binds, CoreSubst.substExpr subst body) }
   where
     main_tv_set = mkVarSet main_tvs
@@ -1041,8 +1099,11 @@ abstractFloats main_tvs body_env body
 		 subst'   = CoreSubst.extendIdSubst subst id poly_app
 	   ; return (subst', (NonRec poly_id poly_rhs)) }
       where
-	rhs'     = CoreSubst.substExpr subst rhs
-	tvs_here = varSetElems (main_tv_set `intersectVarSet` exprSomeFreeVars isTyVar rhs')
+	rhs' = CoreSubst.substExpr subst rhs
+	tvs_here | any isCoVar main_tvs = main_tvs	-- Note [Abstract over coercions]
+		 | otherwise 
+		 = varSetElems (main_tv_set `intersectVarSet` exprSomeFreeVars isTyVar rhs')
+	
 		-- Abstract only over the type variables free in the rhs
 		-- wrt which the new binding is abstracted.  But the naive
 		-- approach of abstract wrt the tyvars free in the Id's type
@@ -1060,7 +1121,7 @@ abstractFloats main_tvs body_env body
 		-- gives rise to problems.   SLPJ June 98
 
     abstract subst (Rec prs)
-       = do { (poly_ids, poly_apps) <- mapAndUnzipSmpl (mk_poly tvs_here) ids
+       = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly tvs_here) ids
 	    ; let subst' = CoreSubst.extendSubstList subst (ids `zip` poly_apps)
 		  poly_rhss = [mkLams tvs_here (CoreSubst.substExpr subst' rhs) | rhs <- rhss]
 	    ; return (subst', Rec (poly_ids `zip` poly_rhss)) }
@@ -1082,7 +1143,7 @@ abstractFloats main_tvs body_env body
 	 tvs_here = main_tvs
 
     mk_poly tvs_here var
-      = do { uniq <- getUniqueSmpl
+      = do { uniq <- getUniqueM
 	   ; let  poly_name = setNameUnique (idName var) uniq		-- Keep same name
 		  poly_ty   = mkForAllTys tvs_here (idType var)	-- But new type of course
 		  poly_id   = mkLocalId poly_name poly_ty 
@@ -1101,6 +1162,13 @@ abstractFloats main_tvs body_env body
 		-- pinned on x.
 \end{code}
 
+Note [Abstract over coercions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If a coercion variable (g :: a ~ Int) is free in the RHS, then so is the
+type variable a.  Rather than sort this mess out, we simply bale out and abstract
+wrt all the type variables if any of them are coercion variables.
+
+
 Historical note: if you use let-bindings instead of a substitution, beware of this:
 
 		-- Suppose we start with:
@@ -1191,8 +1259,8 @@ have to check that r doesn't mention the variables bound by the
 pattern in each alternative, so the binder-info is rather useful.
 
 \begin{code}
-prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
-prepareAlts scrut case_bndr' alts
+prepareAlts :: SimplEnv -> OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
+prepareAlts env scrut case_bndr' alts
   = do	{ dflags <- getDOptsSmpl
 	; alts <- combineIdenticalAlts case_bndr' alts
 
@@ -1203,7 +1271,7 @@ prepareAlts scrut case_bndr' alts
 		--   EITHER by the context, 
 		--   OR by a non-DEFAULT branch in this case expression.
 
-	; default_alts <- prepareDefault dflags scrut case_bndr' mb_tc_app 
+	; default_alts <- prepareDefault dflags env case_bndr' mb_tc_app 
 					 imposs_deflt_cons maybe_deflt
 
 	; let trimmed_alts = filterOut impossible_alt alts_wo_default
@@ -1249,7 +1317,7 @@ combineIdenticalAlts case_bndr alts = return alts
 --			Prepare the default alternative
 -------------------------------------------------------------------------
 prepareDefault :: DynFlags
-	       -> OutExpr	-- Scrutinee
+	       -> SimplEnv
 	       -> OutId		-- Case binder; need just for its type. Note that as an
 				--   OutId, it has maximum information; this is important.
 				--   Test simpl013 is an example
@@ -1261,10 +1329,16 @@ prepareDefault :: DynFlags
 					-- And becuase case-merging can cause many to show up
 
 -------	Merge nested cases ----------
-prepareDefault dflags scrut outer_bndr bndr_ty imposs_cons (Just deflt_rhs)
+prepareDefault dflags env outer_bndr bndr_ty imposs_cons (Just deflt_rhs)
   | dopt Opt_CaseMerge dflags
-  , Case (Var scrut_var) inner_bndr _ inner_alts <- deflt_rhs
-  , scruting_same_var scrut_var
+  , Case (Var inner_scrut_var) inner_bndr _ inner_alts <- deflt_rhs
+  , DoneId inner_scrut_var' <- substId env inner_scrut_var
+	-- Remember, inner_scrut_var is an InId, but outer_bndr is an OutId
+  , inner_scrut_var' == outer_bndr
+	-- NB: the substId means that if the outer scrutinee was a 
+	--     variable, and inner scrutinee is the same variable, 
+	--     then inner_scrut_var' will be outer_bndr
+	--     via the magic of simplCaseBinder
   = do	{ tick (CaseMerge outer_bndr)
 
 	; let munge_rhs rhs = bindCaseBndr inner_bndr (Var outer_bndr) rhs
@@ -1284,17 +1358,10 @@ prepareDefault dflags scrut outer_bndr bndr_ty imposs_cons (Just deflt_rhs)
     	-- mkCase applied to them, so they won't have a case in their default
     	-- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
     	-- in munge_rhs may put a case into the DEFAULT branch!
-  where
-    	-- We are scrutinising the same variable if it's
-    	-- the outer case-binder, or if the outer case scrutinises a variable
-    	-- (and it's the same).  Testing both allows us not to replace the
-    	-- outer scrut-var with the outer case-binder (Simplify.simplCaseBinder).
-    scruting_same_var = case scrut of
-			  Var outer_scrut -> \ v -> v == outer_bndr || v == outer_scrut
-			  other		  -> \ v -> v == outer_bndr
+
 
 --------- Fill in known constructor -----------
-prepareDefault dflags scrut case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rhs)
+prepareDefault dflags env case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rhs)
   | 	-- This branch handles the case where we are 
 	-- scrutinisng an algebraic data type
     isAlgTyCon tycon		-- It's a data type, tuple, or unboxed tuples.  
@@ -1320,7 +1387,7 @@ prepareDefault dflags scrut case_bndr (Just (tycon, inst_tys)) imposs_cons (Just
 
 	[con] -> 	-- It matches exactly one constructor, so fill it in
 		 do { tick (FillInCaseDefault case_bndr)
-                    ; us <- getUniquesSmpl
+                    ; us <- getUniquesM
                     ; let (ex_tvs, co_tvs, arg_ids) =
                               dataConRepInstPat us con inst_tys
                     ; return [(DataAlt con, ex_tvs ++ co_tvs ++ arg_ids, deflt_rhs)] }
@@ -1328,10 +1395,10 @@ prepareDefault dflags scrut case_bndr (Just (tycon, inst_tys)) imposs_cons (Just
 	two_or_more -> return [(DEFAULT, [], deflt_rhs)]
 
 --------- Catch-all cases -----------
-prepareDefault dflags scrut case_bndr bndr_ty imposs_cons (Just deflt_rhs)
+prepareDefault dflags env case_bndr bndr_ty imposs_cons (Just deflt_rhs)
   = return [(DEFAULT, [], deflt_rhs)]
 
-prepareDefault dflags scrut case_bndr bndr_ty imposs_cons Nothing
+prepareDefault dflags env case_bndr bndr_ty imposs_cons Nothing
   = return []	-- No default branch
 \end{code}
 
@@ -1377,8 +1444,8 @@ mkCase scrut case_bndr ty []
 
 mkCase scrut case_bndr ty alts	-- Identity case
   | all identity_alt alts
-  = tick (CaseIdentity case_bndr)		`thenSmpl_`
-    returnSmpl (re_cast scrut)
+  = do tick (CaseIdentity case_bndr)
+       return (re_cast scrut)
   where
     identity_alt (con, args, rhs) = check_eq con args (de_cast rhs)
 
@@ -1411,7 +1478,7 @@ mkCase scrut case_bndr ty alts	-- Identity case
 --------------------------------------------------
 --	Catch-all
 --------------------------------------------------
-mkCase scrut bndr ty alts = returnSmpl (Case scrut bndr ty alts)
+mkCase scrut bndr ty alts = return (Case scrut bndr ty alts)
 \end{code}