From 4e6d579860228f1264558d1cb03f27f239333039 Mon Sep 17 00:00:00 2001
From: simonpj <unknown>
Date: Thu, 7 Sep 2000 16:32:24 +0000
Subject: [PATCH] [project @ 2000-09-07 16:32:23 by simonpj] A list of
 simplifier-related stuff, triggered 	by looking at GHC's
 performance.

	I don't guarantee that this lot will lead to
	a uniform improvement over 4.08, but it it should
	be a bit better.  More work probably required.


* Make the simplifier's Stop continuation record whether the expression being
  simplified is the RHS of a thunk, or (say) the body of a lambda or case RHS.
  In the thunk case we want to be a bit keener about inlining if the type of
  the thunk is amenable to update in place.

* Fix interestingArg, which was being too liberal, and hence doing
  too much inlining.

* Extended CoreUtils.exprIsCheap to make two more things cheap:
    - 	case (coerce x) of ...
    -   let x = y +# z
  This makes a bit more eta expansion happen.  It was provoked by
  a program of Marcin's.

* MkIface.ifaceBinds.   Make sure that we emit rules for things
  (like class operations) that don't get a top-level binding in the
  interface file.  Previously such rules were silently forgotten.

* Move transformRhs to *after* simplification, which makes it a
  little easier to do, and means that the arity it computes is
  readily available to completeBinding.  This gets much better
  arities.

* Do coerce splitting in completeBinding. This gets good code for
	newtype CInt = CInt Int

	test:: CInt -> Int
	test x = case x of
	      	   1 -> 2
	      	   2 -> 4
	      	   3 -> 8
	      	   4 -> 16
	      	   _ -> 0

* Modify the meaning of "arity" so that during compilation it means
  "if you apply this function to fewer args, it will do virtually
  no work".   So, for example
	f = coerce t (\x -> e)
  has arity at least 1.  When a function is exported, it's arity becomes
  the number of exposed, top-level lambdas, which is subtly different.
  But that's ok.

  I removed CoreUtils.exprArity altogether: it looked only at the exposed
  lambdas.  Instead, we use exprEtaExpandArity exclusively.

  All of this makes I/O programs work much better.
---
 ghc/compiler/basicTypes/Id.lhs        |   21 +--
 ghc/compiler/coreSyn/CoreFVs.lhs      |    4 +-
 ghc/compiler/coreSyn/CoreUnfold.lhs   |   26 ++-
 ghc/compiler/coreSyn/CoreUtils.lhs    |   49 ++----
 ghc/compiler/main/MkIface.lhs         |  125 ++++++++------
 ghc/compiler/simplCore/OccurAnal.lhs  |   18 +-
 ghc/compiler/simplCore/SetLevels.lhs  |   20 +--
 ghc/compiler/simplCore/SimplMonad.lhs |   50 +++++-
 ghc/compiler/simplCore/SimplUtils.lhs |  282 ++++++++++++++++++--------------
 ghc/compiler/simplCore/Simplify.lhs   |  291 +++++++++++++++++----------------
 ghc/compiler/stranal/WorkWrap.lhs     |   37 +++--
 11 files changed, 524 insertions(+), 399 deletions(-)
diff --git a/ghc/compiler/basicTypes/Id.lhs b/ghc/compiler/basicTypes/Id.lhs
index 0076c36..4901db0 100644
--- a/ghc/compiler/basicTypes/Id.lhs
+++ b/ghc/compiler/basicTypes/Id.lhs
@@ -27,16 +27,16 @@ module Id (
 	externallyVisibleId,
 	idFreeTyVars,
 	isIP,
-
-	-- Inline pragma stuff
-	idInlinePragma, setInlinePragma, modifyInlinePragma, 
-
 	isSpecPragmaId,	isRecordSelector,
 	isPrimOpId, isPrimOpId_maybe, 
 	isDataConId, isDataConId_maybe, isDataConWrapId, isDataConWrapId_maybe,
 	isBottomingId,
 	isExportedId, isUserExportedId,
-	mayHaveNoBinding,
+	hasNoBinding,
+
+	-- Inline pragma stuff
+	idInlinePragma, setInlinePragma, modifyInlinePragma, 
+
 
 	-- One shot lambda stuff
 	isOneShotLambda, setOneShotLambda, clearOneShotLambda,
@@ -237,16 +237,13 @@ isSpecPragmaId id = case idFlavour id of
 			SpecPragmaId -> True
 			other	     -> False
 
-mayHaveNoBinding id = case idFlavour id of
+hasNoBinding id = case idFlavour id of
 			DataConId _ -> True
 			PrimOpId _  -> True
 			other	    -> False
-	-- mayHaveNoBinding returns True of an Id which may not have a
+	-- hasNoBinding returns True of an Id which may not have a
 	-- binding, even though it is defined in this module.  Notably,
 	-- the constructors of a dictionary are in this situation.
-	--	
-	-- mayHaveNoBinding returns True of some things that *do* have a local binding,
-	-- so it's only an approximation.  That's ok... it's only use for assertions.
 
 -- Don't drop a binding for an exported Id,
 -- if it otherwise looks dead.  
@@ -294,9 +291,7 @@ exportWithOrigOccName id = omitIfaceSigForId id || isUserExportedId id
 
 \begin{code}
 isDeadBinder :: Id -> Bool
-isDeadBinder bndr | isId bndr = case idOccInfo bndr of
-					IAmDead -> True
-					other	-> False
+isDeadBinder bndr | isId bndr = isDeadOcc (idOccInfo bndr)
 		  | otherwise = False	-- TyVars count as not dead
 
 isIP id = isIPOcc (getOccName id)
diff --git a/ghc/compiler/coreSyn/CoreFVs.lhs b/ghc/compiler/coreSyn/CoreFVs.lhs
index 5784439..42dcee8 100644
--- a/ghc/compiler/coreSyn/CoreFVs.lhs
+++ b/ghc/compiler/coreSyn/CoreFVs.lhs
@@ -18,7 +18,7 @@ module CoreFVs (
 #include "HsVersions.h"
 
 import CoreSyn
-import Id		( Id, idFreeTyVars, mayHaveNoBinding, idSpecialisation )
+import Id		( Id, idFreeTyVars, hasNoBinding, idSpecialisation )
 import VarSet
 import Var		( Var, isId )
 import Name		( isLocallyDefined )
@@ -37,7 +37,7 @@ import Outputable
 mustHaveLocalBinding :: Var -> Bool
 -- True <=> the variable must have a binding in this module
 mustHaveLocalBinding v
-  | isId v    = isLocallyDefined v && not (mayHaveNoBinding v)
+  | isId v    = isLocallyDefined v && not (hasNoBinding v)
   | otherwise = True	-- TyVars etc must
 \end{code}
 
diff --git a/ghc/compiler/coreSyn/CoreUnfold.lhs b/ghc/compiler/coreSyn/CoreUnfold.lhs
index c170c47..ae9fbb6 100644
--- a/ghc/compiler/coreSyn/CoreUnfold.lhs
+++ b/ghc/compiler/coreSyn/CoreUnfold.lhs
@@ -214,7 +214,18 @@ sizeExpr (I# bOMB_OUT_SIZE) top_args expr
 
     size_up (Case (Var v) _ alts) 
 	| v `elem` top_args		-- We are scrutinising an argument variable
-	= case alts of
+	= 
+{-	I'm nuking this special case; BUT see the comment with case alternatives.
+
+	(a) It's too eager.  We don't want to inline a wrapper into a
+	    context with no benefit.  
+	    E.g.  \ x. f (x+x)   	o point in inlining (+) here!
+
+	(b) It's ineffective. Once g's wrapper is inlined, its case-expressions 
+	    aren't scrutinising arguments any more
+
+	    case alts of
+
 		[alt] -> size_up_alt alt `addSize` SizeIs 0# (unitBag (v, 1)) 0#
 		-- We want to make wrapper-style evaluation look cheap, so that
 		-- when we inline a wrapper it doesn't make call site (much) bigger
@@ -227,7 +238,9 @@ sizeExpr (I# bOMB_OUT_SIZE) top_args expr
 		-- ordering difference, we make (case a of (x,y) -> ...), 
 		-- *where a is one of the arguments* look free.
 
-		other -> alts_size (foldr addSize sizeOne alt_sizes)	-- The 1 is for the scrutinee
+		other -> 
+-}
+			 alts_size (foldr addSize sizeOne alt_sizes)	-- The 1 is for the scrutinee
 				   (foldr1 maxSize alt_sizes)
 
 		-- Good to inline if an arg is scrutinised, because
@@ -301,7 +314,8 @@ sizeExpr (I# bOMB_OUT_SIZE) top_args expr
 
     ------------ 
     size_up_alt (con, bndrs, rhs) = size_up rhs
-	    -- Don't charge for args, so that wrappers look cheap
+ 	-- Don't charge for args, so that wrappers look cheap
+	-- (See comments about wrappers with Case)
 
     ------------
 	-- We want to record if we're case'ing, or applying, an argument
@@ -602,7 +616,7 @@ callSiteInline black_listed inline_call occ id arg_infos interesting_cont
 #ifdef DEBUG
     if opt_D_dump_inlinings then
 	pprTrace "Considering inlining"
-		 (ppr id <+> vcat [text "black listed" <+> ppr black_listed,
+		 (ppr id <+> vcat [text "black listed:" <+> ppr black_listed,
 				   text "occ info:" <+> ppr occ,
 			  	   text "arg infos" <+> ppr arg_infos,
 				   text "interesting continuation" <+> ppr interesting_cont,
@@ -700,8 +714,8 @@ normal_case rule_vars phase v
 	  | from_INLINE -> has_rules	-- Black list until final phase
 	  | otherwise   -> True		-- Always blacklisted
 
-	IMustNotBeINLINEd from_inline (Just threshold)
-	  | from_inline -> (phase < threshold && has_rules)
+	IMustNotBeINLINEd from_INLINE (Just threshold)
+	  | from_INLINE -> (phase < threshold && has_rules)
 	  | otherwise   -> (phase < threshold || has_rules)
   where
     has_rules =  v `elemVarSet` rule_vars
diff --git a/ghc/compiler/coreSyn/CoreUtils.lhs b/ghc/compiler/coreSyn/CoreUtils.lhs
index 5e9736b..05a2520 100644
--- a/ghc/compiler/coreSyn/CoreUtils.lhs
+++ b/ghc/compiler/coreSyn/CoreUtils.lhs
@@ -11,7 +11,7 @@ module CoreUtils (
         mkPiType,
 
 	-- Properties of expressions
-	exprType, coreAltsType, exprArity,
+	exprType, coreAltsType, 
 	exprIsBottom, exprIsDupable, exprIsTrivial, exprIsCheap, 
 	exprIsValue,exprOkForSpeculation, exprIsBig, 
 	exprIsConApp_maybe,
@@ -300,19 +300,16 @@ shared.  The main examples of things which aren't WHNF but are
 
   * 	case e of
 	  pi -> ei
+	(where e, and all the ei are cheap)
 
-	where e, and all the ei are cheap; and
-
-  *	let x = e
-	in b
-
-	where e and b are cheap; and
+  *	let x = e in b
+	(where e and b are cheap)
 
   *	op x1 ... xn
-
-	where op is a cheap primitive operator
+	(where op is a cheap primitive operator)
 
   *	error "foo"
+	(because we are happy to substitute it inside a lambda)
 
 Notice that a variable is considered 'cheap': we can push it inside a lambda,
 because sharing will make sure it is only evaluated once.
@@ -324,10 +321,18 @@ exprIsCheap (Type _)        	  = True
 exprIsCheap (Var _)         	  = True
 exprIsCheap (Note _ e)      	  = exprIsCheap e
 exprIsCheap (Lam x e)       	  = if isId x then True else exprIsCheap e
-exprIsCheap (Case (Var v) _ alts) = and [exprIsCheap rhs | (_,_,rhs) <- alts]
+exprIsCheap (Case e _ alts)       = exprIsCheap e && 
+				    and [exprIsCheap rhs | (_,_,rhs) <- alts]
 	-- Experimentally, treat (case x of ...) as cheap
+	-- (and case __coerce x etc.)
 	-- This improves arities of overloaded functions where
 	-- there is only dictionary selection (no construction) involved
+exprIsCheap (Let (NonRec x _) e)  
+      | isUnLiftedType (idType x) = exprIsCheap e
+      | otherwise		  = False
+	-- strict lets always have cheap right hand sides, and
+	-- do no allocation.
+
 exprIsCheap other_expr 
   = go other_expr 0 True
   where
@@ -337,9 +342,8 @@ exprIsCheap other_expr
 
 	  || idAppIsBottom f n_args 
 			-- Application of a function which
-			-- always gives bottom; we treat this as
-			-- a WHNF, because it certainly doesn't
-			-- need to be shared!
+			-- always gives bottom; we treat this as cheap
+			-- because it certainly doesn't need to be shared!
 	
     go (App f a) n_args args_cheap 
 	| isTypeArg a = go f n_args 	  args_cheap
@@ -476,25 +480,6 @@ idAppIsValue id n_val_args
 \end{code}
 
 \begin{code}
-exprArity :: CoreExpr -> Int	-- How many value lambdas are at the top
-exprArity (Lam b e)     | isTyVar b	= exprArity e
-		        | otherwise	= 1 + exprArity e
-
-exprArity (Note note e) | ok_note note	= exprArity e
-			where
-			  ok_note (Coerce _ _) = True
-				-- We *do* look through coerces when getting arities.
-				-- Reason: arities are to do with *representation* and
-				-- work duplication. 
-			  ok_note InlineMe     = True
-			  ok_note InlineCall   = True
-			  ok_note other	       = False
-				-- SCC and TermUsg might be over-conservative?
-
-exprArity other	= 0
-\end{code}
-
-\begin{code}
 exprIsConApp_maybe :: CoreExpr -> Maybe (DataCon, [CoreExpr])
 exprIsConApp_maybe expr
   = analyse (collectArgs expr)
diff --git a/ghc/compiler/main/MkIface.lhs b/ghc/compiler/main/MkIface.lhs
index 3321130..da7b866 100644
--- a/ghc/compiler/main/MkIface.lhs
+++ b/ghc/compiler/main/MkIface.lhs
@@ -24,7 +24,7 @@ import RnMonad
 import TcInstUtil	( InstInfo(..) )
 
 import CmdLineOpts
-import Id		( Id, idType, idInfo, omitIfaceSigForId, isUserExportedId,
+import Id		( Id, idType, idInfo, omitIfaceSigForId, isUserExportedId, hasNoBinding,
 			  idSpecialisation
 			)
 import Var		( isId )
@@ -68,6 +68,7 @@ import Bag
 import Outputable
 
 import Maybe 		( isNothing )
+import List		( partition )
 import Monad 		( when )
 \end{code}
 
@@ -322,6 +323,7 @@ completeIface new_iface local_tycons local_classes
      all_decls = cls_dcls ++ ty_dcls ++ bagToList val_dcls
      (inst_dcls, inst_ids) = ifaceInstances inst_info
      cls_dcls = map ifaceClass local_classes
+  
      ty_dcls  = map ifaceTyCon (filter (not . isWiredInName . getName) local_tycons)
 
      (val_dcls, emitted_ids) = ifaceBinds (inst_ids `unionVarSet` orphan_rule_ids)
@@ -358,7 +360,10 @@ ifaceRules rules emitted
 				-- We can't print builtin rules in interface files
 				-- Since they are built in, an importing module
 				-- will have access to them anyway
-		     all (`elemVarSet` emitted) (varSetElems (ruleSomeLhsFreeVars interestingId rule))
+
+			-- Sept 00: I've disabled this test.  It doesn't stop many, if any, rules
+			-- from coming out, and to make it work properly we need to add 
+			     all (`elemVarSet` emitted) (varSetElems (ruleSomeLhsFreeVars interestingId rule))
 				-- Spit out a rule only if all its lhs free vars are emitted
 				-- This is a good reason not to do it when we emit the Id itself
 		   ]
@@ -489,6 +494,11 @@ ifaceBinds needed_ids final_ids binds
 			Nothing  -> pprTrace "ifaceBinds not found:" (ppr id) $
 				    idInfo id
 
+	-- The 'needed' set contains the Ids that are needed by earlier
+	-- interface file emissions.  If the Id isn't in this set, and isn't
+	-- exported, there's no need to emit anything
+    need_id needed_set id = id `elemVarSet` needed_set || isUserExportedId id 
+
     go needed [] decls emitted
 	| not (isEmptyVarSet needed) = pprTrace "ifaceBinds: free vars:" 
 					  (sep (map ppr (varSetElems needed)))
@@ -496,18 +506,24 @@ ifaceBinds needed_ids final_ids binds
 	| otherwise 		     = (decls, emitted)
 
     go needed (NonRec id rhs : binds) decls emitted
-	= case ifaceId get_idinfo needed False id rhs of
-		Nothing		      -> go needed binds decls emitted
-		Just (decl, extras) -> let
-			needed' = (needed `unionVarSet` extras) `delVarSet` id
-			-- 'extras' can include the Id itself via a rule
-			emitted' = emitted `extendVarSet` id
-			in
-			go needed' binds (decl `consBag` decls) emitted'
+	| need_id needed id
+	= if omitIfaceSigForId id then
+	    go (needed `delVarSet` id) binds decls (emitted `extendVarSet` id)
+	  else
+	    go ((needed `unionVarSet` extras) `delVarSet` id)
+	       binds
+	       (decl `consBag` decls)
+	       (emitted `extendVarSet` id)
+	| otherwise
+	= go needed binds decls emitted
+	where
+	  (decl, extras) = ifaceId get_idinfo False id rhs
 
 	-- Recursive groups are a bit more of a pain.  We may only need one to
 	-- start with, but it may call out the next one, and so on.  So we
-	-- have to look for a fixed point.
+	-- have to look for a fixed point.  We don't want necessarily them all, 
+	-- because without -O we may only need the first one (if we don't emit
+	-- its unfolding)
     go needed (Rec pairs : binds) decls emitted
 	= go needed' binds decls' emitted' 
 	where
@@ -519,42 +535,29 @@ ifaceBinds needed_ids final_ids binds
     go_rec :: IdSet -> [(Id,CoreExpr)] -> (Bag RdrNameHsDecl, IdSet, IdSet)
     go_rec needed pairs
 	| null decls = (emptyBag, emptyVarSet, emptyVarSet)
-	| otherwise	= (more_decls `unionBags`   listToBag decls, 
-			   more_emitted  `unionVarSet` mkVarSet emitted,
-			   more_extras   `unionVarSet` extras)
+	| otherwise  = (more_decls   `unionBags`   listToBag decls, 
+			more_emitted `unionVarSet` mkVarSet (map fst needed_prs),
+			more_extras  `unionVarSet` extras)
 	where
-	  maybes	     = map do_one pairs
-	  emitted	     = [id   | ((id,_), Just _)  <- pairs `zip` maybes]
-	  reduced_pairs	     = [pair | (pair,   Nothing) <- pairs `zip` maybes]
-	  (decls, extras_s)  = unzip (catMaybes maybes)
-	  extras	     = unionVarSets extras_s
-	  (more_decls, more_emitted, more_extras) = go_rec extras reduced_pairs
-
-	  do_one (id,rhs) = ifaceId get_idinfo needed True id rhs
+	  (needed_prs,leftover_prs) = partition is_needed pairs
+	  (decls, extras_s)         = unzip [ifaceId get_idinfo True id rhs 
+				            | (id,rhs) <- needed_prs, not (omitIfaceSigForId id)]
+	  extras	            = unionVarSets extras_s
+	  (more_decls, more_emitted, more_extras) = go_rec extras leftover_prs
+	  is_needed (id,_) = need_id needed id
 \end{code}
 
 
 \begin{code}
 ifaceId :: (Id -> IdInfo)	-- This function "knows" the extra info added
 				-- by the STG passes.  Sigh
-
-	-> IdSet		-- Set of Ids that are needed by earlier interface
-				-- file emissions.  If the Id isn't in this set, and isn't
-				-- exported, there's no need to emit anything
 	-> Bool			-- True <=> recursive, so don't print unfolding
 	-> Id
 	-> CoreExpr		-- The Id's right hand side
-	-> Maybe (RdrNameHsDecl, IdSet)	-- The emitted stuff, plus any *extra* needed Ids
-
-ifaceId get_idinfo needed_ids is_rec id rhs
-  | not (id `elemVarSet` needed_ids ||		-- Needed [no id in needed_ids has omitIfaceSigForId]
-	(isUserExportedId id && not (omitIfaceSigForId id)))	-- or exported and not to be omitted
-  = Nothing 		-- Well, that was easy!
+	-> (RdrNameHsDecl, IdSet)	-- The emitted stuff, plus any *extra* needed Ids
 
-ifaceId get_idinfo needed_ids is_rec id rhs
-  = ASSERT2( arity_matches_strictness, ppr id )
-    Just (SigD (IfaceSig (toRdrName id) (toHsType id_type) hs_idinfo noSrcLoc),
-	  new_needed_ids)
+ifaceId get_idinfo is_rec id rhs
+  = (SigD (IfaceSig (toRdrName id) (toHsType id_type) hs_idinfo noSrcLoc),  new_needed_ids)
   where
     id_type     = idType id
     core_idinfo = idInfo id
@@ -565,7 +568,8 @@ ifaceId get_idinfo needed_ids is_rec id rhs
 					   strict_hsinfo ++ wrkr_hsinfo ++ unfold_hsinfo
 
     ------------  Arity  --------------
-    arity_info     = arityInfo stg_idinfo
+    arity_info   = arityInfo stg_idinfo
+    stg_arity	 = arityLowerBound arity_info
     arity_hsinfo = case arityInfo stg_idinfo of
 			a@(ArityExactly n) -> [HsArity a]
 			other		   -> []
@@ -589,11 +593,40 @@ ifaceId get_idinfo needed_ids is_rec id rhs
 
 
     ------------  Worker  --------------
-    work_info     = workerInfo core_idinfo
-    has_worker    = workerExists work_info
-    wrkr_hsinfo   = case work_info of
-			HasWorker work_id _ -> [HsWorker (toRdrName work_id)]
-			other		    -> []
+	-- We only treat a function as having a worker if
+	-- the exported arity (which is now the number of visible lambdas)
+	-- is the same as the arity at the moment of the w/w split
+	-- If so, we can safely omit the unfolding inside the wrapper, and
+	-- instead re-generate it from the type/arity/strictness info
+	-- But if the arity has changed, we just take the simple path and
+	-- put the unfolding into the interface file, forgetting the fact
+	-- that it's a wrapper.  
+	--
+	-- How can this happen?  Sometimes we get
+	--	f = coerce t (\x y -> $wf x y)
+	-- at the moment of w/w split; but the eta reducer turns it into
+	--	f = coerce t $wf
+	-- which is perfectly fine except that the exposed arity so far as
+	-- the code generator is concerned (zero) differs from the arity
+	-- when we did the split (2).  
+	--
+	-- All this arises because we use 'arity' to mean "exactly how many
+	-- top level lambdas are there" in interface files; but during the
+	-- compilation of this module it means "how many things can I apply
+	-- this to".
+    work_info           = workerInfo core_idinfo
+    HasWorker work_id _ = work_info
+
+    has_worker = case work_info of
+		  HasWorker work_id wrap_arity 
+		   | wrap_arity == stg_arity -> True
+		   | otherwise		     -> pprTrace "ifaceId: arity change:" (ppr id) 
+						False
+							  
+		  other			     -> False
+
+    wrkr_hsinfo | has_worker = [HsWorker (toRdrName work_id)]
+		| otherwise  = []
 
     ------------  Unfolding  --------------
     inline_pragma  = inlinePragInfo core_idinfo
@@ -623,11 +656,10 @@ ifaceId get_idinfo needed_ids is_rec id rhs
 						unfold_ids	`unionVarSet`
 						spec_ids
 
-    worker_ids = case work_info of
-		   HasWorker work_id _ | interestingId work_id -> unitVarSet work_id
+    worker_ids | has_worker && interestingId work_id = unitVarSet work_id
 			-- Conceivably, the worker might come from
 			-- another module
-		   other -> emptyVarSet
+	       | otherwise = emptyVarSet
 
     spec_ids = filterVarSet interestingId (rulesRhsFreeVars spec_info)
 
@@ -644,7 +676,6 @@ ifaceId get_idinfo needed_ids is_rec id rhs
 	     HasWorker _ wrap_arity -> wrap_arity == arityLowerBound arity_info
 	     other		    -> True
     
-interestingId id = isId id && isLocallyDefined id &&
-		   not (omitIfaceSigForId id)
+interestingId id = isId id && isLocallyDefined id && not (hasNoBinding id)
 \end{code}
 
diff --git a/ghc/compiler/simplCore/OccurAnal.lhs b/ghc/compiler/simplCore/OccurAnal.lhs
index b7d7c22..ad9b70f 100644
--- a/ghc/compiler/simplCore/OccurAnal.lhs
+++ b/ghc/compiler/simplCore/OccurAnal.lhs
@@ -87,20 +87,20 @@ occurAnalyseRule (Rule str tpl_vars tpl_args rhs)
 
 In @occAnalTop@ we do indirection-shorting.  That is, if we have this:
 
-	loc = <expression>
+	x_local = <expression>
 	...
-	exp = loc
+	x_exported = loc
 
 where exp is exported, and loc is not, then we replace it with this:
 
-	loc = exp
-	exp = <expression>
+	x_local = x_exported
+	x_exported = <expression>
 	...
 
-Without this we never get rid of the exp = loc thing.  This save a
-gratuitous jump (from \tr{x_exported} to \tr{x_local}), and makes
-strictness information propagate better.  This used to happen in the
-final phase, but it's tidier to do it here.
+Without this we never get rid of the x_exported = x_local thing.  This
+save a gratuitous jump (from \tr{x_exported} to \tr{x_local}), and
+makes strictness information propagate better.  This used to happen in
+the final phase, but it's tidier to do it here.
 
 If more than one exported thing is equal to a local thing (i.e., the
 local thing really is shared), then we do one only:
@@ -171,7 +171,7 @@ occurAnalyseBinds binds
 		   ind_env' = extendVarEnv ind_env local_id exported_id
 
 	    other -> 	-- Ho ho! The normal case
-		   (final_usage, ind_env, new_binds ++ binds')
+		     (final_usage, ind_env, new_binds ++ binds')
 		   
 initialTopEnv = OccEnv isLocallyDefined	-- Anything local is interesting
 		       emptyVarSet
diff --git a/ghc/compiler/simplCore/SetLevels.lhs b/ghc/compiler/simplCore/SetLevels.lhs
index 517d2d9..515185f 100644
--- a/ghc/compiler/simplCore/SetLevels.lhs
+++ b/ghc/compiler/simplCore/SetLevels.lhs
@@ -133,9 +133,6 @@ ltLvl (Level maj1 min1) (Level maj2 min2)
 
 ltMajLvl :: Level -> Level -> Bool
     -- Tells if one level belongs to a difft *lambda* level to another
-    -- But it returns True regardless if l1 is the top level
-    -- We always like to float to the top!	
-ltMajLvl (Level 0 0)    _	       = True
 ltMajLvl (Level maj1 _) (Level maj2 _) = maj1 < maj2
 
 isTopLvl :: Level -> Bool
@@ -144,6 +141,9 @@ isTopLvl other       = False
 
 instance Outputable Level where
   ppr (Level maj min) = hcat [ char '<', int maj, char ',', int min, char '>' ]
+
+instance Eq Level where
+  (Level maj1 min1) == (Level maj2 min2) = maj1==maj2 && min1==min2
 \end{code}
 
 %************************************************************************
@@ -226,8 +226,8 @@ lvlExpr ctxt_lvl env (_, AnnApp fun arg)
 	-- but we do if the function is big and hairy, like a case
 
 lvlExpr ctxt_lvl env (_, AnnNote InlineMe expr)
-	-- Don't float anything out of an InlineMe
-  = lvlExpr tOP_LEVEL env expr 		`thenLvl` \ expr' ->
+-- Don't float anything out of an InlineMe; hence the tOP_LEVEL
+  = lvlExpr tOP_LEVEL env expr 	`thenLvl` \ expr' ->
     returnLvl (Note InlineMe expr')
 
 lvlExpr ctxt_lvl env (_, AnnNote note expr)
@@ -305,6 +305,8 @@ lvlMFE strict_ctxt ctxt_lvl env ann_expr@(fvs, _)
   || not good_destination
   || exprIsTrivial expr				-- Is trivial
   || (strict_ctxt && exprIsBottom expr)		-- Strict context and is bottom
+						--  e.g. \x -> error "foo"
+						-- No gain from floating this
   = 	-- Don't float it out
     lvlExpr ctxt_lvl env ann_expr
 
@@ -734,11 +736,9 @@ subst_id_info (_, _, subst, _) ctxt_lvl dest_lvl v
 	-- VERY IMPORTANT: we must zap the demand info 
 	-- if the thing is going to float out past a lambda
     zap_dmd info
-	| float_past_lam && isStrict (demandInfo info)
-	= setDemandInfo info wwLazy
-	| otherwise
-	= info
+	| stays_put || not (isStrict (demandInfo info)) = info
+	| otherwise					= setDemandInfo info wwLazy
 
-    float_past_lam = ctxt_lvl `ltMajLvl` dest_lvl
+    stays_put = ctxt_lvl == dest_lvl
 \end{code}
 	
diff --git a/ghc/compiler/simplCore/SimplMonad.lhs b/ghc/compiler/simplCore/SimplMonad.lhs
index fac41a7..322f0f5 100644
--- a/ghc/compiler/simplCore/SimplMonad.lhs
+++ b/ghc/compiler/simplCore/SimplMonad.lhs
@@ -39,14 +39,19 @@ module SimplMonad (
 	getSubstEnv, extendSubst, extendSubstList,
 	getInScope, setInScope, modifyInScope, addNewInScopeIds,
 	setSubstEnv, zapSubstEnv,
-	getSimplBinderStuff, setSimplBinderStuff
+	getSimplBinderStuff, setSimplBinderStuff,
+
+	-- Adding bindings
+	addLetBind, addLetBinds, addAuxiliaryBind, addAuxiliaryBinds,
+	addCaseBind, needsCaseBinding, addNonRecBind
     ) where
 
 #include "HsVersions.h"
 
-import Id		( Id, mkSysLocal, idUnfolding, isDataConWrapId )
+import Id		( Id, mkSysLocal, idType, idUnfolding, isDataConWrapId )
 import CoreSyn
 import CoreUnfold	( isCompulsoryUnfolding )
+import CoreUtils	( exprOkForSpeculation )
 import PprCore		()	-- Instances
 import CostCentre	( CostCentreStack, subsumedCCS )
 import Name		( isLocallyDefined )
@@ -57,7 +62,7 @@ import qualified Subst
 import Subst		( Subst, mkSubst, substEnv, 
 			  InScopeSet, mkInScopeSet, substInScope, isInScope
 			)
-import Type             ( Type )
+import Type             ( Type, isUnLiftedType )
 import UniqSupply	( uniqsFromSupply, uniqFromSupply, splitUniqSupply,
 			  UniqSupply
 			)
@@ -106,6 +111,45 @@ type OutStuff a   = ([OutBind], a)
 	-- incrementally.  Comments just before simplExprB in Simplify.lhs
 \end{code}
 
+\begin{code}
+addLetBind :: CoreBind -> SimplM (OutStuff a) -> SimplM (OutStuff a)
+addLetBind bind thing_inside
+  = thing_inside	`thenSmpl` \ (binds, res) ->
+    returnSmpl (bind : binds, res)
+
+addLetBinds :: [CoreBind] -> SimplM (OutStuff a) -> SimplM (OutStuff a)
+addLetBinds binds1 thing_inside
+  = thing_inside	`thenSmpl` \ (binds2, res) ->
+    returnSmpl (binds1 ++ binds2, res)
+
+addAuxiliaryBinds :: [CoreBind] -> SimplM (OutStuff a) -> SimplM (OutStuff a)
+	-- Extends the in-scope environment as well as wrapping the bindings
+addAuxiliaryBinds binds1 thing_inside
+  = addNewInScopeIds (bindersOfBinds binds1)	$
+    addLetBinds binds1 thing_inside
+
+addAuxiliaryBind :: CoreBind -> SimplM (OutStuff a) -> SimplM (OutStuff a)
+	-- Extends the in-scope environment as well as wrapping the bindings
+addAuxiliaryBind bind thing_inside
+  = addNewInScopeIds (bindersOf bind)	$
+    addLetBind bind thing_inside
+
+needsCaseBinding ty rhs = isUnLiftedType ty && not (exprOkForSpeculation rhs)
+	-- Make a case expression instead of a let
+	-- These can arise either from the desugarer,
+	-- or from beta reductions: (\x.e) (x +# y)
+
+addCaseBind bndr rhs thing_inside
+  = getInScope 			`thenSmpl` \ in_scope ->
+    thing_inside		`thenSmpl` \ (floats, (_, body)) ->
+    returnSmpl ([], (in_scope, Case rhs bndr [(DEFAULT, [], mkLets floats body)]))
+
+addNonRecBind bndr rhs thing_inside
+	-- Checks for needing a case binding
+  | needsCaseBinding (idType bndr) rhs = addCaseBind bndr rhs thing_inside
+  | otherwise		     	       = addLetBind  (NonRec bndr rhs) thing_inside
+\end{code}
+
 
 %************************************************************************
 %*									*
diff --git a/ghc/compiler/simplCore/SimplUtils.lhs b/ghc/compiler/simplCore/SimplUtils.lhs
index 90a759d..235593c 100644
--- a/ghc/compiler/simplCore/SimplUtils.lhs
+++ b/ghc/compiler/simplCore/SimplUtils.lhs
@@ -11,7 +11,7 @@ module SimplUtils (
 
 	-- The continuation type
 	SimplCont(..), DupFlag(..), contIsDupable, contResultType,
-	countValArgs, countArgs,
+	countValArgs, countArgs, mkRhsStop, mkStop,
 	getContArgs, interestingCallContext, interestingArg, isStrictType, discardInline
 
     ) where
@@ -44,6 +44,7 @@ import DataCon		( dataConRepArity )
 import VarSet
 import VarEnv		( SubstEnv, SubstResult(..) )
 import Util		( lengthExceeds )
+import BasicTypes	( Arity )
 import Outputable
 \end{code}
 
@@ -56,7 +57,10 @@ import Outputable
 
 \begin{code}
 data SimplCont		-- Strict contexts
-  = Stop OutType		-- Type of the result
+  = Stop     OutType		-- Type of the result
+	     Bool		-- True => This is the RHS of a thunk whose type suggests
+				--	   that update-in-place would be possible
+				--	   (This makes the inliner a little keener.)
 
   | CoerceIt OutType			-- The To-type, simplified
 	     SimplCont
@@ -83,7 +87,7 @@ data SimplCont		-- Strict contexts
 				-- The result expression in the OutExprStuff has type cont_ty
 
 instance Outputable SimplCont where
-  ppr (Stop _)        		     = ptext SLIT("Stop")
+  ppr (Stop _ _)       		     = ptext SLIT("Stop")
   ppr (ApplyTo dup arg se cont)      = (ptext SLIT("ApplyTo") <+> ppr dup <+> ppr arg) $$ ppr cont
   ppr (ArgOf   dup _ _)   	     = ptext SLIT("ArgOf...") <+> ppr dup
   ppr (Select dup bndr alts se cont) = (ptext SLIT("Select") <+> ppr dup <+> ppr bndr) $$ 
@@ -97,9 +101,16 @@ instance Outputable DupFlag where
   ppr OkToDup = ptext SLIT("ok")
   ppr NoDup   = ptext SLIT("nodup")
 
+
+-------------------
+mkRhsStop, mkStop :: OutType -> SimplCont
+mkStop    ty = Stop ty False
+mkRhsStop ty = Stop ty (canUpdateInPlace ty)
+
+
 -------------------
 contIsDupable :: SimplCont -> Bool
-contIsDupable (Stop _)       		 = True
+contIsDupable (Stop _ _)       		 = True
 contIsDupable (ApplyTo  OkToDup _ _ _)   = True
 contIsDupable (ArgOf    OkToDup _ _)     = True
 contIsDupable (Select   OkToDup _ _ _ _) = True
@@ -115,21 +126,22 @@ discardInline cont		   = cont
 
 -------------------
 discardableCont :: SimplCont -> Bool
-discardableCont (Stop _)	    = False
+discardableCont (Stop _ _)	    = False
 discardableCont (CoerceIt _ cont)   = discardableCont cont
 discardableCont (InlinePlease cont) = discardableCont cont
 discardableCont other		    = True
 
 discardCont :: SimplCont	-- A continuation, expecting
 	    -> SimplCont	-- Replace the continuation with a suitable coerce
-discardCont (Stop to_ty) = Stop to_ty
-discardCont cont	 = CoerceIt to_ty (Stop to_ty)
-			 where
-			   to_ty = contResultType cont
+discardCont cont = case cont of
+		     Stop to_ty _ -> cont
+		     other        -> CoerceIt to_ty (mkStop to_ty)
+		 where
+		   to_ty = contResultType cont
 
 -------------------
 contResultType :: SimplCont -> OutType
-contResultType (Stop to_ty)	     = to_ty
+contResultType (Stop to_ty _)	     = to_ty
 contResultType (ArgOf _ to_ty _)     = to_ty
 contResultType (ApplyTo _ _ _ cont)  = contResultType cont
 contResultType (CoerceIt _ cont)     = contResultType cont
@@ -257,15 +269,19 @@ interestingArg in_scope arg subst
   where
     analyse (Var v)
 	= case lookupIdSubst (mkSubst in_scope subst) v of
-	    DoneId v' _ -> hasSomeUnfolding (idUnfolding v')
-					-- was: isValueUnfolding (idUnfolding v')
-					-- But that seems over-pessimistic
-
-	    other	-> True		-- was: False
-					-- But that is *definitely* too pessimistic.
-					-- E.g. 	let x = 3 in f 
-					-- Here, x will be unconditionally substituted, via
-					-- the substitution!
+	    ContEx subst arg -> interestingArg in_scope arg subst
+	    DoneEx arg	     -> analyse arg
+	    DoneId v' _      -> hasSomeUnfolding (idUnfolding v')
+				-- Was: isValueUnfolding (idUnfolding v')
+				-- But that seems over-pessimistic
+
+	-- NB: it's too pessimistic to return False for ContEx/DoneEx
+	-- Consider 	let x = 3 in f x
+	-- The substitution will contain (x -> ContEx 3)
+	-- It's also too optimistic to return True for the ContEx/DoneEx case
+	-- Consider (\x. f x y) y
+	-- The substitution will contain (x -> ContEx y).
+
     analyse (Type _)	      = False
     analyse (App fn (Type _)) = analyse fn
     analyse (Note _ a)	      = analyse a
@@ -316,11 +332,15 @@ interestingCallContext :: Bool 		-- False <=> no args at all
 	-- 	as scrutinee of a case		Select
 	--	as arg of a strict fn		ArgOf
 	-- then we should not inline it (unless there is some other reason,
-	-- e.g. is is the sole occurrence).  
-	-- Why not?  At least in the case-scrutinee situation, turning
-	--	case x of y -> ...
+	-- e.g. is is the sole occurrence).  We achieve this by making
+	-- interestingCallContext return False for a lone variable.
+	--
+	-- Why?  At least in the case-scrutinee situation, turning
+	--	let x = (a,b) in case x of y -> ...
 	-- into
-	--	let y = (a,b) in ...
+	--	let x = (a,b) in case (a,b) of y -> ...
+	-- and thence to 
+	--	let x = (a,b) in let y = (a,b) in ...
 	-- is bad if the binding for x will remain.
 	--
 	-- Another example: I discovered that strings
@@ -333,12 +353,13 @@ interestingCallContext :: Bool 		-- False <=> no args at all
 	-- the context can ``see'' the unfolding of the variable (e.g. case or a RULE)
 	-- so there's no gain.
 	--
-	-- However, even a type application isn't a lone variable.  Consider
+	-- However, even a type application or coercion isn't a lone variable.
+	-- Consider
 	--	case $fMonadST @ RealWorld of { :DMonad a b c -> c }
 	-- We had better inline that sucker!  The case won't see through it.
 	--
-	-- For now, I'm treating treating a variable applied to types as
-	-- "lone". The motivating example was
+	-- For now, I'm treating treating a variable applied to types 
+	-- in a *lazy* context "lone". The motivating example was
 	--	f = /\a. \x. BIG
 	--	g = /\a. \y.  h (f a)
 	-- There's no advantage in inlining f here, and perhaps
@@ -347,12 +368,12 @@ interestingCallContext :: Bool 		-- False <=> no args at all
 interestingCallContext some_args some_val_args cont
   = interesting cont
   where
-    interesting (InlinePlease _)   = True
-    interesting (ApplyTo _ _ _ _)  = some_args	-- Can happen if we have (coerce t (f x)) y
-    interesting (Select _ _ _ _ _) = some_args
-    interesting (ArgOf _ _ _)	   = some_val_args
-    interesting (Stop ty) 	   = some_val_args && canUpdateInPlace ty
-    interesting (CoerceIt _ cont)  = interesting cont
+    interesting (InlinePlease _)       = True
+    interesting (Select _ _ _ _ _)     = some_args
+    interesting (ApplyTo _ _ _ _)      = some_args	-- Can happen if we have (coerce t (f x)) y
+    interesting (ArgOf _ _ _)	       = some_val_args
+    interesting (Stop ty upd_in_place) = some_val_args && upd_in_place
+    interesting (CoerceIt _ cont)      = interesting cont
 	-- If this call is the arg of a strict function, the context
 	-- is a bit interesting.  If we inline here, we may get useful
 	-- evaluation information to avoid repeated evals: e.g.
@@ -453,12 +474,13 @@ Try (a) eta expansion
     (b) type-lambda swizzling
 
 \begin{code}
-transformRhs :: InExpr -> SimplM InExpr
-transformRhs rhs 
-  = tryEtaExpansion body		`thenSmpl` \ body' ->
-    mkRhsTyLam tyvars body'
-  where
-    (tyvars, body) = collectTyBinders rhs
+transformRhs :: OutExpr 
+	     -> (Arity -> OutExpr -> SimplM (OutStuff a))
+	     -> SimplM (OutStuff a)
+
+transformRhs rhs thing_inside 
+  = tryRhsTyLam rhs			$ \ rhs1 ->
+    tryEtaExpansion rhs1 thing_inside
 \end{code}
 
 
@@ -491,7 +513,7 @@ let-floating.
 This optimisation is CRUCIAL in eliminating the junk introduced by
 desugaring mutually recursive definitions.  Don't eliminate it lightly!
 
-So far as the implemtation is concerned:
+So far as the implementation is concerned:
 
 	Invariant: go F e = /\tvs -> F e
 	
@@ -533,25 +555,31 @@ as we would normally do.
 
 
 \begin{code}
-mkRhsTyLam tyvars body			-- Only does something if there's a let
+tryRhsTyLam rhs thing_inside		-- Only does something if there's a let
   | null tyvars || not (worth_it body)	-- inside a type lambda, and a WHNF inside that
-  = returnSmpl (mkLams tyvars body)
+  = thing_inside rhs
   | otherwise
-  = go (\x -> x) body
+  = go (\x -> x) body		$ \ body' ->
+    thing_inside (mkLams tyvars body')
+
   where
+    (tyvars, body) = collectTyBinders rhs
+
     worth_it (Let _ e)	     = whnf_in_middle e
     worth_it other     	     = False
     whnf_in_middle (Let _ e) = whnf_in_middle e
     whnf_in_middle e	     = exprIsCheap e
 
 
-    go fn (Let bind@(NonRec var rhs) body) | exprIsTrivial rhs
-      = go (fn . Let bind) body
+    go fn (Let bind@(NonRec var rhs) body) thing_inside
+      | exprIsTrivial rhs
+      = go (fn . Let bind) body thing_inside
+
+    go fn (Let bind@(NonRec var rhs) body) thing_inside
+      = mk_poly tyvars_here var						`thenSmpl` \ (var', rhs') ->
+	addAuxiliaryBind (NonRec var' (mkLams tyvars_here (fn rhs))) 	$
+	go (fn . Let (mk_silly_bind var rhs')) body thing_inside
 
-    go fn (Let bind@(NonRec var rhs) body)
-      = mk_poly tyvars_here var				`thenSmpl` \ (var', rhs') ->
-	go (fn . Let (mk_silly_bind var rhs')) body	`thenSmpl` \ body' ->
-	returnSmpl (Let (NonRec var' (mkLams tyvars_here (fn rhs))) body')
       where
 	tyvars_here = tyvars
 		--	main_tyvar_set = mkVarSet tyvars
@@ -573,13 +601,13 @@ mkRhsTyLam tyvars body			-- Only does something if there's a let
 		-- abstracting wrt *all* the tyvars.  We'll see if that
 		-- gives rise to problems.   SLPJ June 98
 
-    go fn (Let (Rec prs) body)
+    go fn (Let (Rec prs) body) thing_inside
        = mapAndUnzipSmpl (mk_poly tyvars_here) vars	`thenSmpl` \ (vars', rhss') ->
 	 let
-	    gn body = fn $ foldr Let body (zipWith mk_silly_bind vars rhss')
+	    gn body = fn (foldr Let body (zipWith mk_silly_bind vars rhss'))
 	 in
-	 go gn body				`thenSmpl` \ body' ->
-	 returnSmpl (Let (Rec (vars' `zip` [mkLams tyvars_here (gn rhs) | rhs <- rhss])) body')
+	 addAuxiliaryBind (Rec (vars' `zip` [mkLams tyvars_here (gn rhs) | rhs <- rhss]))	$
+	 go gn body thing_inside
        where
 	 (vars,rhss) = unzip prs
 	 tyvars_here = tyvars
@@ -588,17 +616,19 @@ mkRhsTyLam tyvars body			-- Only does something if there's a let
 		-- See notes with tyvars_here above
 
 
-    go fn body = returnSmpl (mkLams tyvars (fn body))
+    go fn body thing_inside = thing_inside (fn body)
 
     mk_poly tyvars_here var
       = getUniqueSmpl		`thenSmpl` \ uniq ->
 	let
 	    poly_name = setNameUnique (idName var) uniq		-- Keep same name
 	    poly_ty   = mkForAllTys tyvars_here (idType var)	-- But new type of course
+	    poly_id   = mkId poly_name poly_ty vanillaIdInfo
 
-		-- It's crucial to copy the occInfo of the original var, because
-		-- we're looking at occurrence-analysed but as yet unsimplified code!
-		-- In particular, we mustn't lose the loop breakers.
+		-- In the olden days, it was crucial to copy the occInfo of the original var, 
+		-- because we were looking at occurrence-analysed but as yet unsimplified code!
+		-- In particular, we mustn't lose the loop breakers.  BUT NOW we are looking
+		-- at already simplified code, so it doesn't matter
 		-- 
 		-- It's even right to retain single-occurrence or dead-var info:
 		-- Suppose we started with  /\a -> let x = E in B
@@ -607,14 +637,11 @@ mkRhsTyLam tyvars body			-- Only does something if there's a let
 		-- where x* has an INLINE prag on it.  Now, once x* is inlined,
 		-- the occurrences of x' will be just the occurrences originally
 		-- pinned on x.
-	    poly_info = vanillaIdInfo `setOccInfo` idOccInfo var
-
-	    poly_id   = mkId poly_name poly_ty poly_info
+		-- 	   poly_info = vanillaIdInfo `setOccInfo` idOccInfo var
 	in
 	returnSmpl (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tyvars_here))
 
     mk_silly_bind var rhs = NonRec var rhs
-		-- We need to be careful about inlining.
 		-- Suppose we start with:
 		--
 		--	x = let g = /\a -> \x -> f x x
@@ -627,8 +654,7 @@ mkRhsTyLam tyvars body			-- Only does something if there's a let
 		--		* so we're back to square one
 		-- We rely on the simplifier not to inline g into the RHS of g*,
 		-- because it's a "lone" occurrence, and there is no benefit in
-		-- inlining.  But it's a slightly delicate property, and there's
-		-- a danger of making the simplifier loop here.
+		-- inlining.  But it's a slightly delicate property; hence this comment
 \end{code}
 
 
@@ -641,61 +667,94 @@ mkRhsTyLam tyvars body			-- Only does something if there's a let
 	Try eta expansion for RHSs
 
 We go for:
-		\x1..xn -> N	==>   \x1..xn y1..ym -> N y1..ym
-	AND		
-		N E1..En	==>   let z1=E1 .. zn=En in \y1..ym -> N z1..zn y1..ym
+   Case 1    f = \x1..xn -> N  ==>   f = \x1..xn y1..ym -> N y1..ym
+		 (n >= 0)
+     OR		
+   Case 2    f = N E1..En      ==>   z1=E1
+		 (n > 0)		 .. 
+				     zn=En
+				     f = \y1..ym -> N z1..zn y1..ym
+
+where (in both cases) 
 
-where (in both cases) N is a NORMAL FORM (i.e. no redexes anywhere)
-wanting a suitable number of extra args.
+	* The xi can include type variables
 
-NB: the Ei may have unlifted type, but the simplifier (which is applied
-to the result) deals OK with this.
+	* The yi are all value variables
 
-There is no point in looking for a combination of the two, 
-because that would leave use with some lets sandwiched between lambdas;
-that's what the final test in the first equation is for.
+	* N is a NORMAL FORM (i.e. no redexes anywhere)
+	  wanting a suitable number of extra args.
+
+	* the Ei must not have unlifted type
+
+There is no point in looking for a combination of the two, because
+that would leave use with some lets sandwiched between lambdas; that's
+what the final test in the first equation is for.
 
 \begin{code}
-tryEtaExpansion :: InExpr -> SimplM InExpr
-tryEtaExpansion rhs
+tryEtaExpansion :: OutExpr 
+		-> (Arity -> OutExpr -> SimplM (OutStuff a))
+		-> SimplM (OutStuff a)
+tryEtaExpansion rhs thing_inside
   |  not opt_SimplDoLambdaEtaExpansion
-  || exprIsTrivial rhs				-- Don't eta-expand a trival RHS
-  || null y_tys					-- No useful expansion
-  || not (null x_bndrs || and trivial_args)	-- Not (no x-binders or no z-binds)
-  = returnSmpl rhs
-
-  | otherwise	-- Consider eta expansion
-  = newIds SLIT("y") y_tys					$ ( \ y_bndrs ->
-    tick (EtaExpansion (head y_bndrs))				`thenSmpl_`
-    mapAndUnzipSmpl bind_z_arg (args `zip` trivial_args)	`thenSmpl` (\ (maybe_z_binds, z_args) ->
-    returnSmpl (mkLams x_bndrs				$ 
-		mkLets (catMaybes maybe_z_binds)	$
- 		mkLams y_bndrs				$
-		mkApps (mkApps fun z_args) (map Var y_bndrs))))
+  || null y_tys				-- No useful expansion
+  || not (is_case1 || is_case2)		-- Neither case matches
+  = thing_inside final_arity rhs	-- So, no eta expansion, but
+					-- return a good arity
+
+  | is_case1
+  = make_y_bndrs			$ \ y_bndrs ->
+    thing_inside final_arity
+		 (mkLams x_bndrs $ mkLams y_bndrs $
+		  mkApps body (map Var y_bndrs))
+
+  | otherwise	-- Must be case 2
+  = mapAndUnzipSmpl bind_z_arg arg_infos		`thenSmpl` \ (maybe_z_binds, z_args) ->
+    addAuxiliaryBinds (catMaybes maybe_z_binds)		$
+    make_y_bndrs					$  \ y_bndrs ->
+    thing_inside final_arity
+	         (mkLams y_bndrs $
+		  mkApps (mkApps fun z_args) (map Var y_bndrs))
   where
-    (x_bndrs, body) = collectValBinders rhs
-    (fun, args)	    = collectArgs body
-    trivial_args    = map exprIsTrivial args
-    fun_arity	    = exprEtaExpandArity fun
+    all_trivial_args = all is_trivial arg_infos
+    is_case1	     = all_trivial_args
+    is_case2	     = null x_bndrs && not (any unlifted_non_trivial arg_infos)
+
+    (x_bndrs, body)  = collectBinders rhs	-- NB: x_bndrs can include type variables
+    x_arity	     = valBndrCount x_bndrs
 
-    bind_z_arg (arg, trivial_arg) 
+    (fun, args)	     = collectArgs body
+    arg_infos        = [(arg, exprType arg, exprIsTrivial arg) | arg <- args]
+
+    is_trivial		 (_, _,  triv) = triv
+    unlifted_non_trivial (_, ty, triv) = not triv && isUnLiftedType ty
+
+    fun_arity	     = exprEtaExpandArity fun
+
+    final_arity | all_trivial_args = x_arity + extra_args_wanted
+	        | otherwise	   = x_arity
+	-- Arity can be more than the number of lambdas
+	-- because of coerces. E.g.  \x -> coerce t (\y -> e) 
+	-- will have arity at least 2
+	-- The worker/wrapper pass will bring the coerce out to the top
+
+    bind_z_arg (arg, arg_ty, trivial_arg) 
 	| trivial_arg = returnSmpl (Nothing, arg)
-        | otherwise   = newId SLIT("z") (exprType arg)	$ \ z ->
+        | otherwise   = newId SLIT("z") arg_ty	$ \ z ->
 			returnSmpl (Just (NonRec z arg), Var z)
 
-	-- Note: I used to try to avoid the exprType call by using
-	-- the type of the binder.  But this type doesn't necessarily
-	-- belong to the same substitution environment as this rhs;
-	-- and we are going to make extra term binders (y_bndrs) from the type
-	-- which will be processed with the rhs substitution environment.
-	-- This only went wrong in a mind bendingly complicated case.
+    make_y_bndrs thing_inside 
+	= ASSERT( not (exprIsTrivial rhs) )
+    	  newIds SLIT("y") y_tys			$ \ y_bndrs ->
+	  tick (EtaExpansion (head y_bndrs))		`thenSmpl_`
+	  thing_inside y_bndrs
+
     (potential_extra_arg_tys, _) = splitFunTys (exprType body)
 	
     y_tys :: [InType]
-    y_tys  = take no_extras_wanted potential_extra_arg_tys
+    y_tys  = take extra_args_wanted potential_extra_arg_tys
 	
-    no_extras_wanted :: Int
-    no_extras_wanted = 0 `max`
+    extra_args_wanted :: Int	-- Number of extra args we want
+    extra_args_wanted = 0 `max` (fun_arity - valArgCount args)
 
 	-- We used to expand the arity to the previous arity fo the
 	-- function; but this is pretty dangerous.  Consdier
@@ -707,25 +766,6 @@ tryEtaExpansion rhs
 	--	f = \xy -> let z = BIG in e
 	--
 	-- (bndr_arity - no_of_xs)		`max`
-
-	-- See if the body could obviously do with more args
-	(fun_arity - valArgCount args)
-
--- This case is now deal with by exprEtaExpandArity
-	-- Finally, see if it's a state transformer, and xs is non-null
-	-- (so it's also a function not a thunk) in which
-	-- case we eta-expand on principle! This can waste work,
-	-- but usually doesn't.
-	-- I originally checked for a singleton type [ty] in this case
-	-- but then I found a situation in which I had
-	--	\ x -> let {..} in \ s -> f (...) s
-	-- AND f RETURNED A FUNCTION.  That is, 's' wasn't the only
-	-- potential extra arg.
---	case (x_bndrs, potential_extra_arg_tys) of
---	    (_:_, ty:_)  -> case splitTyConApp_maybe ty of
---				  Just (tycon,_) | tycon == statePrimTyCon -> 1
---				  other					   -> 0
---	    other -> 0
 \end{code}
 
 
diff --git a/ghc/compiler/simplCore/Simplify.lhs b/ghc/compiler/simplCore/Simplify.lhs
index 68f8c22..5c09ebc 100644
--- a/ghc/compiler/simplCore/Simplify.lhs
+++ b/ghc/compiler/simplCore/Simplify.lhs
@@ -15,20 +15,22 @@ import CmdLineOpts	( switchIsOn, opt_SimplDoEtaReduction,
 import SimplMonad
 import SimplUtils	( mkCase, transformRhs, findAlt, 
 			  simplBinder, simplBinders, simplIds, findDefault,
-			  SimplCont(..), DupFlag(..), 
+			  SimplCont(..), DupFlag(..), mkStop, mkRhsStop,
 			  contResultType, discardInline, countArgs, contIsDupable,
 			  getContArgs, interestingCallContext, interestingArg, isStrictType
 			)
 import Var		( mkSysTyVar, tyVarKind )
 import VarEnv
+import VarSet		( elemVarSet )
 import Id		( Id, idType, idInfo, isDataConId,
 			  idUnfolding, setIdUnfolding, isExportedId, isDeadBinder,
 			  idDemandInfo, setIdInfo,
 			  idOccInfo, setIdOccInfo,
 			  zapLamIdInfo, setOneShotLambda, 
 			)
-import IdInfo		( OccInfo(..), ArityInfo(..),
-			  setArityInfo, setUnfoldingInfo,
+import IdInfo		( OccInfo(..), isDeadOcc, isLoopBreaker,
+			  ArityInfo, setArityInfo, atLeastArity,
+			  setUnfoldingInfo,
 			  occInfo
 			)
 import Demand		( Demand, isStrict )
@@ -36,12 +38,12 @@ import DataCon		( dataConNumInstArgs, dataConRepStrictness,
 			  dataConSig, dataConArgTys
 			)
 import CoreSyn
-import CoreFVs		( mustHaveLocalBinding )
+import CoreFVs		( mustHaveLocalBinding, exprFreeVars )
 import CoreUnfold	( mkOtherCon, mkUnfolding, otherCons,
 			  callSiteInline
 			)
 import CoreUtils	( cheapEqExpr, exprIsDupable, exprIsTrivial, exprIsConApp_maybe,
-			  exprType, coreAltsType, exprArity, exprIsValue, idAppIsCheap,
+			  exprType, coreAltsType, exprIsValue, idAppIsCheap,
 			  exprOkForSpeculation, etaReduceExpr,
 			  mkCoerce, mkSCC, mkInlineMe, mkAltExpr
 			)
@@ -57,7 +59,6 @@ import Subst		( mkSubst, substTy, substExpr,
 import TyCon		( isDataTyCon, tyConDataConsIfAvailable	)
 import TysPrim		( realWorldStatePrimTy )
 import PrelInfo		( realWorldPrimId )
-import BasicTypes	( isLoopBreaker )
 import Maybes		( maybeToBool )
 import Util		( zipWithEqual )
 import Outputable
@@ -128,33 +129,6 @@ simplRecBind top_lvl pairs bndrs' thing_inside
 %*									*
 %************************************************************************
 
-\begin{code}
-addLetBind :: OutId -> OutExpr -> SimplM (OutStuff a) -> SimplM (OutStuff a)
-addLetBind bndr rhs thing_inside
-  = thing_inside	`thenSmpl` \ (binds, res) ->
-    returnSmpl (NonRec bndr rhs : binds, res)
-
-addLetBinds :: [CoreBind] -> SimplM (OutStuff a) -> SimplM (OutStuff a)
-addLetBinds binds1 thing_inside
-  = thing_inside	`thenSmpl` \ (binds2, res) ->
-    returnSmpl (binds1 ++ binds2, res)
-
-needsCaseBinding ty rhs = isUnLiftedType ty && not (exprOkForSpeculation rhs)
-	-- Make a case expression instead of a let
-	-- These can arise either from the desugarer,
-	-- or from beta reductions: (\x.e) (x +# y)
-
-addCaseBind bndr rhs thing_inside
-  = getInScope 			`thenSmpl` \ in_scope ->
-    thing_inside		`thenSmpl` \ (floats, (_, body)) ->
-    returnSmpl ([], (in_scope, Case rhs bndr [(DEFAULT, [], mkLets floats body)]))
-
-addNonRecBind bndr rhs thing_inside
-	-- Checks for needing a case binding
-  | needsCaseBinding (idType bndr) rhs = addCaseBind bndr rhs thing_inside
-  | otherwise		     	       = addLetBind  bndr rhs thing_inside
-\end{code}
-
 The reason for this OutExprStuff stuff is that we want to float *after*
 simplifying a RHS, not before.  If we do so naively we get quadratic
 behaviour as things float out.
@@ -196,7 +170,7 @@ might do the same again.
 \begin{code}
 simplExpr :: CoreExpr -> SimplM CoreExpr
 simplExpr expr = getSubst	`thenSmpl` \ subst ->
-		 simplExprC expr (Stop (substTy subst (exprType expr)))
+		 simplExprC expr (mkStop (substTy subst (exprType expr)))
 	-- The type in the Stop continuation is usually not used
 	-- It's only needed when discarding continuations after finding
 	-- a function that returns bottom.
@@ -235,7 +209,7 @@ simplExprF (Case scrut bndr alts) cont
 	-- If case-of-case is off, simply simplify the case expression
 	-- in a vanilla Stop context, and rebuild the result around it
 	simplExprC scrut (Select NoDup bndr alts subst_env 
-				 (Stop (contResultType cont)))	`thenSmpl` \ case_expr' ->
+				 (mkStop (contResultType cont)))	`thenSmpl` \ case_expr' ->
 	rebuild case_expr' cont
 
 
@@ -249,7 +223,7 @@ simplExprF (Let (Rec pairs) body) cont
 simplExprF expr@(Lam _ _) cont = simplLam expr cont
 
 simplExprF (Type ty) cont
-  = ASSERT( case cont of { Stop _ -> True; ArgOf _ _ _ -> True; other -> False } )
+  = ASSERT( case cont of { Stop _ _ -> True; ArgOf _ _ _ -> True; other -> False } )
     simplType ty	`thenSmpl` \ ty' ->
     rebuild (Type ty') cont
 
@@ -302,7 +276,7 @@ simplExprF (Note InlineCall e) cont
 
 simplExprF (Note InlineMe e) cont
   = case cont of
-	Stop _ -> 	-- Totally boring continuation
+	Stop _ _ -> 	-- Totally boring continuation
 			-- Don't inline inside an INLINE expression
 		  setBlackList noInlineBlackList (simplExpr e)	`thenSmpl` \ e' ->
 		  rebuild (mkInlineMe e') cont
@@ -352,22 +326,37 @@ simplLam fun cont
     go expr cont = simplExprF expr cont
 
 -- completeLam deals with the case where a lambda doesn't have an ApplyTo
--- continuation.  
--- We used to try for eta reduction here, but I found that this was
--- eta reducing things like 
---	f = \x -> (coerce (\x -> e))
--- This made f's arity reduce, which is a bad thing, so I removed the
--- eta reduction at this point, and now do it only when binding 
--- (at the call to postInlineUnconditionally)
-
-completeLam acc (Lam bndr body) cont
+-- continuation, so there are real lambdas left to put in the result
+
+-- We try for eta reduction here, but *only* if we get all the 
+-- way to an exprIsTrivial expression.    
+-- We don't want to remove extra lambdas unless we are going 
+-- to avoid allocating this thing altogether
+
+completeLam rev_bndrs (Lam bndr body) cont
   = simplBinder bndr			$ \ bndr' ->
-    completeLam (bndr':acc) body cont
+    completeLam (bndr':rev_bndrs) body cont
 
-completeLam acc body cont
+completeLam rev_bndrs body cont
   = simplExpr body 			`thenSmpl` \ body' ->
-    rebuild (foldl (flip Lam) body' acc) cont
-		-- Remember, acc is the *reversed* binders
+    case try_eta body' of
+	Just etad_lam -> tick (EtaReduction (head rev_bndrs)) 	`thenSmpl_`
+			 rebuild etad_lam cont
+
+	Nothing	      -> rebuild (foldl (flip Lam) body' rev_bndrs) cont
+  where
+	-- We don't use CoreUtils.etaReduceExpr, because we can be more
+	-- efficient here: (a) we already have the binders, (b) we can do
+	-- the triviality test before computing the free vars
+    try_eta body | not opt_SimplDoEtaReduction = Nothing
+		 | otherwise		       = go rev_bndrs body
+
+    go (b : bs) (App fun arg) | ok_arg b arg = go bs fun	-- Loop round
+    go []       body          | ok_body body = Just body	-- Success!
+    go _        _			     = Nothing		-- Failure!
+
+    ok_body body = exprIsTrivial body && not (any (`elemVarSet` exprFreeVars body) rev_bndrs)
+    ok_arg b arg = varToCoreExpr b `cheapEqExpr` arg
 
 mkLamBndrZapper :: CoreExpr 	-- Function
 		-> SimplCont	-- The context
@@ -465,28 +454,17 @@ simplValArg :: OutType		-- rhs_ty: Type of arg; used only occasionally
 
 simplValArg arg_ty is_strict arg arg_se cont_ty thing_inside
   | is_strict
-  = transformRhs arg			`thenSmpl` \ t_arg ->
-    getEnv 				`thenSmpl` \ env ->
+  = getEnv 				`thenSmpl` \ env ->
     setSubstEnv arg_se 				$
-    simplExprF t_arg (ArgOf NoDup cont_ty 	$ \ rhs' ->
+    simplExprF arg (ArgOf NoDup cont_ty 	$ \ rhs' ->
     setAllExceptInScope env			$
-    thing_inside (etaFirst rhs'))
+    thing_inside rhs')
 
   | otherwise
   = simplRhs False {- Not top level -} 
 	     True {- OK to float unboxed -}
 	     arg_ty arg arg_se 
 	     thing_inside
-   
--- Do eta-reduction on the simplified RHS, if eta reduction is on
--- But *only* if we get all the way to an exprIsTrivial expression.    
--- We don't want to remove extra lambdas unless we are going 
--- to avoid allocating this thing altogether
-etaFirst rhs 
-  | opt_SimplDoEtaReduction && exprIsTrivial rhs' = rhs'
-  | otherwise	  				  = rhs
- where
-   rhs' = etaReduceExpr rhs
 \end{code}
 
 
@@ -512,63 +490,130 @@ completeBinding :: InId 		-- Binder
 	   	-> SimplM (OutStuff a)
 
 completeBinding old_bndr new_bndr top_lvl black_listed new_rhs thing_inside
-  |  (case occ_info of		-- This happens; for example, the case_bndr during case of
-	IAmDead -> True		-- known constructor:  case (a,b) of x { (p,q) -> ... }
-	other	-> False)	-- Here x isn't mentioned in the RHS, so we don't want to
+  |  isDeadOcc occ_info 	-- This happens; for example, the case_bndr during case of
+				-- known constructor:  case (a,b) of x { (p,q) -> ... }
+				-- Here x isn't mentioned in the RHS, so we don't want to
 				-- create the (dead) let-binding  let x = (a,b) in ...
   =  thing_inside
 
-  |  postInlineUnconditionally black_listed occ_info old_bndr new_rhs
-	-- Maybe we don't need a let-binding!  Maybe we can just
-	-- inline it right away.  Unlike the preInlineUnconditionally case
-	-- we are allowed to look at the RHS.
+  | exprIsTrivial new_rhs
+  = completeTrivialBinding old_bndr new_bndr 
+			   black_listed loop_breaker new_rhs
+			   thing_inside
+
+  | Note coercion@(Coerce _ inner_ty) inner_rhs <- new_rhs
+	-- x = coerce t e  ==>  c = e; x = inline_me (coerce t c)
+	-- Now x can get inlined, which moves the coercion
+	-- to the usage site.  This is a bit like worker/wrapper stuff,
+	-- but it's useful to do it very promptly, so that
+	--	x = coerce T (I# 3)
+	-- get's w/wd to
+	--	c = I# 3
+	--	x = coerce T $wx
+	-- This in turn means that
+	--	case (coerce Int x) of ...
+	-- will inline x.  
+	-- Also the full-blown w/w thing isn't set up for non-functions
 	--
-	-- NB: a loop breaker never has postInlineUnconditionally True
-	-- and non-loop-breakers only have *forward* references
-	-- Hence, it's safe to discard the binding
-	--	
-	-- NB: You might think that postInlineUnconditionally is an optimisation,
-	-- but if we have
-	--	let x = f Bool in (x, y)
-	-- then because of the constructor, x will not be *inlined* in the pair,
-	-- so the trivial binding will stay.  But in this postInlineUnconditionally 
-	-- gag we use the *substitution* to substitute (f Bool) for x, and that *will*
-	-- happen.
-  =  tick (PostInlineUnconditionally old_bndr)	`thenSmpl_`
-     extendSubst old_bndr (DoneEx new_rhs)	
-     thing_inside
+	-- The inline_me note is so that the simplifier doesn't 
+	-- just substitute c back inside x's rhs!  (Typically, x will
+	-- get substituted away, but not if it's exported.)
+  = newId SLIT("c") inner_ty 					$ \ c_id ->
+    completeBinding c_id c_id top_lvl False inner_rhs		$
+    completeTrivialBinding old_bndr new_bndr black_listed loop_breaker
+		   	   (Note InlineMe (Note coercion (Var c_id)))	$
+    thing_inside
+
 
   |  otherwise
-  =  getSubst			`thenSmpl` \ subst ->
+  =  transformRhs new_rhs	$ \ arity new_rhs' ->
+     getSubst			`thenSmpl` \ subst ->
      let
 	-- We make new IdInfo for the new binder by starting from the old binder, 
 	-- doing appropriate substitutions.
 	-- Then we add arity and unfolding info to get the new binder
-	old_info      = idInfo old_bndr
 	new_bndr_info = substIdInfo subst old_info (idInfo new_bndr)
-		        `setArityInfo` ArityAtLeast (exprArity new_rhs)
+		        `setArityInfo` atLeastArity arity
 
 	-- Add the unfolding *only* for non-loop-breakers
 	-- Making loop breakers not have an unfolding at all 
 	-- means that we can avoid tests in exprIsConApp, for example.
 	-- This is important: if exprIsConApp says 'yes' for a recursive
 	-- thing, then we can get into an infinite loop
-	info_w_unf | isLoopBreaker (occInfo old_info) = new_bndr_info
-		   | otherwise = new_bndr_info `setUnfoldingInfo` mkUnfolding top_lvl new_rhs
+	info_w_unf | loop_breaker = new_bndr_info
+		   | otherwise	  = new_bndr_info `setUnfoldingInfo` mkUnfolding top_lvl new_rhs'
 
 	final_id = new_bndr `setIdInfo` info_w_unf
      in
 	-- These seqs forces the Id, and hence its IdInfo,
 	-- and hence any inner substitutions
      final_id				`seq`
-     addLetBind final_id new_rhs 	$
+     addLetBind (NonRec final_id new_rhs') 	$
      modifyInScope new_bndr final_id thing_inside
 
   where
-    occ_info = idOccInfo old_bndr
+    old_info     = idInfo old_bndr
+    occ_info     = occInfo old_info
+    loop_breaker = isLoopBreaker occ_info
 \end{code}    
 
 
+\begin{code}
+completeTrivialBinding old_bndr new_bndr black_listed loop_breaker new_rhs thing_inside
+	-- We're looking at a binding with a trivial RHS, so
+	-- perhaps we can discard it altogether!
+	--
+	-- NB: a loop breaker never has postInlineUnconditionally True
+	-- and non-loop-breakers only have *forward* references
+	-- Hence, it's safe to discard the binding
+	--	
+	-- NB: You might think that postInlineUnconditionally is an optimisation,
+	-- but if we have
+	--	let x = f Bool in (x, y)
+	-- then because of the constructor, x will not be *inlined* in the pair,
+	-- so the trivial binding will stay.  But in this postInlineUnconditionally 
+	-- gag we use the *substitution* to substitute (f Bool) for x, and that *will*
+	-- happen.
+
+	-- NOTE: This isn't our last opportunity to inline.
+	-- We're at the binding site right now, and
+	-- we'll get another opportunity when we get to the ocurrence(s)
+
+	-- Note that we do this unconditional inlining only for trival RHSs.
+	-- Don't inline even WHNFs inside lambdas; doing so may
+	-- simply increase allocation when the function is called
+	-- This isn't the last chance; see NOTE above.
+	--
+	-- NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here
+	-- Why?  Because we don't even want to inline them into the
+	-- RHS of constructor arguments. See NOTE above
+	--
+	-- NB: Even NOINLINEis ignored here: if the rhs is trivial
+	-- it's best to inline it anyway.  We often get a=E; b=a
+	-- from desugaring, with both a and b marked NOINLINE.
+
+  |  not keep_binding	-- Can discard binding, inlining everywhere
+  =  extendSubst old_bndr (DoneEx new_rhs)	$
+     tick (PostInlineUnconditionally old_bndr)	`thenSmpl_`
+     thing_inside
+    
+  | otherwise		-- We must keep the binding, but we may still inline
+  = getSubst			`thenSmpl` \ subst ->
+    let
+	new_bndr_info = substIdInfo subst (idInfo old_bndr) (idInfo new_bndr)
+ 	final_id      = new_bndr `setIdInfo` new_bndr_info
+    in
+    addLetBind (NonRec final_id new_rhs) 	$
+    if dont_inline then
+	modifyInScope new_bndr final_id	thing_inside
+    else
+	extendSubst old_bndr (DoneEx new_rhs) thing_inside
+  where
+    dont_inline  = black_listed || loop_breaker
+    keep_binding = dont_inline || isExportedId old_bndr
+\end{code}
+
+
 %************************************************************************
 %*									*
 \subsection{simplLazyBind}
@@ -621,17 +666,14 @@ simplLazyBind top_lvl bndr bndr' rhs thing_inside
 \begin{code}
 simplRhs :: Bool		-- True <=> Top level
 	 -> Bool		-- True <=> OK to float unboxed (speculative) bindings
+				--		False for (a) recursive and (b) top-level bindings
 	 -> OutType 		-- Type of RHS; used only occasionally
 	 -> InExpr -> SubstEnv
 	 -> (OutExpr -> SimplM (OutStuff a))
 	 -> SimplM (OutStuff a)
 simplRhs top_lvl float_ubx rhs_ty rhs rhs_se thing_inside
-  =    	-- Swizzle the inner lets past the big lambda (if any)
-	-- and try eta expansion
-    transformRhs rhs					`thenSmpl` \ t_rhs ->
-
-	-- Simplify it
-    setSubstEnv rhs_se (simplExprF t_rhs (Stop rhs_ty))	`thenSmpl` \ (floats, (in_scope', rhs')) ->
+  =	-- Simplify it
+    setSubstEnv rhs_se (simplExprF rhs (mkRhsStop rhs_ty))	`thenSmpl` \ (floats, (in_scope', rhs')) ->
 
 	-- Float lets out of RHS
     let
@@ -652,12 +694,12 @@ simplRhs top_lvl float_ubx rhs_ty rhs rhs_se thing_inside
 	WARN( any demanded_float floats_out, ppr floats_out )
 	addLetBinds floats_out 	$
 	setInScope in_scope'	$
-	thing_inside (etaFirst rhs'')
+	thing_inside rhs''
 		-- in_scope' may be excessive, but that's OK;
 		-- it's a superset of what's in scope
     else	
 		-- Don't do the float
-	thing_inside (etaFirst (mkLets floats rhs'))
+	thing_inside (mkLets floats rhs')
 
 -- In a let-from-let float, we just tick once, arbitrarily
 -- choosing the first floated binder to identify it
@@ -706,11 +748,7 @@ wantToExpose :: Int -> CoreExpr -> Bool
 --	v = E
 --	z = \w -> g v w
 -- Which is what we want; chances are z will be inlined now.
---
--- This defn isn't quite like 
---	exprIsCheap (it ignores non-cheap args)
---	exprIsValue (may not say True for a lone variable)
--- which is slightly weird
+
 wantToExpose n (Var v)		= idAppIsCheap v n
 wantToExpose n (Lit l)		= True
 wantToExpose n (Lam _ e)	= True
@@ -952,35 +990,6 @@ preInlineUnconditionally black_listed bndr
 	  	  OneOcc in_lam once -> not in_lam && once
 			-- Not inside a lambda, one occurrence ==> safe!
 		  other 	     -> False
-
-
-postInlineUnconditionally :: Bool  	-- Black listed
-			  -> OccInfo
-			  -> InId -> OutExpr -> Bool
-	-- Examines a (bndr = rhs) binding, AFTER the rhs has been simplified
-	-- It returns True if it's ok to discard the binding and inline the
-	-- RHS at every use site.
-
-	-- NOTE: This isn't our last opportunity to inline.
-	-- We're at the binding site right now, and
-	-- we'll get another opportunity when we get to the ocurrence(s)
-
-postInlineUnconditionally black_listed occ_info bndr rhs
-  | isExportedId bndr	   = False		-- Don't inline these, ever
-  | black_listed 	   = False
-  | isLoopBreaker occ_info = False
-  | otherwise	           = exprIsTrivial rhs	-- Duplicating is free
-	-- Don't inline even WHNFs inside lambdas; doing so may
-	-- simply increase allocation when the function is called
-	-- This isn't the last chance; see NOTE above.
-	--
-	-- NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here
-	-- Why?  Because we don't even want to inline them into the
-	-- RHS of constructor arguments. See NOTE above
-	--
-	-- NB: Even NOINLINEis ignored here: if the rhs is trivial
-	-- it's best to inline it anyway.  We often get a=E; b=a
-	-- from desugaring, with both a and b marked NOINLINE.
 \end{code}
 
 
@@ -1002,7 +1011,7 @@ rebuild_done expr
 rebuild :: OutExpr -> SimplCont -> SimplM OutExprStuff
 
 --	Stop continuation
-rebuild expr (Stop _) = rebuild_done expr
+rebuild expr (Stop _ _) = rebuild_done expr
 
 --	ArgOf continuation
 rebuild expr (ArgOf _ _ cont_fn) = cont_fn expr
@@ -1453,7 +1462,8 @@ mkDupableCont join_arg_ty (ArgOf _ cont_ty cont_fn) thing_inside
 	-- Want to tick here so that we go round again,
 	-- and maybe copy or inline the code;
 	-- not strictly CaseOf Case
-    addLetBind join_id join_rhs	(thing_inside new_cont)
+    addLetBind (NonRec join_id join_rhs)	$
+    thing_inside new_cont
 
 mkDupableCont ty (ApplyTo _ arg se cont) thing_inside
   = mkDupableCont (funResultTy ty) cont 		$ \ cont' ->
@@ -1468,7 +1478,7 @@ mkDupableCont ty (ApplyTo _ arg se cont) thing_inside
 	-- and maybe copy or inline the code;
 	-- not strictly CaseOf Case
 
-     addLetBind bndr arg'						$
+     addLetBind (NonRec bndr arg')		$
 	-- But what if the arg should be case-bound?  We can't use
 	-- addNonRecBind here because its type is too specific.
 	-- This has been this way for a long time, so I'll leave it,
@@ -1486,7 +1496,7 @@ mkDupableCont ty (Select _ case_bndr alts se cont) thing_inside
 	returnSmpl (concat alt_binds_s, alts')
     )					`thenSmpl` \ (alt_binds, alts') ->
 
-    addNewInScopeIds [b | NonRec b _ <- alt_binds]		$
+    addAuxiliaryBinds alt_binds 				$
 
 	-- NB that the new alternatives, alts', are still InAlts, using the original
 	-- binders.  That means we can keep the case_bndr intact. This is important
@@ -1495,15 +1505,14 @@ mkDupableCont ty (Select _ case_bndr alts se cont) thing_inside
 	-- This is VITAL when the type of case_bndr is an unboxed pair (often the
 	-- case in I/O rich code.  We aren't allowed a lambda bound
 	-- arg of unboxed tuple type, and indeed such a case_bndr is always dead
-    addLetBinds alt_binds 					$
-    thing_inside (Select OkToDup case_bndr alts' se (Stop (contResultType cont)))
+    thing_inside (Select OkToDup case_bndr alts' se (mkStop (contResultType cont)))
 
 mkDupableAlt :: InId -> OutId -> SimplCont -> InAlt -> SimplM (OutStuff InAlt)
 mkDupableAlt case_bndr case_bndr' cont alt@(con, bndrs, rhs)
   = simplBinders bndrs					$ \ bndrs' ->
     simplExprC rhs cont					`thenSmpl` \ rhs' ->
 
-    if (case cont of { Stop _ -> exprIsDupable rhs'; other -> False}) then
+    if (case cont of { Stop _ _ -> exprIsDupable rhs'; other -> False}) then
    	-- It is worth checking for a small RHS because otherwise we
 	-- get extra let bindings that may cause an extra iteration of the simplifier to
 	-- inline back in place.  Quite often the rhs is just a variable or constructor.
diff --git a/ghc/compiler/stranal/WorkWrap.lhs b/ghc/compiler/stranal/WorkWrap.lhs
index 0ad7546..b05737d 100644
--- a/ghc/compiler/stranal/WorkWrap.lhs
+++ b/ghc/compiler/stranal/WorkWrap.lhs
@@ -14,9 +14,9 @@ import CmdLineOpts	( opt_UF_CreationThreshold , opt_D_verbose_core2core,
                           opt_D_dump_worker_wrapper
 			)
 import CoreLint		( beginPass, endPass )
-import CoreUtils	( exprType, exprArity, exprEtaExpandArity )
+import CoreUtils	( exprType, exprEtaExpandArity )
 import MkId		( mkWorkerId )
-import Id		( Id, idType, idStrictness, setIdArityInfo, isOneShotLambda,
+import Id		( Id, idType, idStrictness, idArity, isOneShotLambda,
 			  setIdStrictness, idInlinePragma, 
 			  setIdWorkerInfo, idCprInfo, setInlinePragma )
 import Type		( Type, isNewType, splitForAllTys, splitFunTys )
@@ -186,17 +186,30 @@ tryWW	:: Bool				-- True <=> a non-recursive binding
 					-- if two, then a worker and a
 					-- wrapper.
 tryWW non_rec fn_id rhs
-  | isNeverInlinePrag inline_prag
+  | isNeverInlinePrag inline_prag || arity == 0
   = 	-- Don't split things that will never be inlined
     returnUs [ (fn_id, rhs) ]
 
-  | non_rec && certainlyWillInline fn_id
-	-- No point in worker/wrappering something that is going to be
+  | non_rec && not do_coerce_ww && certainlyWillInline fn_id
+	-- No point in worker/wrappering a function that is going to be
 	-- INLINEd wholesale anyway.  If the strictness analyser is run
 	-- twice, this test also prevents wrappers (which are INLINEd)
 	-- from being re-done.
 	--
+	-- The do_coerce_ww test is so that
+	-- a function with a coerce should w/w to get rid
+	-- of the coerces, which can significantly improve its arity.
+	-- Example:  f []     = return [] :: IO [Int]
+	--	     f (x:xs) = return (x:xs)
+	-- If we aren't careful we end up with
+	--	f = \ x -> case x of {
+	--		     x:xs -> __coerce (IO [Int]) (\ s -> (# s, x:xs #)
+	--		     []   -> lvl_sJ8
+	--
+	--
 	-- OUT OF DATE NOTE, kept for info:
+	-- It's out of date because now wrappers look very cheap 
+	-- even when they are inlined.
 	--   In this case we add an INLINE pragma to the RHS.  Why?
 	--   Because consider
 	--	  f = \x -> g x x
@@ -204,8 +217,6 @@ tryWW non_rec fn_id rhs
 	--   Then f is small, so we don't w/w it.  But g is big, and we do, so
 	--   g's wrapper will get inlined in f's RHS, which makes f look big now.
 	--   So f doesn't get inlined, but it is strict and we have failed to w/w it.
-	-- It's out of date because now wrappers look very cheap 
-	-- even when they are inlined.
   = returnUs [ (fn_id, rhs) ]
 
   | not (do_strict_ww || do_cpr_ww || do_coerce_ww)
@@ -222,13 +233,9 @@ tryWW non_rec fn_id rhs
 	work_id | has_strictness = proto_work_id `setIdStrictness` mkStrictnessInfo (work_demands, result_bot)
 		| otherwise	 = proto_work_id
 
-	wrap_arity = exprArity wrap_rhs		-- Might be greater than the current visible arity
-						-- if the function returns bottom
-						
 	wrap_rhs = wrap_fn work_id
 	wrap_id  = fn_id `setIdStrictness`      wrapper_strictness
-                         `setIdWorkerInfo`	HasWorker work_id wrap_arity
-			 `setIdArityInfo`	exactArity wrap_arity
+                         `setIdWorkerInfo`	HasWorker work_id arity
 			 `setInlinePragma`	NoInlinePragInfo	-- Put it on the worker instead
 		-- Add info to the wrapper:
 		--	(a) we want to set its arity
@@ -237,12 +244,12 @@ tryWW non_rec fn_id rhs
     in
     returnUs ([(work_id, work_rhs), (wrap_id, wrap_rhs)])
 	-- Worker first, because wrapper mentions it
-	-- Arrange to inline the wrapper unconditionally
+	-- mkWwBodies has already built a wrap_rhs with an INLINE pragma wrapped around it
   where
     fun_ty = idType fn_id
-    arity  = exprEtaExpandArity rhs
+    arity  = idArity fn_id	-- The arity is set by the simplifier using exprEtaExpandArity
+				-- So it may be more than the number of top-level-visible lambdas
 
-	-- Don't split something which is marked unconditionally NOINLINE
     inline_prag  = idInlinePragma fn_id
 
     strictness_info           = idStrictness fn_id
-- 
1.7.10.4