X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2FsimplCore%2FSimplUtils.lhs;h=7e9a010051ee64822a2604bf0d628125e5b5df63;hb=6cec61d14a324285dbb8ce73d4c7215f1f8d6766;hp=20f26c2ce7dd61ee368da6cff40bd21dd6fd3b97;hpb=63e3a41126771e71c44705480c2bde7043a41df3;p=ghc-hetmet.git

diff --git a/compiler/simplCore/SimplUtils.lhs b/compiler/simplCore/SimplUtils.lhs
index 20f26c2..7e9a010 100644
--- a/compiler/simplCore/SimplUtils.lhs
+++ b/compiler/simplCore/SimplUtils.lhs
@@ -6,17 +6,19 @@
 \begin{code}
 module SimplUtils (
 	-- Rebuilding
-	mkLam, mkCase, prepareAlts, 
+	mkLam, mkCase, prepareAlts, tryEtaExpand,
 
 	-- Inlining,
 	preInlineUnconditionally, postInlineUnconditionally, 
-	activeUnfolding, activeUnfInRule, activeRule, 
-        simplEnvForGHCi, simplEnvForRules, updModeForInlineRules,
+	activeUnfolding, activeRule, 
+	getUnfoldingInRuleMatch, 
+        simplEnvForGHCi, updModeForInlineRules,
 
 	-- The continuation type
 	SimplCont(..), DupFlag(..), ArgInfo(..),
+        isSimplified,
 	contIsDupable, contResultType, contIsTrivial, contArgs, dropArgs, 
-	pushArgs, countValArgs, countArgs, addArgTo,
+	pushSimplifiedArgs, countValArgs, countArgs, addArgTo,
 	mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg,
 	interestingCallContext, 
 
@@ -28,7 +30,7 @@ module SimplUtils (
 #include "HsVersions.h"
 
 import SimplEnv
-import CoreMonad	( SimplifierMode(..), Tick(..) )
+import CoreMonad        ( SimplifierMode(..), Tick(..) )
 import DynFlags
 import StaticFlags
 import CoreSyn
@@ -36,13 +38,14 @@ import qualified CoreSubst
 import PprCore
 import CoreFVs
 import CoreUtils
-import CoreArity	( etaExpand, exprEtaExpandArity )
+import CoreArity
 import CoreUnfold
 import Name
 import Id
-import Var	( isCoVar )
+import Var
 import Demand
 import SimplMonad
+import TcType	( isDictLikeTy )
 import Type	hiding( substTy )
 import Coercion ( coercionKind )
 import TyCon
@@ -99,12 +102,12 @@ data SimplCont
 	SimplCont
 
   | ApplyTo  		-- C arg
-	DupFlag 
-	InExpr StaticEnv		-- The argument and its static env
+	DupFlag			-- See Note [DupFlag invariants]
+	InExpr StaticEnv	-- The argument and its static env
 	SimplCont
 
   | Select   		-- case C of alts
-	DupFlag 
+	DupFlag 	        -- See Note [DupFlag invariants]
 	InId [InAlt] StaticEnv	-- The case binder, alts, and subst-env
 	SimplCont
 
@@ -147,18 +150,35 @@ instance Outputable SimplCont where
 					  {-  $$ nest 2 (pprSimplEnv se) -}) $$ ppr cont
   ppr (StrictBind b _ _ _ cont)      = (ptext (sLit "StrictBind") <+> ppr b) $$ ppr cont
   ppr (StrictArg ai _ cont)          = (ptext (sLit "StrictArg") <+> ppr (ai_fun ai)) $$ ppr cont
-  ppr (Select dup bndr alts _ cont)  = (ptext (sLit "Select") <+> ppr dup <+> ppr bndr) $$ 
-				       (nest 4 (ppr alts)) $$ ppr cont 
+  ppr (Select dup bndr alts se cont) = (ptext (sLit "Select") <+> ppr dup <+> ppr bndr) $$ 
+				       (nest 2 $ vcat [ppr (seTvSubst se), ppr alts]) $$ ppr cont 
   ppr (CoerceIt co cont)	     = (ptext (sLit "CoerceIt") <+> ppr co) $$ ppr cont
 
-data DupFlag = OkToDup | NoDup
+data DupFlag = NoDup       -- Unsimplified, might be big
+             | Simplified  -- Simplified
+             | OkToDup     -- Simplified and small
+
+isSimplified :: DupFlag -> Bool
+isSimplified NoDup = False
+isSimplified _     = True	-- Invariant: the subst-env is empty
 
 instance Outputable DupFlag where
-  ppr OkToDup = ptext (sLit "ok")
-  ppr NoDup   = ptext (sLit "nodup")
+  ppr OkToDup    = ptext (sLit "ok")
+  ppr NoDup      = ptext (sLit "nodup")
+  ppr Simplified = ptext (sLit "simpl")
+\end{code}
 
+Note [DupFlag invariants]
+~~~~~~~~~~~~~~~~~~~~~~~~~
+In both (ApplyTo dup _ env k)
+   and  (Select dup _ _ env k)
+the following invariants hold
 
+  (a) if dup = OkToDup, then continuation k is also ok-to-dup
+  (b) if dup = OkToDup or Simplified, the subst-env is empty
+      (and and hence no need to re-simplify)
 
+\begin{code}
 -------------------
 mkBoringStop :: SimplCont
 mkBoringStop = Stop BoringCtxt
@@ -179,8 +199,8 @@ contIsRhsOrArg _               = False
 -------------------
 contIsDupable :: SimplCont -> Bool
 contIsDupable (Stop {})                  = True
-contIsDupable (ApplyTo  OkToDup _ _ _)   = True
-contIsDupable (Select   OkToDup _ _ _ _) = True
+contIsDupable (ApplyTo  OkToDup _ _ _)   = True	-- See Note [DupFlag invariants]
+contIsDupable (Select   OkToDup _ _ _ _) = True -- ...ditto...
 contIsDupable (CoerceIt _ cont)          = contIsDupable cont
 contIsDupable _                          = False
 
@@ -222,16 +242,26 @@ countArgs :: SimplCont -> Int
 countArgs (ApplyTo _ _ _ cont) = 1 + countArgs cont
 countArgs _                    = 0
 
-contArgs :: SimplCont -> ([OutExpr], SimplCont)
+contArgs :: SimplCont -> (Bool, [ArgSummary], SimplCont)
 -- Uses substitution to turn each arg into an OutExpr
-contArgs cont = go [] cont
+contArgs cont@(ApplyTo {})
+  = case go [] cont of { (args, cont') -> (False, args, cont') }
   where
-    go args (ApplyTo _ arg se cont) = go (substExpr se arg : args) cont
-    go args cont		    = (reverse args, cont)
+    go args (ApplyTo _ arg se cont)
+      | isTypeArg arg = go args                           cont
+      | otherwise     = go (is_interesting arg se : args) cont
+    go args cont      = (reverse args, cont)
+
+    is_interesting arg se = interestingArg (substExpr (text "contArgs") se arg)
+    		   -- Do *not* use short-cutting substitution here
+		   -- because we want to get as much IdInfo as possible
 
-pushArgs :: SimplEnv -> [CoreExpr] -> SimplCont -> SimplCont
-pushArgs _env []         cont = cont
-pushArgs env  (arg:args) cont = ApplyTo NoDup arg env (pushArgs env args cont)
+contArgs cont = (True, [], cont)
+
+pushSimplifiedArgs :: SimplEnv -> [CoreExpr] -> SimplCont -> SimplCont
+pushSimplifiedArgs _env []         cont = cont
+pushSimplifiedArgs env  (arg:args) cont = ApplyTo Simplified arg env (pushSimplifiedArgs env args cont)
+		   -- The env has an empty SubstEnv
 
 dropArgs :: Int -> SimplCont -> SimplCont
 dropArgs 0 cont = cont
@@ -424,33 +454,55 @@ interestingArgContext rules call_cont
 \end{code}
 
 
-
 %************************************************************************
 %*									*
-\subsection{Decisions about inlining}
+                  SimplifierMode
 %*									*
 %************************************************************************
 
-Inlining is controlled partly by the SimplifierMode switch.  This has two
-settings
-	
-	SimplGently	(a) Simplifying before specialiser/full laziness
-			(b) Simplifiying inside InlineRules
-			(c) Simplifying the LHS of a rule
-			(d) Simplifying a GHCi expression or Template 
-				Haskell splice
-
-	SimplPhase n _	 Used at all other times
-
-Note [Gentle mode]
-~~~~~~~~~~~~~~~~~~
-Gentle mode has a separate boolean flag to control
-	a) inlining (sm_inline flag)
-	b) rules    (sm_rules  flag)
-A key invariant about Gentle mode is that it is treated as the EARLIEST
-phase.  Something is inlined if the sm_inline flag is on AND the thing
-is inlinable in the earliest phase.  This is important. Example
+The SimplifierMode controls several switches; see its definition in
+CoreMonad
+        sm_rules      :: Bool     -- Whether RULES are enabled
+        sm_inline     :: Bool     -- Whether inlining is enabled
+        sm_case_case  :: Bool     -- Whether case-of-case is enabled
+        sm_eta_expand :: Bool     -- Whether eta-expansion is enabled
+
+\begin{code}
+simplEnvForGHCi :: DynFlags -> SimplEnv
+simplEnvForGHCi dflags
+  = mkSimplEnv $ SimplMode { sm_names = ["GHCi"]
+                           , sm_phase = InitialPhase
+                           , sm_rules = rules_on
+                           , sm_inline = False
+                           , sm_eta_expand = eta_expand_on
+                           , sm_case_case = True }
+  where
+    rules_on      = dopt Opt_EnableRewriteRules   dflags
+    eta_expand_on = dopt Opt_DoLambdaEtaExpansion dflags
+   -- Do not do any inlining, in case we expose some unboxed
+   -- tuple stuff that confuses the bytecode interpreter
+
+updModeForInlineRules :: Activation -> SimplifierMode -> SimplifierMode
+-- See Note [Simplifying inside InlineRules]
+updModeForInlineRules inline_rule_act current_mode
+  = current_mode { sm_phase = phaseFromActivation inline_rule_act
+                 , sm_inline = True
+                 , sm_eta_expand = False }
+		 -- For sm_rules, just inherit; sm_rules might be "off"
+		 -- becuase of -fno-enable-rewrite-rules
+  where
+    phaseFromActivation (ActiveAfter n) = Phase n
+    phaseFromActivation _               = InitialPhase
+\end{code}
 
+Note [Inlining in gentle mode]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Something is inlined if 
+   (i)   the sm_inline flag is on, AND
+   (ii)  the thing has an INLINE pragma, AND
+   (iii) the thing is inlinable in the earliest phase.  
+
+Example of why (iii) is important:
   {-# INLINE [~1] g #-}
   g = ...
   
@@ -466,9 +518,9 @@ On the other hand, it is bad not to do ANY inlining into an
 InlineRule, because then recursive knots in instance declarations
 don't get unravelled.
 
-However, *sometimes* SimplGently must do no call-site inlining at all.
-Before full laziness we must be careful not to inline wrappers,
-because doing so inhibits floating
+However, *sometimes* SimplGently must do no call-site inlining at all
+(hence sm_inline = False).  Before full laziness we must be careful
+not to inline wrappers, because doing so inhibits floating
     e.g. ...(case f x of ...)...
     ==> ...(case (case x of I# x# -> fw x#) of ...)...
     ==> ...(case x of I# x# -> case fw x# of ...)...
@@ -480,19 +532,6 @@ running it, we don't want to use -O2.  Indeed, we don't want to inline
 anything, because the byte-code interpreter might get confused about 
 unboxed tuples and suchlike.
 
-Note [RULEs enabled in SimplGently]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-RULES are enabled when doing "gentle" simplification.  Two reasons:
-
-  * We really want the class-op cancellation to happen:
-        op (df d1 d2) --> $cop3 d1 d2
-    because this breaks the mutual recursion between 'op' and 'df'
-
-  * I wanted the RULE
-        lift String ===> ...
-    to work in Template Haskell when simplifying
-    splices, so we get simpler code for literal strings
-
 Note [Simplifying inside InlineRules]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 We must take care with simplification inside InlineRules (which come from
@@ -511,8 +550,9 @@ one; see OccurAnal.addRuleUsage.
 
 Second, we do want *do* to some modest rules/inlining stuff in InlineRules,
 partly to eliminate senseless crap, and partly to break the recursive knots
-generated by instance declarations.  To keep things simple, we always set 
-the phase to 'gentle' when processing InlineRules.  OK, so suppose we have
+generated by instance declarations.
+
+However, suppose we have
 	{-# INLINE <act> f #-}
 	f = <rhs>
 meaning "inline f in phases p where activation <act>(p) holds". 
@@ -522,12 +562,8 @@ f when it is inlined.  So our conservative plan (implemented by
 updModeForInlineRules) is this:
 
   -------------------------------------------------------------
-  When simplifying the RHS of an InlineRule,
-  If the InlineRule becomes active in phase p, then
-    if the current phase is *earlier than* p, 
-       make no inlinings or rules active when simplifying the RHS
-    otherwise 
-       set the phase to p when simplifying the RHS
+  When simplifying the RHS of an InlineRule, set the phase to the
+  phase in which the InlineRule first becomes active
   -------------------------------------------------------------
 
 That ensures that
@@ -541,13 +577,13 @@ That ensures that
      inlining the *original* rhs in phase p.
 
 For example, 
- 	{-# INLINE f #-}
-	f x = ...g...
+       	{-# INLINE f #-}
+      	f x = ...g...
 
-	{-# NOINLINE [1] g #-}
-	g y = ...
+      	{-# NOINLINE [1] g #-}
+      	g y = ...
 
-	{-# RULE h g = ... #-}
+      	{-# RULE h g = ... #-}
 Here we must not inline g into f's RHS, even when we get to phase 0,
 because when f is later inlined into some other module we want the
 rule for h to fire.
@@ -576,42 +612,75 @@ mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf
 continuation. 
 
 \begin{code}
-simplEnvForGHCi :: SimplEnv
-simplEnvForGHCi = mkSimplEnv allOffSwitchChecker $
-                  SimplGently { sm_rules = False, sm_inline = False }
-   -- Do not do any inlining, in case we expose some unboxed
-   -- tuple stuff that confuses the bytecode interpreter
-
-simplEnvForRules :: SimplEnv
-simplEnvForRules = mkSimplEnv allOffSwitchChecker $
-                   SimplGently { sm_rules = True, sm_inline = False }
+activeUnfolding :: SimplEnv -> Id -> Bool
+activeUnfolding env
+  | not (sm_inline mode) = active_unfolding_minimal
+  | otherwise            = case sm_phase mode of
+                             InitialPhase -> active_unfolding_gentle
+                             Phase n      -> active_unfolding n
+  where
+    mode = getMode env
 
-updModeForInlineRules :: Activation -> SimplifierMode -> SimplifierMode
--- See Note [Simplifying inside InlineRules]
---    Treat Gentle as phase "infinity"
---    If current_phase `earlier than` inline_rule_start_phase 
---      then no_op
---    else 
---    if current_phase `same phase` inline_rule_start_phase 
---      then current_phase   (keep gentle flags)
---      else inline_rule_start_phase
-updModeForInlineRules inline_rule_act current_mode
-  = case inline_rule_act of
-      NeverActive     -> no_op
-      AlwaysActive    -> mk_gentle current_mode
-      ActiveBefore {} -> mk_gentle current_mode
-      ActiveAfter n   -> mk_phase n current_mode
+getUnfoldingInRuleMatch :: SimplEnv -> IdUnfoldingFun
+-- When matching in RULE, we want to "look through" an unfolding
+-- (to see a constructor) if *rules* are on, even if *inlinings* 
+-- are not.  A notable example is DFuns, which really we want to 
+-- match in rules like (op dfun) in gentle mode. Another example
+-- is 'otherwise' which we want exprIsConApp_maybe to be able to
+-- see very early on
+getUnfoldingInRuleMatch env id
+  | unf_is_active = idUnfolding id
+  | otherwise     = NoUnfolding
   where
-    no_op = SimplGently { sm_rules = False, sm_inline = False }
+    mode = getMode env
+    unf_is_active
+     | not (sm_rules mode) = active_unfolding_minimal id
+     | otherwise           = isActive (sm_phase mode) (idInlineActivation id)
 
-    mk_gentle (SimplGently {}) = current_mode
-    mk_gentle _                = SimplGently { sm_rules = True, sm_inline = True }
+active_unfolding_minimal :: Id -> Bool
+-- Compuslory unfoldings only
+-- Ignore SimplGently, because we want to inline regardless;
+-- the Id has no top-level binding at all
+--
+-- NB: we used to have a second exception, for data con wrappers.
+-- On the grounds that we use gentle mode for rule LHSs, and 
+-- they match better when data con wrappers are inlined.
+-- But that only really applies to the trivial wrappers (like (:)),
+-- and they are now constructed as Compulsory unfoldings (in MkId)
+-- so they'll happen anyway.
+active_unfolding_minimal id = isCompulsoryUnfolding (realIdUnfolding id)
+
+active_unfolding :: PhaseNum -> Id -> Bool
+active_unfolding n id = isActiveIn n (idInlineActivation id)
 
-    mk_phase n (SimplPhase _ ss) = SimplPhase n ss
-    mk_phase n (SimplGently {})  = SimplPhase n ["gentle-rules"]
+active_unfolding_gentle :: Id -> Bool
+-- Anything that is early-active
+-- See Note [Gentle mode]
+active_unfolding_gentle id
+  =  isInlinePragma prag
+  && isEarlyActive (inlinePragmaActivation prag)
+       -- NB: wrappers are not early-active
+  where
+    prag = idInlinePragma id
+
+----------------------
+activeRule :: DynFlags -> SimplEnv -> Maybe (Activation -> Bool)
+-- Nothing => No rules at all
+activeRule _dflags env
+  | not (sm_rules mode) = Nothing     -- Rewriting is off
+  | otherwise           = Just (isActive (sm_phase mode))
+  where
+    mode = getMode env
 \end{code}
 
 
+
+%************************************************************************
+%*									*
+                  preInlineUnconditionally
+%*									*
+%************************************************************************
+
 preInlineUnconditionally
 ~~~~~~~~~~~~~~~~~~~~~~~~
 @preInlineUnconditionally@ examines a bndr to see if it is used just
@@ -688,6 +757,27 @@ let-float if you inline windowToViewport
 However, as usual for Gentle mode, do not inline things that are
 inactive in the intial stages.  See Note [Gentle mode].
 
+Note [InlineRule and preInlineUnconditionally]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Surprisingly, do not pre-inline-unconditionally Ids with INLINE pragmas!
+Example
+
+   {-# INLINE f #-}
+   f :: Eq a => a -> a
+   f x = ...
+   
+   fInt :: Int -> Int
+   fInt = f Int dEqInt
+
+   ...fInt...fInt...fInt...
+
+Here f occurs just once, in the RHS of f1. But if we inline it there
+we'll lose the opportunity to inline at each of fInt's call sites.
+The INLINE pragma will only inline when the application is saturated
+for exactly this reason; and we don't want PreInlineUnconditionally
+to second-guess it.  A live example is Trac #3736.
+    c.f. Note [InlineRule and postInlineUnconditionally]
+
 Note [Top-level botomming Ids]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Don't inline top-level Ids that are bottoming, even if they are used just
@@ -698,6 +788,7 @@ Inlining them won't make the program run faster!
 preInlineUnconditionally :: SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
 preInlineUnconditionally env top_lvl bndr rhs
   | not active 		                     = False
+  | isStableUnfolding (idUnfolding bndr)     = False    -- Note [InlineRule and preInlineUnconditionally]
   | isTopLevel top_lvl && isBottomingId bndr = False	-- Note [Top-level bottoming Ids]
   | opt_SimplNoPreInlining                   = False
   | otherwise = case idOccInfo bndr of
@@ -705,11 +796,9 @@ preInlineUnconditionally env top_lvl bndr rhs
 	  	  OneOcc in_lam True int_cxt -> try_once in_lam int_cxt
 		  _                          -> False
   where
-    phase = getMode env
-    active = case phase of
-		   SimplGently {} -> isEarlyActive act
-			-- See Note [pre/postInlineUnconditionally in gentle mode]
-		   SimplPhase n _ -> isActive n act
+    mode = getMode env
+    active = isActive (sm_phase mode) act
+             -- See Note [pre/postInlineUnconditionally in gentle mode]
     act = idInlineActivation bndr
     try_once in_lam int_cxt	-- There's one textual occurrence
 	| not in_lam = isNotTopLevel top_lvl || early_phase
@@ -741,9 +830,9 @@ preInlineUnconditionally env top_lvl bndr rhs
     canInlineInLam (Note _ e)		= canInlineInLam e
     canInlineInLam _			= False
 
-    early_phase = case phase of
-			SimplPhase 0 _ -> False
-			_    	       -> True
+    early_phase = case sm_phase mode of
+                    Phase 0 -> False
+                    _       -> True
 -- If we don't have this early_phase test, consider
 --	x = length [1,2,3]
 -- The full laziness pass carefully floats all the cons cells to
@@ -761,6 +850,12 @@ preInlineUnconditionally env top_lvl bndr rhs
 
 \end{code}
 
+%************************************************************************
+%*									*
+                  postInlineUnconditionally
+%*									*
+%************************************************************************
+
 postInlineUnconditionally
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 @postInlineUnconditionally@ decides whether to unconditionally inline
@@ -768,7 +863,7 @@ a thing based on the form of its RHS; in particular if it has a
 trivial RHS.  If so, we can inline and discard the binding altogether.
 
 NB: a loop breaker has must_keep_binding = True and non-loop-breakers
-only have *forward* references Hence, it's safe to discard the binding
+only have *forward* references. Hence, it's safe to discard the binding
 	
 NOTE: This isn't our last opportunity to inline.  We're at the binding
 site right now, and we'll get another opportunity when we get to the
@@ -803,8 +898,8 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
 					-- because it might be referred to "earlier"
   | isExportedId bndr           = False
   | isStableUnfolding unfolding = False	-- Note [InlineRule and postInlineUnconditionally]
-  | exprIsTrivial rhs 	        = True
   | isTopLevel top_lvl          = False	-- Note [Top level and postInlineUnconditionally]
+  | exprIsTrivial rhs 	        = True
   | otherwise
   = case occ_info of
 	-- The point of examining occ_info here is that for *non-values* 
@@ -865,86 +960,35 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
 -- Alas!
 
   where
-    active = case getMode env of
-		   SimplGently {} -> isEarlyActive act
-			-- See Note [pre/postInlineUnconditionally in gentle mode]
-		   SimplPhase n _ -> isActive n act
-    act = idInlineActivation bndr
-
-activeUnfolding :: SimplEnv -> IdUnfoldingFun
-activeUnfolding env
-  = case getMode env of
-      SimplGently { sm_inline = False } -> active_unfolding_minimal
-      SimplGently { sm_inline = True  } -> active_unfolding_gentle
-      SimplPhase n _                    -> active_unfolding n
-
-activeUnfInRule :: SimplEnv -> IdUnfoldingFun
--- When matching in RULE, we want to "look through" an unfolding
--- if *rules* are on, even if *inlinings* are not.  A notable example
--- is DFuns, which really we want to match in rules like (op dfun)
--- in gentle mode.
-activeUnfInRule env
-  = case getMode env of
-      SimplGently { sm_rules = False } -> active_unfolding_minimal
-      SimplGently { sm_rules = True  } -> active_unfolding_gentle
-      SimplPhase n _                   -> active_unfolding n
-
-active_unfolding_minimal :: IdUnfoldingFun
--- Compuslory unfoldings only
--- Ignore SimplGently, because we want to inline regardless;
--- the Id has no top-level binding at all
---
--- NB: we used to have a second exception, for data con wrappers.
--- On the grounds that we use gentle mode for rule LHSs, and 
--- they match better when data con wrappers are inlined.
--- But that only really applies to the trivial wrappers (like (:)),
--- and they are now constructed as Compulsory unfoldings (in MkId)
--- so they'll happen anyway.
-active_unfolding_minimal id
-  | isCompulsoryUnfolding unf = unf
-  | otherwise                 = NoUnfolding
-  where
-    unf = realIdUnfolding id	-- Never a loop breaker
-
-active_unfolding_gentle :: IdUnfoldingFun
--- Anything that is early-active
--- See Note [Gentle mode]
-active_unfolding_gentle id
-  | isEarlyActive (idInlineActivation id) = idUnfolding id
-  | otherwise                             = NoUnfolding
-      -- idUnfolding checks for loop-breakers
-      -- Things with an INLINE pragma may have 
-      -- an unfolding *and* be a loop breaker  
-      -- (maybe the knot is not yet untied)
-
-active_unfolding :: CompilerPhase -> IdUnfoldingFun
-active_unfolding n id
-  | isActive n (idInlineActivation id) = idUnfolding id
-  | otherwise                          = NoUnfolding
-
-activeRule :: DynFlags -> SimplEnv -> Maybe (Activation -> Bool)
--- Nothing => No rules at all
-activeRule dflags env
-  | not (dopt Opt_EnableRewriteRules dflags)
-  = Nothing	-- Rewriting is off
-  | otherwise
-  = case getMode env of
-      SimplGently { sm_rules = rules_on } 
-        | rules_on  -> Just isEarlyActive	-- Note [RULEs enabled in SimplGently]
-        | otherwise -> Nothing
-      SimplPhase n _ -> Just (isActive n)
+    active = isActive (sm_phase (getMode env)) (idInlineActivation bndr)
+        -- See Note [pre/postInlineUnconditionally in gentle mode]
 \end{code}
 
 Note [Top level and postInlineUnconditionally]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-We don't do postInlineUnconditionally for top-level things (exept ones that
-are trivial):
-  * There is no point, because the main goal is to get rid of local
-    bindings used in multiple case branches.
+We don't do postInlineUnconditionally for top-level things (even for
+ones that are trivial):
+
   * Doing so will inline top-level error expressions that have been
     carefully floated out by FloatOut.  More generally, it might 
     replace static allocation with dynamic.
 
+  * Even for trivial expressions there's a problem.  Consider
+      {-# RULE "foo" forall (xs::[T]). reverse xs = ruggle xs #-}
+      blah xs = reverse xs
+      ruggle = sort
+    In one simplifier pass we might fire the rule, getting 
+      blah xs = ruggle xs
+    but in *that* simplifier pass we must not do postInlineUnconditionally
+    on 'ruggle' because then we'll have an unbound occurrence of 'ruggle'
+
+    If the rhs is trivial it'll be inlined by callSiteInline, and then
+    the binding will be dead and discarded by the next use of OccurAnal
+
+  * There is less point, because the main goal is to get rid of local
+    bindings used in multiple case branches.  
+    
+
 Note [InlineRule and postInlineUnconditionally]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Do not do postInlineUnconditionally if the Id has an InlineRule, otherwise
@@ -962,6 +1006,8 @@ Then there's a danger we'll optimise to
 and now postInlineUnconditionally, losing the InlineRule on f.  Now f'
 won't inline because 'e' is too big.
 
+    c.f. Note [InlineRule and preInlineUnconditionally]
+
 
 %************************************************************************
 %*									*
@@ -977,7 +1023,7 @@ mkLam :: SimplEnv -> [OutBndr] -> OutExpr -> SimplM OutExpr
 
 mkLam _b [] body 
   = return body
-mkLam env bndrs body
+mkLam _env bndrs body
   = do	{ dflags <- getDOptsSmpl
 	; mkLam' dflags bndrs body }
   where
@@ -991,23 +1037,22 @@ mkLam env bndrs body
 	co_vars  = tyVarsOfType co
 	bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars      
 
+    mkLam' dflags bndrs body@(Lam {})
+      = mkLam' dflags (bndrs ++ bndrs1) body1
+      where
+        (bndrs1, body1) = collectBinders body
+
     mkLam' dflags bndrs body
-      | dopt Opt_DoEtaReduction dflags,
-        Just etad_lam <- tryEtaReduce bndrs body
+      | dopt Opt_DoEtaReduction dflags
+      , Just etad_lam <- tryEtaReduce bndrs body
       = do { tick (EtaReduction (head bndrs))
 	   ; return etad_lam }
 
-      | dopt Opt_DoLambdaEtaExpansion dflags,
-        not (inGentleMode env),	      -- In gentle mode don't eta-expansion
-   	any isRuntimeVar bndrs	      -- because it can clutter up the code
-	    		 	      -- with casts etc that may not be removed
-      = do { let body' = tryEtaExpansion dflags body
- 	   ; return (mkLams bndrs body') }
-   
       | otherwise 
       = return (mkLams bndrs body)
 \end{code}
 
+
 Note [Casts and lambdas]
 ~~~~~~~~~~~~~~~~~~~~~~~~
 Consider 
@@ -1040,159 +1085,154 @@ It does not make sense to transform
 	/\g. e `cast` g  ==>  (/\g.e) `cast` (/\g.g)
 because the latter is not well-kinded.
 
---	c) floating lets out through big lambdas 
---		[only if all tyvar lambdas, and only if this lambda
---		 is the RHS of a let]
-
-{-	Sept 01: I'm experimenting with getting the
-	full laziness pass to float out past big lambdsa
- | all isTyVar bndrs,	-- Only for big lambdas
-   contIsRhs cont	-- Only try the rhs type-lambda floating
-			-- if this is indeed a right-hand side; otherwise
-			-- we end up floating the thing out, only for float-in
-			-- to float it right back in again!
- = do (floats, body') <- tryRhsTyLam env bndrs body
-      return (floats, mkLams bndrs body')
--}
-
-
 %************************************************************************
 %*									*
-		Eta reduction
+              Eta expansion									 
 %*									*
 %************************************************************************
 
-Note [Eta reduction conditions]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-We try for eta reduction here, but *only* if we get all the way to an
-trivial expression.  We don't want to remove extra lambdas unless we
-are going to avoid allocating this thing altogether.
-
-There are some particularly delicate points here:
-
-* Eta reduction is not valid in general:  
-	\x. bot  /=  bot
-  This matters, partly for old-fashioned correctness reasons but,
-  worse, getting it wrong can yield a seg fault. Consider
-	f = \x.f x
-	h y = case (case y of { True -> f `seq` True; False -> False }) of
-		True -> ...; False -> ...
-
-  If we (unsoundly) eta-reduce f to get f=f, the strictness analyser
-  says f=bottom, and replaces the (f `seq` True) with just
-  (f `cast` unsafe-co).  BUT, as thing stand, 'f' got arity 1, and it
-  *keeps* arity 1 (perhaps also wrongly).  So CorePrep eta-expands 
-  the definition again, so that it does not termninate after all.
-  Result: seg-fault because the boolean case actually gets a function value.
-  See Trac #1947.
-
-  So it's important to to the right thing.
-
-* Note [Arity care]: we need to be careful if we just look at f's
-  arity. Currently (Dec07), f's arity is visible in its own RHS (see
-  Note [Arity robustness] in SimplEnv) so we must *not* trust the
-  arity when checking that 'f' is a value.  Otherwise we will
-  eta-reduce
-      f = \x. f x
-  to
-      f = f
-  Which might change a terminiating program (think (f `seq` e)) to a 
-  non-terminating one.  So we check for being a loop breaker first.
-
-  However for GlobalIds we can look at the arity; and for primops we
-  must, since they have no unfolding.  
-
-* Regardless of whether 'f' is a value, we always want to 
-  reduce (/\a -> f a) to f
-  This came up in a RULE: foldr (build (/\a -> g a))
-  did not match 	  foldr (build (/\b -> ...something complex...))
-  The type checker can insert these eta-expanded versions,
-  with both type and dictionary lambdas; hence the slightly 
-  ad-hoc isDictId
-
-* Never *reduce* arity. For example
-      f = \xy. g x y
-  Then if h has arity 1 we don't want to eta-reduce because then
-  f's arity would decrease, and that is bad
-
-These delicacies are why we don't use exprIsTrivial and exprIsHNF here.
-Alas.
+When we meet a let-binding we try eta-expansion.  To find the 
+arity of the RHS we use a little fixpoint analysis; see Note [Arity analysis]
 
 \begin{code}
-tryEtaReduce :: [OutBndr] -> OutExpr -> Maybe OutExpr
-tryEtaReduce bndrs body 
-  = go (reverse bndrs) body
+tryEtaExpand :: SimplEnv -> OutId -> OutExpr -> SimplM (Arity, OutExpr)
+-- See Note [Eta-expanding at let bindings]
+tryEtaExpand env bndr rhs
+  = do { dflags <- getDOptsSmpl
+       ; (new_arity, new_rhs) <- try_expand dflags
+
+       ; WARN( new_arity < old_arity || new_arity < _dmd_arity, 
+               (ptext (sLit "Arity decrease:") <+> (ppr bndr <+> ppr old_arity
+		<+> ppr new_arity <+> ppr _dmd_arity) $$ ppr new_rhs) )
+			-- Note [Arity decrease]
+         return (new_arity, new_rhs) }
   where
-    incoming_arity = count isId bndrs
-
-    go (b : bs) (App fun arg) | ok_arg b arg = go bs fun	-- Loop round
-    go []       fun           | ok_fun fun   = Just fun		-- Success!
-    go _        _			     = Nothing		-- Failure!
-
-	-- Note [Eta reduction conditions]
-    ok_fun (App fun (Type ty)) 
-	| not (any (`elemVarSet` tyVarsOfType ty) bndrs)
-	=  ok_fun fun
-    ok_fun (Var fun_id)
-	=  not (fun_id `elem` bndrs)
-	&& (ok_fun_id fun_id || all ok_lam bndrs)
-    ok_fun _fun = False
-
-    ok_fun_id fun = fun_arity fun >= incoming_arity
-
-    fun_arity fun 	      -- See Note [Arity care]
-       | isLocalId fun && isLoopBreaker (idOccInfo fun) = 0
-       | otherwise = idArity fun   	      
-
-    ok_lam v = isTyVar v || isDictId v
-
-    ok_arg b arg = varToCoreExpr b `cheapEqExpr` arg
+    try_expand dflags
+      | sm_eta_expand (getMode env)      -- Provided eta-expansion is on
+      , not (exprIsTrivial rhs)
+      , let dicts_cheap = dopt Opt_DictsCheap dflags
+            new_arity   = findArity dicts_cheap bndr rhs old_arity
+      , new_arity > rhs_arity
+      = do { tick (EtaExpansion bndr)
+           ; return (new_arity, etaExpand new_arity rhs) }
+      | otherwise
+      = return (rhs_arity, rhs)
+
+    rhs_arity  = exprArity rhs
+    old_arity  = idArity bndr
+    _dmd_arity = length $ fst $ splitStrictSig $ idStrictness bndr
+
+findArity :: Bool -> Id -> CoreExpr -> Arity -> Arity
+-- This implements the fixpoint loop for arity analysis
+-- See Note [Arity analysis]
+findArity dicts_cheap bndr rhs old_arity
+  = go (exprEtaExpandArity (mk_cheap_fn dicts_cheap init_cheap_app) rhs)
+       -- We always call exprEtaExpandArity once, but usually 
+       -- that produces a result equal to old_arity, and then
+       -- we stop right away (since arities should not decrease)
+       -- Result: the common case is that there is just one iteration
+  where
+    go :: Arity -> Arity
+    go cur_arity
+      | cur_arity <= old_arity = cur_arity	
+      | new_arity == cur_arity = cur_arity
+      | otherwise = ASSERT( new_arity < cur_arity )
+                    pprTrace "Exciting arity" 
+                       (vcat [ ppr bndr <+> ppr cur_arity <+> ppr new_arity
+                             , ppr rhs])
+                    go new_arity
+      where
+        new_arity = exprEtaExpandArity (mk_cheap_fn dicts_cheap cheap_app) rhs
+      
+        cheap_app :: CheapAppFun
+        cheap_app fn n_val_args
+          | fn == bndr = n_val_args < cur_arity
+          | otherwise  = isCheapApp fn n_val_args
+
+    init_cheap_app :: CheapAppFun
+    init_cheap_app fn n_val_args
+      | fn == bndr = True
+      | otherwise  = isCheapApp fn n_val_args
+ 
+mk_cheap_fn :: Bool -> CheapAppFun -> CheapFun
+mk_cheap_fn dicts_cheap cheap_app
+  | not dicts_cheap
+  = \e _     -> exprIsCheap' cheap_app e
+  | otherwise
+  = \e mb_ty -> exprIsCheap' cheap_app e
+             || case mb_ty of
+                  Nothing -> False
+                  Just ty -> isDictLikeTy ty
+	-- If the experimental -fdicts-cheap flag is on, we eta-expand through
+	-- dictionary bindings.  This improves arities. Thereby, it also
+	-- means that full laziness is less prone to floating out the
+	-- application of a function to its dictionary arguments, which
+	-- can thereby lose opportunities for fusion.  Example:
+	-- 	foo :: Ord a => a -> ...
+	--	foo = /\a \(d:Ord a). let d' = ...d... in \(x:a). ....
+	--		-- So foo has arity 1
+	--
+	--	f = \x. foo dInt $ bar x
+	--
+	-- The (foo DInt) is floated out, and makes ineffective a RULE 
+	--	foo (bar x) = ...
+	--
+	-- One could go further and make exprIsCheap reply True to any
+	-- dictionary-typed expression, but that's more work.
+	-- 
+	-- See Note [Dictionary-like types] in TcType.lhs for why we use
+  	-- isDictLikeTy here rather than isDictTy
 \end{code}
 
+Note [Eta-expanding at let bindings]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We now eta expand at let-bindings, which is where the payoff 
+comes. 
+
+One useful consequence is this example:
+   genMap :: C a => ...
+   {-# INLINE genMap #-}
+   genMap f xs = ...
+
+   myMap :: D a => ...
+   {-# INLINE myMap #-}
+   myMap = genMap
+
+Notice that 'genMap' should only inline if applied to two arguments.
+In the InlineRule for myMap we'll have the unfolding 
+    (\d -> genMap Int (..d..))  
+We do not want to eta-expand to 
+    (\d f xs -> genMap Int (..d..) f xs) 
+because then 'genMap' will inline, and it really shouldn't: at least
+as far as the programmer is concerned, it's not applied to two
+arguments!
+
+Note [Arity analysis]
+~~~~~~~~~~~~~~~~~~~~~
+The motivating example for arity analysis is this:
+ 
+  f = \x. let g = f (x+1) 
+          in \y. ...g...
 
-%************************************************************************
-%*									*
-		Eta expansion
-%*									*
-%************************************************************************
-
-
-We go for:
-   f = \x1..xn -> N  ==>   f = \x1..xn y1..ym -> N y1..ym
-				 (n >= 0)
-
-where (in both cases) 
-
-	* The xi can include type variables
-
-	* The yi are all value variables
-
-	* N is a NORMAL FORM (i.e. no redexes anywhere)
-	  wanting a suitable number of extra args.
-
-The biggest reason for doing this is for cases like
-
-	f = \x -> case x of
-		    True  -> \y -> e1
-		    False -> \y -> e2
+What arity does f have?  Really it should have arity 2, but a naive
+look at the RHS won't see that.  You need a fixpoint analysis which
+says it has arity "infinity" the first time round.
 
-Here we want to get the lambdas together.  A good exmaple is the nofib
-program fibheaps, which gets 25% more allocation if you don't do this
-eta-expansion.
+This example happens a lot; it first showed up in Andy Gill's thesis,
+fifteen years ago!  It also shows up in the code for 'rnf' on lists
+in Trac #4138.
 
-We may have to sandwich some coerces between the lambdas
-to make the types work.   exprEtaExpandArity looks through coerces
-when computing arity; and etaExpand adds the coerces as necessary when
-actually computing the expansion.
+The analysis is easy to achieve because exprEtaExpandArity takes an
+argument
+     type CheapFun = CoreExpr -> Maybe Type -> Bool
+used to decide if an expression is cheap enough to push inside a 
+lambda.  And exprIsCheap' in turn takes an argument
+     type CheapAppFun = Id -> Int -> Bool
+which tells when an application is cheap. This makes it easy to
+write the analysis loop.
 
-\begin{code}
-tryEtaExpansion :: DynFlags -> OutExpr -> OutExpr
--- There is at least one runtime binder in the binders
-tryEtaExpansion dflags body
-  = etaExpand fun_arity body
-  where
-    fun_arity = exprEtaExpandArity dflags body
-\end{code}
+The analysis is cheap-and-cheerful because it doesn't deal with
+mutual recursion.  But the self-recursive case is the important one.
 
 
 %************************************************************************
@@ -1282,7 +1322,7 @@ abstractFloats :: [OutTyVar] -> SimplEnv -> OutExpr -> SimplM ([OutBind], OutExp
 abstractFloats main_tvs body_env body
   = ASSERT( notNull body_floats )
     do	{ (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
-	; return (float_binds, CoreSubst.substExpr subst body) }
+	; return (float_binds, CoreSubst.substExpr (text "abstract_floats1") subst body) }
   where
     main_tv_set = mkVarSet main_tvs
     body_floats = getFloats body_env
@@ -1295,10 +1335,10 @@ abstractFloats main_tvs body_env body
 		 subst'   = CoreSubst.extendIdSubst subst id poly_app
 	   ; return (subst', (NonRec poly_id poly_rhs)) }
       where
-	rhs' = CoreSubst.substExpr subst rhs
+	rhs' = CoreSubst.substExpr (text "abstract_floats2") subst rhs
 	tvs_here | any isCoVar main_tvs = main_tvs	-- Note [Abstract over coercions]
 		 | otherwise 
-		 = varSetElems (main_tv_set `intersectVarSet` exprSomeFreeVars isTyVar rhs')
+		 = varSetElems (main_tv_set `intersectVarSet` exprSomeFreeVars isTyCoVar rhs')
 	
 		-- Abstract only over the type variables free in the rhs
 		-- wrt which the new binding is abstracted.  But the naive
@@ -1319,7 +1359,8 @@ abstractFloats main_tvs body_env body
     abstract subst (Rec prs)
        = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly tvs_here) ids
 	    ; let subst' = CoreSubst.extendSubstList subst (ids `zip` poly_apps)
-		  poly_rhss = [mkLams tvs_here (CoreSubst.substExpr subst' rhs) | rhs <- rhss]
+		  poly_rhss = [mkLams tvs_here (CoreSubst.substExpr (text "abstract_floats3") subst' rhs) 
+                              | rhs <- rhss]
 	    ; return (subst', Rec (poly_ids `zip` poly_rhss)) }
        where
 	 (ids,rhss) = unzip prs
@@ -1489,16 +1530,17 @@ prepareDefault case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rhs)
 				-- 	case x of { DEFAULT -> e }
 				-- and we don't want to fill in a default for them!
   , Just all_cons <- tyConDataCons_maybe tycon
-  , not (null all_cons)		-- This is a tricky corner case.  If the data type has no constructors,
-				-- which GHC allows, then the case expression will have at most a default
-				-- alternative.  We don't want to eliminate that alternative, because the
-				-- invariant is that there's always one alternative.  It's more convenient
-				-- to leave	
-				--	case x of { DEFAULT -> e }     
-				-- as it is, rather than transform it to
-				--	error "case cant match"
-				-- which would be quite legitmate.  But it's a really obscure corner, and
-				-- not worth wasting code on.
+  , not (null all_cons)	
+	-- This is a tricky corner case.  If the data type has no constructors,
+	-- which GHC allows, then the case expression will have at most a default
+	-- alternative.  We don't want to eliminate that alternative, because the
+	-- invariant is that there's always one alternative.  It's more convenient
+	-- to leave	
+	--	case x of { DEFAULT -> e }     
+	-- as it is, rather than transform it to
+	--	error "case cant match"
+	-- which would be quite legitmate.  But it's a really obscure corner, and
+	-- not worth wasting code on.
   , let imposs_data_cons = [con | DataAlt con <- imposs_cons]	-- We now know it's a data type 
 	impossible con   = con `elem` imposs_data_cons || dataConCannotMatch inst_tys con
   = case filterOut impossible all_cons of
@@ -1514,9 +1556,12 @@ prepareDefault case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rhs)
 
 	_ -> return [(DEFAULT, [], deflt_rhs)]
 
-  | debugIsOn, isAlgTyCon tycon, not (isOpenTyCon tycon), null (tyConDataCons tycon)
+  | debugIsOn, isAlgTyCon tycon
+  , null (tyConDataCons tycon)
+  , not (isFamilyTyCon tycon || isAbstractTyCon tycon)
 	-- Check for no data constructors
-        -- This can legitimately happen for type families, so don't report that
+        -- This can legitimately happen for abstract types and type families,
+        -- so don't report that
   = pprTrace "prepareDefault" (ppr case_bndr <+> ppr tycon)
         $ return [(DEFAULT, [], deflt_rhs)]