Preserve strictness when floating coercions

[ghc-hetmet.git] / compiler / simplCore / SimplUtils.lhs
diff --git a/compiler/simplCore/SimplUtils.lhs b/compiler/simplCore/SimplUtils.lhs

index be5c4b3..dfe9e83 100644 (file)
--- a/compiler/simplCore/SimplUtils.lhs
+++ b/compiler/simplCore/SimplUtils.lhs
@@ -10,14 +10,15 @@ module SimplUtils (
  
         -- Inlining,
         preInlineUnconditionally, postInlineUnconditionally, 
  
         -- Inlining,
         preInlineUnconditionally, postInlineUnconditionally, 
-       activeInline, activeRule, inlineMode,
+       activeInline, activeRule, 
+        simplEnvForGHCi, simplEnvForRules, simplGentlyForInlineRules,
  
         -- The continuation type
         SimplCont(..), DupFlag(..), ArgInfo(..),
         contIsDupable, contResultType, contIsTrivial, contArgs, dropArgs, 
  
         -- The continuation type
         SimplCont(..), DupFlag(..), ArgInfo(..),
         contIsDupable, contResultType, contIsTrivial, contArgs, dropArgs, 
-       countValArgs, countArgs, splitInlineCont,
-       mkBoringStop, mkLazyArgStop, contIsRhsOrArg,
-       interestingCallContext, interestingArgContext,
+       countValArgs, countArgs, 
+       mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg,
+       interestingCallContext, 
  
         interestingArg, mkArgInfo,
         
  
         interestingArg, mkArgInfo,
         
@@ -34,6 +35,7 @@ import qualified CoreSubst
  import PprCore
  import CoreFVs
  import CoreUtils
  import PprCore
  import CoreFVs
  import CoreUtils
+import CoreArity       ( etaExpand, exprEtaExpandArity )
  import CoreUnfold
  import Name
  import Id
  import CoreUnfold
  import Name
  import Id
@@ -51,7 +53,7 @@ import MonadUtils
  import Outputable
  import FastString
  
  import Outputable
  import FastString
  
-import List( nub )
+import Data.List
  \end{code}
  
  
  \end{code}
  
  
@@ -112,7 +114,7 @@ data SimplCont
         SimplCont       
  
    | StrictArg          -- e C
         SimplCont       
  
    | StrictArg          -- e C
-       OutExpr                 -- e 
+       OutExpr                 -- e; *always* of form (Var v `App1` e1 .. `App` en)
         CallCtxt                -- Whether *this* argument position is interesting
         ArgInfo                 -- Whether the function at the head of e has rules, etc
         SimplCont               --     plus strictness flags for *further* args
         CallCtxt                -- Whether *this* argument position is interesting
         ArgInfo                 -- Whether the function at the head of e has rules, etc
         SimplCont               --     plus strictness flags for *further* args
@@ -151,6 +153,9 @@ instance Outputable DupFlag where
  mkBoringStop :: SimplCont
  mkBoringStop = Stop BoringCtxt
  
  mkBoringStop :: SimplCont
  mkBoringStop = Stop BoringCtxt
  
+mkRhsStop :: SimplCont -- See Note [RHS of lets] in CoreUnfold
+mkRhsStop = Stop (ArgCtxt False)
+
  mkLazyArgStop :: CallCtxt -> SimplCont
  mkLazyArgStop cci = Stop cci
  
  mkLazyArgStop :: CallCtxt -> SimplCont
  mkLazyArgStop cci = Stop cci
  
@@ -214,63 +219,11 @@ dropArgs :: Int -> SimplCont -> SimplCont
  dropArgs 0 cont = cont
  dropArgs n (ApplyTo _ _ _ cont) = dropArgs (n-1) cont
  dropArgs n other               = pprPanic "dropArgs" (ppr n <+> ppr other)
  dropArgs 0 cont = cont
  dropArgs n (ApplyTo _ _ _ cont) = dropArgs (n-1) cont
  dropArgs n other               = pprPanic "dropArgs" (ppr n <+> ppr other)
-
---------------------
-splitInlineCont :: SimplCont -> Maybe (SimplCont, SimplCont)
--- Returns Nothing if the continuation should dissolve an InlineMe Note
--- Return Just (c1,c2) otherwise, 
---     where c1 is the continuation to put inside the InlineMe 
---     and   c2 outside
-
--- Example: (__inline_me__ (/\a. e)) ty
---     Here we want to do the beta-redex without dissolving the InlineMe
--- See test simpl017 (and Trac #1627) for a good example of why this is important
-
-splitInlineCont (ApplyTo dup (Type ty) se c)
-  | Just (c1, c2) <- splitInlineCont c = Just (ApplyTo dup (Type ty) se c1, c2)
-splitInlineCont cont@(Stop {})         = Just (mkBoringStop, cont)
-splitInlineCont cont@(StrictBind {})   = Just (mkBoringStop, cont)
-splitInlineCont cont@(StrictArg  {})   = Just (mkBoringStop, cont)
-splitInlineCont _                      = Nothing
-\end{code}
-
-
-\begin{code}
-interestingArg :: OutExpr -> Bool
-       -- An argument is interesting if it has *some* structure
-       -- We are here trying to avoid unfolding a function that
-       -- is applied only to variables that have no unfolding
-       -- (i.e. they are probably lambda bound): f x y z
-       -- There is little point in inlining f here.
-interestingArg (Var v)          = hasSomeUnfolding (idUnfolding v)
-                                       -- Was: isValueUnfolding (idUnfolding v')
-                                       -- But that seems over-pessimistic
-                                || isDataConWorkId v
-                                       -- This accounts for an argument like
-                                       -- () or [], which is definitely interesting
-interestingArg (Type _)                 = False
-interestingArg (App fn (Type _)) = interestingArg fn
-interestingArg (Note _ a)       = interestingArg a
-
--- Idea (from Sam B); I'm not sure if it's a good idea, so commented out for now
--- interestingArg expr | isUnLiftedType (exprType expr)
---        -- Unlifted args are only ever interesting if we know what they are
---  =                  case expr of
---                        Lit lit -> True
---                        _       -> False
-
-interestingArg _                 = True
-       -- Consider     let x = 3 in f x
-       -- The substitution will contain (x -> ContEx 3), and we want to
-       -- to say that x is an interesting argument.
-       -- But consider also (\x. f x y) y
-       -- The substitution will contain (x -> ContEx y), and we want to say
-       -- that x is not interesting (assuming y has no unfolding)
  \end{code}
  
  
  \end{code}
  
  
-Comment about interestingCallContext
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Note [Interesting call context]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  We want to avoid inlining an expression where there can't possibly be
  any gain, such as in an argument position.  Hence, if the continuation
  is interesting (eg. a case scrutinee, application etc.) then we
  We want to avoid inlining an expression where there can't possibly be
  any gain, such as in an argument position.  Hence, if the continuation
  is interesting (eg. a case scrutinee, application etc.) then we
@@ -305,20 +258,22 @@ default case.
  
  \begin{code}
  interestingCallContext :: SimplCont -> CallCtxt
  
  \begin{code}
  interestingCallContext :: SimplCont -> CallCtxt
+-- See Note [Interesting call context]
  interestingCallContext cont
    = interesting cont
    where
  interestingCallContext cont
    = interesting cont
    where
-    interestingCtxt = ArgCtxt False 2  -- Give *some* incentive!
-
      interesting (Select _ bndr _ _ _)
      interesting (Select _ bndr _ _ _)
-       | isDeadBinder bndr       = CaseCtxt
-       | otherwise               = interestingCtxt
+       | isDeadBinder bndr = CaseCtxt
+       | otherwise         = ArgCtxt False     -- If the binder is used, this
+                                               -- is like a strict let
+                                               -- See Note [RHS of lets] in CoreUnfold
                 
                 
-    interesting (ApplyTo {})      = interestingCtxt
-                               -- Can happen if we have (coerce t (f x)) y
-                               -- Perhaps interestingCtxt is a bit over-keen, but I've
-                               -- seen (coerce f) x, where f has an INLINE prag,
-                               -- So we have to give some motivation for inlining it
+    interesting (ApplyTo _ arg _ cont)
+       | isTypeArg arg = interesting cont
+       | otherwise     = ValAppCtxt    -- Can happen if we have (f Int |> co) y
+                                       -- If f has an INLINE prag we need to give it some
+                                       -- motivation to inline. See Note [Cast then apply]
+                                       -- in CoreUnfold
  
      interesting (StrictArg _ cci _ _)  = cci
      interesting (StrictBind {})                = BoringCtxt
  
      interesting (StrictArg _ cci _ _)  = cci
      interesting (StrictBind {})                = BoringCtxt
@@ -342,24 +297,25 @@ interestingCallContext cont
  
  -------------------
  mkArgInfo :: Id
  
  -------------------
  mkArgInfo :: Id
+         -> [CoreRule] -- Rules for function
           -> Int        -- Number of value args
           -> Int        -- Number of value args
-         -> SimplCont  -- Context of the cal
+         -> SimplCont  -- Context of the call
           -> ArgInfo
  
           -> ArgInfo
  
-mkArgInfo fun n_val_args call_cont
+mkArgInfo fun rules n_val_args call_cont
    | n_val_args < idArity fun           -- Note [Unsaturated functions]
    = ArgInfo { ai_rules = False
             , ai_strs = vanilla_stricts 
             , ai_discs = vanilla_discounts }
    | otherwise
    | n_val_args < idArity fun           -- Note [Unsaturated functions]
    = ArgInfo { ai_rules = False
             , ai_strs = vanilla_stricts 
             , ai_discs = vanilla_discounts }
    | otherwise
-  = ArgInfo { ai_rules = interestingArgContext fun call_cont
+  = ArgInfo { ai_rules = interestingArgContext rules call_cont
             , ai_strs  = add_type_str (idType fun) arg_stricts
             , ai_discs = arg_discounts }
    where
      vanilla_discounts, arg_discounts :: [Int]
      vanilla_discounts = repeat 0
      arg_discounts = case idUnfolding fun of
             , ai_strs  = add_type_str (idType fun) arg_stricts
             , ai_discs = arg_discounts }
    where
      vanilla_discounts, arg_discounts :: [Int]
      vanilla_discounts = repeat 0
      arg_discounts = case idUnfolding fun of
-                       CoreUnfolding _ _ _ _ (UnfoldIfGoodArgs _ discounts _ _)
+                       CoreUnfolding {uf_guidance = UnfoldIfGoodArgs {ug_args = discounts}}
                               -> discounts ++ vanilla_discounts
                         _     -> vanilla_discounts
  
                               -> discounts ++ vanilla_discounts
                         _     -> vanilla_discounts
  
@@ -413,7 +369,7 @@ it'll just be floated out again.  Even if f has lots of discounts
  on its first argument -- it must be saturated for these to kick in
  -}
  
  on its first argument -- it must be saturated for these to kick in
  -}
  
-interestingArgContext :: Id -> SimplCont -> Bool
+interestingArgContext :: [CoreRule] -> SimplCont -> Bool
  -- If the argument has form (f x y), where x,y are boring,
  -- and f is marked INLINE, then we don't want to inline f.
  -- But if the context of the argument is
  -- If the argument has form (f x y), where x,y are boring,
  -- and f is marked INLINE, then we don't want to inline f.
  -- But if the context of the argument is
@@ -424,16 +380,18 @@ interestingArgContext :: Id -> SimplCont -> Bool
  -- where h has rules, then we do want to inline f; hence the
  -- call_cont argument to interestingArgContext
  --
  -- where h has rules, then we do want to inline f; hence the
  -- call_cont argument to interestingArgContext
  --
--- The interesting_arg_ctxt flag makes this happen; if it's
+-- The ai-rules flag makes this happen; if it's
  -- set, the inliner gets just enough keener to inline f 
  -- regardless of how boring f's arguments are, if it's marked INLINE
  --
  -- The alternative would be to *always* inline an INLINE function,
  -- regardless of how boring its context is; but that seems overkill
  -- For example, it'd mean that wrapper functions were always inlined
  -- set, the inliner gets just enough keener to inline f 
  -- regardless of how boring f's arguments are, if it's marked INLINE
  --
  -- The alternative would be to *always* inline an INLINE function,
  -- regardless of how boring its context is; but that seems overkill
  -- For example, it'd mean that wrapper functions were always inlined
-interestingArgContext fn call_cont
-  = idHasRules fn || go call_cont
+interestingArgContext rules call_cont
+  = notNull rules || enclosing_fn_has_rules
    where
    where
+    enclosing_fn_has_rules = go call_cont
+
      go (Select {})          = False
      go (ApplyTo {})         = False
      go (StrictArg _ cci _ _) = interesting cci
      go (Select {})          = False
      go (ApplyTo {})         = False
      go (StrictArg _ cci _ _) = interesting cci
@@ -441,8 +399,8 @@ interestingArgContext fn call_cont
      go (CoerceIt _ c)       = go c
      go (Stop cci)            = interesting cci
  
      go (CoerceIt _ c)       = go c
      go (Stop cci)            = interesting cci
  
-    interesting (ArgCtxt rules _) = rules
-    interesting _                 = False
+    interesting (ArgCtxt rules) = rules
+    interesting _               = False
  \end{code}
  
  
  \end{code}
  
  
@@ -453,18 +411,58 @@ interestingArgContext fn call_cont
  %*                                                                     *
  %************************************************************************
  
  %*                                                                     *
  %************************************************************************
  
-Inlining is controlled partly by the SimplifierMode switch.  This has two
-settings:
+\begin{code}
+simplEnvForGHCi :: SimplEnv
+simplEnvForGHCi = mkSimplEnv allOffSwitchChecker $
+                  SimplGently { sm_rules = False, sm_inline = False }
+   -- Do not do any inlining, in case we expose some unboxed
+   -- tuple stuff that confuses the bytecode interpreter
+
+simplEnvForRules :: SimplEnv
+simplEnvForRules = mkSimplEnv allOffSwitchChecker $
+                   SimplGently { sm_rules = True, sm_inline = False }
+
+simplGentlyForInlineRules :: SimplifierMode
+simplGentlyForInlineRules = SimplGently { sm_rules = True, sm_inline = True }
+       -- Simplify as much as possible, subject to the usual "gentle" rules
+\end{code}
  
  
+Inlining is controlled partly by the SimplifierMode switch.  This has two
+settings
+       
         SimplGently     (a) Simplifying before specialiser/full laziness
         SimplGently     (a) Simplifying before specialiser/full laziness
-                       (b) Simplifiying inside INLINE pragma
+                       (b) Simplifiying inside InlineRules
                         (c) Simplifying the LHS of a rule
                         (d) Simplifying a GHCi expression or Template 
                                 Haskell splice
  
         SimplPhase n _   Used at all other times
  
                         (c) Simplifying the LHS of a rule
                         (d) Simplifying a GHCi expression or Template 
                                 Haskell splice
  
         SimplPhase n _   Used at all other times
  
-The key thing about SimplGently is that it does no call-site inlining.
+Note [Gentle mode]
+~~~~~~~~~~~~~~~~~~
+Gentle mode has a separate boolean flag to control
+       a) inlining (sm_inline flag)
+       b) rules    (sm_rules  flag)
+A key invariant about Gentle mode is that it is treated as the EARLIEST
+phase.  Something is inlined if the sm_inline flag is on AND the thing
+is inlinable in the earliest phase.  This is important. Example
+
+  {-# INLINE [~1] g #-}
+  g = ...
+  
+  {-# INLINE f #-}
+  f x = g (g x)
+
+If we were to inline g into f's inlining, then an importing module would
+never be able to do
+       f e --> g (g e) ---> RULE fires
+because the InlineRule for f has had g inlined into it.
+
+On the other hand, it is bad not to do ANY inlining into an
+InlineRule, because then recursive knots in instance declarations
+don't get unravelled.
+
+However, *sometimes* SimplGently must do no call-site inlining at all.
  Before full laziness we must be careful not to inline wrappers,
  because doing so inhibits floating
      e.g. ...(case f x of ...)...
  Before full laziness we must be careful not to inline wrappers,
  because doing so inhibits floating
      e.g. ...(case f x of ...)...
@@ -478,17 +476,11 @@ running it, we don't want to use -O2.  Indeed, we don't want to inline
  anything, because the byte-code interpreter might get confused about 
  unboxed tuples and suchlike.
  
  anything, because the byte-code interpreter might get confused about 
  unboxed tuples and suchlike.
  
-INLINE pragmas
-~~~~~~~~~~~~~~
-SimplGently is also used as the mode to simplify inside an InlineMe note.
-
-\begin{code}
-inlineMode :: SimplifierMode
-inlineMode = SimplGently
-\end{code}
-
-It really is important to switch off inlinings inside such
-expressions.  Consider the following example 
+Note [Simplifying gently inside InlineRules]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We don't do much simplification inside InlineRules (which come from
+INLINE pragmas).  It really is important to switch off inlinings
+inside such expressions.  Consider the following example
  
         let f = \pq -> BIG
         in
  
         let f = \pq -> BIG
         in
@@ -497,16 +489,14 @@ expressions.  Consider the following example
         in ...g...g...g...g...g...
  
  Now, if that's the ONLY occurrence of f, it will be inlined inside g,
         in ...g...g...g...g...g...
  
  Now, if that's the ONLY occurrence of f, it will be inlined inside g,
-and thence copied multiple times when g is inlined.
+and thence copied multiple times when g is inlined.  
  
  
-
-This function may be inlinined in other modules, so we
-don't want to remove (by inlining) calls to functions that have
-specialisations, or that may have transformation rules in an importing
-scope.
+This function may be inlinined in other modules, so we don't want to
+remove (by inlining) calls to functions that have specialisations, or
+that may have transformation rules in an importing scope.
  
  E.g.   {-# INLINE f #-}
  
  E.g.   {-# INLINE f #-}
-               f x = ...g...
+       f x = ...g...
  
  and suppose that g is strict *and* has specialisations.  If we inline
  g's wrapper, we deny f the chance of getting the specialised version
  
  and suppose that g is strict *and* has specialisations.  If we inline
  g's wrapper, we deny f the chance of getting the specialised version
@@ -524,15 +514,14 @@ continuation.  That's why the keep_inline predicate returns True for
  ArgOf continuations.  It shouldn't do any harm not to dissolve the
  inline-me note under these circumstances.
  
  ArgOf continuations.  It shouldn't do any harm not to dissolve the
  inline-me note under these circumstances.
  
-Note that the result is that we do very little simplification
-inside an InlineMe.  
+Although we do very little simplification inside an InlineRule,
+the RHS is simplified as normal.  For example:
  
         all xs = foldr (&&) True xs
         any p = all . map p  {-# INLINE any #-}
  
  
         all xs = foldr (&&) True xs
         any p = all . map p  {-# INLINE any #-}
  
-Problem: any won't get deforested, and so if it's exported and the
-importer doesn't use the inlining, (eg passes it as an arg) then we
-won't get deforestation at all.  We havn't solved this problem yet!
+The RHS of 'any' will get optimised and deforested; but the InlineRule
+will still mention the original RHS.
  
  
  preInlineUnconditionally
  
  
  preInlineUnconditionally
@@ -599,6 +588,18 @@ seems a bit fragile.
  Conclusion: inline top level things gaily until Phase 0 (the last
  phase), at which point don't.
  
  Conclusion: inline top level things gaily until Phase 0 (the last
  phase), at which point don't.
  
+Note [pre/postInlineUnconditionally in gentle mode]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Even in gentle mode we want to do preInlineUnconditionally.  The
+reason is that too little clean-up happens if you don't inline
+use-once things.  Also a bit of inlining is *good* for full laziness;
+it can expose constant sub-expressions.  Example in
+spectral/mandel/Mandel.hs, where the mandelset function gets a useful
+let-float if you inline windowToViewport
+
+However, as usual for Gentle mode, do not inline things that are
+inactive in the intial stages.  See Note [Gentle mode].
+
  \begin{code}
  preInlineUnconditionally :: SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
  preInlineUnconditionally env top_lvl bndr rhs
  \begin{code}
  preInlineUnconditionally :: SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
  preInlineUnconditionally env top_lvl bndr rhs
@@ -611,9 +612,10 @@ preInlineUnconditionally env top_lvl bndr rhs
    where
      phase = getMode env
      active = case phase of
    where
      phase = getMode env
      active = case phase of
-                  SimplGently    -> isAlwaysActive prag
-                  SimplPhase n _ -> isActive n prag
-    prag = idInlinePragma bndr
+                  SimplGently {} -> isEarlyActive act
+                       -- See Note [pre/postInlineUnconditionally in gentle mode]
+                  SimplPhase n _ -> isActive n act
+    act = idInlineActivation bndr
  
      try_once in_lam int_cxt    -- There's one textual occurrence
         | not in_lam = isNotTopLevel top_lvl || early_phase
  
      try_once in_lam int_cxt    -- There's one textual occurrence
         | not in_lam = isNotTopLevel top_lvl || early_phase
@@ -696,7 +698,7 @@ story for now.
  \begin{code}
  postInlineUnconditionally 
      :: SimplEnv -> TopLevelFlag
  \begin{code}
  postInlineUnconditionally 
      :: SimplEnv -> TopLevelFlag
-    -> InId            -- The binder (an OutId would be fine too)
+    -> OutId           -- The binder (an InId would be fine too)
      -> OccInfo                 -- From the InId
      -> OutExpr
      -> Unfolding
      -> OccInfo                 -- From the InId
      -> OutExpr
      -> Unfolding
@@ -706,6 +708,7 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
    | isLoopBreaker occ_info = False     -- If it's a loop-breaker of any kind, don't inline
                                         -- because it might be referred to "earlier"
    | isExportedId bndr      = False
    | isLoopBreaker occ_info = False     -- If it's a loop-breaker of any kind, don't inline
                                         -- because it might be referred to "earlier"
    | isExportedId bndr      = False
+  | isInlineRule unfolding = False     -- Note [InlineRule and postInlineUnconditionally]
    | exprIsTrivial rhs     = True
    | otherwise
    = case occ_info of
    | exprIsTrivial rhs     = True
    | otherwise
    = case occ_info of
@@ -767,21 +770,17 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
  
    where
      active = case getMode env of
  
    where
      active = case getMode env of
-                  SimplGently    -> isAlwaysActive prag
-                  SimplPhase n _ -> isActive n prag
-    prag = idInlinePragma bndr
+                  SimplGently {} -> isEarlyActive act
+                       -- See Note [pre/postInlineUnconditionally in gentle mode]
+                  SimplPhase n _ -> isActive n act
+    act = idInlineActivation bndr
  
  activeInline :: SimplEnv -> OutId -> Bool
  activeInline env id
    = case getMode env of
  
  activeInline :: SimplEnv -> OutId -> Bool
  activeInline env id
    = case getMode env of
-      SimplGently -> False
-       -- No inlining at all when doing gentle stuff,
-       -- except for local things that occur once (pre/postInlineUnconditionally)
-       -- The reason is that too little clean-up happens if you 
-       -- don't inline use-once things.   Also a bit of inlining is *good* for
-       -- full laziness; it can expose constant sub-expressions.
-       -- Example in spectral/mandel/Mandel.hs, where the mandelset 
-       -- function gets a useful let-float if you inline windowToViewport
+      SimplGently { sm_inline = inlining_on } 
+         -> inlining_on && isEarlyActive act
+       -- See Note [Gentle mode]
  
         -- NB: we used to have a second exception, for data con wrappers.
         -- On the grounds that we use gentle mode for rule LHSs, and 
  
         -- NB: we used to have a second exception, for data con wrappers.
         -- On the grounds that we use gentle mode for rule LHSs, and 
@@ -790,26 +789,45 @@ activeInline env id
         -- and they are now constructed as Compulsory unfoldings (in MkId)
         -- so they'll happen anyway.
  
         -- and they are now constructed as Compulsory unfoldings (in MkId)
         -- so they'll happen anyway.
  
-      SimplPhase n _ -> isActive n prag
+      SimplPhase n _ -> isActive n act
    where
    where
-    prag = idInlinePragma id
+    act = idInlineActivation id
  
  activeRule :: DynFlags -> SimplEnv -> Maybe (Activation -> Bool)
  -- Nothing => No rules at all
  activeRule dflags env
  
  activeRule :: DynFlags -> SimplEnv -> Maybe (Activation -> Bool)
  -- Nothing => No rules at all
  activeRule dflags env
-  | not (dopt Opt_RewriteRules dflags)
+  | not (dopt Opt_EnableRewriteRules dflags)
    = Nothing    -- Rewriting is off
    | otherwise
    = case getMode env of
    = Nothing    -- Rewriting is off
    | otherwise
    = case getMode env of
-       SimplGently    -> Just isAlwaysActive
+      SimplGently { sm_rules = rules_on } 
+        | rules_on  -> Just isEarlyActive
+        | otherwise -> Nothing
                         -- Used to be Nothing (no rules in gentle mode)
                         -- Main motivation for changing is that I wanted
                         --      lift String ===> ...
                         -- to work in Template Haskell when simplifying
                         -- splices, so we get simpler code for literal strings
                         -- Used to be Nothing (no rules in gentle mode)
                         -- Main motivation for changing is that I wanted
                         --      lift String ===> ...
                         -- to work in Template Haskell when simplifying
                         -- splices, so we get simpler code for literal strings
-       SimplPhase n _ -> Just (isActive n)
+      SimplPhase n _ -> Just (isActive n)
  \end{code}
  
  \end{code}
  
+Note [InlineRule and postInlineUnconditionally]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Do not do postInlineUnconditionally if the Id has an InlineRule, otherwise
+we lose the unfolding.  Example
+
+     -- f has InlineRule with rhs (e |> co)
+     --   where 'e' is big
+     f = e |> co
+
+Then there's a danger we'll optimise to
+
+     f' = e
+     f = f' |> co
+
+and now postInlineUnconditionally, losing the InlineRule on f.  Now f'
+won't inline because 'e' is too big.
+
  
  %************************************************************************
  %*                                                                     *
  
  %************************************************************************
  %*                                                                     *
@@ -818,14 +836,14 @@ activeRule dflags env
  %************************************************************************
  
  \begin{code}
  %************************************************************************
  
  \begin{code}
-mkLam :: [OutBndr] -> OutExpr -> SimplM OutExpr
+mkLam :: SimplEnv -> [OutBndr] -> OutExpr -> SimplM OutExpr
  -- mkLam tries three things
  --     a) eta reduction, if that gives a trivial expression
  --     b) eta expansion [only if there are some value lambdas]
  
  -- mkLam tries three things
  --     a) eta reduction, if that gives a trivial expression
  --     b) eta expansion [only if there are some value lambdas]
  
-mkLam [] body 
+mkLam _b [] body 
    = return body
    = return body
-mkLam bndrs body
+mkLam env bndrs body
    = do { dflags <- getDOptsSmpl
         ; mkLam' dflags bndrs body }
    where
    = do { dflags <- getDOptsSmpl
         ; mkLam' dflags bndrs body }
    where
@@ -846,8 +864,10 @@ mkLam bndrs body
            ; return etad_lam }
  
        | dopt Opt_DoLambdaEtaExpansion dflags,
            ; return etad_lam }
  
        | dopt Opt_DoLambdaEtaExpansion dflags,
-       any isRuntimeVar bndrs
-      = do { body' <- tryEtaExpansion dflags body
+        not (inGentleMode env),              -- In gentle mode don't eta-expansion
+       any isRuntimeVar bndrs        -- because it can clutter up the code
+                                     -- with casts etc that may not be removed
+      = do { let body' = tryEtaExpansion dflags body
            ; return (mkLams bndrs body') }
     
        | otherwise 
            ; return (mkLams bndrs body') }
     
        | otherwise 
@@ -934,10 +954,16 @@ There are some particularly delicate points here:
  
    So it's important to to the right thing.
  
  
    So it's important to to the right thing.
  
-* We need to be careful if we just look at f's arity. Currently (Dec07),
-  f's arity is visible in its own RHS (see Note [Arity robustness] in 
-  SimplEnv) so we must *not* trust the arity when checking that 'f' is
-  a value.  Instead, look at the unfolding. 
+* Note [Arity care]: we need to be careful if we just look at f's
+  arity. Currently (Dec07), f's arity is visible in its own RHS (see
+  Note [Arity robustness] in SimplEnv) so we must *not* trust the
+  arity when checking that 'f' is a value.  Otherwise we will
+  eta-reduce
+      f = \x. f x
+  to
+      f = f
+  Which might change a terminiating program (think (f `seq` e)) to a 
+  non-terminating one.  So we check for being a loop breaker first.
  
    However for GlobalIds we can look at the arity; and for primops we
    must, since they have no unfolding.  
  
    However for GlobalIds we can look at the arity; and for primops we
    must, since they have no unfolding.  
@@ -950,6 +976,11 @@ There are some particularly delicate points here:
    with both type and dictionary lambdas; hence the slightly 
    ad-hoc isDictId
  
    with both type and dictionary lambdas; hence the slightly 
    ad-hoc isDictId
  
+* Never *reduce* arity. For example
+      f = \xy. g x y
+  Then if h has arity 1 we don't want to eta-reduce because then
+  f's arity would decrease, and that is bad
+
  These delicacies are why we don't use exprIsTrivial and exprIsHNF here.
  Alas.
  
  These delicacies are why we don't use exprIsTrivial and exprIsHNF here.
  Alas.
  
@@ -958,6 +989,8 @@ tryEtaReduce :: [OutBndr] -> OutExpr -> Maybe OutExpr
  tryEtaReduce bndrs body 
    = go (reverse bndrs) body
    where
  tryEtaReduce bndrs body 
    = go (reverse bndrs) body
    where
+    incoming_arity = count isId bndrs
+
      go (b : bs) (App fun arg) | ok_arg b arg = go bs fun       -- Loop round
      go []       fun           | ok_fun fun   = Just fun                -- Success!
      go _        _                           = Nothing          -- Failure!
      go (b : bs) (App fun arg) | ok_arg b arg = go bs fun       -- Loop round
      go []       fun           | ok_fun fun   = Just fun                -- Success!
      go _        _                           = Nothing          -- Failure!
@@ -971,11 +1004,11 @@ tryEtaReduce bndrs body
         && (ok_fun_id fun_id || all ok_lam bndrs)
      ok_fun _fun = False
  
         && (ok_fun_id fun_id || all ok_lam bndrs)
      ok_fun _fun = False
  
-    ok_fun_id fun
-       | isLocalId fun       = isEvaldUnfolding (idUnfolding fun)
-       | isDataConWorkId fun = True
-       | isGlobalId fun      = idArity fun > 0
-        | otherwise           = panic "tryEtaReduce/ok_fun_id"
+    ok_fun_id fun = fun_arity fun >= incoming_arity
+
+    fun_arity fun            -- See Note [Arity care]
+       | isLocalId fun && isLoopBreaker (idOccInfo fun) = 0
+       | otherwise = idArity fun             
  
      ok_lam v = isTyVar v || isDictId v
  
  
      ok_lam v = isTyVar v || isDictId v
  
@@ -1019,11 +1052,10 @@ when computing arity; and etaExpand adds the coerces as necessary when
  actually computing the expansion.
  
  \begin{code}
  actually computing the expansion.
  
  \begin{code}
-tryEtaExpansion :: DynFlags -> OutExpr -> SimplM OutExpr
+tryEtaExpansion :: DynFlags -> OutExpr -> OutExpr
  -- There is at least one runtime binder in the binders
  -- There is at least one runtime binder in the binders
-tryEtaExpansion dflags body = do
-    us <- getUniquesM
-    return (etaExpand fun_arity us body (exprType body))
+tryEtaExpansion dflags body
+  = etaExpand fun_arity body
    where
      fun_arity = exprEtaExpandArity dflags body
  \end{code}
    where
      fun_arity = exprEtaExpandArity dflags body
  \end{code}
@@ -1176,7 +1208,7 @@ abstractFloats main_tvs body_env body
        = do { uniq <- getUniqueM
            ; let  poly_name = setNameUnique (idName var) uniq           -- Keep same name
                   poly_ty   = mkForAllTys tvs_here (idType var) -- But new type of course
        = do { uniq <- getUniqueM
            ; let  poly_name = setNameUnique (idName var) uniq           -- Keep same name
                   poly_ty   = mkForAllTys tvs_here (idType var) -- But new type of course
-                 poly_id   = transferPolyIdInfo var $  -- Note [transferPolyIdInfo] in Id.lhs
+                 poly_id   = transferPolyIdInfo var tvs_here $ -- Note [transferPolyIdInfo] in Id.lhs
                               mkLocalId poly_name poly_ty 
            ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
                 -- In the olden days, it was crucial to copy the occInfo of the original var, 
                               mkLocalId poly_name poly_ty 
            ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
                 -- In the olden days, it was crucial to copy the occInfo of the original var, 
@@ -1425,9 +1457,9 @@ prepareDefault _ _ case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rh
  
         _ -> return [(DEFAULT, [], deflt_rhs)]
  
  
         _ -> return [(DEFAULT, [], deflt_rhs)]
  
-  | debugIsOn, isAlgTyCon tycon, [] <- tyConDataCons tycon
+  | debugIsOn, isAlgTyCon tycon, not (isOpenTyCon tycon), null (tyConDataCons tycon)
+       -- This can legitimately happen for type families, so don't report that
    = pprTrace "prepareDefault" (ppr case_bndr <+> ppr tycon)
    = pprTrace "prepareDefault" (ppr case_bndr <+> ppr tycon)
-       -- This can legitimately happen for type families
          $ return [(DEFAULT, [], deflt_rhs)]
  
  --------- Catch-all cases -----------
          $ return [(DEFAULT, [], deflt_rhs)]
  
  --------- Catch-all cases -----------