add -fsimpleopt-before-flatten

[ghc-hetmet.git] / compiler / simplCore / SimplUtils.lhs
diff --git a/compiler/simplCore/SimplUtils.lhs b/compiler/simplCore/SimplUtils.lhs

index 1618525..7e9a010 100644 (file)
--- a/compiler/simplCore/SimplUtils.lhs
+++ b/compiler/simplCore/SimplUtils.lhs
@@ -10,8 +10,9 @@ module SimplUtils (
  
         -- Inlining,
         preInlineUnconditionally, postInlineUnconditionally, 
-       activeUnfolding, activeUnfInRule, activeRule, 
-        simplEnvForGHCi, simplEnvForRules, updModeForInlineRules,
+       activeUnfolding, activeRule, 
+       getUnfoldingInRuleMatch, 
+        simplEnvForGHCi, updModeForInlineRules,
  
         -- The continuation type
         SimplCont(..), DupFlag(..), ArgInfo(..),
@@ -29,7 +30,7 @@ module SimplUtils (
  #include "HsVersions.h"
  
  import SimplEnv
-import CoreMonad       ( SimplifierMode(..), Tick(..) )
+import CoreMonad        ( SimplifierMode(..), Tick(..) )
  import DynFlags
  import StaticFlags
  import CoreSyn
@@ -44,6 +45,7 @@ import Id
  import Var
  import Demand
  import SimplMonad
+import TcType  ( isDictLikeTy )
  import Type    hiding( substTy )
  import Coercion ( coercionKind )
  import TyCon
@@ -454,44 +456,43 @@ interestingArgContext rules call_cont
  
  %************************************************************************
  %*                                                                     *
-                  Gentle mode                                                                  
+                  SimplifierMode
  %*                                                                     *
  %************************************************************************
  
-Inlining is controlled partly by the SimplifierMode switch.  This has two
-settings
-       
-       SimplGently     (a) Simplifying before specialiser/full laziness
-                       (b) Simplifiying inside InlineRules
-                       (c) Simplifying the LHS of a rule
-                       (d) Simplifying a GHCi expression or Template 
-                               Haskell splice
-
-       SimplPhase n _   Used at all other times
-
-Note [Gentle mode]
-~~~~~~~~~~~~~~~~~~
-Gentle mode has a separate boolean flag to control
-       a) inlining (sm_inline flag)
-       b) rules    (sm_rules  flag)
-A key invariant about Gentle mode is that it is treated as the EARLIEST
-phase.  
+The SimplifierMode controls several switches; see its definition in
+CoreMonad
+        sm_rules      :: Bool     -- Whether RULES are enabled
+        sm_inline     :: Bool     -- Whether inlining is enabled
+        sm_case_case  :: Bool     -- Whether case-of-case is enabled
+        sm_eta_expand :: Bool     -- Whether eta-expansion is enabled
  
  \begin{code}
-simplEnvForGHCi :: SimplEnv
-simplEnvForGHCi = mkSimplEnv allOffSwitchChecker $
-                  SimplGently { sm_rules = True, sm_inline = False }
+simplEnvForGHCi :: DynFlags -> SimplEnv
+simplEnvForGHCi dflags
+  = mkSimplEnv $ SimplMode { sm_names = ["GHCi"]
+                           , sm_phase = InitialPhase
+                           , sm_rules = rules_on
+                           , sm_inline = False
+                           , sm_eta_expand = eta_expand_on
+                           , sm_case_case = True }
+  where
+    rules_on      = dopt Opt_EnableRewriteRules   dflags
+    eta_expand_on = dopt Opt_DoLambdaEtaExpansion dflags
     -- Do not do any inlining, in case we expose some unboxed
     -- tuple stuff that confuses the bytecode interpreter
  
-simplEnvForRules :: SimplEnv
-simplEnvForRules = mkSimplEnv allOffSwitchChecker $
-                   SimplGently { sm_rules = True, sm_inline = False }
-
  updModeForInlineRules :: Activation -> SimplifierMode -> SimplifierMode
  -- See Note [Simplifying inside InlineRules]
-updModeForInlineRules _inline_rule_act _current_mode
-  = SimplGently { sm_rules = True, sm_inline = True }
+updModeForInlineRules inline_rule_act current_mode
+  = current_mode { sm_phase = phaseFromActivation inline_rule_act
+                 , sm_inline = True
+                 , sm_eta_expand = False }
+                -- For sm_rules, just inherit; sm_rules might be "off"
+                -- becuase of -fno-enable-rewrite-rules
+  where
+    phaseFromActivation (ActiveAfter n) = Phase n
+    phaseFromActivation _               = InitialPhase
  \end{code}
  
  Note [Inlining in gentle mode]
@@ -531,25 +532,6 @@ running it, we don't want to use -O2.  Indeed, we don't want to inline
  anything, because the byte-code interpreter might get confused about 
  unboxed tuples and suchlike.
  
-Note [RULEs enabled in SimplGently]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-RULES are enabled when doing "gentle" simplification.  Two reasons:
-
-  * We really want the class-op cancellation to happen:
-        op (df d1 d2) --> $cop3 d1 d2
-    because this breaks the mutual recursion between 'op' and 'df'
-
-  * I wanted the RULE
-        lift String ===> ...
-    to work in Template Haskell when simplifying
-    splices, so we get simpler code for literal strings
-
-But watch out: list fusion can prevent floating.  So use phase control
-to switch off those rules until after floating.
-
-Currently (Oct10) I think that sm_rules is always True, so we
-could remove it.
-
  Note [Simplifying inside InlineRules]
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  We must take care with simplification inside InlineRules (which come from
@@ -568,8 +550,55 @@ one; see OccurAnal.addRuleUsage.
  
  Second, we do want *do* to some modest rules/inlining stuff in InlineRules,
  partly to eliminate senseless crap, and partly to break the recursive knots
-generated by instance declarations.  To keep things simple, we always set 
-the phase to 'gentle' when processing InlineRules.  
+generated by instance declarations.
+
+However, suppose we have
+       {-# INLINE <act> f #-}
+       f = <rhs>
+meaning "inline f in phases p where activation <act>(p) holds". 
+Then what inlinings/rules can we apply to the copy of <rhs> captured in
+f's InlineRule?  Our model is that literally <rhs> is substituted for
+f when it is inlined.  So our conservative plan (implemented by 
+updModeForInlineRules) is this:
+
+  -------------------------------------------------------------
+  When simplifying the RHS of an InlineRule, set the phase to the
+  phase in which the InlineRule first becomes active
+  -------------------------------------------------------------
+
+That ensures that
+
+  a) Rules/inlinings that *cease* being active before p will 
+     not apply to the InlineRule rhs, consistent with it being
+     inlined in its *original* form in phase p.
+
+  b) Rules/inlinings that only become active *after* p will
+     not apply to the InlineRule rhs, again to be consistent with
+     inlining the *original* rhs in phase p.
+
+For example, 
+               {-# INLINE f #-}
+       f x = ...g...
+
+       {-# NOINLINE [1] g #-}
+       g y = ...
+
+       {-# RULE h g = ... #-}
+Here we must not inline g into f's RHS, even when we get to phase 0,
+because when f is later inlined into some other module we want the
+rule for h to fire.
+
+Similarly, consider
+       {-# INLINE f #-}
+       f x = ...g...
+
+       g y = ...
+and suppose that there are auto-generated specialisations and a strictness
+wrapper for g.  The specialisations get activation AlwaysActive, and the
+strictness wrapper get activation (ActiveAfter 0).  So the strictness
+wrepper fails the test and won't be inlined into f's InlineRule. That
+means f can inline, expose the specialised call to g, so the specialisation
+rules can fire.
  
  A note about wrappers
  ~~~~~~~~~~~~~~~~~~~~~
@@ -583,31 +612,32 @@ mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf
  continuation. 
  
  \begin{code}
-activeUnfolding :: SimplEnv -> IdUnfoldingFun
+activeUnfolding :: SimplEnv -> Id -> Bool
  activeUnfolding env
-  = case getMode env of
-      SimplGently { sm_inline = False } -> active_unfolding_minimal
-      SimplGently { sm_inline = True  } -> active_unfolding_gentle
-      SimplPhase n _                    -> active_unfolding n
+  | not (sm_inline mode) = active_unfolding_minimal
+  | otherwise            = case sm_phase mode of
+                             InitialPhase -> active_unfolding_gentle
+                             Phase n      -> active_unfolding n
+  where
+    mode = getMode env
  
-activeUnfInRule :: SimplEnv -> IdUnfoldingFun
+getUnfoldingInRuleMatch :: SimplEnv -> IdUnfoldingFun
  -- When matching in RULE, we want to "look through" an unfolding
  -- (to see a constructor) if *rules* are on, even if *inlinings* 
  -- are not.  A notable example is DFuns, which really we want to 
  -- match in rules like (op dfun) in gentle mode. Another example
  -- is 'otherwise' which we want exprIsConApp_maybe to be able to
  -- see very early on
-activeUnfInRule env
-  = case getMode env of
-      SimplGently { sm_rules = False } -> active_unfolding_minimal
-      SimplGently { sm_rules = True  } -> active_unfolding_early
-      SimplPhase n _                   -> active_unfolding n
+getUnfoldingInRuleMatch env id
+  | unf_is_active = idUnfolding id
+  | otherwise     = NoUnfolding
    where
-    active_unfolding_early id
-      | isEarlyActive (idInlineActivation id) = idUnfolding id
-      | otherwise                             = idUnfolding id
+    mode = getMode env
+    unf_is_active
+     | not (sm_rules mode) = active_unfolding_minimal id
+     | otherwise           = isActive (sm_phase mode) (idInlineActivation id)
  
-active_unfolding_minimal :: IdUnfoldingFun
+active_unfolding_minimal :: Id -> Bool
  -- Compuslory unfoldings only
  -- Ignore SimplGently, because we want to inline regardless;
  -- the Id has no top-level binding at all
@@ -618,113 +648,31 @@ active_unfolding_minimal :: IdUnfoldingFun
  -- But that only really applies to the trivial wrappers (like (:)),
  -- and they are now constructed as Compulsory unfoldings (in MkId)
  -- so they'll happen anyway.
-active_unfolding_minimal id
-  | isCompulsoryUnfolding unf = unf
-  | otherwise                 = NoUnfolding
-  where
-    unf = idUnfolding id
+active_unfolding_minimal id = isCompulsoryUnfolding (realIdUnfolding id)
  
-active_unfolding_gentle :: IdUnfoldingFun
+active_unfolding :: PhaseNum -> Id -> Bool
+active_unfolding n id = isActiveIn n (idInlineActivation id)
+
+active_unfolding_gentle :: Id -> Bool
  -- Anything that is early-active
  -- See Note [Gentle mode]
  active_unfolding_gentle id
-  | isStableUnfolding unf
-  , isEarlyActive (idInlineActivation id) = unf
+  =  isInlinePragma prag
+  && isEarlyActive (inlinePragmaActivation prag)
         -- NB: wrappers are not early-active
-  | otherwise                             = NoUnfolding
    where
-    unf = idUnfolding id
-      -- idUnfolding checks for loop-breakers
-      -- Things with an INLINE pragma may have 
-      -- an unfolding *and* be a loop breaker  
-      -- (maybe the knot is not yet untied)
-
-active_unfolding :: CompilerPhase -> IdUnfoldingFun
-active_unfolding n id
-  | isActive n (idInlineActivation id) = idUnfolding id
-  | otherwise                          = NoUnfolding
+    prag = idInlinePragma id
  
+----------------------
  activeRule :: DynFlags -> SimplEnv -> Maybe (Activation -> Bool)
  -- Nothing => No rules at all
-activeRule dflags env
-  | not (dopt Opt_EnableRewriteRules dflags)
-  = Nothing    -- Rewriting is off
-  | otherwise
-  = case getMode env of
-      SimplGently { sm_rules = rules_on } 
-        | rules_on  -> Just isEarlyActive      -- Note [RULEs enabled in SimplGently]
-        | otherwise -> Nothing
-      SimplPhase n _ -> Just (isActive n)
+activeRule _dflags env
+  | not (sm_rules mode) = Nothing     -- Rewriting is off
+  | otherwise           = Just (isActive (sm_phase mode))
+  where
+    mode = getMode env
  \end{code}
  
---------------------------------------------------------------
-                OLD NOTES, now wrong
-           Preserved just for now (Oct 10)
---------------------------------------------------------------
-
-      OK, so suppose we have
-       {-# INLINE <act> f #-}
-       f = <rhs>
-      meaning "inline f in phases p where activation <act>(p) holds". 
-      Then what inlinings/rules can we apply to the copy of <rhs> captured in
-      f's InlineRule?  Our model is that literally <rhs> is substituted for
-      f when it is inlined.  So our conservative plan (implemented by 
-      updModeForInlineRules) is this:
-
-        -------------------------------------------------------------
-        When simplifying the RHS of an InlineRule,
-        If the InlineRule becomes active in phase p, then
-          if the current phase is *earlier than* p, 
-             make no inlinings or rules active when simplifying the RHS
-          otherwise 
-             set the phase to p when simplifying the RHS
-
-      --    Treat Gentle as phase "infinity"
-      --    If current_phase `earlier than` inline_rule_start_phase 
-      --      then no_op
-      --    else 
-      --    if current_phase `same phase` inline_rule_start_phase 
-      --      then current_phase   (keep gentle flags)
-      --      else inline_rule_start_phase
-        -------------------------------------------------------------
-
-      That ensures that
-
-        a) Rules/inlinings that *cease* being active before p will 
-           not apply to the InlineRule rhs, consistent with it being
-           inlined in its *original* form in phase p.
-
-        b) Rules/inlinings that only become active *after* p will
-           not apply to the InlineRule rhs, again to be consistent with
-           inlining the *original* rhs in phase p.
-
-      For example, 
-               {-# INLINE f #-}
-       f x = ...g...
-
-       {-# NOINLINE [1] g #-}
-       g y = ...
-
-       {-# RULE h g = ... #-}
-      Here we must not inline g into f's RHS, even when we get to phase 0,
-      because when f is later inlined into some other module we want the
-      rule for h to fire.
-
-      Similarly, consider
-               {-# INLINE f #-}
-       f x = ...g...
-
-       g y = ...
-      and suppose that there are auto-generated specialisations and a strictness
-      wrapper for g.  The specialisations get activation AlwaysActive, and the
-      strictness wrapper get activation (ActiveAfter 0).  So the strictness
-      wrepper fails the test and won't be inlined into f's InlineRule. That
-      means f can inline, expose the specialised call to g, so the specialisation
-      rules can fire.
-
---------------------------------------------------------------
-                END OF OLD NOTES
---------------------------------------------------------------
  
  
  %************************************************************************
@@ -848,11 +796,9 @@ preInlineUnconditionally env top_lvl bndr rhs
                   OneOcc in_lam True int_cxt -> try_once in_lam int_cxt
                   _                          -> False
    where
-    phase = getMode env
-    active = case phase of
-                  SimplGently {} -> isEarlyActive act
-                       -- See Note [pre/postInlineUnconditionally in gentle mode]
-                  SimplPhase n _ -> isActive n act
+    mode = getMode env
+    active = isActive (sm_phase mode) act
+             -- See Note [pre/postInlineUnconditionally in gentle mode]
      act = idInlineActivation bndr
      try_once in_lam int_cxt    -- There's one textual occurrence
         | not in_lam = isNotTopLevel top_lvl || early_phase
@@ -884,9 +830,9 @@ preInlineUnconditionally env top_lvl bndr rhs
      canInlineInLam (Note _ e)          = canInlineInLam e
      canInlineInLam _                   = False
  
-    early_phase = case phase of
-                       SimplPhase 0 _ -> False
-                       _              -> True
+    early_phase = case sm_phase mode of
+                    Phase 0 -> False
+                    _       -> True
  -- If we don't have this early_phase test, consider
  --     x = length [1,2,3]
  -- The full laziness pass carefully floats all the cons cells to
@@ -917,7 +863,7 @@ a thing based on the form of its RHS; in particular if it has a
  trivial RHS.  If so, we can inline and discard the binding altogether.
  
  NB: a loop breaker has must_keep_binding = True and non-loop-breakers
-only have *forward* references Hence, it's safe to discard the binding
+only have *forward* references. Hence, it's safe to discard the binding
         
  NOTE: This isn't our last opportunity to inline.  We're at the binding
  site right now, and we'll get another opportunity when we get to the
@@ -952,8 +898,8 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
                                         -- because it might be referred to "earlier"
    | isExportedId bndr           = False
    | isStableUnfolding unfolding = False        -- Note [InlineRule and postInlineUnconditionally]
-  | exprIsTrivial rhs          = True
    | isTopLevel top_lvl          = False        -- Note [Top level and postInlineUnconditionally]
+  | exprIsTrivial rhs          = True
    | otherwise
    = case occ_info of
         -- The point of examining occ_info here is that for *non-values* 
@@ -1014,23 +960,35 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
  -- Alas!
  
    where
-    active = case getMode env of
-                  SimplGently {} -> isEarlyActive act
-                       -- See Note [pre/postInlineUnconditionally in gentle mode]
-                  SimplPhase n _ -> isActive n act
-    act = idInlineActivation bndr
+    active = isActive (sm_phase (getMode env)) (idInlineActivation bndr)
+        -- See Note [pre/postInlineUnconditionally in gentle mode]
  \end{code}
  
  Note [Top level and postInlineUnconditionally]
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-We don't do postInlineUnconditionally for top-level things (exept ones that
-are trivial):
-  * There is no point, because the main goal is to get rid of local
-    bindings used in multiple case branches.
+We don't do postInlineUnconditionally for top-level things (even for
+ones that are trivial):
+
    * Doing so will inline top-level error expressions that have been
      carefully floated out by FloatOut.  More generally, it might 
      replace static allocation with dynamic.
  
+  * Even for trivial expressions there's a problem.  Consider
+      {-# RULE "foo" forall (xs::[T]). reverse xs = ruggle xs #-}
+      blah xs = reverse xs
+      ruggle = sort
+    In one simplifier pass we might fire the rule, getting 
+      blah xs = ruggle xs
+    but in *that* simplifier pass we must not do postInlineUnconditionally
+    on 'ruggle' because then we'll have an unbound occurrence of 'ruggle'
+
+    If the rhs is trivial it'll be inlined by callSiteInline, and then
+    the binding will be dead and discarded by the next use of OccurAnal
+
+  * There is less point, because the main goal is to get rid of local
+    bindings used in multiple case branches.  
+    
+
  Note [InlineRule and postInlineUnconditionally]
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  Do not do postInlineUnconditionally if the Id has an InlineRule, otherwise
@@ -1133,6 +1091,9 @@ because the latter is not well-kinded.
  %*                                                                     *
  %************************************************************************
  
+When we meet a let-binding we try eta-expansion.  To find the 
+arity of the RHS we use a little fixpoint analysis; see Note [Arity analysis]
+
  \begin{code}
  tryEtaExpand :: SimplEnv -> OutId -> OutExpr -> SimplM (Arity, OutExpr)
  -- See Note [Eta-expanding at let bindings]
@@ -1147,20 +1108,80 @@ tryEtaExpand env bndr rhs
           return (new_arity, new_rhs) }
    where
      try_expand dflags
-      | dopt Opt_DoLambdaEtaExpansion dflags 
+      | sm_eta_expand (getMode env)      -- Provided eta-expansion is on
        , not (exprIsTrivial rhs)
-      , not (inGentleMode env)  -- In gentle mode don't eta-expansion
-                               -- because it can clutter up the code
-                               -- with casts etc that may not be removed
-      , let new_arity = exprEtaExpandArity dflags rhs
-      , new_arity > old_arity
+      , let dicts_cheap = dopt Opt_DictsCheap dflags
+            new_arity   = findArity dicts_cheap bndr rhs old_arity
+      , new_arity > rhs_arity
        = do { tick (EtaExpansion bndr)
             ; return (new_arity, etaExpand new_arity rhs) }
        | otherwise
-      = return (exprArity rhs, rhs)
+      = return (rhs_arity, rhs)
  
+    rhs_arity  = exprArity rhs
      old_arity  = idArity bndr
      _dmd_arity = length $ fst $ splitStrictSig $ idStrictness bndr
+
+findArity :: Bool -> Id -> CoreExpr -> Arity -> Arity
+-- This implements the fixpoint loop for arity analysis
+-- See Note [Arity analysis]
+findArity dicts_cheap bndr rhs old_arity
+  = go (exprEtaExpandArity (mk_cheap_fn dicts_cheap init_cheap_app) rhs)
+       -- We always call exprEtaExpandArity once, but usually 
+       -- that produces a result equal to old_arity, and then
+       -- we stop right away (since arities should not decrease)
+       -- Result: the common case is that there is just one iteration
+  where
+    go :: Arity -> Arity
+    go cur_arity
+      | cur_arity <= old_arity = cur_arity     
+      | new_arity == cur_arity = cur_arity
+      | otherwise = ASSERT( new_arity < cur_arity )
+                    pprTrace "Exciting arity" 
+                       (vcat [ ppr bndr <+> ppr cur_arity <+> ppr new_arity
+                             , ppr rhs])
+                    go new_arity
+      where
+        new_arity = exprEtaExpandArity (mk_cheap_fn dicts_cheap cheap_app) rhs
+      
+        cheap_app :: CheapAppFun
+        cheap_app fn n_val_args
+          | fn == bndr = n_val_args < cur_arity
+          | otherwise  = isCheapApp fn n_val_args
+
+    init_cheap_app :: CheapAppFun
+    init_cheap_app fn n_val_args
+      | fn == bndr = True
+      | otherwise  = isCheapApp fn n_val_args
+ 
+mk_cheap_fn :: Bool -> CheapAppFun -> CheapFun
+mk_cheap_fn dicts_cheap cheap_app
+  | not dicts_cheap
+  = \e _     -> exprIsCheap' cheap_app e
+  | otherwise
+  = \e mb_ty -> exprIsCheap' cheap_app e
+             || case mb_ty of
+                  Nothing -> False
+                  Just ty -> isDictLikeTy ty
+       -- If the experimental -fdicts-cheap flag is on, we eta-expand through
+       -- dictionary bindings.  This improves arities. Thereby, it also
+       -- means that full laziness is less prone to floating out the
+       -- application of a function to its dictionary arguments, which
+       -- can thereby lose opportunities for fusion.  Example:
+       --      foo :: Ord a => a -> ...
+       --      foo = /\a \(d:Ord a). let d' = ...d... in \(x:a). ....
+       --              -- So foo has arity 1
+       --
+       --      f = \x. foo dInt $ bar x
+       --
+       -- The (foo DInt) is floated out, and makes ineffective a RULE 
+       --      foo (bar x) = ...
+       --
+       -- One could go further and make exprIsCheap reply True to any
+       -- dictionary-typed expression, but that's more work.
+       -- 
+       -- See Note [Dictionary-like types] in TcType.lhs for why we use
+       -- isDictLikeTy here rather than isDictTy
  \end{code}
  
  Note [Eta-expanding at let bindings]
@@ -1186,6 +1207,33 @@ because then 'genMap' will inline, and it really shouldn't: at least
  as far as the programmer is concerned, it's not applied to two
  arguments!
  
+Note [Arity analysis]
+~~~~~~~~~~~~~~~~~~~~~
+The motivating example for arity analysis is this:
+ 
+  f = \x. let g = f (x+1) 
+          in \y. ...g...
+
+What arity does f have?  Really it should have arity 2, but a naive
+look at the RHS won't see that.  You need a fixpoint analysis which
+says it has arity "infinity" the first time round.
+
+This example happens a lot; it first showed up in Andy Gill's thesis,
+fifteen years ago!  It also shows up in the code for 'rnf' on lists
+in Trac #4138.
+
+The analysis is easy to achieve because exprEtaExpandArity takes an
+argument
+     type CheapFun = CoreExpr -> Maybe Type -> Bool
+used to decide if an expression is cheap enough to push inside a 
+lambda.  And exprIsCheap' in turn takes an argument
+     type CheapAppFun = Id -> Int -> Bool
+which tells when an application is cheap. This makes it easy to
+write the analysis loop.
+
+The analysis is cheap-and-cheerful because it doesn't deal with
+mutual recursion.  But the self-recursive case is the important one.
+
  
  %************************************************************************
  %*                                                                     *