Allow inlining in "SimplGentle" mode

[ghc-hetmet.git] / compiler / simplCore / SimplUtils.lhs
diff --git a/compiler/simplCore/SimplUtils.lhs b/compiler/simplCore/SimplUtils.lhs

index d697fb3..dfe9e83 100644 (file)
--- a/compiler/simplCore/SimplUtils.lhs
+++ b/compiler/simplCore/SimplUtils.lhs
@@ -10,14 +10,15 @@ module SimplUtils (
  
         -- Inlining,
         preInlineUnconditionally, postInlineUnconditionally, 
-       activeInline, activeRule, inlineMode,
+       activeInline, activeRule, 
+        simplEnvForGHCi, simplEnvForRules, simplGentlyForInlineRules,
  
         -- The continuation type
         SimplCont(..), DupFlag(..), ArgInfo(..),
         contIsDupable, contResultType, contIsTrivial, contArgs, dropArgs, 
-       countValArgs, countArgs, splitInlineCont,
-       mkBoringStop, mkLazyArgStop, contIsRhsOrArg,
-       interestingCallContext, interestingArgContext,
+       countValArgs, countArgs, 
+       mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg,
+       interestingCallContext, 
  
         interestingArg, mkArgInfo,
         
@@ -34,6 +35,7 @@ import qualified CoreSubst
  import PprCore
  import CoreFVs
  import CoreUtils
+import CoreArity       ( etaExpand, exprEtaExpandArity )
  import CoreUnfold
  import Name
  import Id
@@ -51,7 +53,7 @@ import MonadUtils
  import Outputable
  import FastString
  
-import List( nub )
+import Data.List
  \end{code}
  
  
@@ -112,7 +114,7 @@ data SimplCont
         SimplCont       
  
    | StrictArg          -- e C
-       OutExpr                 -- e 
+       OutExpr                 -- e; *always* of form (Var v `App1` e1 .. `App` en)
         CallCtxt                -- Whether *this* argument position is interesting
         ArgInfo                 -- Whether the function at the head of e has rules, etc
         SimplCont               --     plus strictness flags for *further* args
@@ -151,6 +153,9 @@ instance Outputable DupFlag where
  mkBoringStop :: SimplCont
  mkBoringStop = Stop BoringCtxt
  
+mkRhsStop :: SimplCont -- See Note [RHS of lets] in CoreUnfold
+mkRhsStop = Stop (ArgCtxt False)
+
  mkLazyArgStop :: CallCtxt -> SimplCont
  mkLazyArgStop cci = Stop cci
  
@@ -214,63 +219,11 @@ dropArgs :: Int -> SimplCont -> SimplCont
  dropArgs 0 cont = cont
  dropArgs n (ApplyTo _ _ _ cont) = dropArgs (n-1) cont
  dropArgs n other               = pprPanic "dropArgs" (ppr n <+> ppr other)
-
---------------------
-splitInlineCont :: SimplCont -> Maybe (SimplCont, SimplCont)
--- Returns Nothing if the continuation should dissolve an InlineMe Note
--- Return Just (c1,c2) otherwise, 
---     where c1 is the continuation to put inside the InlineMe 
---     and   c2 outside
-
--- Example: (__inline_me__ (/\a. e)) ty
---     Here we want to do the beta-redex without dissolving the InlineMe
--- See test simpl017 (and Trac #1627) for a good example of why this is important
-
-splitInlineCont (ApplyTo dup (Type ty) se c)
-  | Just (c1, c2) <- splitInlineCont c = Just (ApplyTo dup (Type ty) se c1, c2)
-splitInlineCont cont@(Stop {})         = Just (mkBoringStop, cont)
-splitInlineCont cont@(StrictBind {})   = Just (mkBoringStop, cont)
-splitInlineCont cont@(StrictArg  {})   = Just (mkBoringStop, cont)
-splitInlineCont _                      = Nothing
  \end{code}
  
  
-\begin{code}
-interestingArg :: OutExpr -> Bool
-       -- An argument is interesting if it has *some* structure
-       -- We are here trying to avoid unfolding a function that
-       -- is applied only to variables that have no unfolding
-       -- (i.e. they are probably lambda bound): f x y z
-       -- There is little point in inlining f here.
-interestingArg (Var v)          = hasSomeUnfolding (idUnfolding v)
-                                       -- Was: isValueUnfolding (idUnfolding v')
-                                       -- But that seems over-pessimistic
-                                || isDataConWorkId v
-                                       -- This accounts for an argument like
-                                       -- () or [], which is definitely interesting
-interestingArg (Type _)                 = False
-interestingArg (App fn (Type _)) = interestingArg fn
-interestingArg (Note _ a)       = interestingArg a
-
--- Idea (from Sam B); I'm not sure if it's a good idea, so commented out for now
--- interestingArg expr | isUnLiftedType (exprType expr)
---        -- Unlifted args are only ever interesting if we know what they are
---  =                  case expr of
---                        Lit lit -> True
---                        _       -> False
-
-interestingArg _                 = True
-       -- Consider     let x = 3 in f x
-       -- The substitution will contain (x -> ContEx 3), and we want to
-       -- to say that x is an interesting argument.
-       -- But consider also (\x. f x y) y
-       -- The substitution will contain (x -> ContEx y), and we want to say
-       -- that x is not interesting (assuming y has no unfolding)
-\end{code}
-
-
-Comment about interestingCallContext
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Note [Interesting call context]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  We want to avoid inlining an expression where there can't possibly be
  any gain, such as in an argument position.  Hence, if the continuation
  is interesting (eg. a case scrutinee, application etc.) then we
@@ -305,20 +258,22 @@ default case.
  
  \begin{code}
  interestingCallContext :: SimplCont -> CallCtxt
+-- See Note [Interesting call context]
  interestingCallContext cont
    = interesting cont
    where
-    interestingCtxt = ArgCtxt False 2  -- Give *some* incentive!
-
      interesting (Select _ bndr _ _ _)
-       | isDeadBinder bndr       = CaseCtxt
-       | otherwise               = interestingCtxt
+       | isDeadBinder bndr = CaseCtxt
+       | otherwise         = ArgCtxt False     -- If the binder is used, this
+                                               -- is like a strict let
+                                               -- See Note [RHS of lets] in CoreUnfold
                 
-    interesting (ApplyTo {})      = interestingCtxt
-                               -- Can happen if we have (coerce t (f x)) y
-                               -- Perhaps interestingCtxt is a bit over-keen, but I've
-                               -- seen (coerce f) x, where f has an INLINE prag,
-                               -- So we have to give some motivation for inlining it
+    interesting (ApplyTo _ arg _ cont)
+       | isTypeArg arg = interesting cont
+       | otherwise     = ValAppCtxt    -- Can happen if we have (f Int |> co) y
+                                       -- If f has an INLINE prag we need to give it some
+                                       -- motivation to inline. See Note [Cast then apply]
+                                       -- in CoreUnfold
  
      interesting (StrictArg _ cci _ _)  = cci
      interesting (StrictBind {})                = BoringCtxt
@@ -342,24 +297,25 @@ interestingCallContext cont
  
  -------------------
  mkArgInfo :: Id
+         -> [CoreRule] -- Rules for function
           -> Int        -- Number of value args
-         -> SimplCont  -- Context of the cal
+         -> SimplCont  -- Context of the call
           -> ArgInfo
  
-mkArgInfo fun n_val_args call_cont
+mkArgInfo fun rules n_val_args call_cont
    | n_val_args < idArity fun           -- Note [Unsaturated functions]
    = ArgInfo { ai_rules = False
             , ai_strs = vanilla_stricts 
             , ai_discs = vanilla_discounts }
    | otherwise
-  = ArgInfo { ai_rules = interestingArgContext fun call_cont
+  = ArgInfo { ai_rules = interestingArgContext rules call_cont
             , ai_strs  = add_type_str (idType fun) arg_stricts
             , ai_discs = arg_discounts }
    where
      vanilla_discounts, arg_discounts :: [Int]
      vanilla_discounts = repeat 0
      arg_discounts = case idUnfolding fun of
-                       CoreUnfolding _ _ _ _ (UnfoldIfGoodArgs _ discounts _ _)
+                       CoreUnfolding {uf_guidance = UnfoldIfGoodArgs {ug_args = discounts}}
                               -> discounts ++ vanilla_discounts
                         _     -> vanilla_discounts
  
@@ -413,7 +369,7 @@ it'll just be floated out again.  Even if f has lots of discounts
  on its first argument -- it must be saturated for these to kick in
  -}
  
-interestingArgContext :: Id -> SimplCont -> Bool
+interestingArgContext :: [CoreRule] -> SimplCont -> Bool
  -- If the argument has form (f x y), where x,y are boring,
  -- and f is marked INLINE, then we don't want to inline f.
  -- But if the context of the argument is
@@ -424,16 +380,18 @@ interestingArgContext :: Id -> SimplCont -> Bool
  -- where h has rules, then we do want to inline f; hence the
  -- call_cont argument to interestingArgContext
  --
--- The interesting_arg_ctxt flag makes this happen; if it's
+-- The ai-rules flag makes this happen; if it's
  -- set, the inliner gets just enough keener to inline f 
  -- regardless of how boring f's arguments are, if it's marked INLINE
  --
  -- The alternative would be to *always* inline an INLINE function,
  -- regardless of how boring its context is; but that seems overkill
  -- For example, it'd mean that wrapper functions were always inlined
-interestingArgContext fn call_cont
-  = idHasRules fn || go call_cont
+interestingArgContext rules call_cont
+  = notNull rules || enclosing_fn_has_rules
    where
+    enclosing_fn_has_rules = go call_cont
+
      go (Select {})          = False
      go (ApplyTo {})         = False
      go (StrictArg _ cci _ _) = interesting cci
@@ -441,8 +399,8 @@ interestingArgContext fn call_cont
      go (CoerceIt _ c)       = go c
      go (Stop cci)            = interesting cci
  
-    interesting (ArgCtxt rules _) = rules
-    interesting _                 = False
+    interesting (ArgCtxt rules) = rules
+    interesting _               = False
  \end{code}
  
  
@@ -453,18 +411,58 @@ interestingArgContext fn call_cont
  %*                                                                     *
  %************************************************************************
  
-Inlining is controlled partly by the SimplifierMode switch.  This has two
-settings:
+\begin{code}
+simplEnvForGHCi :: SimplEnv
+simplEnvForGHCi = mkSimplEnv allOffSwitchChecker $
+                  SimplGently { sm_rules = False, sm_inline = False }
+   -- Do not do any inlining, in case we expose some unboxed
+   -- tuple stuff that confuses the bytecode interpreter
+
+simplEnvForRules :: SimplEnv
+simplEnvForRules = mkSimplEnv allOffSwitchChecker $
+                   SimplGently { sm_rules = True, sm_inline = False }
+
+simplGentlyForInlineRules :: SimplifierMode
+simplGentlyForInlineRules = SimplGently { sm_rules = True, sm_inline = True }
+       -- Simplify as much as possible, subject to the usual "gentle" rules
+\end{code}
  
+Inlining is controlled partly by the SimplifierMode switch.  This has two
+settings
+       
         SimplGently     (a) Simplifying before specialiser/full laziness
-                       (b) Simplifiying inside INLINE pragma
+                       (b) Simplifiying inside InlineRules
                         (c) Simplifying the LHS of a rule
                         (d) Simplifying a GHCi expression or Template 
                                 Haskell splice
  
         SimplPhase n _   Used at all other times
  
-The key thing about SimplGently is that it does no call-site inlining.
+Note [Gentle mode]
+~~~~~~~~~~~~~~~~~~
+Gentle mode has a separate boolean flag to control
+       a) inlining (sm_inline flag)
+       b) rules    (sm_rules  flag)
+A key invariant about Gentle mode is that it is treated as the EARLIEST
+phase.  Something is inlined if the sm_inline flag is on AND the thing
+is inlinable in the earliest phase.  This is important. Example
+
+  {-# INLINE [~1] g #-}
+  g = ...
+  
+  {-# INLINE f #-}
+  f x = g (g x)
+
+If we were to inline g into f's inlining, then an importing module would
+never be able to do
+       f e --> g (g e) ---> RULE fires
+because the InlineRule for f has had g inlined into it.
+
+On the other hand, it is bad not to do ANY inlining into an
+InlineRule, because then recursive knots in instance declarations
+don't get unravelled.
+
+However, *sometimes* SimplGently must do no call-site inlining at all.
  Before full laziness we must be careful not to inline wrappers,
  because doing so inhibits floating
      e.g. ...(case f x of ...)...
@@ -478,17 +476,11 @@ running it, we don't want to use -O2.  Indeed, we don't want to inline
  anything, because the byte-code interpreter might get confused about 
  unboxed tuples and suchlike.
  
-INLINE pragmas
-~~~~~~~~~~~~~~
-SimplGently is also used as the mode to simplify inside an InlineMe note.
-
-\begin{code}
-inlineMode :: SimplifierMode
-inlineMode = SimplGently
-\end{code}
-
-It really is important to switch off inlinings inside such
-expressions.  Consider the following example 
+Note [Simplifying gently inside InlineRules]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We don't do much simplification inside InlineRules (which come from
+INLINE pragmas).  It really is important to switch off inlinings
+inside such expressions.  Consider the following example
  
         let f = \pq -> BIG
         in
@@ -497,16 +489,14 @@ expressions.  Consider the following example
         in ...g...g...g...g...g...
  
  Now, if that's the ONLY occurrence of f, it will be inlined inside g,
-and thence copied multiple times when g is inlined.
-
+and thence copied multiple times when g is inlined.  
  
-This function may be inlinined in other modules, so we
-don't want to remove (by inlining) calls to functions that have
-specialisations, or that may have transformation rules in an importing
-scope.
+This function may be inlinined in other modules, so we don't want to
+remove (by inlining) calls to functions that have specialisations, or
+that may have transformation rules in an importing scope.
  
  E.g.   {-# INLINE f #-}
-               f x = ...g...
+       f x = ...g...
  
  and suppose that g is strict *and* has specialisations.  If we inline
  g's wrapper, we deny f the chance of getting the specialised version
@@ -524,15 +514,14 @@ continuation.  That's why the keep_inline predicate returns True for
  ArgOf continuations.  It shouldn't do any harm not to dissolve the
  inline-me note under these circumstances.
  
-Note that the result is that we do very little simplification
-inside an InlineMe.  
+Although we do very little simplification inside an InlineRule,
+the RHS is simplified as normal.  For example:
  
         all xs = foldr (&&) True xs
         any p = all . map p  {-# INLINE any #-}
  
-Problem: any won't get deforested, and so if it's exported and the
-importer doesn't use the inlining, (eg passes it as an arg) then we
-won't get deforestation at all.  We havn't solved this problem yet!
+The RHS of 'any' will get optimised and deforested; but the InlineRule
+will still mention the original RHS.
  
  
  preInlineUnconditionally
@@ -599,6 +588,18 @@ seems a bit fragile.
  Conclusion: inline top level things gaily until Phase 0 (the last
  phase), at which point don't.
  
+Note [pre/postInlineUnconditionally in gentle mode]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Even in gentle mode we want to do preInlineUnconditionally.  The
+reason is that too little clean-up happens if you don't inline
+use-once things.  Also a bit of inlining is *good* for full laziness;
+it can expose constant sub-expressions.  Example in
+spectral/mandel/Mandel.hs, where the mandelset function gets a useful
+let-float if you inline windowToViewport
+
+However, as usual for Gentle mode, do not inline things that are
+inactive in the intial stages.  See Note [Gentle mode].
+
  \begin{code}
  preInlineUnconditionally :: SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
  preInlineUnconditionally env top_lvl bndr rhs
@@ -611,9 +612,10 @@ preInlineUnconditionally env top_lvl bndr rhs
    where
      phase = getMode env
      active = case phase of
-                  SimplGently    -> isAlwaysActive prag
-                  SimplPhase n _ -> isActive n prag
-    prag = idInlinePragma bndr
+                  SimplGently {} -> isEarlyActive act
+                       -- See Note [pre/postInlineUnconditionally in gentle mode]
+                  SimplPhase n _ -> isActive n act
+    act = idInlineActivation bndr
  
      try_once in_lam int_cxt    -- There's one textual occurrence
         | not in_lam = isNotTopLevel top_lvl || early_phase
@@ -696,7 +698,7 @@ story for now.
  \begin{code}
  postInlineUnconditionally 
      :: SimplEnv -> TopLevelFlag
-    -> InId            -- The binder (an OutId would be fine too)
+    -> OutId           -- The binder (an InId would be fine too)
      -> OccInfo                 -- From the InId
      -> OutExpr
      -> Unfolding
@@ -706,6 +708,7 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
    | isLoopBreaker occ_info = False     -- If it's a loop-breaker of any kind, don't inline
                                         -- because it might be referred to "earlier"
    | isExportedId bndr      = False
+  | isInlineRule unfolding = False     -- Note [InlineRule and postInlineUnconditionally]
    | exprIsTrivial rhs     = True
    | otherwise
    = case occ_info of
@@ -767,21 +770,17 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
  
    where
      active = case getMode env of
-                  SimplGently    -> isAlwaysActive prag
-                  SimplPhase n _ -> isActive n prag
-    prag = idInlinePragma bndr
+                  SimplGently {} -> isEarlyActive act
+                       -- See Note [pre/postInlineUnconditionally in gentle mode]
+                  SimplPhase n _ -> isActive n act
+    act = idInlineActivation bndr
  
  activeInline :: SimplEnv -> OutId -> Bool
  activeInline env id
    = case getMode env of
-      SimplGently -> False
-       -- No inlining at all when doing gentle stuff,
-       -- except for local things that occur once (pre/postInlineUnconditionally)
-       -- The reason is that too little clean-up happens if you 
-       -- don't inline use-once things.   Also a bit of inlining is *good* for
-       -- full laziness; it can expose constant sub-expressions.
-       -- Example in spectral/mandel/Mandel.hs, where the mandelset 
-       -- function gets a useful let-float if you inline windowToViewport
+      SimplGently { sm_inline = inlining_on } 
+         -> inlining_on && isEarlyActive act
+       -- See Note [Gentle mode]
  
         -- NB: we used to have a second exception, for data con wrappers.
         -- On the grounds that we use gentle mode for rule LHSs, and 
@@ -790,9 +789,9 @@ activeInline env id
         -- and they are now constructed as Compulsory unfoldings (in MkId)
         -- so they'll happen anyway.
  
-      SimplPhase n _ -> isActive n prag
+      SimplPhase n _ -> isActive n act
    where
-    prag = idInlinePragma id
+    act = idInlineActivation id
  
  activeRule :: DynFlags -> SimplEnv -> Maybe (Activation -> Bool)
  -- Nothing => No rules at all
@@ -801,15 +800,34 @@ activeRule dflags env
    = Nothing    -- Rewriting is off
    | otherwise
    = case getMode env of
-       SimplGently    -> Just isAlwaysActive
+      SimplGently { sm_rules = rules_on } 
+        | rules_on  -> Just isEarlyActive
+        | otherwise -> Nothing
                         -- Used to be Nothing (no rules in gentle mode)
                         -- Main motivation for changing is that I wanted
                         --      lift String ===> ...
                         -- to work in Template Haskell when simplifying
                         -- splices, so we get simpler code for literal strings
-       SimplPhase n _ -> Just (isActive n)
+      SimplPhase n _ -> Just (isActive n)
  \end{code}
  
+Note [InlineRule and postInlineUnconditionally]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Do not do postInlineUnconditionally if the Id has an InlineRule, otherwise
+we lose the unfolding.  Example
+
+     -- f has InlineRule with rhs (e |> co)
+     --   where 'e' is big
+     f = e |> co
+
+Then there's a danger we'll optimise to
+
+     f' = e
+     f = f' |> co
+
+and now postInlineUnconditionally, losing the InlineRule on f.  Now f'
+won't inline because 'e' is too big.
+
  
  %************************************************************************
  %*                                                                     *
@@ -818,14 +836,14 @@ activeRule dflags env
  %************************************************************************
  
  \begin{code}
-mkLam :: [OutBndr] -> OutExpr -> SimplM OutExpr
+mkLam :: SimplEnv -> [OutBndr] -> OutExpr -> SimplM OutExpr
  -- mkLam tries three things
  --     a) eta reduction, if that gives a trivial expression
  --     b) eta expansion [only if there are some value lambdas]
  
-mkLam [] body 
+mkLam _b [] body 
    = return body
-mkLam bndrs body
+mkLam env bndrs body
    = do { dflags <- getDOptsSmpl
         ; mkLam' dflags bndrs body }
    where
@@ -846,8 +864,10 @@ mkLam bndrs body
            ; return etad_lam }
  
        | dopt Opt_DoLambdaEtaExpansion dflags,
-       any isRuntimeVar bndrs
-      = do { body' <- tryEtaExpansion dflags body
+        not (inGentleMode env),              -- In gentle mode don't eta-expansion
+       any isRuntimeVar bndrs        -- because it can clutter up the code
+                                     -- with casts etc that may not be removed
+      = do { let body' = tryEtaExpansion dflags body
            ; return (mkLams bndrs body') }
     
        | otherwise 
@@ -1032,11 +1052,10 @@ when computing arity; and etaExpand adds the coerces as necessary when
  actually computing the expansion.
  
  \begin{code}
-tryEtaExpansion :: DynFlags -> OutExpr -> SimplM OutExpr
+tryEtaExpansion :: DynFlags -> OutExpr -> OutExpr
  -- There is at least one runtime binder in the binders
-tryEtaExpansion dflags body = do
-    us <- getUniquesM
-    return (etaExpand fun_arity us body (exprType body))
+tryEtaExpansion dflags body
+  = etaExpand fun_arity body
    where
      fun_arity = exprEtaExpandArity dflags body
  \end{code}
@@ -1189,7 +1208,7 @@ abstractFloats main_tvs body_env body
        = do { uniq <- getUniqueM
            ; let  poly_name = setNameUnique (idName var) uniq           -- Keep same name
                   poly_ty   = mkForAllTys tvs_here (idType var) -- But new type of course
-                 poly_id   = transferPolyIdInfo var $  -- Note [transferPolyIdInfo] in Id.lhs
+                 poly_id   = transferPolyIdInfo var tvs_here $ -- Note [transferPolyIdInfo] in Id.lhs
                               mkLocalId poly_name poly_ty 
            ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
                 -- In the olden days, it was crucial to copy the occInfo of the original var, 
@@ -1438,9 +1457,9 @@ prepareDefault _ _ case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rh
  
         _ -> return [(DEFAULT, [], deflt_rhs)]
  
-  | debugIsOn, isAlgTyCon tycon, [] <- tyConDataCons tycon
+  | debugIsOn, isAlgTyCon tycon, not (isOpenTyCon tycon), null (tyConDataCons tycon)
+       -- This can legitimately happen for type families, so don't report that
    = pprTrace "prepareDefault" (ppr case_bndr <+> ppr tycon)
-       -- This can legitimately happen for type families
          $ return [(DEFAULT, [], deflt_rhs)]
  
  --------- Catch-all cases -----------