From d8c0a66c1e5d357eae882963ee6834f485868d22 Mon Sep 17 00:00:00 2001 From: "simonpj@microsoft.com" Date: Fri, 25 Jan 2008 10:46:16 +0000 Subject: [PATCH] Be a little keener to inline This is really a bug. A saturated call in an "interesting" context should inline, but there was a strange "n_val_args > 0" condition, which was stopping it. Reported by Roman. --- compiler/coreSyn/CoreUnfold.lhs | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/compiler/coreSyn/CoreUnfold.lhs b/compiler/coreSyn/CoreUnfold.lhs index 9617c59..d8e0cb0 100644 --- a/compiler/coreSyn/CoreUnfold.lhs +++ b/compiler/coreSyn/CoreUnfold.lhs @@ -496,7 +496,7 @@ If the thing is in WHNF, there's no danger of duplicating work, so we can inline if it occurs once, or is small NOTE: we don't want to inline top-level functions that always diverge. -It just makes the code bigger. Tt turns out that the convenient way to prevent +It just makes the code bigger. It turns out that the convenient way to prevent them inlining is to give them a NOINLINE pragma, which we do in StrictAnal.addStrictnessInfoToTopId @@ -562,10 +562,10 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info -> True | otherwise - -> some_benefit && small_enough - + -> some_benefit && small_enough where enough_args = n_val_args >= n_vals_wanted + -- Note [Enough args] some_benefit = or arg_infos || really_interesting_cont -- There must be something interesting @@ -583,7 +583,7 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info = case cont_info of BoringCont -> not is_top && n_vals_wanted > 0 -- Note [Nested functions] CaseCont -> not lone_variable || not is_value -- Note [Lone variables] - InterestingCont -> n_vals_wanted > 0 + InterestingCont -> True -- Something else interesting about continuation small_enough = (size - discount) <= opt_UF_UseThreshold discount = computeDiscount n_vals_wanted arg_discounts @@ -604,7 +604,8 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info pprTrace "Considering inlining" (ppr id <+> vcat [text "active:" <+> ppr active_inline, text "arg infos" <+> ppr arg_infos, - text "interesting continuation" <+> ppr cont_info, + text "interesting continuation" <+> ppr cont_info <+> + ppr n_val_args, text "is value:" <+> ppr is_value, text "is cheap:" <+> ppr is_cheap, text "guidance" <+> ppr guidance, @@ -615,6 +616,16 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info } \end{code} +Note [Enough args] +~~~~~~~~~~~~~~~~~~ +At one stage we considered only inlining a function that has enough +arguments to saturate its arity. But we can lose from this. For +example (f . g) might not be a saturated application of (.), but +nevertheless f and g might usefully optimise with each other if we +inlined (.) and f and g. + +Current story (Jan08): inline even if not saturated. + Note [Nested functions] ~~~~~~~~~~~~~~~~~~~~~~~ If a function has a nested defn we also record some-benefit, on the @@ -628,7 +639,7 @@ increase the chance that the constructor won't be allocated at all in the branches that don't use it. Note [Lone variables] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~ The "lone-variable" case is important. I spent ages messing about with unsatisfactory varaints, but this is nice. The idea is that if a variable appears all alone -- 1.7.10.4