From 196ccd219fb3b7bdd4dc24e8804da8640af83bb2 Mon Sep 17 00:00:00 2001 From: simonmar Date: Fri, 12 Aug 2005 10:45:36 +0000 Subject: [PATCH] [project @ 2005-08-12 10:45:36 by simonmar] More inlining changes from SimonPJ & me: The Plan is to avoid relying on knowledge of OneOcc occurrences after postInlineUnconditionally, so we now attempt to make use of OneOcc as far as possible in in pre/postInlineUnconditionally rather than the call site. Plenty of comments in the code with more details. This change almost always improves performance on nofib; we have one program that goes slower, which we'll investigate in due course. --- ghc/compiler/coreSyn/CoreUnfold.lhs | 12 ++++-- ghc/compiler/simplCore/SimplUtils.lhs | 72 ++++++++++++++++++++++++++------- ghc/compiler/simplCore/Simplify.lhs | 2 +- 3 files changed, 68 insertions(+), 18 deletions(-) diff --git a/ghc/compiler/coreSyn/CoreUnfold.lhs b/ghc/compiler/coreSyn/CoreUnfold.lhs index 14a0f4d..e8549a7 100644 --- a/ghc/compiler/coreSyn/CoreUnfold.lhs +++ b/ghc/compiler/coreSyn/CoreUnfold.lhs @@ -23,7 +23,7 @@ module CoreUnfold ( hasUnfolding, hasSomeUnfolding, neverUnfold, couldBeSmallEnoughToInline, - certainlyWillInline, + certainlyWillInline, smallEnoughToInline, callSiteInline ) where @@ -460,6 +460,12 @@ certainlyWillInline (CoreUnfolding _ _ _ is_cheap (UnfoldIfGoodArgs n_vals _ siz = is_cheap && size - (n_vals +1) <= opt_UF_UseThreshold certainlyWillInline other = False + +smallEnoughToInline :: Unfolding -> Bool +smallEnoughToInline (CoreUnfolding _ _ _ _ (UnfoldIfGoodArgs _ _ size _)) + = size <= opt_UF_UseThreshold +smallEnoughToInline other + = False \end{code} %************************************************************************ @@ -520,7 +526,7 @@ callSiteInline dflags active_inline inline_call occ id arg_infos interesting_con | otherwise = case occ of IAmDead -> pprTrace "callSiteInline: dead" (ppr id) False IAmALoopBreaker -> False - OneOcc in_lam _ _ -> (not in_lam || is_cheap) && consider_safe True + --OneOcc in_lam _ _ -> (not in_lam || is_cheap) && consider_safe True other -> is_cheap && consider_safe False -- we consider even the once-in-one-branch -- occurrences, because they won't all have been @@ -551,7 +557,7 @@ callSiteInline dflags active_inline inline_call occ id arg_infos interesting_con where some_benefit = or arg_infos || really_interesting_cont || - (not is_top && (once || (n_vals_wanted > 0 && enough_args))) + (not is_top && ({- once || -} (n_vals_wanted > 0 && enough_args))) -- [was (once && not in_lam)] -- If it occurs more than once, there must be -- something interesting about some argument, or the diff --git a/ghc/compiler/simplCore/SimplUtils.lhs b/ghc/compiler/simplCore/SimplUtils.lhs index 5e12c5e..975f6a5 100644 --- a/ghc/compiler/simplCore/SimplUtils.lhs +++ b/ghc/compiler/simplCore/SimplUtils.lhs @@ -30,10 +30,11 @@ import StaticFlags ( opt_UF_UpdateInPlace, opt_SimplNoPreInlining, import CoreSyn import CoreFVs ( exprFreeVars ) -import CoreUtils ( cheapEqExpr, exprType, exprIsTrivial, +import CoreUtils ( cheapEqExpr, exprType, exprIsTrivial, exprIsCheap, etaExpand, exprEtaExpandArity, bindNonRec, mkCoerce2, findDefault, exprOkForSpeculation, exprIsHNF ) +import CoreUnfold ( smallEnoughToInline ) import Id ( idType, isDataConWorkId, idOccInfo, isDictId, idArity, mkSysLocal, isDeadBinder, idNewDemandInfo, isExportedId, idUnfolding, idNewStrictness, idInlinePragma, @@ -593,9 +594,18 @@ preInlineUnconditionally env top_lvl bndr rhs prag = idInlinePragma bndr try_once in_lam int_cxt -- There's one textual occurrence - = not in_lam && (isNotTopLevel top_lvl || early_phase) - || (canInlineInLam rhs && int_cxt) - -- + | not in_lam = isNotTopLevel top_lvl || early_phase + | otherwise = int_cxt && canInlineInLam rhs + +-- Be very careful before inlining inside a lambda, becuase (a) we must not +-- invalidate occurrence information, and (b) we want to avoid pushing a +-- single allocation (here) into multiple allocations (inside lambda). +-- Inlining a *function* with a single *saturated* call would be ok, mind you. +-- || (if is_cheap && not (canInlineInLam rhs) then pprTrace "preinline" (ppr bndr <+> ppr rhs) ok else ok) +-- where +-- is_cheap = exprIsCheap rhs +-- ok = is_cheap && int_cxt + -- int_cxt The context isn't totally boring -- E.g. let f = \ab.BIG in \y. map f xs -- Don't want to substitute for f, because then we allocate @@ -608,18 +618,11 @@ preInlineUnconditionally env top_lvl bndr rhs -- canInlineInLam => free vars of rhs are (Once in_lam) or Many, -- so substituting rhs inside a lambda doesn't change the occ info. -- Sadly, not quite the same as exprIsHNF. - canInlineInLam (Var x) = occ_info_ok (idOccInfo x) canInlineInLam (Lit l) = True - canInlineInLam (Type ty) = True canInlineInLam (Lam b e) = isRuntimeVar b || canInlineInLam e - canInlineInLam (App e (Type _)) = canInlineInLam e canInlineInLam (Note _ e) = canInlineInLam e canInlineInLam _ = False - occ_info_ok (OneOcc in_lam _ _) = in_lam - occ_info_ok NoOccInfo = True - occ_info_ok _ = False - early_phase = case phase of SimplPhase 0 -> False other -> True @@ -669,13 +672,54 @@ our new view that inlining is like a RULE, so I'm sticking to the 'active' story for now. \begin{code} -postInlineUnconditionally :: SimplEnv -> OutId -> OccInfo -> OutExpr -> Bool -postInlineUnconditionally env bndr occ_info rhs +postInlineUnconditionally :: SimplEnv -> TopLevelFlag -> OutId -> OccInfo -> OutExpr -> Unfolding -> Bool +postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding | not active = False | isLoopBreaker occ_info = False | isExportedId bndr = False | exprIsTrivial rhs = True - | otherwise = False + | otherwise + = case occ_info of + OneOcc in_lam one_br int_cxt + -> (one_br || smallEnoughToInline unfolding) -- Small enough to dup + -- ToDo: consider discount on smallEnoughToInline if int_cxt is true + -- + -- NB: Do we want to inline arbitrarily big things becuase + -- one_br is True? that can lead to inline cascades. But + -- preInlineUnconditionlly has dealt with all the common cases + -- so perhaps it's worth the risk. Here's an example + -- let f = if b then Left (\x.BIG) else Right (\y.BIG) + -- in \y. ....f.... + -- We can't preInlineUnconditionally because that woud invalidate + -- the occ info for b. Yet f is used just once, and duplicating + -- the case work is fine (exprIsCheap). + + && ((isNotTopLevel top_lvl && not in_lam) || + -- But outside a lambda, we want to be reasonably aggressive + -- about inlining into multiple branches of case + -- e.g. let x = + -- in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... } + -- Inlining can be a big win if C3 is the hot-spot, even if + -- the uses in C1, C2 are not 'interesting' + -- An example that gets worse if you add int_cxt here is 'clausify' + + (isCheapUnfolding unfolding && int_cxt)) + -- isCheap => acceptable work duplication; in_lam may be true + -- int_cxt to prevent us inlining inside a lambda without some + -- good reason. See the notes on int_cxt in preInlineUnconditionally + + other -> False + -- The point here is that for *non-values* that occur + -- outside a lambda, the call-site inliner won't have + -- a chance (becuase it doesn't know that the thing + -- only occurs once). The pre-inliner won't have gotten + -- it either, if the thing occurs in more than one branch + -- So the main target is things like + -- let x = f y in + -- case v of + -- True -> case x of ... + -- False -> case x of ... + -- I'm not sure how important this is in practice where active = case getMode env of SimplGently -> isAlwaysActive prag diff --git a/ghc/compiler/simplCore/Simplify.lhs b/ghc/compiler/simplCore/Simplify.lhs index 144b26a..610882d 100644 --- a/ghc/compiler/simplCore/Simplify.lhs +++ b/ghc/compiler/simplCore/Simplify.lhs @@ -615,7 +615,7 @@ completeLazyBind :: SimplEnv -- (as usual) use the in-scope-env from the floats completeLazyBind env top_lvl old_bndr new_bndr new_rhs - | postInlineUnconditionally env new_bndr occ_info new_rhs + | postInlineUnconditionally env top_lvl new_bndr occ_info new_rhs unfolding = -- Drop the binding tick (PostInlineUnconditionally old_bndr) `thenSmpl_` returnSmpl (emptyFloats env, extendIdSubst env old_bndr (DoneEx new_rhs)) -- 1.7.10.4