X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;ds=sidebyside;f=compiler%2FcoreSyn%2FCoreUnfold.lhs;h=258cd46cf26caf6e61fc54d3f3bd21fa992c1468;hb=d95ce839533391e7118257537044f01cbb1d6694;hp=b6706c14cd71318682b3e8dfaee2bdb0a3b300f4;hpb=444c62505e1e9790db08322fea8625dedd81d446;p=ghc-hetmet.git diff --git a/compiler/coreSyn/CoreUnfold.lhs b/compiler/coreSyn/CoreUnfold.lhs index b6706c1..258cd46 100644 --- a/compiler/coreSyn/CoreUnfold.lhs +++ b/compiler/coreSyn/CoreUnfold.lhs @@ -18,12 +18,10 @@ find, unsurprisingly, a Core expression. module CoreUnfold ( Unfolding, UnfoldingGuidance, -- Abstract types - noUnfolding, mkTopUnfolding, mkImplicitUnfolding, mkUnfolding, - mkCompulsoryUnfolding, seqUnfolding, - evaldUnfolding, mkOtherCon, otherCons, - unfoldingTemplate, maybeUnfoldingTemplate, - isEvaldUnfolding, isValueUnfolding, isCheapUnfolding, isCompulsoryUnfolding, - hasUnfolding, hasSomeUnfolding, neverUnfold, + noUnfolding, mkImplicitUnfolding, + mkTopUnfolding, mkUnfolding, + mkInlineRule, mkWwInlineRule, + mkCompulsoryUnfolding, couldBeSmallEnoughToInline, certainlyWillInline, smallEnoughToInline, @@ -37,7 +35,7 @@ import DynFlags import CoreSyn import PprCore () -- Instances import OccurAnal -import CoreSubst ( Subst, emptySubst, substTy, extendIdSubst, extendTvSubst +import CoreSubst ( emptySubst, substTy, extendIdSubst, extendTvSubst , lookupIdSubst, substBndr, substBndrs, substRecBndrs ) import CoreUtils import Id @@ -45,7 +43,9 @@ import DataCon import Literal import PrimOp import IdInfo +import BasicTypes ( Arity ) import Type hiding( substTy, extendTvSubst ) +import Maybes import PrelNames import Bag import FastTypes @@ -68,24 +68,37 @@ mkTopUnfolding expr = mkUnfolding True {- Top level -} expr mkImplicitUnfolding :: CoreExpr -> Unfolding -- For implicit Ids, do a tiny bit of optimising first mkImplicitUnfolding expr - = CoreUnfolding (simpleOptExpr emptySubst expr) + = CoreUnfolding (simpleOptExpr expr) True (exprIsHNF expr) (exprIsCheap expr) (calcUnfoldingGuidance opt_UF_CreationThreshold expr) +mkInlineRule :: CoreExpr -> Arity -> Unfolding +mkInlineRule expr arity + = InlineRule { uf_tmpl = simpleOptExpr expr, + uf_is_top = True, -- Conservative; this gets set more + -- accuately by the simplifier (slight hack) + -- in SimplEnv.substUnfolding + uf_arity = arity, + uf_is_value = exprIsHNF expr, + uf_worker = Nothing } + +mkWwInlineRule :: CoreExpr -> Arity -> Id -> Unfolding +mkWwInlineRule expr arity wkr + = InlineRule { uf_tmpl = simpleOptExpr expr, + uf_is_top = True, -- Conservative; see mkInlineRule + uf_arity = arity, + uf_is_value = exprIsHNF expr, + uf_worker = Just wkr } + mkUnfolding :: Bool -> CoreExpr -> Unfolding mkUnfolding top_lvl expr - = CoreUnfolding (occurAnalyseExpr expr) - top_lvl - - (exprIsHNF expr) - -- Already evaluated - - (exprIsCheap expr) - -- OK to inline inside a lambda - - (calcUnfoldingGuidance opt_UF_CreationThreshold expr) + = CoreUnfolding { uf_tmpl = occurAnalyseExpr expr, + uf_is_top = top_lvl, + uf_is_value = exprIsHNF expr, + uf_is_cheap = exprIsCheap expr, + uf_guidance = calcUnfoldingGuidance opt_UF_CreationThreshold expr } -- Sometimes during simplification, there's a large let-bound thing -- which has been substituted, and so is now dead; so 'expr' contains -- two copies of the thing while the occurrence-analysed expression doesn't @@ -95,14 +108,6 @@ mkUnfolding top_lvl expr -- This can occasionally mean that the guidance is very pessimistic; -- it gets fixed up next round -instance Outputable Unfolding where - ppr NoUnfolding = ptext (sLit "No unfolding") - ppr (OtherCon cs) = ptext (sLit "OtherCon") <+> ppr cs - ppr (CompulsoryUnfolding e) = ptext (sLit "Compulsory") <+> ppr e - ppr (CoreUnfolding e top hnf cheap g) - = ptext (sLit "Unf") <+> sep [ppr top <+> ppr hnf <+> ppr cheap <+> ppr g, - ppr e] - mkCompulsoryUnfolding :: CoreExpr -> Unfolding mkCompulsoryUnfolding expr -- Used for things that absolutely must be unfolded = CompulsoryUnfolding (occurAnalyseExpr expr) @@ -116,75 +121,27 @@ mkCompulsoryUnfolding expr -- Used for things that absolutely must be unfolded %************************************************************************ \begin{code} -instance Outputable UnfoldingGuidance where - ppr UnfoldNever = ptext (sLit "NEVER") - ppr (UnfoldIfGoodArgs v cs size discount) - = hsep [ ptext (sLit "IF_ARGS"), int v, - brackets (hsep (map int cs)), - int size, - int discount ] -\end{code} - - -\begin{code} calcUnfoldingGuidance :: Int -- bomb out if size gets bigger than this -> CoreExpr -- expression to look at -> UnfoldingGuidance calcUnfoldingGuidance bOMB_OUT_SIZE expr - = case collect_val_bndrs expr of { (inline, val_binders, body) -> + = case collectBinders expr of { (binders, body) -> let + val_binders = filter isId binders n_val_binders = length val_binders - - max_inline_size = n_val_binders+2 - -- The idea is that if there is an INLINE pragma (inline is True) - -- and there's a big body, we give a size of n_val_binders+2. This - -- This is just enough to fail the no-size-increase test in callSiteInline, - -- so that INLINE things don't get inlined into entirely boring contexts, - -- but no more. - in case (sizeExpr (iUnbox bOMB_OUT_SIZE) val_binders body) of - - TooBig - | not inline -> UnfoldNever - -- A big function with an INLINE pragma must - -- have an UnfoldIfGoodArgs guidance - | otherwise -> UnfoldIfGoodArgs n_val_binders - (map (const 0) val_binders) - max_inline_size 0 - + TooBig -> UnfoldNever SizeIs size cased_args scrut_discount - -> UnfoldIfGoodArgs - n_val_binders - (map discount_for val_binders) - final_size - (iBox scrut_discount) + -> UnfoldIfGoodArgs { ug_arity = n_val_binders + , ug_args = map discount_for val_binders + , ug_size = iBox size + , ug_res = iBox scrut_discount } where - boxed_size = iBox size - - final_size | inline = boxed_size `min` max_inline_size - | otherwise = boxed_size - - -- Sometimes an INLINE thing is smaller than n_val_binders+2. - -- A particular case in point is a constructor, which has size 1. - -- We want to inline this regardless, hence the `min` - discount_for b = foldlBag (\acc (b',n) -> if b==b' then acc+n else acc) 0 cased_args } - where - collect_val_bndrs e = go False [] e - -- We need to be a bit careful about how we collect the - -- value binders. In ptic, if we see - -- __inline_me (\x y -> e) - -- We want to say "2 value binders". Why? So that - -- we take account of information given for the arguments - - go _ rev_vbs (Note InlineMe e) = go True rev_vbs e - go inline rev_vbs (Lam b e) | isId b = go inline (b:rev_vbs) e - | otherwise = go inline rev_vbs e - go inline rev_vbs e = (inline, reverse rev_vbs, e) \end{code} \begin{code} @@ -197,21 +154,10 @@ sizeExpr :: FastInt -- Bomb out if it gets bigger than this sizeExpr bOMB_OUT_SIZE top_args expr = size_up expr where - size_up (Type _) = sizeZero -- Types cost nothing + size_up (Type _) = sizeZero -- Types cost nothing size_up (Var _) = sizeOne - - size_up (Note InlineMe _) = sizeOne -- Inline notes make it look very small - -- This can be important. If you have an instance decl like this: - -- instance Foo a => Foo [a] where - -- {-# INLINE op1, op2 #-} - -- op1 = ... - -- op2 = ... - -- then we'll get a dfun which is a pair of two INLINE lambdas - - size_up (Note _ body) = size_up body -- Other notes cost nothing - + size_up (Note _ body) = size_up body -- Notes cost nothing size_up (Cast e _) = size_up e - size_up (App fun (Type _)) = size_up fun size_up (App fun arg) = size_up_app fun [arg] @@ -484,13 +430,17 @@ couldBeSmallEnoughToInline threshold rhs = case calcUnfoldingGuidance threshold certainlyWillInline :: Unfolding -> Bool -- Sees if the unfolding is pretty certain to inline -certainlyWillInline (CoreUnfolding _ _ _ is_cheap (UnfoldIfGoodArgs n_vals _ size _)) +certainlyWillInline (CompulsoryUnfolding {}) = True +certainlyWillInline (InlineRule {}) = True +certainlyWillInline (CoreUnfolding + { uf_is_cheap = is_cheap + , uf_guidance = UnfoldIfGoodArgs {ug_arity = n_vals, ug_size = size}}) = is_cheap && size - (n_vals +1) <= opt_UF_UseThreshold certainlyWillInline _ = False smallEnoughToInline :: Unfolding -> Bool -smallEnoughToInline (CoreUnfolding _ _ _ _ (UnfoldIfGoodArgs _ _ size _)) +smallEnoughToInline (CoreUnfolding {uf_guidance = UnfoldIfGoodArgs {ug_size = size}}) = size <= opt_UF_UseThreshold smallEnoughToInline _ = False @@ -536,6 +486,10 @@ data CallCtxt = BoringCtxt -- => be keener to inline -- INVARIANT: ArgCtxt False 0 ==> BoringCtxt + | ValAppCtxt -- We're applied to at least one value arg + -- This arises when we have ((f x |> co) y) + -- Then the (f x) has argument 'x' but in a ValAppCtxt + | CaseCtxt -- We're the scrutinee of a case -- that decomposes its scrutinee @@ -543,9 +497,13 @@ instance Outputable CallCtxt where ppr BoringCtxt = ptext (sLit "BoringCtxt") ppr (ArgCtxt _ _) = ptext (sLit "ArgCtxt") ppr CaseCtxt = ptext (sLit "CaseCtxt") + ppr ValAppCtxt = ptext (sLit "ValAppCtxt") callSiteInline dflags active_inline id lone_variable arg_infos cont_info - = case idUnfolding id of { + = let + n_val_args = length arg_infos + in + case idUnfolding id of { NoUnfolding -> Nothing ; OtherCon _ -> Nothing ; @@ -556,14 +514,45 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info -- compulsory unfoldings (see MkId.lhs). -- We don't allow them to be inactive - CoreUnfolding unf_template is_top is_value is_cheap guidance -> + InlineRule { uf_tmpl = unf_template, uf_arity = arity, uf_is_top = is_top + , uf_is_value = is_value, uf_worker = mb_worker } + -> let yes_or_no | not active_inline = False + | n_val_args < arity = yes_unsat -- Not enough value args + | n_val_args == arity = yes_exact -- Exactly saturated + | otherwise = True -- Over-saturated + result | yes_or_no = Just unf_template + | otherwise = Nothing + + -- See Note [Inlining an InlineRule] + is_wrapper = isJust mb_worker + yes_unsat | is_wrapper = or arg_infos + | otherwise = False + + yes_exact = or arg_infos || interesting_saturated_call + interesting_saturated_call + = case cont_info of + BoringCtxt -> not is_top -- Note [Nested functions] + CaseCtxt -> not lone_variable || not is_value -- Note [Lone variables] + ArgCtxt {} -> arity > 0 -- Note [Inlining in ArgCtxt] + ValAppCtxt -> True -- Note [Cast then apply] + in + if dopt Opt_D_dump_inlinings dflags then + pprTrace ("Considering InlineRule for: " ++ showSDoc (ppr id)) + (vcat [text "active:" <+> ppr active_inline, + text "arg infos" <+> ppr arg_infos, + text "interesting call" <+> ppr interesting_saturated_call, + text "is value:" <+> ppr is_value, + text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"]) + result + else result ; + + CoreUnfolding { uf_tmpl = unf_template, uf_is_top = is_top, uf_is_value = is_value, + uf_is_cheap = is_cheap, uf_guidance = guidance } -> let result | yes_or_no = Just unf_template | otherwise = Nothing - n_val_args = length arg_infos - yes_or_no = active_inline && is_cheap && consider_safe -- We consider even the once-in-one-branch -- occurrences, because they won't all have been @@ -579,7 +568,8 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info -- work-duplication issue (the caller checks that). = case guidance of UnfoldNever -> False - UnfoldIfGoodArgs n_vals_wanted arg_discounts size res_discount + UnfoldIfGoodArgs { ug_arity = n_vals_wanted, ug_args = arg_discounts + , ug_res = res_discount, ug_size = size } | enough_args && size <= (n_vals_wanted + 1) -- Inline unconditionally if there no size increase -- Size of call is n_vals_wanted (+1 for the function) @@ -610,8 +600,8 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info = case cont_info of BoringCtxt -> not is_top && n_vals_wanted > 0 -- Note [Nested functions] CaseCtxt -> not lone_variable || not is_value -- Note [Lone variables] - ArgCtxt {} -> n_vals_wanted > 0 - -- See Note [Inlining in ArgCtxt] + ArgCtxt {} -> n_vals_wanted > 0 -- Note [Inlining in ArgCtxt] + ValAppCtxt -> True -- Note [Cast then apply] small_enough = (size - discount) <= opt_UF_UseThreshold discount = computeDiscount n_vals_wanted arg_discounts @@ -619,7 +609,7 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info res_discount' = case cont_info of BoringCtxt -> 0 CaseCtxt -> res_discount - ArgCtxt _ _ -> 4 `min` res_discount + _other -> 4 `min` res_discount -- res_discount can be very large when a function returns -- construtors; but we only want to invoke that large discount -- when there's a case continuation. @@ -629,20 +619,35 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info in if dopt Opt_D_dump_inlinings dflags then - pprTrace "Considering inlining" - (ppr id <+> vcat [text "active:" <+> ppr active_inline, - text "arg infos" <+> ppr arg_infos, - text "interesting continuation" <+> ppr cont_info, - text "is value:" <+> ppr is_value, - text "is cheap:" <+> ppr is_cheap, - text "guidance" <+> ppr guidance, - text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"]) + pprTrace ("Considering inlining: " ++ showSDoc (ppr id)) + (vcat [text "active:" <+> ppr active_inline, + text "arg infos" <+> ppr arg_infos, + text "interesting continuation" <+> ppr cont_info, + text "is value:" <+> ppr is_value, + text "is cheap:" <+> ppr is_cheap, + text "guidance" <+> ppr guidance, + text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"]) result else result } \end{code} +Note [Inlining an InlineRule] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +An InlineRules is used for + (a) pogrammer INLINE pragmas + (b) inlinings from worker/wrapper + +For (a) the RHS may be large, and our contract is that we *only* inline +when the function is applied to all the arguments on the LHS of the +source-code defn. (The uf_arity in the rule.) + +However for worker/wrapper it may be worth inlining even if the +arity is not satisfied (as we do in the CoreUnfolding case) so we don't +require saturation. + + Note [Nested functions] ~~~~~~~~~~~~~~~~~~~~~~~ If a function has a nested defn we also record some-benefit, on the @@ -655,6 +660,16 @@ branches. Then inlining it doesn't increase allocation, but it does increase the chance that the constructor won't be allocated at all in the branches that don't use it. +Note [Cast then apply] +~~~~~~~~~~~~~~~~~~~~~~ +Consider + myIndex = __inline_me ( (/\a. ) |> co ) + co :: (forall a. a -> a) ~ (forall a. T a) + ... /\a.\x. case ((myIndex a) |> sym co) x of { ... } ... + +We need to inline myIndex to unravel this; but the actual call (myIndex a) has +no value arguments. The ValAppCtxt gives it enough incentive to inline. + Note [Inlining in ArgCtxt] ~~~~~~~~~~~~~~~~~~~~~~~~~~ The condition (n_vals_wanted > 0) here is very important, because otherwise @@ -757,14 +772,14 @@ computeDiscount n_vals_wanted arg_discounts result_discount arg_infos \begin{code} -simpleOptExpr :: Subst -> CoreExpr -> CoreExpr +simpleOptExpr :: CoreExpr -> CoreExpr -- Return an occur-analysed and slightly optimised expression -- The optimisation is very straightforward: just -- inline non-recursive bindings that are used only once, -- or wheere the RHS is trivial -simpleOptExpr subst expr - = go subst (occurAnalyseExpr expr) +simpleOptExpr expr + = go emptySubst (occurAnalyseExpr expr) where go subst (Var v) = lookupIdSubst subst v go subst (App e1 e2) = App (go subst e1) (go subst e2)