X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2FcoreSyn%2FCoreUnfold.lhs;h=d7ec4c718e7fe79edcfd25a66b5df36ea00ead38;hb=85f8276b368d39c93e137fa7b0a8a96ab3c6b389;hp=767006076d60c7ed9d5e07bd9965442236865cac;hpb=fa1c8a7e7013b1e9a37326b80abadec737c9347e;p=ghc-hetmet.git diff --git a/compiler/coreSyn/CoreUnfold.lhs b/compiler/coreSyn/CoreUnfold.lhs index 7670060..d7ec4c7 100644 --- a/compiler/coreSyn/CoreUnfold.lhs +++ b/compiler/coreSyn/CoreUnfold.lhs @@ -18,7 +18,8 @@ find, unsurprisingly, a Core expression. module CoreUnfold ( Unfolding, UnfoldingGuidance, -- Abstract types - noUnfolding, mkTopUnfolding, mkUnfolding, mkCompulsoryUnfolding, seqUnfolding, + noUnfolding, mkTopUnfolding, mkImplicitUnfolding, mkUnfolding, + mkCompulsoryUnfolding, seqUnfolding, evaldUnfolding, mkOtherCon, otherCons, unfoldingTemplate, maybeUnfoldingTemplate, isEvaldUnfolding, isValueUnfolding, isCheapUnfolding, isCompulsoryUnfolding, @@ -31,23 +32,24 @@ module CoreUnfold ( ) where -#include "HsVersions.h" - import StaticFlags import DynFlags import CoreSyn import PprCore () -- Instances import OccurAnal +import CoreSubst ( Subst, emptySubst, substTy, extendIdSubst, extendTvSubst + , lookupIdSubst, substBndr, substBndrs, substRecBndrs ) import CoreUtils import Id import DataCon import Literal import PrimOp import IdInfo -import Type +import Type hiding( substTy, extendTvSubst ) import PrelNames import Bag import FastTypes +import FastString import Outputable \end{code} @@ -63,6 +65,15 @@ import Outputable mkTopUnfolding :: CoreExpr -> Unfolding mkTopUnfolding expr = mkUnfolding True {- Top level -} expr +mkImplicitUnfolding :: CoreExpr -> Unfolding +-- For implicit Ids, do a tiny bit of optimising first +mkImplicitUnfolding expr + = CoreUnfolding (simpleOptExpr emptySubst expr) + True + (exprIsHNF expr) + (exprIsCheap expr) + (calcUnfoldingGuidance opt_UF_CreationThreshold expr) + mkUnfolding :: Bool -> CoreExpr -> Unfolding mkUnfolding top_lvl expr = CoreUnfolding (occurAnalyseExpr expr) @@ -85,11 +96,11 @@ mkUnfolding top_lvl expr -- it gets fixed up next round instance Outputable Unfolding where - ppr NoUnfolding = ptext SLIT("No unfolding") - ppr (OtherCon cs) = ptext SLIT("OtherCon") <+> ppr cs - ppr (CompulsoryUnfolding e) = ptext SLIT("Compulsory") <+> ppr e + ppr NoUnfolding = ptext (sLit "No unfolding") + ppr (OtherCon cs) = ptext (sLit "OtherCon") <+> ppr cs + ppr (CompulsoryUnfolding e) = ptext (sLit "Compulsory") <+> ppr e ppr (CoreUnfolding e top hnf cheap g) - = ptext SLIT("Unf") <+> sep [ppr top <+> ppr hnf <+> ppr cheap <+> ppr g, + = ptext (sLit "Unf") <+> sep [ppr top <+> ppr hnf <+> ppr cheap <+> ppr g, ppr e] mkCompulsoryUnfolding :: CoreExpr -> Unfolding @@ -106,9 +117,9 @@ mkCompulsoryUnfolding expr -- Used for things that absolutely must be unfolded \begin{code} instance Outputable UnfoldingGuidance where - ppr UnfoldNever = ptext SLIT("NEVER") + ppr UnfoldNever = ptext (sLit "NEVER") ppr (UnfoldIfGoodArgs v cs size discount) - = hsep [ ptext SLIT("IF_ARGS"), int v, + = hsep [ ptext (sLit "IF_ARGS"), int v, brackets (hsep (map int cs)), int size, int discount ] @@ -525,13 +536,18 @@ data CallCtxt = BoringCtxt -- => be keener to inline -- INVARIANT: ArgCtxt False 0 ==> BoringCtxt + | ValAppCtxt -- We're applied to at least one value arg + -- This arises when we have ((f x |> co) y) + -- Then the (f x) has argument 'x' but in a ValAppCtxt + | CaseCtxt -- We're the scrutinee of a case -- that decomposes its scrutinee instance Outputable CallCtxt where - ppr BoringCtxt = ptext SLIT("BoringCtxt") - ppr (ArgCtxt _ _) = ptext SLIT("ArgCtxt") - ppr CaseCtxt = ptext SLIT("CaseCtxt") + ppr BoringCtxt = ptext (sLit "BoringCtxt") + ppr (ArgCtxt _ _) = ptext (sLit "ArgCtxt") + ppr CaseCtxt = ptext (sLit "CaseCtxt") + ppr ValAppCtxt = ptext (sLit "ValAppCtxt") callSiteInline dflags active_inline id lone_variable arg_infos cont_info = case idUnfolding id of { @@ -575,10 +591,13 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info -> True | otherwise - -> some_benefit && small_enough + -> some_benefit && small_enough && inline_enough_args where enough_args = n_val_args >= n_vals_wanted + inline_enough_args = + not (dopt Opt_InlineIfEnoughArgs dflags) || enough_args + some_benefit = or arg_infos || really_interesting_cont -- There must be something interesting @@ -596,8 +615,8 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info = case cont_info of BoringCtxt -> not is_top && n_vals_wanted > 0 -- Note [Nested functions] CaseCtxt -> not lone_variable || not is_value -- Note [Lone variables] - ArgCtxt {} -> True - -- Was: n_vals_wanted > 0; but see test eyeball/inline1.hs + ArgCtxt {} -> n_vals_wanted > 0 -- Note [Inlining in ArgCtxt] + ValAppCtxt -> True -- Note [Cast then apply] small_enough = (size - discount) <= opt_UF_UseThreshold discount = computeDiscount n_vals_wanted arg_discounts @@ -605,7 +624,7 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info res_discount' = case cont_info of BoringCtxt -> 0 CaseCtxt -> res_discount - ArgCtxt _ _ -> 4 `min` res_discount + _other -> 4 `min` res_discount -- res_discount can be very large when a function returns -- construtors; but we only want to invoke that large discount -- when there's a case continuation. @@ -641,6 +660,29 @@ branches. Then inlining it doesn't increase allocation, but it does increase the chance that the constructor won't be allocated at all in the branches that don't use it. +Note [Cast then apply] +~~~~~~~~~~~~~~~~~~~~~~ +Consider + myIndex = __inline_me ( (/\a. ) |> co ) + co :: (forall a. a -> a) ~ (forall a. T a) + ... /\a.\x. case ((myIndex a) |> sym co) x of { ... } ... + +We need to inline myIndex to unravel this; but the actual call (myIndex a) has +no value arguments. The ValAppCtxt gives it enough incentive to inline. + +Note [Inlining in ArgCtxt] +~~~~~~~~~~~~~~~~~~~~~~~~~~ +The condition (n_vals_wanted > 0) here is very important, because otherwise +we end up inlining top-level stuff into useless places; eg + x = I# 3# + f = \y. g x +This can make a very big difference: it adds 16% to nofib 'integer' allocs, +and 20% to 'power'. + +At one stage I replaced this condition by 'True' (leading to the above +slow-down). The motivation was test eyeball/inline1.hs; but that seems +to work ok now. + Note [Lone variables] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The "lone-variable" case is important. I spent ages messing about @@ -721,3 +763,75 @@ computeDiscount n_vals_wanted arg_discounts result_discount arg_infos mk_arg_discount discount is_evald | is_evald = discount | otherwise = 0 \end{code} + +%************************************************************************ +%* * + The Very Simple Optimiser +%* * +%************************************************************************ + + +\begin{code} +simpleOptExpr :: Subst -> CoreExpr -> CoreExpr +-- Return an occur-analysed and slightly optimised expression +-- The optimisation is very straightforward: just +-- inline non-recursive bindings that are used only once, +-- or wheere the RHS is trivial + +simpleOptExpr subst expr + = go subst (occurAnalyseExpr expr) + where + go subst (Var v) = lookupIdSubst subst v + go subst (App e1 e2) = App (go subst e1) (go subst e2) + go subst (Type ty) = Type (substTy subst ty) + go _ (Lit lit) = Lit lit + go subst (Note note e) = Note note (go subst e) + go subst (Cast e co) = Cast (go subst e) (substTy subst co) + go subst (Let bind body) = go_bind subst bind body + go subst (Lam bndr body) = Lam bndr' (go subst' body) + where + (subst', bndr') = substBndr subst bndr + + go subst (Case e b ty as) = Case (go subst e) b' + (substTy subst ty) + (map (go_alt subst') as) + where + (subst', b') = substBndr subst b + + + ---------------------- + go_alt subst (con, bndrs, rhs) = (con, bndrs', go subst' rhs) + where + (subst', bndrs') = substBndrs subst bndrs + + ---------------------- + go_bind subst (Rec prs) body = Let (Rec (bndrs' `zip` rhss')) + (go subst' body) + where + (bndrs, rhss) = unzip prs + (subst', bndrs') = substRecBndrs subst bndrs + rhss' = map (go subst') rhss + + go_bind subst (NonRec b r) body = go_nonrec subst b (go subst r) body + + ---------------------- + go_nonrec subst b (Type ty') body + | isTyVar b = go (extendTvSubst subst b ty') body + -- let a::* = TYPE ty in + go_nonrec subst b r' body + | isId b -- let x = e in + , exprIsTrivial r' || safe_to_inline (idOccInfo b) + = go (extendIdSubst subst b r') body + go_nonrec subst b r' body + = Let (NonRec b' r') (go subst' body) + where + (subst', b') = substBndr subst b + + ---------------------- + -- Unconditionally safe to inline + safe_to_inline :: OccInfo -> Bool + safe_to_inline IAmDead = True + safe_to_inline (OneOcc in_lam one_br _) = not in_lam && one_br + safe_to_inline (IAmALoopBreaker {}) = False + safe_to_inline NoOccInfo = False +\end{code} \ No newline at end of file