X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2FcoreSyn%2FCoreUnfold.lhs;h=f32d5b1482c386aaf21d6979c22b8b7ad99e0962;hp=0c7e9e485b4a0a2381a77924c3e1a2ab9e65bf5c;hb=72462499b891d5779c19f3bda03f96e24f9554ae;hpb=ad23a496a860063ab01025051d9c9baf45725a61 diff --git a/compiler/coreSyn/CoreUnfold.lhs b/compiler/coreSyn/CoreUnfold.lhs index 0c7e9e4..f32d5b1 100644 --- a/compiler/coreSyn/CoreUnfold.lhs +++ b/compiler/coreSyn/CoreUnfold.lhs @@ -18,12 +18,10 @@ find, unsurprisingly, a Core expression. module CoreUnfold ( Unfolding, UnfoldingGuidance, -- Abstract types - noUnfolding, mkTopUnfolding, mkImplicitUnfolding, mkUnfolding, - mkCompulsoryUnfolding, seqUnfolding, - evaldUnfolding, mkOtherCon, otherCons, - unfoldingTemplate, maybeUnfoldingTemplate, - isEvaldUnfolding, isValueUnfolding, isExpandableUnfolding, isCompulsoryUnfolding, - hasUnfolding, hasSomeUnfolding, neverUnfold, + noUnfolding, mkImplicitUnfolding, + mkTopUnfolding, mkUnfolding, mkCoreUnfolding, + mkInlineRule, mkWwInlineRule, + mkCompulsoryUnfolding, mkDFunUnfolding, interestingArg, ArgSummary(..), @@ -32,24 +30,32 @@ module CoreUnfold ( callSiteInline, CallCtxt(..), + exprIsConApp_maybe + ) where +#include "HsVersions.h" + import StaticFlags import DynFlags import CoreSyn import PprCore () -- Instances import OccurAnal -import CoreSubst ( Subst, emptySubst, substTy, extendIdSubst, extendTvSubst - , lookupIdSubst, substBndr, substBndrs, substRecBndrs ) +import CoreSubst hiding( substTy ) import CoreUtils import Id import DataCon +import TyCon import Literal import PrimOp import IdInfo -import Type hiding( substTy, extendTvSubst ) +import BasicTypes ( Arity ) +import TcType ( tcSplitDFunTy ) +import Type +import Coercion import PrelNames import Bag +import Util import FastTypes import FastString import Outputable @@ -69,28 +75,34 @@ mkTopUnfolding expr = mkUnfolding True {- Top level -} expr mkImplicitUnfolding :: CoreExpr -> Unfolding -- For implicit Ids, do a tiny bit of optimising first -mkImplicitUnfolding expr - = CoreUnfolding (simpleOptExpr emptySubst expr) - True - (exprIsHNF expr) - (exprIsCheap expr) - (exprIsExpandable expr) - (calcUnfoldingGuidance opt_UF_CreationThreshold expr) - -mkUnfolding :: Bool -> CoreExpr -> Unfolding -mkUnfolding top_lvl expr - = CoreUnfolding (occurAnalyseExpr expr) - top_lvl - - (exprIsHNF expr) - -- Already evaluated +mkImplicitUnfolding expr = mkTopUnfolding (simpleOptExpr expr) - (exprIsCheap expr) - -- OK to inline inside a lambda +mkWwInlineRule :: Id -> CoreExpr -> Arity -> Unfolding +mkWwInlineRule id = mkInlineRule (InlWrapper id) - (exprIsExpandable expr) +mkInlineRule :: InlineRuleInfo -> CoreExpr -> Arity -> Unfolding +mkInlineRule inl_info expr arity + = mkCoreUnfolding True -- Note [Top-level flag on inline rules] + expr' arity + (InlineRule { ug_ir_info = inl_info, ug_small = small }) + where + expr' = simpleOptExpr expr + small = case calcUnfoldingGuidance (arity+1) expr' of + (arity_e, UnfoldIfGoodArgs { ug_size = size_e }) + -> uncondInline arity_e size_e + _other {- actually UnfoldNever -} -> False + +-- Note [Top-level flag on inline rules] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- Slight hack: note that mk_inline_rules conservatively sets the +-- top-level flag to True. It gets set more accurately by the simplifier +-- Simplify.simplUnfolding. - (calcUnfoldingGuidance opt_UF_CreationThreshold expr) +mkUnfolding :: Bool -> CoreExpr -> Unfolding +mkUnfolding top_lvl expr + = mkCoreUnfolding top_lvl expr arity guidance + where + (arity, guidance) = calcUnfoldingGuidance opt_UF_CreationThreshold expr -- Sometimes during simplification, there's a large let-bound thing -- which has been substituted, and so is now dead; so 'expr' contains -- two copies of the thing while the occurrence-analysed expression doesn't @@ -100,17 +112,23 @@ mkUnfolding top_lvl expr -- This can occasionally mean that the guidance is very pessimistic; -- it gets fixed up next round -instance Outputable Unfolding where - ppr NoUnfolding = ptext (sLit "No unfolding") - ppr (OtherCon cs) = ptext (sLit "OtherCon") <+> ppr cs - ppr (CompulsoryUnfolding e) = ptext (sLit "Compulsory") <+> ppr e - ppr (CoreUnfolding e top hnf cheap expable g) - = ptext (sLit "Unf") <+> sep [ppr top <+> ppr hnf <+> ppr cheap <+> ppr expable <+> ppr g, - ppr e] +mkCoreUnfolding :: Bool -> CoreExpr -> Arity -> UnfoldingGuidance -> Unfolding +-- Occurrence-analyses the expression before capturing it +mkCoreUnfolding top_lvl expr arity guidance + = CoreUnfolding { uf_tmpl = occurAnalyseExpr expr, + uf_arity = arity, + uf_is_top = top_lvl, + uf_is_value = exprIsHNF expr, + uf_is_cheap = exprIsCheap expr, + uf_expandable = exprIsExpandable expr, + uf_guidance = guidance } + +mkDFunUnfolding :: DataCon -> [Id] -> Unfolding +mkDFunUnfolding con ops = DFunUnfolding con (map Var ops) mkCompulsoryUnfolding :: CoreExpr -> Unfolding mkCompulsoryUnfolding expr -- Used for things that absolutely must be unfolded - = CompulsoryUnfolding (occurAnalyseExpr expr) + = mkCoreUnfolding True expr 0 UnfoldAlways -- Arity of unfolding doesn't matter \end{code} @@ -121,75 +139,26 @@ mkCompulsoryUnfolding expr -- Used for things that absolutely must be unfolded %************************************************************************ \begin{code} -instance Outputable UnfoldingGuidance where - ppr UnfoldNever = ptext (sLit "NEVER") - ppr (UnfoldIfGoodArgs v cs size discount) - = hsep [ ptext (sLit "IF_ARGS"), int v, - brackets (hsep (map int cs)), - int size, - int discount ] -\end{code} - - -\begin{code} calcUnfoldingGuidance :: Int -- bomb out if size gets bigger than this -> CoreExpr -- expression to look at - -> UnfoldingGuidance + -> (Arity, UnfoldingGuidance) calcUnfoldingGuidance bOMB_OUT_SIZE expr - = case collect_val_bndrs expr of { (inline, val_binders, body) -> + = case collectBinders expr of { (binders, body) -> let + val_binders = filter isId binders n_val_binders = length val_binders - - max_inline_size = n_val_binders+2 - -- The idea is that if there is an INLINE pragma (inline is True) - -- and there's a big body, we give a size of n_val_binders+2. This - -- This is just enough to fail the no-size-increase test in callSiteInline, - -- so that INLINE things don't get inlined into entirely boring contexts, - -- but no more. - in case (sizeExpr (iUnbox bOMB_OUT_SIZE) val_binders body) of - - TooBig - | not inline -> UnfoldNever - -- A big function with an INLINE pragma must - -- have an UnfoldIfGoodArgs guidance - | otherwise -> UnfoldIfGoodArgs n_val_binders - (map (const 0) val_binders) - max_inline_size 0 - + TooBig -> (n_val_binders, UnfoldNever) SizeIs size cased_args scrut_discount - -> UnfoldIfGoodArgs - n_val_binders - (map discount_for val_binders) - final_size - (iBox scrut_discount) + -> (n_val_binders, UnfoldIfGoodArgs { ug_args = map discount_for val_binders + , ug_size = iBox size + , ug_res = iBox scrut_discount }) where - boxed_size = iBox size - - final_size | inline = boxed_size `min` max_inline_size - | otherwise = boxed_size - - -- Sometimes an INLINE thing is smaller than n_val_binders+2. - -- A particular case in point is a constructor, which has size 1. - -- We want to inline this regardless, hence the `min` - discount_for b = foldlBag (\acc (b',n) -> if b==b' then acc+n else acc) 0 cased_args - } - where - collect_val_bndrs e = go False [] e - -- We need to be a bit careful about how we collect the - -- value binders. In ptic, if we see - -- __inline_me (\x y -> e) - -- We want to say "2 value binders". Why? So that - -- we take account of information given for the arguments - - go _ rev_vbs (Note InlineMe e) = go True rev_vbs e - go inline rev_vbs (Lam b e) | isId b = go inline (b:rev_vbs) e - | otherwise = go inline rev_vbs e - go inline rev_vbs e = (inline, reverse rev_vbs, e) + } \end{code} Note [Computing the size of an expression] @@ -222,18 +191,28 @@ Notice that 'x' counts 0, while (f x) counts 2. That's deliberate: there's a function call to account for. Notice also that constructor applications are very cheap, because exposing them to a caller is so valuable. -Thing to watch out for - -* We inline *unconditionally* if inlined thing is smaller (using sizeExpr) - than the thing it's replacing. Notice that +Note [Unconditional inlining] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We inline *unconditionally* if inlined thing is smaller (using sizeExpr) +than the thing it's replacing. Notice that (f x) --> (g 3) -- YES, unconditionally (f x) --> x : [] -- YES, *even though* there are two -- arguments to the cons x --> g 3 -- NO x --> Just v -- NO - It's very important not to unconditionally replace a variable by - a non-atomic term. +It's very important not to unconditionally replace a variable by +a non-atomic term. + +\begin{code} +uncondInline :: Arity -> Int -> Bool +-- Inline unconditionally if there no size increase +-- Size of call is arity (+1 for the function) +-- See Note [Unconditional inlining] +uncondInline arity size + | arity == 0 = size == 0 + | otherwise = size <= arity + 1 +\end{code} \begin{code} @@ -248,20 +227,12 @@ sizeExpr :: FastInt -- Bomb out if it gets bigger than this sizeExpr bOMB_OUT_SIZE top_args expr = size_up expr where + size_up (Cast e _) = size_up e + size_up (Note _ e) = size_up e size_up (Type _) = sizeZero -- Types cost nothing size_up (Lit lit) = sizeN (litSize lit) - size_up (Var f) = size_up_call f 0 -- Make sure we get constructor + size_up (Var f) = size_up_call f [] -- Make sure we get constructor -- discounts even on nullary constructors - size_up (Cast e _) = size_up e - - size_up (Note InlineMe _) = sizeOne -- Inline notes make it look very small - -- This can be important. If you have an instance decl like this: - -- instance Foo a => Foo [a] where - -- {-# INLINE op1, op2 #-} - -- op1 = ... - -- op2 = ... - -- then we'll get a dfun which is a pair of two INLINE lambdas - size_up (Note _ body) = size_up body -- Other notes cost nothing size_up (App fun (Type _)) = size_up fun size_up (App fun arg) = size_up_app fun [arg] @@ -324,17 +295,18 @@ sizeExpr bOMB_OUT_SIZE top_args expr | isTypeArg arg = size_up_app fun args | otherwise = size_up_app fun (arg:args) `addSize` nukeScrutDiscount (size_up arg) - size_up_app (Var fun) args = size_up_call fun (length args) + size_up_app (Var fun) args = size_up_call fun args size_up_app other args = size_up other `addSizeN` length args ------------ - size_up_call :: Id -> Int -> ExprSize - size_up_call fun n_val_args + size_up_call :: Id -> [CoreExpr] -> ExprSize + size_up_call fun val_args = case idDetails fun of FCallId _ -> sizeN opt_UF_DearOp - DataConWorkId dc -> conSize dc n_val_args - PrimOpId op -> primOpSize op n_val_args - _ -> funSize top_args fun n_val_args + DataConWorkId dc -> conSize dc (length val_args) + PrimOpId op -> primOpSize op (length val_args) + ClassOpId _ -> classOpSize top_args val_args + _ -> funSize top_args fun (length val_args) ------------ size_up_alt (_con, _bndrs, rhs) = size_up rhs @@ -365,6 +337,22 @@ litSize _other = 0 -- Must match size of nullary constructors -- Key point: if x |-> 4, then x must inline unconditionally -- (eg via case binding) +classOpSize :: [Id] -> [CoreExpr] -> ExprSize +-- See Note [Conlike is interesting] +classOpSize _ [] + = sizeZero +classOpSize top_args (arg1 : other_args) + = SizeIs (iUnbox size) arg_discount (_ILIT(0)) + where + size = 2 + length other_args + -- If the class op is scrutinising a lambda bound dictionary then + -- give it a discount, to encourage the inlining of this function + -- The actual discount is rather arbitrarily chosen + arg_discount = case arg1 of + Var dict | dict `elem` top_args + -> unitBag (dict, opt_UF_DictDiscount) + _other -> emptyBag + funSize :: [Id] -> Id -> Int -> ExprSize -- Size for functions that are not constructors or primops -- Note [Function applications] @@ -450,6 +438,35 @@ lamScrutDiscount (SizeIs n vs _) = SizeIs n vs (iUnbox opt_UF_FunAppDiscount) lamScrutDiscount TooBig = TooBig \end{code} +Note [Discounts and thresholds] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Constants for discounts and thesholds are defined in main/StaticFlags, +all of form opt_UF_xxxx. They are: + +opt_UF_CreationThreshold (45) + At a definition site, if the unfolding is bigger than this, we + may discard it altogether + +opt_UF_UseThreshold (6) + At a call site, if the unfolding, less discounts, is smaller than + this, then it's small enough inline + +opt_UF_KeennessFactor (1.5) + Factor by which the discounts are multiplied before + subtracting from size + +opt_UF_DictDiscount (1) + The discount for each occurrence of a dictionary argument + as an argument of a class method. Should be pretty small + else big functions may get inlined + +opt_UF_FunAppDiscount (6) + Discount for a function argument that is applied. Quite + large, because if we inline we avoid the higher-order call. + +opt_UF_DearOp (4) + The size of a foreign call or not-dupable PrimOp + Note [Function applications] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -508,52 +525,38 @@ sizeN n = SizeIs (iUnbox n) emptyBag (_ILIT(0)) %* * %************************************************************************ -We have very limited information about an unfolding expression: (1)~so -many type arguments and so many value arguments expected---for our -purposes here, we assume we've got those. (2)~A ``size'' or ``cost,'' -a single integer. (3)~An ``argument info'' vector. For this, what we -have at the moment is a Boolean per argument position that says, ``I -will look with great favour on an explicit constructor in this -position.'' (4)~The ``discount'' to subtract if the expression -is being scrutinised. - -Assuming we have enough type- and value arguments (if not, we give up -immediately), then we see if the ``discounted size'' is below some -(semi-arbitrary) threshold. It works like this: for every argument -position where we're looking for a constructor AND WE HAVE ONE in our -hands, we get a (again, semi-arbitrary) discount [proportion to the -number of constructors in the type being scrutinized]. - -If we're in the context of a scrutinee ( \tr{(case of A .. -> ...;.. )}) -and the expression in question will evaluate to a constructor, we use -the computed discount size *for the result only* rather than -computing the argument discounts. Since we know the result of -the expression is going to be taken apart, discounting its size -is more accurate (see @sizeExpr@ above for how this discount size -is computed). - -We use this one to avoid exporting inlinings that we ``couldn't possibly -use'' on the other side. Can be overridden w/ flaggery. -Just the same as smallEnoughToInline, except that it has no actual arguments. +We use 'couldBeSmallEnoughToInline' to avoid exporting inlinings that +we ``couldn't possibly use'' on the other side. Can be overridden w/ +flaggery. Just the same as smallEnoughToInline, except that it has no +actual arguments. \begin{code} couldBeSmallEnoughToInline :: Int -> CoreExpr -> Bool -couldBeSmallEnoughToInline threshold rhs = case calcUnfoldingGuidance threshold rhs of - UnfoldNever -> False - _ -> True - -certainlyWillInline :: Unfolding -> Bool - -- Sees if the unfolding is pretty certain to inline -certainlyWillInline (CoreUnfolding _ _ _ is_cheap _ (UnfoldIfGoodArgs n_vals _ size _)) - = is_cheap && size - (n_vals+1) <= opt_UF_UseThreshold -certainlyWillInline _ - = False +couldBeSmallEnoughToInline threshold rhs + = case calcUnfoldingGuidance threshold rhs of + (_, UnfoldNever) -> False + _ -> True +---------------- smallEnoughToInline :: Unfolding -> Bool -smallEnoughToInline (CoreUnfolding _ _ _ _ _ (UnfoldIfGoodArgs _ _ size _)) +smallEnoughToInline (CoreUnfolding {uf_guidance = UnfoldIfGoodArgs {ug_size = size}}) = size <= opt_UF_UseThreshold smallEnoughToInline _ = False + +---------------- +certainlyWillInline :: Unfolding -> Bool + -- Sees if the unfolding is pretty certain to inline +certainlyWillInline (CoreUnfolding { uf_is_cheap = is_cheap, uf_arity = n_vals, uf_guidance = guidance }) + = case guidance of + UnfoldAlways {} -> True + UnfoldNever -> False + InlineRule {} -> True + UnfoldIfGoodArgs { ug_size = size} + -> is_cheap && size - (n_vals +1) <= opt_UF_UseThreshold + +certainlyWillInline _ + = False \end{code} %************************************************************************ @@ -610,87 +613,81 @@ data CallCtxt = BoringCtxt instance Outputable CallCtxt where ppr BoringCtxt = ptext (sLit "BoringCtxt") - ppr (ArgCtxt _ _) = ptext (sLit "ArgCtxt") + ppr (ArgCtxt rules disc) = ptext (sLit "ArgCtxt") <> ppr (rules,disc) ppr CaseCtxt = ptext (sLit "CaseCtxt") ppr ValAppCtxt = ptext (sLit "ValAppCtxt") callSiteInline dflags active_inline id lone_variable arg_infos cont_info - = case idUnfolding id of { - NoUnfolding -> Nothing ; - OtherCon _ -> Nothing ; - - CompulsoryUnfolding unf_template -> Just unf_template ; - -- CompulsoryUnfolding => there is no top-level binding - -- for these things, so we must inline it. - -- Only a couple of primop-like things have - -- compulsory unfoldings (see MkId.lhs). - -- We don't allow them to be inactive - - CoreUnfolding unf_template is_top is_value is_cheap is_expable guidance -> - + = let + n_val_args = length arg_infos + in + case idUnfolding id of { + NoUnfolding -> Nothing ; + OtherCon _ -> Nothing ; + DFunUnfolding {} -> Nothing ; -- Never unfold a DFun + CoreUnfolding { uf_tmpl = unf_template, uf_is_top = is_top, uf_is_value = is_value, + uf_is_cheap = is_cheap, uf_arity = uf_arity, uf_guidance = guidance } -> + -- uf_arity will typically be equal to (idArity id), + -- but may be less for InlineRules let result | yes_or_no = Just unf_template | otherwise = Nothing - n_val_args = length arg_infos - - yes_or_no = active_inline && is_cheap && consider_safe - -- We consider even the once-in-one-branch - -- occurrences, because they won't all have been - -- caught by preInlineUnconditionally. In particular, - -- if the occurrence is once inside a lambda, and the - -- rhs is cheap but not a manifest lambda, then - -- pre-inline will not have inlined it for fear of - -- invalidating the occurrence info in the rhs. - - consider_safe - -- consider_safe decides whether it's a good idea to - -- inline something, given that there's no - -- work-duplication issue (the caller checks that). + interesting_args = any nonTriv arg_infos + -- NB: (any nonTriv arg_infos) looks at the + -- over-saturated args too which is "wrong"; + -- but if over-saturated we inline anyway. + + -- some_benefit is used when the RHS is small enough + -- and the call has enough (or too many) value + -- arguments (ie n_val_args >= arity). But there must + -- be *something* interesting about some argument, or the + -- result context, to make it worth inlining + some_benefit = interesting_args + || n_val_args > uf_arity -- Over-saturated + || interesting_saturated_call -- Exactly saturated + + interesting_saturated_call + = case cont_info of + BoringCtxt -> not is_top && uf_arity > 0 -- Note [Nested functions] + CaseCtxt -> not (lone_variable && is_value) -- Note [Lone variables] + ArgCtxt {} -> uf_arity > 0 -- Note [Inlining in ArgCtxt] + ValAppCtxt -> True -- Note [Cast then apply] + + yes_or_no = case guidance of UnfoldNever -> False - UnfoldIfGoodArgs n_vals_wanted arg_discounts size res_discount - | uncond_inline -> True - | otherwise -> some_benefit && small_enough && inline_enough_args - - where - -- Inline unconditionally if there no size increase - -- Size of call is n_vals_wanted (+1 for the function) - uncond_inline - | n_vals_wanted == 0 = size == 0 - | otherwise = enough_args && (size <= n_vals_wanted + 1) - - enough_args = n_val_args >= n_vals_wanted - inline_enough_args = - not (dopt Opt_InlineIfEnoughArgs dflags) || enough_args - - - some_benefit = any nonTriv arg_infos || really_interesting_cont - -- There must be something interesting - -- about some argument, or the result - -- context, to make it worth inlining - - -- NB: (any nonTriv arg_infos) looks at the over-saturated - -- args too which is wrong; but if over-saturated - -- we'll probably inline anyway. - - really_interesting_cont - | n_val_args < n_vals_wanted = False -- Too few args - | n_val_args == n_vals_wanted = interesting_saturated_call - | otherwise = True -- Extra args - -- really_interesting_cont tells if the result of the - -- call is in an interesting context. - - interesting_saturated_call - = case cont_info of - BoringCtxt -> not is_top && n_vals_wanted > 0 -- Note [Nested functions] - CaseCtxt -> not lone_variable || not is_value -- Note [Lone variables] - ArgCtxt {} -> n_vals_wanted > 0 -- Note [Inlining in ArgCtxt] - ValAppCtxt -> True -- Note [Cast then apply] - - small_enough = (size - discount) <= opt_UF_UseThreshold - discount = computeDiscount n_vals_wanted arg_discounts - res_discount arg_infos cont_info + + UnfoldAlways -> True + -- UnfoldAlways => there is no top-level binding for + -- these things, so we must inline it. Only a few + -- primop-like things have compulsory unfoldings (see + -- MkId.lhs). Ignore is_active because we want to + -- inline even if SimplGently is on. + + InlineRule { ug_ir_info = inl_info, ug_small = uncond_inline } + | not active_inline -> False + | n_val_args < uf_arity -> yes_unsat -- Not enough value args + | uncond_inline -> True -- Note [INLINE for small functions] + | otherwise -> some_benefit -- Saturated or over-saturated + where + -- See Note [Inlining an InlineRule] + yes_unsat = case inl_info of + InlSat -> False + _other -> interesting_args + + UnfoldIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size } + | not active_inline -> False + | not is_cheap -> False + | n_val_args < uf_arity -> interesting_args && small_enough + -- Note [Unsaturated applications] + | uncondInline uf_arity size -> True + | otherwise -> some_benefit && small_enough + + where + small_enough = (size - discount) <= opt_UF_UseThreshold + discount = computeDiscount uf_arity arg_discounts + res_discount arg_infos cont_info in if dopt Opt_D_dump_inlinings dflags then @@ -700,7 +697,6 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info text "interesting continuation" <+> ppr cont_info, text "is value:" <+> ppr is_value, text "is cheap:" <+> ppr is_cheap, - text "is expandable:" <+> ppr is_expable, text "guidance" <+> ppr guidance, text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"]) result @@ -709,6 +705,44 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info } \end{code} +Note [Unsaturated applications] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +When a call is not saturated, we *still* inline if one of the +arguments has interesting structure. That's sometimes very important. +A good example is the Ord instance for Bool in Base: + + Rec { + $fOrdBool =GHC.Classes.D:Ord + @ Bool + ... + $cmin_ajX + + $cmin_ajX [Occ=LoopBreaker] :: Bool -> Bool -> Bool + $cmin_ajX = GHC.Classes.$dmmin @ Bool $fOrdBool + } + +But the defn of GHC.Classes.$dmmin is: + + $dmmin :: forall a. GHC.Classes.Ord a => a -> a -> a + {- Arity: 3, HasNoCafRefs, Strictness: SLL, + Unfolding: (\ @ a $dOrd :: GHC.Classes.Ord a x :: a y :: a -> + case @ a GHC.Classes.<= @ a $dOrd x y of wild { + GHC.Bool.False -> y GHC.Bool.True -> x }) -} + +We *really* want to inline $dmmin, even though it has arity 3, in +order to unravel the recursion. + + +Note [INLINE for small functions] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider {-# INLINE f #-} + f x = Just x + g y = f y +Then f's RHS is no larger than its LHS, so we should inline it +into even the most boring context. (We do so if there is no INLINE +pragma!) That's the reason for the 'inl_small' flag on an InlineRule. + + Note [Things to watch] ~~~~~~~~~~~~~~~~~~~~~~ * { y = I# 3; x = y `cast` co; ...case (x `cast` co) of ... } @@ -720,6 +754,21 @@ Note [Things to watch] Make sure that x does not inline unconditionally! Lest we get extra allocation. +Note [Inlining an InlineRule] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +An InlineRules is used for + (a) pogrammer INLINE pragmas + (b) inlinings from worker/wrapper + +For (a) the RHS may be large, and our contract is that we *only* inline +when the function is applied to all the arguments on the LHS of the +source-code defn. (The uf_arity in the rule.) + +However for worker/wrapper it may be worth inlining even if the +arity is not satisfied (as we do in the CoreUnfolding case) so we don't +require saturation. + + Note [Nested functions] ~~~~~~~~~~~~~~~~~~~~~~~ If a function has a nested defn we also record some-benefit, on the @@ -744,7 +793,7 @@ no value arguments. The ValAppCtxt gives it enough incentive to inline. Note [Inlining in ArgCtxt] ~~~~~~~~~~~~~~~~~~~~~~~~~~ -The condition (n_vals_wanted > 0) here is very important, because otherwise +The condition (arity > 0) here is very important, because otherwise we end up inlining top-level stuff into useless places; eg x = I# 3# f = \y. g x @@ -760,11 +809,13 @@ Note [Lone variables] The "lone-variable" case is important. I spent ages messing about with unsatisfactory varaints, but this is nice. The idea is that if a variable appears all alone - as an arg of lazy fn, or rhs Stop - as scrutinee of a case Select - as arg of a strict fn ArgOf + + as an arg of lazy fn, or rhs BoringCtxt + as scrutinee of a case CaseCtxt + as arg of a fn ArgCtxt AND it is bound to a value + then we should not inline it (unless there is some other reason, e.g. is is the sole occurrence). That is what is happening at the use of 'lone_variable' in 'interesting_saturated_call'. @@ -798,6 +849,11 @@ However, watch out: important: in the NDP project, 'bar' generates a closure data structure rather than a list. + So the non-inlining of lone_variables should only apply if the + unfolding is regarded as cheap; because that is when exprIsConApp_maybe + looks through the unfolding. Hence the "&& is_cheap" in the + InlineRule branch. + * Even a type application or coercion isn't a lone variable. Consider case $fMonadST @ RealWorld of { :DMonad a b c -> c } @@ -873,10 +929,21 @@ But we don't regard (f x y) as interesting, unless f is unsaturated. If it's saturated and f hasn't inlined, then it's probably not going to now! +Note [Conlike is interesting] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + f d = ...((*) d x y)... + ... f (df d')... +where df is con-like. Then we'd really like to inline so that the +rule for (*) (df d) can fire. To do this + a) we give a discount for being an argument of a class-op (eg (*) d) + b) we say that a con-like argument (eg (df d)) is interesting + \begin{code} data ArgSummary = TrivArg -- Nothing interesting | NonTrivArg -- Arg has structure | ValueArg -- Arg is a con-app or PAP + -- ..or con-like. Note [Conlike is interesting] interestingArg :: CoreExpr -> ArgSummary -- See Note [Interesting arguments] @@ -885,7 +952,8 @@ interestingArg e = go e 0 -- n is # value args to which the expression is applied go (Lit {}) _ = ValueArg go (Var v) n - | isDataConWorkId v = ValueArg + | isConLikeId v = ValueArg -- Experimenting with 'conlike' rather that + -- data constructors here | idArity v > n = ValueArg -- Catches (eg) primops with arity but no unfolding | n > 0 = NonTrivArg -- Saturated or unknown call | evald_unfolding = ValueArg -- n==0; look for a value @@ -910,75 +978,169 @@ nonTriv TrivArg = False nonTriv _ = True \end{code} - %************************************************************************ %* * - The Very Simple Optimiser + exprIsConApp_maybe %* * %************************************************************************ +Note [exprIsConApp_maybe] +~~~~~~~~~~~~~~~~~~~~~~~~~ +exprIsConApp_maybe is a very important function. There are two principal +uses: + * case e of { .... } + * cls_op e, where cls_op is a class operation + +In both cases you want to know if e is of form (C e1..en) where C is +a data constructor. + +However e might not *look* as if \begin{code} -simpleOptExpr :: Subst -> CoreExpr -> CoreExpr --- Return an occur-analysed and slightly optimised expression --- The optimisation is very straightforward: just --- inline non-recursive bindings that are used only once, --- or wheere the RHS is trivial - -simpleOptExpr subst expr - = go subst (occurAnalyseExpr expr) +-- | Returns @Just (dc, [t1..tk], [x1..xn])@ if the argument expression is +-- a *saturated* constructor application of the form @dc t1..tk x1 .. xn@, +-- where t1..tk are the *universally-qantified* type args of 'dc' +exprIsConApp_maybe :: CoreExpr -> Maybe (DataCon, [Type], [CoreExpr]) + +exprIsConApp_maybe (Note _ expr) + = exprIsConApp_maybe expr + -- We ignore all notes. For example, + -- case _scc_ "foo" (C a b) of + -- C a b -> e + -- should be optimised away, but it will be only if we look + -- through the SCC note. + +exprIsConApp_maybe (Cast expr co) + = -- Here we do the KPush reduction rule as described in the FC paper + -- The transformation applies iff we have + -- (C e1 ... en) `cast` co + -- where co :: (T t1 .. tn) ~ to_ty + -- The left-hand one must be a T, because exprIsConApp returned True + -- but the right-hand one might not be. (Though it usually will.) + + case exprIsConApp_maybe expr of { + Nothing -> Nothing ; + Just (dc, _dc_univ_args, dc_args) -> + + let (_from_ty, to_ty) = coercionKind co + dc_tc = dataConTyCon dc + in + case splitTyConApp_maybe to_ty of { + Nothing -> Nothing ; + Just (to_tc, to_tc_arg_tys) + | dc_tc /= to_tc -> Nothing + -- These two Nothing cases are possible; we might see + -- (C x y) `cast` (g :: T a ~ S [a]), + -- where S is a type function. In fact, exprIsConApp + -- will probably not be called in such circumstances, + -- but there't nothing wrong with it + + | otherwise -> + let + tc_arity = tyConArity dc_tc + dc_univ_tyvars = dataConUnivTyVars dc + dc_ex_tyvars = dataConExTyVars dc + arg_tys = dataConRepArgTys dc + + dc_eqs :: [(Type,Type)] -- All equalities from the DataCon + dc_eqs = [(mkTyVarTy tv, ty) | (tv,ty) <- dataConEqSpec dc] ++ + [getEqPredTys eq_pred | eq_pred <- dataConEqTheta dc] + + (ex_args, rest1) = splitAtList dc_ex_tyvars dc_args + (co_args, val_args) = splitAtList dc_eqs rest1 + + -- Make the "theta" from Fig 3 of the paper + gammas = decomposeCo tc_arity co + theta = zipOpenTvSubst (dc_univ_tyvars ++ dc_ex_tyvars) + (gammas ++ stripTypeArgs ex_args) + + -- Cast the existential coercion arguments + cast_co (ty1, ty2) (Type co) + = Type $ mkSymCoercion (substTy theta ty1) + `mkTransCoercion` co + `mkTransCoercion` (substTy theta ty2) + cast_co _ other_arg = pprPanic "cast_co" (ppr other_arg) + new_co_args = zipWith cast_co dc_eqs co_args + + -- Cast the value arguments (which include dictionaries) + new_val_args = zipWith cast_arg arg_tys val_args + cast_arg arg_ty arg = mkCoerce (substTy theta arg_ty) arg + in +#ifdef DEBUG + let dump_doc = vcat [ppr dc, ppr dc_univ_tyvars, ppr dc_ex_tyvars, + ppr arg_tys, ppr dc_args, ppr _dc_univ_args, + ppr ex_args, ppr val_args] + ASSERT2( coreEqType from_ty (mkTyConApp dc_tc _dc_univ_args), dump_doc ) + ASSERT2( all isTypeArg (ex_args ++ co_args), dump_doc ) + ASSERT2( equalLength val_args arg_tys, dump_doc ) +#endif + + Just (dc, to_tc_arg_tys, ex_args ++ new_co_args ++ new_val_args) + }} + +exprIsConApp_maybe expr + = analyse expr [] where - go subst (Var v) = lookupIdSubst subst v - go subst (App e1 e2) = App (go subst e1) (go subst e2) - go subst (Type ty) = Type (substTy subst ty) - go _ (Lit lit) = Lit lit - go subst (Note note e) = Note note (go subst e) - go subst (Cast e co) = Cast (go subst e) (substTy subst co) - go subst (Let bind body) = go_bind subst bind body - go subst (Lam bndr body) = Lam bndr' (go subst' body) - where - (subst', bndr') = substBndr subst bndr - - go subst (Case e b ty as) = Case (go subst e) b' - (substTy subst ty) - (map (go_alt subst') as) - where - (subst', b') = substBndr subst b - - - ---------------------- - go_alt subst (con, bndrs, rhs) = (con, bndrs', go subst' rhs) - where - (subst', bndrs') = substBndrs subst bndrs - - ---------------------- - go_bind subst (Rec prs) body = Let (Rec (bndrs' `zip` rhss')) - (go subst' body) - where - (bndrs, rhss) = unzip prs - (subst', bndrs') = substRecBndrs subst bndrs - rhss' = map (go subst') rhss - - go_bind subst (NonRec b r) body = go_nonrec subst b (go subst r) body - - ---------------------- - go_nonrec subst b (Type ty') body - | isTyVar b = go (extendTvSubst subst b ty') body - -- let a::* = TYPE ty in - go_nonrec subst b r' body - | isId b -- let x = e in - , exprIsTrivial r' || safe_to_inline (idOccInfo b) - = go (extendIdSubst subst b r') body - go_nonrec subst b r' body - = Let (NonRec b' r') (go subst' body) - where - (subst', b') = substBndr subst b - - ---------------------- - -- Unconditionally safe to inline - safe_to_inline :: OccInfo -> Bool - safe_to_inline IAmDead = True - safe_to_inline (OneOcc in_lam one_br _) = not in_lam && one_br - safe_to_inline (IAmALoopBreaker {}) = False - safe_to_inline NoOccInfo = False -\end{code} \ No newline at end of file + analyse (App fun arg) args = analyse fun (arg:args) + analyse fun@(Lam {}) args = beta fun [] args + + analyse (Var fun) args + | Just con <- isDataConWorkId_maybe fun + , is_saturated + , let (univ_ty_args, rest_args) = splitAtList (dataConUnivTyVars con) args + = Just (con, stripTypeArgs univ_ty_args, rest_args) + + -- Look through dictionary functions; see Note [Unfolding DFuns] + | DFunUnfolding con ops <- unfolding + , is_saturated + , let (dfun_tvs, _cls, dfun_res_tys) = tcSplitDFunTy (idType fun) + subst = zipOpenTvSubst dfun_tvs (stripTypeArgs (takeList dfun_tvs args)) + = Just (con, substTys subst dfun_res_tys, + [mkApps op args | op <- ops]) + + -- Look through unfoldings, but only cheap ones, because + -- we are effectively duplicating the unfolding + | CoreUnfolding { uf_expandable = expand_me, uf_tmpl = rhs } <- unfolding + , expand_me = -- pprTrace "expanding" (ppr fun $$ ppr rhs) $ + analyse rhs args + where + is_saturated = count isValArg args == idArity fun + unfolding = idUnfolding fun + + analyse _ _ = Nothing + + ----------- + beta (Lam v body) pairs (arg : args) + | isTypeArg arg + = beta body ((v,arg):pairs) args + + beta (Lam {}) _ _ -- Un-saturated, or not a type lambda + = Nothing + + beta fun pairs args + = case analyse (substExpr (mkOpenSubst pairs) fun) args of + Nothing -> -- pprTrace "Bale out! exprIsConApp_maybe" doc $ + Nothing + Just ans -> -- pprTrace "Woo-hoo! exprIsConApp_maybe" doc $ + Just ans + where + -- doc = vcat [ppr fun, ppr expr, ppr pairs, ppr args] + + +stripTypeArgs :: [CoreExpr] -> [Type] +stripTypeArgs args = ASSERT2( all isTypeArg args, ppr args ) + [ty | Type ty <- args] +\end{code} + +Note [Unfolding DFuns] +~~~~~~~~~~~~~~~~~~~~~~ +DFuns look like + + df :: forall a b. (Eq a, Eq b) -> Eq (a,b) + df a b d_a d_b = MkEqD (a,b) ($c1 a b d_a d_b) + ($c2 a b d_a d_b) + +So to split it up we just need to apply the ops $c1, $c2 etc +to the very same args as the dfun. It takes a little more work +to compute the type arguments to the dictionary constructor. +