X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2FcoreSyn%2FCoreUnfold.lhs;h=eaeba103c73ae8f3b59493887329bc5ca55d992b;hb=21eea25f1212ec306aac806233a2ec048212d529;hp=45b8b927763121dcac331b0d77948af5e0f3501f;hpb=109eb1e9f31cc9fbec194dcc9cfd732d990c2678;p=ghc-hetmet.git diff --git a/compiler/coreSyn/CoreUnfold.lhs b/compiler/coreSyn/CoreUnfold.lhs index 45b8b92..eaeba10 100644 --- a/compiler/coreSyn/CoreUnfold.lhs +++ b/compiler/coreSyn/CoreUnfold.lhs @@ -22,7 +22,7 @@ module CoreUnfold ( mkCompulsoryUnfolding, seqUnfolding, evaldUnfolding, mkOtherCon, otherCons, unfoldingTemplate, maybeUnfoldingTemplate, - isEvaldUnfolding, isValueUnfolding, isCheapUnfolding, isCompulsoryUnfolding, + isEvaldUnfolding, isValueUnfolding, isExpandableUnfolding, isCompulsoryUnfolding, hasUnfolding, hasSomeUnfolding, neverUnfold, couldBeSmallEnoughToInline, @@ -71,7 +71,8 @@ mkImplicitUnfolding expr = CoreUnfolding (simpleOptExpr emptySubst expr) True (exprIsHNF expr) - (exprIsCheap expr) + (exprIsCheap expr) + (exprIsExpandable expr) (calcUnfoldingGuidance opt_UF_CreationThreshold expr) mkUnfolding :: Bool -> CoreExpr -> Unfolding @@ -85,6 +86,8 @@ mkUnfolding top_lvl expr (exprIsCheap expr) -- OK to inline inside a lambda + (exprIsExpandable expr) + (calcUnfoldingGuidance opt_UF_CreationThreshold expr) -- Sometimes during simplification, there's a large let-bound thing -- which has been substituted, and so is now dead; so 'expr' contains @@ -99,8 +102,8 @@ instance Outputable Unfolding where ppr NoUnfolding = ptext (sLit "No unfolding") ppr (OtherCon cs) = ptext (sLit "OtherCon") <+> ppr cs ppr (CompulsoryUnfolding e) = ptext (sLit "Compulsory") <+> ppr e - ppr (CoreUnfolding e top hnf cheap g) - = ptext (sLit "Unf") <+> sep [ppr top <+> ppr hnf <+> ppr cheap <+> ppr g, + ppr (CoreUnfolding e top hnf cheap expable g) + = ptext (sLit "Unf") <+> sep [ppr top <+> ppr hnf <+> ppr cheap <+> ppr expable <+> ppr g, ppr e] mkCompulsoryUnfolding :: CoreExpr -> Unfolding @@ -309,7 +312,7 @@ sizeExpr bOMB_OUT_SIZE top_args expr | fun `hasKey` buildIdKey = buildSize | fun `hasKey` augmentIdKey = augmentSize | otherwise - = case globalIdDetails fun of + = case idDetails fun of DataConWorkId dc -> conSizeN dc (valArgCount args) FCallId _ -> sizeN opt_UF_DearOp @@ -484,13 +487,13 @@ couldBeSmallEnoughToInline threshold rhs = case calcUnfoldingGuidance threshold certainlyWillInline :: Unfolding -> Bool -- Sees if the unfolding is pretty certain to inline -certainlyWillInline (CoreUnfolding _ _ _ is_cheap (UnfoldIfGoodArgs n_vals _ size _)) +certainlyWillInline (CoreUnfolding _ _ _ is_cheap _ (UnfoldIfGoodArgs n_vals _ size _)) = is_cheap && size - (n_vals +1) <= opt_UF_UseThreshold certainlyWillInline _ = False smallEnoughToInline :: Unfolding -> Bool -smallEnoughToInline (CoreUnfolding _ _ _ _ (UnfoldIfGoodArgs _ _ size _)) +smallEnoughToInline (CoreUnfolding _ _ _ _ _ (UnfoldIfGoodArgs _ _ size _)) = size <= opt_UF_UseThreshold smallEnoughToInline _ = False @@ -536,6 +539,10 @@ data CallCtxt = BoringCtxt -- => be keener to inline -- INVARIANT: ArgCtxt False 0 ==> BoringCtxt + | ValAppCtxt -- We're applied to at least one value arg + -- This arises when we have ((f x |> co) y) + -- Then the (f x) has argument 'x' but in a ValAppCtxt + | CaseCtxt -- We're the scrutinee of a case -- that decomposes its scrutinee @@ -543,6 +550,7 @@ instance Outputable CallCtxt where ppr BoringCtxt = ptext (sLit "BoringCtxt") ppr (ArgCtxt _ _) = ptext (sLit "ArgCtxt") ppr CaseCtxt = ptext (sLit "CaseCtxt") + ppr ValAppCtxt = ptext (sLit "ValAppCtxt") callSiteInline dflags active_inline id lone_variable arg_infos cont_info = case idUnfolding id of { @@ -556,7 +564,7 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info -- compulsory unfoldings (see MkId.lhs). -- We don't allow them to be inactive - CoreUnfolding unf_template is_top is_value is_cheap guidance -> + CoreUnfolding unf_template is_top is_value is_cheap is_expable guidance -> let result | yes_or_no = Just unf_template @@ -586,10 +594,13 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info -> True | otherwise - -> some_benefit && small_enough + -> some_benefit && small_enough && inline_enough_args where enough_args = n_val_args >= n_vals_wanted + inline_enough_args = + not (dopt Opt_InlineIfEnoughArgs dflags) || enough_args + some_benefit = or arg_infos || really_interesting_cont -- There must be something interesting @@ -607,8 +618,8 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info = case cont_info of BoringCtxt -> not is_top && n_vals_wanted > 0 -- Note [Nested functions] CaseCtxt -> not lone_variable || not is_value -- Note [Lone variables] - ArgCtxt {} -> n_vals_wanted > 0 - -- See Note [Inlining in ArgCtxt] + ArgCtxt {} -> n_vals_wanted > 0 -- Note [Inlining in ArgCtxt] + ValAppCtxt -> True -- Note [Cast then apply] small_enough = (size - discount) <= opt_UF_UseThreshold discount = computeDiscount n_vals_wanted arg_discounts @@ -616,7 +627,7 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info res_discount' = case cont_info of BoringCtxt -> 0 CaseCtxt -> res_discount - ArgCtxt _ _ -> 4 `min` res_discount + _other -> 4 `min` res_discount -- res_discount can be very large when a function returns -- construtors; but we only want to invoke that large discount -- when there's a case continuation. @@ -626,14 +637,15 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info in if dopt Opt_D_dump_inlinings dflags then - pprTrace "Considering inlining" - (ppr id <+> vcat [text "active:" <+> ppr active_inline, - text "arg infos" <+> ppr arg_infos, - text "interesting continuation" <+> ppr cont_info, - text "is value:" <+> ppr is_value, - text "is cheap:" <+> ppr is_cheap, - text "guidance" <+> ppr guidance, - text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"]) + pprTrace ("Considering inlining: " ++ showSDoc (ppr id)) + (vcat [text "active:" <+> ppr active_inline, + text "arg infos" <+> ppr arg_infos, + text "interesting continuation" <+> ppr cont_info, + text "is value:" <+> ppr is_value, + text "is cheap:" <+> ppr is_cheap, + text "is expandable:" <+> ppr is_expable, + text "guidance" <+> ppr guidance, + text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"]) result else result @@ -652,6 +664,16 @@ branches. Then inlining it doesn't increase allocation, but it does increase the chance that the constructor won't be allocated at all in the branches that don't use it. +Note [Cast then apply] +~~~~~~~~~~~~~~~~~~~~~~ +Consider + myIndex = __inline_me ( (/\a. ) |> co ) + co :: (forall a. a -> a) ~ (forall a. T a) + ... /\a.\x. case ((myIndex a) |> sym co) x of { ... } ... + +We need to inline myIndex to unravel this; but the actual call (myIndex a) has +no value arguments. The ValAppCtxt gives it enough incentive to inline. + Note [Inlining in ArgCtxt] ~~~~~~~~~~~~~~~~~~~~~~~~~~ The condition (n_vals_wanted > 0) here is very important, because otherwise @@ -666,7 +688,7 @@ slow-down). The motivation was test eyeball/inline1.hs; but that seems to work ok now. Note [Lone variables] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~ The "lone-variable" case is important. I spent ages messing about with unsatisfactory varaints, but this is nice. The idea is that if a variable appears all alone