X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2FcoreSyn%2FCoreUnfold.lhs;h=38513af2b49e4b99eba6867be9cb340deb519f8c;hb=6af98b2f2302d06f8e545c610eb5f2881ed88540;hp=b6706c14cd71318682b3e8dfaee2bdb0a3b300f4;hpb=444c62505e1e9790db08322fea8625dedd81d446;p=ghc-hetmet.git diff --git a/compiler/coreSyn/CoreUnfold.lhs b/compiler/coreSyn/CoreUnfold.lhs index b6706c1..38513af 100644 --- a/compiler/coreSyn/CoreUnfold.lhs +++ b/compiler/coreSyn/CoreUnfold.lhs @@ -309,7 +309,7 @@ sizeExpr bOMB_OUT_SIZE top_args expr | fun `hasKey` buildIdKey = buildSize | fun `hasKey` augmentIdKey = augmentSize | otherwise - = case globalIdDetails fun of + = case idDetails fun of DataConWorkId dc -> conSizeN dc (valArgCount args) FCallId _ -> sizeN opt_UF_DearOp @@ -536,6 +536,10 @@ data CallCtxt = BoringCtxt -- => be keener to inline -- INVARIANT: ArgCtxt False 0 ==> BoringCtxt + | ValAppCtxt -- We're applied to at least one value arg + -- This arises when we have ((f x |> co) y) + -- Then the (f x) has argument 'x' but in a ValAppCtxt + | CaseCtxt -- We're the scrutinee of a case -- that decomposes its scrutinee @@ -543,6 +547,7 @@ instance Outputable CallCtxt where ppr BoringCtxt = ptext (sLit "BoringCtxt") ppr (ArgCtxt _ _) = ptext (sLit "ArgCtxt") ppr CaseCtxt = ptext (sLit "CaseCtxt") + ppr ValAppCtxt = ptext (sLit "ValAppCtxt") callSiteInline dflags active_inline id lone_variable arg_infos cont_info = case idUnfolding id of { @@ -610,8 +615,8 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info = case cont_info of BoringCtxt -> not is_top && n_vals_wanted > 0 -- Note [Nested functions] CaseCtxt -> not lone_variable || not is_value -- Note [Lone variables] - ArgCtxt {} -> n_vals_wanted > 0 - -- See Note [Inlining in ArgCtxt] + ArgCtxt {} -> n_vals_wanted > 0 -- Note [Inlining in ArgCtxt] + ValAppCtxt -> True -- Note [Cast then apply] small_enough = (size - discount) <= opt_UF_UseThreshold discount = computeDiscount n_vals_wanted arg_discounts @@ -619,7 +624,7 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info res_discount' = case cont_info of BoringCtxt -> 0 CaseCtxt -> res_discount - ArgCtxt _ _ -> 4 `min` res_discount + _other -> 4 `min` res_discount -- res_discount can be very large when a function returns -- construtors; but we only want to invoke that large discount -- when there's a case continuation. @@ -655,6 +660,16 @@ branches. Then inlining it doesn't increase allocation, but it does increase the chance that the constructor won't be allocated at all in the branches that don't use it. +Note [Cast then apply] +~~~~~~~~~~~~~~~~~~~~~~ +Consider + myIndex = __inline_me ( (/\a. ) |> co ) + co :: (forall a. a -> a) ~ (forall a. T a) + ... /\a.\x. case ((myIndex a) |> sym co) x of { ... } ... + +We need to inline myIndex to unravel this; but the actual call (myIndex a) has +no value arguments. The ValAppCtxt gives it enough incentive to inline. + Note [Inlining in ArgCtxt] ~~~~~~~~~~~~~~~~~~~~~~~~~~ The condition (n_vals_wanted > 0) here is very important, because otherwise @@ -669,7 +684,7 @@ slow-down). The motivation was test eyeball/inline1.hs; but that seems to work ok now. Note [Lone variables] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~ The "lone-variable" case is important. I spent ages messing about with unsatisfactory varaints, but this is nice. The idea is that if a variable appears all alone