X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2FcoreSyn%2FCoreUnfold.lhs;h=c443c10e4b4cd1efb411a91930e373f958f1c7ea;hp=f8043d47573147b81d62ff60e366702c52dfc70e;hb=4922b3bf7e17c55b63f717fea2d9b9998bc071c6;hpb=b44b34500edbb94194d4fd6ff2acfac0b5ce7a16 diff --git a/compiler/coreSyn/CoreUnfold.lhs b/compiler/coreSyn/CoreUnfold.lhs index f8043d4..c443c10 100644 --- a/compiler/coreSyn/CoreUnfold.lhs +++ b/compiler/coreSyn/CoreUnfold.lhs @@ -43,6 +43,7 @@ import PprCore () -- Instances import OccurAnal import CoreSubst hiding( substTy ) import CoreFVs ( exprFreeVars ) +import CoreArity ( manifestArity ) import CoreUtils import Id import DataCon @@ -140,13 +141,17 @@ mkCompulsoryUnfolding expr -- Used for things that absolutely must be unfolde expr 0 -- Arity of unfolding doesn't matter (UnfWhen unSaturatedOk boringCxtOk) -mkInlineRule :: Bool -> CoreExpr -> Arity -> Unfolding -mkInlineRule unsat_ok expr arity +mkInlineRule :: CoreExpr -> Maybe Arity -> Unfolding +mkInlineRule expr mb_arity = mkCoreUnfolding True InlineRule -- Note [Top-level flag on inline rules] expr' arity (UnfWhen unsat_ok boring_ok) where expr' = simpleOptExpr expr + (unsat_ok, arity) = case mb_arity of + Nothing -> (unSaturatedOk, manifestArity expr') + Just ar -> (needSaturated, ar) + boring_ok = case calcUnfoldingGuidance True -- Treat as cheap False -- But not bottoming (arity+1) expr' of @@ -181,9 +186,9 @@ calcUnfoldingGuidance expr_is_cheap top_bot bOMB_OUT_SIZE expr = case (sizeExpr (iUnbox bOMB_OUT_SIZE) val_bndrs body) of TooBig -> UnfNever SizeIs size cased_bndrs scrut_discount - | uncondInline n_val_bndrs (iBox size) && expr_is_cheap - -> UnfWhen needSaturated boringCxtOk - + | uncondInline n_val_bndrs (iBox size) + , expr_is_cheap + -> UnfWhen unSaturatedOk boringCxtOk -- Note [INLINE for small functions] | top_bot -- See Note [Do not inline top-level bottoming functions] -> UnfNever @@ -239,24 +244,52 @@ Do not re-inline them! But we *do* still inline if they are very small (the uncondInline stuff). -Note [Unconditional inlining] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -We inline *unconditionally* if inlined thing is smaller (using sizeExpr) -than the thing it's replacing. Notice that +Note [INLINE for small functions] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider {-# INLINE f #-} + f x = Just x + g y = f y +Then f's RHS is no larger than its LHS, so we should inline it into +even the most boring context. In general, f the function is +sufficiently small that its body is as small as the call itself, the +inline unconditionally, regardless of how boring the context is. + +Things to note: + + * We inline *unconditionally* if inlined thing is smaller (using sizeExpr) + than the thing it's replacing. Notice that (f x) --> (g 3) -- YES, unconditionally (f x) --> x : [] -- YES, *even though* there are two -- arguments to the cons x --> g 3 -- NO x --> Just v -- NO -It's very important not to unconditionally replace a variable by -a non-atomic term. + It's very important not to unconditionally replace a variable by + a non-atomic term. + +* We do this even if the thing isn't saturated, else we end up with the + silly situation that + f x y = x + ...map (f 3)... + doesn't inline. Even in a boring context, inlining without being + saturated will give a lambda instead of a PAP, and will be more + efficient at runtime. + +* However, when the function's arity > 0, we do insist that it + has at least one value argument at the call site. Otherwise we find this: + f = /\a \x:a. x + d = /\b. MkD (f b) + If we inline f here we get + d = /\b. MkD (\x:b. x) + and then prepareRhs floats out the argument, abstracting the type + variables, so we end up with the original again! + \begin{code} uncondInline :: Arity -> Int -> Bool -- Inline unconditionally if there no size increase -- Size of call is arity (+1 for the function) --- See Note [Unconditional inlining] +-- See Note [INLINE for small functions] uncondInline arity size | arity == 0 = size == 0 | otherwise = size <= arity + 1 @@ -444,21 +477,44 @@ funSize top_args fun n_val_args conSize :: DataCon -> Int -> ExprSize conSize dc n_val_args - | n_val_args == 0 = SizeIs (_ILIT(0)) emptyBag (_ILIT(1)) -- Like variables + | n_val_args == 0 = SizeIs (_ILIT(0)) emptyBag (_ILIT(1)) -- Like variables + +-- See Note [Constructor size] | isUnboxedTupleCon dc = SizeIs (_ILIT(0)) emptyBag (iUnbox n_val_args +# _ILIT(1)) - | otherwise = SizeIs (_ILIT(1)) emptyBag (iUnbox n_val_args +# _ILIT(1)) - -- Treat a constructors application as size 1, regardless of how - -- many arguments it has; we are keen to expose them - -- (and we charge separately for their args). We can't treat - -- them as size zero, else we find that (Just x) has size 0, - -- which is the same as a lone variable; and hence 'v' will - -- always be replaced by (Just x), where v is bound to Just x. - -- - -- However, unboxed tuples count as size zero - -- I found occasions where we had - -- f x y z = case op# x y z of { s -> (# s, () #) } - -- and f wasn't getting inlined +-- See Note [Unboxed tuple result discount] +-- | isUnboxedTupleCon dc = SizeIs (_ILIT(0)) emptyBag (_ILIT(0)) + +-- See Note [Constructor size] + | otherwise = SizeIs (_ILIT(1)) emptyBag (iUnbox n_val_args +# _ILIT(1)) +\end{code} + +Note [Constructor size] +~~~~~~~~~~~~~~~~~~~~~~~ +Treat a constructors application as size 1, regardless of how many +arguments it has; we are keen to expose them (and we charge separately +for their args). We can't treat them as size zero, else we find that +(Just x) has size 0, which is the same as a lone variable; and hence +'v' will always be replaced by (Just x), where v is bound to Just x. + +However, unboxed tuples count as size zero. I found occasions where we had + f x y z = case op# x y z of { s -> (# s, () #) } +and f wasn't getting inlined. + +Note [Unboxed tuple result discount] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +I tried giving unboxed tuples a *result discount* of zero (see the +commented-out line). Why? When returned as a result they do not +allocate, so maybe we don't want to charge so much for them If you +have a non-zero discount here, we find that workers often get inlined +back into wrappers, because it look like + f x = case $wf x of (# a,b #) -> (a,b) +and we are keener because of the case. However while this change +shrank binary sizes by 0.5% it also made spectral/boyer allocate 5% +more. All other changes were very small. So it's not a big deal but I +didn't adopt the idea. + +\begin{code} primOpSize :: PrimOp -> Int -> ExprSize primOpSize op n_val_args | not (primOpIsDupable op) = sizeN opt_UF_DearOp @@ -597,9 +653,11 @@ actual arguments. \begin{code} couldBeSmallEnoughToInline :: Int -> CoreExpr -> Bool couldBeSmallEnoughToInline threshold rhs - = case calcUnfoldingGuidance False False threshold rhs of - (_, UnfNever) -> False - _ -> True + = case sizeExpr (iUnbox threshold) [] body of + TooBig -> False + _ -> True + where + (_, body) = collectBinders rhs ---------------- smallEnoughToInline :: Unfolding -> Bool @@ -726,10 +784,10 @@ callSiteInline dflags id unfolding lone_variable arg_infos cont_info = case guidance of UnfNever -> (False, empty) - UnfWhen unsat_ok boring_ok -> ( (unsat_ok || saturated) - && (boring_ok || some_benefit) - , empty ) - -- For the boring_ok part see Note [INLINE for small functions] + UnfWhen unsat_ok boring_ok + -> (enough_args && (boring_ok || some_benefit), empty ) + where -- See Note [INLINE for small functions] + enough_args = saturated || (unsat_ok && n_val_args > 0) UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size } -> ( is_cheap && some_benefit && small_enough @@ -741,7 +799,7 @@ callSiteInline dflags id unfolding lone_variable arg_infos cont_info res_discount arg_infos cont_info in - if dopt Opt_D_dump_inlinings dflags then + if (dopt Opt_D_dump_inlinings dflags && dopt Opt_D_verbose_core2core dflags) then pprTrace ("Considering inlining: " ++ showSDoc (ppr id)) (vcat [text "arg infos" <+> ppr arg_infos, text "uf arity" <+> ppr uf_arity, @@ -795,16 +853,6 @@ We *really* want to inline $dmmin, even though it has arity 3, in order to unravel the recursion. -Note [INLINE for small functions] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Consider {-# INLINE f #-} - f x = Just x - g y = f y -Then f's RHS is no larger than its LHS, so we should inline it -into even the most boring context. (We do so if there is no INLINE -pragma!) - - Note [Things to watch] ~~~~~~~~~~~~~~~~~~~~~~ * { y = I# 3; x = y `cast` co; ...case (x `cast` co) of ... } @@ -1187,7 +1235,7 @@ exprIsConApp_maybe id_unf expr = Nothing beta fun pairs args - = case analyse (substExpr subst fun) args of + = case analyse (substExpr (text "subst-expr-is-con-app") subst fun) args of Nothing -> -- pprTrace "Bale out! exprIsConApp_maybe" doc $ Nothing Just ans -> -- pprTrace "Woo-hoo! exprIsConApp_maybe" doc $