X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2Fstranal%2FWorkWrap.lhs;h=ac10b1b7737bd84347e76282d05e16b1df56fd63;hp=33ca298d1f0619ed2b824fc5a4eaeb00ca952e57;hb=f537dd87c4a07526e2b1fc1bd1c125d652833641;hpb=76dfa3944cbf149a30398d29e6762a44772c0174 diff --git a/compiler/stranal/WorkWrap.lhs b/compiler/stranal/WorkWrap.lhs index 33ca298..ac10b1b 100644 --- a/compiler/stranal/WorkWrap.lhs +++ b/compiler/stranal/WorkWrap.lhs @@ -7,23 +7,16 @@ module WorkWrap ( wwTopBinds, mkWrapper ) where import CoreSyn -import CoreUnfold ( certainlyWillInline, mkInlineRule, mkWwInlineRule ) +import CoreUnfold ( certainlyWillInline, mkInlineUnfolding, mkWwInlineRule ) import CoreUtils ( exprType, exprIsHNF ) import CoreArity ( exprArity ) import Var -import Id ( idType, isOneShotLambda, idUnfolding, - setIdStrictness, mkWorkerId, setInlinePragma, - setInlineActivation, setIdUnfolding, - setIdArity ) +import Id import Type ( Type ) import IdInfo -import Demand ( Demand(..), StrictSig(..), DmdType(..), DmdResult(..), - Demands(..), mkTopDmdType, isBotRes, returnsCPR, topSig, isAbsent - ) +import Demand import UniqSupply -import BasicTypes ( RecFlag(..), isNonRec, isNeverActive, - Activation(..), InlinePragma(..), - inlinePragmaActivation, inlinePragmaRuleMatchInfo ) +import BasicTypes import VarEnv ( isEmptyVarEnv ) import Maybes ( orElse ) import WwLib @@ -107,6 +100,7 @@ matching by looking for strict arguments of the correct type. wwExpr :: CoreExpr -> UniqSM CoreExpr wwExpr e@(Type {}) = return e +wwExpr e@(Coercion {}) = return e wwExpr e@(Lit {}) = return e wwExpr e@(Var {}) = return e @@ -148,9 +142,8 @@ the business. The only reason this is monadised is for the unique supply. -Note [Don't w/w inline things (a)] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - +Note [Don't w/w INLINE things] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It's very important to refrain from w/w-ing an INLINE function (ie one with an InlineRule) because the wrapper will then overwrite the InlineRule unfolding. @@ -167,18 +160,45 @@ Notice that we refrain from w/w'ing an INLINE function even if it is in a recursive group. It might not be the loop breaker. (We could test for loop-breaker-hood, but I'm not sure that ever matters.) -Note [Don't w/w inline things (b)] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -In general, we refrain from w/w-ing *small* functions, because they'll -inline anyway. But we must take care: it may look small now, but get -to be big later after other inling has happened. So we take the -precaution of adding an INLINE pragma to any such functions. +Note [Don't w/w INLINABLE things] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If we have + {-# INLINABLE f #-} + f x y = .... +then in principle we might get a more efficient loop by w/w'ing f. +But that would make a new unfolding which would overwrite the old +one. So we leave INLINABLE things alone too. + +This is a slight infelicity really, because it means that adding +an INLINABLE pragma could make a program a bit less efficient, +because you lose the worker/wrapper stuff. But I don't see a way +to avoid that. + +Note [Don't w/w inline small non-loop-breaker things] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In general, we refrain from w/w-ing *small* functions, which are not +loop breakers, because they'll inline anyway. But we must take care: +it may look small now, but get to be big later after other inlining +has happened. So we take the precaution of adding an INLINE pragma to +any such functions. I made this change when I observed a big function at the end of -compilation with a useful strictness signature but no w-w. When -I measured it on nofib, it didn't make much difference; just a few -percent improved allocation on one benchmark (bspt/Euclid.space). -But nothing got worse. +compilation with a useful strictness signature but no w-w. (It was +small during demand analysis, we refrained from w/w, and then got big +when something was inlined in its rhs.) When I measured it on nofib, +it didn't make much difference; just a few percent improved allocation +on one benchmark (bspt/Euclid.space). But nothing got worse. + +There is an infelicity though. We may get something like + f = g val +==> + g x = case gw x of r -> I# r + + f {- InlineStable, Template = g val -} + f = case gw x of r -> I# r + +The code for f duplicates that for g, without any real benefit. It +won't really be executed, because calls to f will go via the inlining. Note [Wrapper activation] ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -229,6 +249,7 @@ tryWW is_rec fn_id rhs = return [ (fn_id, rhs) ] | is_thunk && worthSplittingThunk maybe_fn_dmd res_info + -- See Note [Thunk splitting] = ASSERT2( isNonRec is_rec, ppr new_fn_id ) -- The thunk must be non-recursive checkSize new_fn_id rhs $ splitThunk new_fn_id rhs @@ -265,20 +286,23 @@ tryWW is_rec fn_id rhs --------------------- checkSize :: Id -> CoreExpr -> UniqSM [(Id,CoreExpr)] -> UniqSM [(Id,CoreExpr)] - -- See Note [Don't w/w inline things (a) and (b)] checkSize fn_id rhs thing_inside - | isStableUnfolding unfolding -- For DFuns and INLINE things, leave their - = return [ (fn_id, rhs) ] -- unfolding unchanged; but still attach - -- strictness info to the Id + | isStableUnfolding (realIdUnfolding fn_id) + = return [ (fn_id, rhs) ] + -- See Note [Don't w/w INLINABLE things] + -- and Note [Don't w/w INLINABLABLE things] + -- NB: use realIdUnfolding because we want to see the unfolding + -- even if it's a loop breaker! - | certainlyWillInline unfolding + | certainlyWillInline (idUnfolding fn_id) = return [ (fn_id `setIdUnfolding` inline_rule, rhs) ] - -- Note [Don't w/w inline things (b)] + -- Note [Don't w/w inline small non-loop-breaker things] + -- NB: use idUnfolding because we don't want to apply + -- this criterion to a loop breaker! | otherwise = thing_inside where - unfolding = idUnfolding fn_id - inline_rule = mkInlineRule unSaturatedOk rhs (unfoldingArity unfolding) + inline_rule = mkInlineUnfolding Nothing rhs --------------------- splitFun :: Id -> IdInfo -> [Demand] -> DmdResult -> Expr Var @@ -292,6 +316,12 @@ splitFun fn_id fn_info wrap_dmds res_info rhs ; let work_rhs = work_fn rhs work_id = mkWorkerId work_uniq fn_id (exprType work_rhs) + `setIdOccInfo` occInfo fn_info + -- Copy over occurrence info from parent + -- Notably whether it's a loop breaker + -- Doesn't matter much, since we will simplify next, but + -- seems right-er to do so + `setInlineActivation` (inlinePragmaActivation inl_prag) -- Any inline activation (which sets when inlining is active) -- on the original function is duplicated on the worker @@ -310,16 +340,20 @@ splitFun fn_id fn_info wrap_dmds res_info rhs -- arity is consistent with the demand type goes through wrap_rhs = wrap_fn work_id - wrap_prag = InlinePragma { inl_inline = True + wrap_prag = InlinePragma { inl_inline = Inline + , inl_sat = Nothing , inl_act = ActiveAfter 0 , inl_rule = rule_match_info } + -- See Note [Wrapper activation] + -- The RuleMatchInfo is (and must be) unaffected + -- The inl_inline is bound to be False, else we would not be + -- making a wrapper wrap_id = fn_id `setIdUnfolding` mkWwInlineRule work_id wrap_rhs arity `setInlinePragma` wrap_prag - -- See Note [Wrapper activation] - -- The RuleMatchInfo is (and must be) unaffected - -- The inl_inline is bound to be False, else we would not be - -- making a wrapper + `setIdOccInfo` NoOccInfo + -- Zap any loop-breaker-ness, to avoid bleating from Lint + -- about a loop breaker with an INLINE rule ; return ([(work_id, work_rhs), (wrap_id, wrap_rhs)]) }) -- Worker first, because wrapper mentions it @@ -349,8 +383,8 @@ get_one_shots (Note _ e) = get_one_shots e get_one_shots _ = noOneShotInfo \end{code} -Thunk splitting -~~~~~~~~~~~~~~~ +Note [Thunk splitting] +~~~~~~~~~~~~~~~~~~~~~~ Suppose x is used strictly (never mind whether it has the CPR property). @@ -384,6 +418,7 @@ function, so that if x's demand is deeper (say U(U(L,L),L)) then the splitting will go deeper too. \begin{code} +-- See Note [Thunk splitting] -- splitThunk converts the *non-recursive* binding -- x = e -- into @@ -391,6 +426,11 @@ then the splitting will go deeper too. -- in case x of -- I# y -> let x = I# y in x } -- See comments above. Is it not beautifully short? +-- Moreover, it works just as well when there are +-- several binders, and if the binders are lifted +-- E.g. x = e +-- --> x = let x = e in +-- case x of (a,b) -> let x = (a,b) in x splitThunk :: Var -> Expr Var -> UniqSM [(Var, Expr Var)] splitThunk fn_id rhs = do