-%
+calcU%
% (c) The University of Glasgow 2006
% (c) The AQUA Project, Glasgow University, 1994-1998
%
-- For implicit Ids, do a tiny bit of optimising first
mkImplicitUnfolding expr = mkTopUnfolding (simpleOptExpr expr)
-mkWwInlineRule :: Id -> CoreExpr -> Arity -> Unfolding
-mkWwInlineRule id = mkInlineRule (InlWrapper id)
-
-mkInlineRule :: InlineRuleInfo -> CoreExpr -> Arity -> Unfolding
-mkInlineRule inl_info expr arity
- = mkCoreUnfolding True -- Note [Top-level flag on inline rules]
- expr' arity
- (InlineRule { ug_ir_info = inl_info, ug_small = small })
- where
- expr' = simpleOptExpr expr
- small = case calcUnfoldingGuidance (arity+1) expr' of
- (arity_e, UnfoldIfGoodArgs { ug_size = size_e })
- -> uncondInline arity_e size_e
- _other {- actually UnfoldNever -} -> False
-
-- Note [Top-level flag on inline rules]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- Slight hack: note that mk_inline_rules conservatively sets the
mkUnfolding :: Bool -> CoreExpr -> Unfolding
mkUnfolding top_lvl expr
- = mkCoreUnfolding top_lvl expr arity guidance
+ = CoreUnfolding { uf_tmpl = occurAnalyseExpr expr,
+ uf_src = InlineRhs,
+ uf_arity = arity,
+ uf_is_top = top_lvl,
+ uf_is_value = exprIsHNF expr,
+ uf_is_conlike = exprIsConLike expr,
+ uf_expandable = exprIsExpandable expr,
+ uf_is_cheap = is_cheap,
+ uf_guidance = guidance }
where
- (arity, guidance) = calcUnfoldingGuidance opt_UF_CreationThreshold expr
+ is_cheap = exprIsCheap expr
+ (arity, guidance) = calcUnfoldingGuidance is_cheap opt_UF_CreationThreshold expr
-- Sometimes during simplification, there's a large let-bound thing
-- which has been substituted, and so is now dead; so 'expr' contains
-- two copies of the thing while the occurrence-analysed expression doesn't
- -- Nevertheless, we don't occ-analyse before computing the size because the
+ -- Nevertheless, we *don't* occ-analyse before computing the size because the
-- size computation bales out after a while, whereas occurrence analysis does not.
--
-- This can occasionally mean that the guidance is very pessimistic;
- -- it gets fixed up next round
+ -- it gets fixed up next round. And it should be rare, because large
+ -- let-bound things that are dead are usually caught by preInlineUnconditionally
-mkCoreUnfolding :: Bool -> CoreExpr -> Arity -> UnfoldingGuidance -> Unfolding
+mkCoreUnfolding :: Bool -> UnfoldingSource -> CoreExpr
+ -> Arity -> UnfoldingGuidance -> Unfolding
-- Occurrence-analyses the expression before capturing it
-mkCoreUnfolding top_lvl expr arity guidance
+mkCoreUnfolding top_lvl src expr arity guidance
= CoreUnfolding { uf_tmpl = occurAnalyseExpr expr,
+ uf_src = src,
uf_arity = arity,
uf_is_top = top_lvl,
- uf_is_value = exprIsHNF expr,
- uf_is_cheap = exprIsCheap expr,
+ uf_is_value = exprIsHNF expr,
+ uf_is_conlike = exprIsConLike expr,
+ uf_is_cheap = exprIsCheap expr,
uf_expandable = exprIsExpandable expr,
uf_guidance = guidance }
mkDFunUnfolding :: DataCon -> [Id] -> Unfolding
mkDFunUnfolding con ops = DFunUnfolding con (map Var ops)
+mkWwInlineRule :: Id -> CoreExpr -> Arity -> Unfolding
+mkWwInlineRule id expr arity
+ = mkCoreUnfolding True (InlineWrapper id)
+ (simpleOptExpr expr) arity
+ (UnfWhen unSaturatedOk boringCxtNotOk)
+
mkCompulsoryUnfolding :: CoreExpr -> Unfolding
-mkCompulsoryUnfolding expr -- Used for things that absolutely must be unfolded
- = mkCoreUnfolding True expr 0 UnfoldAlways -- Arity of unfolding doesn't matter
+mkCompulsoryUnfolding expr -- Used for things that absolutely must be unfolded
+ = mkCoreUnfolding True InlineCompulsory
+ expr 0 -- Arity of unfolding doesn't matter
+ (UnfWhen unSaturatedOk boringCxtOk)
+
+mkInlineRule :: Bool -> CoreExpr -> Arity -> Unfolding
+mkInlineRule unsat_ok expr arity
+ = mkCoreUnfolding True InlineRule -- Note [Top-level flag on inline rules]
+ expr' arity
+ (UnfWhen unsat_ok boring_ok)
+ where
+ expr' = simpleOptExpr expr
+ boring_ok = case calcUnfoldingGuidance True -- Treat as cheap
+ (arity+1) expr' of
+ (_, UnfWhen _ boring_ok) -> boring_ok
+ _other -> boringCxtNotOk
+ -- See Note [INLINE for small functions]
\end{code}
\begin{code}
calcUnfoldingGuidance
- :: Int -- bomb out if size gets bigger than this
- -> CoreExpr -- expression to look at
+ :: Bool -- True <=> the rhs is cheap, or we want to treat it
+ -- as cheap (INLINE things)
+ -> Int -- Bomb out if size gets bigger than this
+ -> CoreExpr -- Expression to look at
-> (Arity, UnfoldingGuidance)
-calcUnfoldingGuidance bOMB_OUT_SIZE expr
- = case collectBinders expr of { (binders, body) ->
+calcUnfoldingGuidance expr_is_cheap bOMB_OUT_SIZE expr
+ = case collectBinders expr of { (bndrs, body) ->
let
- val_binders = filter isId binders
- n_val_binders = length val_binders
+ val_bndrs = filter isId bndrs
+ n_val_bndrs = length val_bndrs
+
+ guidance
+ = case (sizeExpr (iUnbox bOMB_OUT_SIZE) val_bndrs body) of
+ TooBig -> UnfNever
+ SizeIs size cased_bndrs scrut_discount
+ | uncondInline n_val_bndrs (iBox size) && expr_is_cheap
+ -> UnfWhen needSaturated boringCxtOk
+
+ | otherwise
+ -> UnfIfGoodArgs { ug_args = map (discount cased_bndrs) val_bndrs
+ , ug_size = iBox size
+ , ug_res = iBox scrut_discount }
+
+ discount cbs bndr
+ = foldlBag (\acc (b',n) -> if bndr==b' then acc+n else acc)
+ 0 cbs
in
- case (sizeExpr (iUnbox bOMB_OUT_SIZE) val_binders body) of
- TooBig -> (n_val_binders, UnfoldNever)
- SizeIs size cased_args scrut_discount
- -> (n_val_binders, UnfoldIfGoodArgs { ug_args = map discount_for val_binders
- , ug_size = iBox size
- , ug_res = iBox scrut_discount })
- where
- discount_for b = foldlBag (\acc (b',n) -> if b==b' then acc+n else acc)
- 0 cased_args
- }
+ (n_val_bndrs, guidance) }
\end{code}
Note [Computing the size of an expression]
--------------
0 42#
0 x
+ 0 True
2 f x
1 Just x
4 f (g x)
size_up (Case (Var v) _ _ alts)
| v `elem` top_args -- We are scrutinising an argument variable
- = alts_size (foldr addSize sizeOne alt_sizes) -- The 1 is for the case itself
+ = alts_size (foldr1 addSize alt_sizes) -- The 1 is for the case itself
(foldr1 maxSize alt_sizes)
-- Good to inline if an arg is scrutinised, because
-- that may eliminate allocation in the caller
-- the case when we are scrutinising an argument variable
alts_size (SizeIs tot tot_disc _tot_scrut) -- Size of all alternatives
(SizeIs max _max_disc max_scrut) -- Size of biggest alternative
- = SizeIs tot (unitBag (v, iBox (_ILIT(1) +# tot -# max)) `unionBags` tot_disc) max_scrut
+ = SizeIs tot (unitBag (v, iBox (_ILIT(2) +# tot -# max)) `unionBags` tot_disc) max_scrut
-- If the variable is known, we produce a discount that
-- will take us back to 'max', the size of the largest alternative
-- The 1+ is a little discount for reduced allocation in the caller
size_up (Case e _ _ alts) = foldr (addSize . size_up_alt)
(nukeScrutDiscount (size_up e))
alts
- `addSizeN` 1 -- Add 1 for the case itself
-- We don't charge for the case itself
-- It's a strict thing, and the price of the call
-- is paid by scrut. Also consider
-- case f x of DEFAULT -> e
-- This is just ';'! Don't charge for it.
+ --
+ -- Moreover, we charge one per alternative.
------------
-- size_up_app is used when there's ONE OR MORE value args
_ -> funSize top_args fun (length val_args)
------------
- size_up_alt (_con, _bndrs, rhs) = size_up rhs
+ size_up_alt (_con, _bndrs, rhs) = size_up rhs `addSizeN` 1
-- Don't charge for args, so that wrappers look cheap
-- (See comments about wrappers with Case)
+ --
+ -- IMPORATANT: *do* charge 1 for the alternative, else we
+ -- find that giant case nests are treated as practically free
+ -- A good example is Foreign.C.Error.errrnoToIOError
------------
-- These addSize things have to be here because
conSize :: DataCon -> Int -> ExprSize
conSize dc n_val_args
- | n_val_args == 0 = SizeIs (_ILIT(0)) emptyBag (_ILIT(1))
+ | n_val_args == 0 = SizeIs (_ILIT(0)) emptyBag (_ILIT(1)) -- Like variables
| isUnboxedTupleCon dc = SizeIs (_ILIT(0)) emptyBag (iUnbox n_val_args +# _ILIT(1))
| otherwise = SizeIs (_ILIT(1)) emptyBag (iUnbox n_val_args +# _ILIT(1))
-- Treat a constructors application as size 1, regardless of how
maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 ># n2 = s1
| otherwise = s2
-sizeZero, sizeOne :: ExprSize
+sizeZero :: ExprSize
sizeN :: Int -> ExprSize
sizeZero = SizeIs (_ILIT(0)) emptyBag (_ILIT(0))
-sizeOne = SizeIs (_ILIT(1)) emptyBag (_ILIT(0))
sizeN n = SizeIs (iUnbox n) emptyBag (_ILIT(0))
\end{code}
-
-
%************************************************************************
%* *
\subsection[considerUnfolding]{Given all the info, do (not) do the unfolding}
\begin{code}
couldBeSmallEnoughToInline :: Int -> CoreExpr -> Bool
couldBeSmallEnoughToInline threshold rhs
- = case calcUnfoldingGuidance threshold rhs of
- (_, UnfoldNever) -> False
- _ -> True
+ = case calcUnfoldingGuidance False threshold rhs of
+ (_, UnfNever) -> False
+ _ -> True
----------------
smallEnoughToInline :: Unfolding -> Bool
-smallEnoughToInline (CoreUnfolding {uf_guidance = UnfoldIfGoodArgs {ug_size = size}})
+smallEnoughToInline (CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_size = size}})
= size <= opt_UF_UseThreshold
smallEnoughToInline _
= False
-- Sees if the unfolding is pretty certain to inline
certainlyWillInline (CoreUnfolding { uf_is_cheap = is_cheap, uf_arity = n_vals, uf_guidance = guidance })
= case guidance of
- UnfoldAlways {} -> True
- UnfoldNever -> False
- InlineRule {} -> True
- UnfoldIfGoodArgs { ug_size = size}
+ UnfNever -> False
+ UnfWhen {} -> True
+ UnfIfGoodArgs { ug_size = size}
-> is_cheap && size - (n_vals +1) <= opt_UF_UseThreshold
certainlyWillInline _
\begin{code}
callSiteInline :: DynFlags
- -> Bool -- True <=> the Id can be inlined
-> Id -- The Id
+ -> Unfolding -- Its unfolding (if active)
-> Bool -- True if there are are no arguments at all (incl type args)
-> [ArgSummary] -- One for each value arg; True if it is interesting
-> CallCtxt -- True <=> continuation is interesting
data CallCtxt = BoringCtxt
- | ArgCtxt Bool -- We're somewhere in the RHS of function with rules
- -- => be keener to inline
- Int -- We *are* the argument of a function with this arg discount
- -- => be keener to inline
- -- INVARIANT: ArgCtxt False 0 ==> BoringCtxt
+ | ArgCtxt -- We are somewhere in the argument of a function
+ Bool -- True <=> we're somewhere in the RHS of function with rules
+ -- False <=> we *are* the argument of a function with non-zero
+ -- arg discount
+ -- OR
+ -- we *are* the RHS of a let Note [RHS of lets]
+ -- In both cases, be a little keener to inline
| ValAppCtxt -- We're applied to at least one value arg
-- This arises when we have ((f x |> co) y)
-- that decomposes its scrutinee
instance Outputable CallCtxt where
- ppr BoringCtxt = ptext (sLit "BoringCtxt")
- ppr (ArgCtxt rules disc) = ptext (sLit "ArgCtxt") <> ppr (rules,disc)
- ppr CaseCtxt = ptext (sLit "CaseCtxt")
- ppr ValAppCtxt = ptext (sLit "ValAppCtxt")
-
-callSiteInline dflags active_inline id lone_variable arg_infos cont_info
- = let
- n_val_args = length arg_infos
- in
- case idUnfolding id of {
+ ppr BoringCtxt = ptext (sLit "BoringCtxt")
+ ppr (ArgCtxt rules) = ptext (sLit "ArgCtxt") <+> ppr rules
+ ppr CaseCtxt = ptext (sLit "CaseCtxt")
+ ppr ValAppCtxt = ptext (sLit "ValAppCtxt")
+
+callSiteInline dflags id unfolding lone_variable arg_infos cont_info
+ = case unfolding of {
NoUnfolding -> Nothing ;
OtherCon _ -> Nothing ;
DFunUnfolding {} -> Nothing ; -- Never unfold a DFun
-- uf_arity will typically be equal to (idArity id),
-- but may be less for InlineRules
let
+ n_val_args = length arg_infos
+ saturated = n_val_args >= uf_arity
+
result | yes_or_no = Just unf_template
| otherwise = Nothing
-- arguments (ie n_val_args >= arity). But there must
-- be *something* interesting about some argument, or the
-- result context, to make it worth inlining
- some_benefit = interesting_args
- || n_val_args > uf_arity -- Over-saturated
- || interesting_saturated_call -- Exactly saturated
+ some_benefit
+ | not saturated = interesting_args -- Under-saturated
+ -- Note [Unsaturated applications]
+ | n_val_args > uf_arity = True -- Over-saturated
+ | otherwise = interesting_args -- Saturated
+ || interesting_saturated_call
interesting_saturated_call
= case cont_info of
ArgCtxt {} -> uf_arity > 0 -- Note [Inlining in ArgCtxt]
ValAppCtxt -> True -- Note [Cast then apply]
- yes_or_no
+ (yes_or_no, extra_doc)
= case guidance of
- UnfoldNever -> False
-
- UnfoldAlways -> True
- -- UnfoldAlways => there is no top-level binding for
- -- these things, so we must inline it. Only a few
- -- primop-like things have compulsory unfoldings (see
- -- MkId.lhs). Ignore is_active because we want to
- -- inline even if SimplGently is on.
-
- InlineRule { ug_ir_info = inl_info, ug_small = uncond_inline }
- | not active_inline -> False
- | n_val_args < uf_arity -> yes_unsat -- Not enough value args
- | uncond_inline -> True -- Note [INLINE for small functions]
- | otherwise -> some_benefit -- Saturated or over-saturated
- where
- -- See Note [Inlining an InlineRule]
- yes_unsat = case inl_info of
- InlSat -> False
- _other -> interesting_args
-
- UnfoldIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size }
- | not active_inline -> False
- | not is_cheap -> False
- | n_val_args < uf_arity -> interesting_args && small_enough
- -- Note [Unsaturated applications]
- | uncondInline uf_arity size -> True
- | otherwise -> some_benefit && small_enough
+ UnfNever -> (False, empty)
+
+ UnfWhen unsat_ok boring_ok -> ( (unsat_ok || saturated)
+ && (boring_ok || some_benefit)
+ , empty )
+ -- For the boring_ok part see Note [INLINE for small functions]
+ UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size }
+ -> ( is_cheap && some_benefit && small_enough
+ , (text "discounted size =" <+> int discounted_size) )
where
- small_enough = (size - discount) <= opt_UF_UseThreshold
+ discounted_size = size - discount
+ small_enough = discounted_size <= opt_UF_UseThreshold
discount = computeDiscount uf_arity arg_discounts
res_discount arg_infos cont_info
in
if dopt Opt_D_dump_inlinings dflags then
pprTrace ("Considering inlining: " ++ showSDoc (ppr id))
- (vcat [text "active:" <+> ppr active_inline,
- text "arg infos" <+> ppr arg_infos,
+ (vcat [text "arg infos" <+> ppr arg_infos,
+ text "uf arity" <+> ppr uf_arity,
text "interesting continuation" <+> ppr cont_info,
+ text "some_benefit" <+> ppr some_benefit,
text "is value:" <+> ppr is_value,
text "is cheap:" <+> ppr is_cheap,
text "guidance" <+> ppr guidance,
+ extra_doc,
text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"])
result
else
}
\end{code}
+Note [RHS of lets]
+~~~~~~~~~~~~~~~~~~
+Be a tiny bit keener to inline in the RHS of a let, because that might
+lead to good thing later
+ f y = (y,y,y)
+ g y = let x = f y in ...(case x of (a,b,c) -> ...) ...
+We'd inline 'f' if the call was in a case context, and it kind-of-is,
+only we can't see it. So we treat the RHS of a let as not-totally-boring.
+
Note [Unsaturated applications]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When a call is not saturated, we *still* inline if one of the
g y = f y
Then f's RHS is no larger than its LHS, so we should inline it
into even the most boring context. (We do so if there is no INLINE
-pragma!) That's the reason for the 'inl_small' flag on an InlineRule.
+pragma!)
Note [Things to watch]
Note [Inlining an InlineRule]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An InlineRules is used for
- (a) pogrammer INLINE pragmas
+ (a) programmer INLINE pragmas
(b) inlinings from worker/wrapper
For (a) the RHS may be large, and our contract is that we *only* inline
slow-down). The motivation was test eyeball/inline1.hs; but that seems
to work ok now.
+NOTE: arguably, we should inline in ArgCtxt only if the result of the
+call is at least CONLIKE. At least for the cases where we use ArgCtxt
+for the RHS of a 'let', we only profit from the inlining if we get a
+CONLIKE thing (modulo lets).
+
Note [Lone variables]
~~~~~~~~~~~~~~~~~~~~~
The "lone-variable" case is important. I spent ages messing about
CaseCtxt -> res_discount
_other -> 4 `min` res_discount
-- res_discount can be very large when a function returns
- -- construtors; but we only want to invoke that large discount
+ -- constructors; but we only want to invoke that large discount
-- when there's a case continuation.
-- Otherwise we, rather arbitrarily, threshold it. Yuk.
-- But we want to aovid inlining large functions that return
Consider
f d = ...((*) d x y)...
... f (df d')...
-where df is con-like. Then we'd really like to inline so that the
+where df is con-like. Then we'd really like to inline 'f' so that the
rule for (*) (df d) can fire. To do this
a) we give a discount for being an argument of a class-op (eg (*) d)
b) we say that a con-like argument (eg (df d)) is interesting
-- data constructors here
| idArity v > n = ValueArg -- Catches (eg) primops with arity but no unfolding
| n > 0 = NonTrivArg -- Saturated or unknown call
- | evald_unfolding = ValueArg -- n==0; look for a value
+ | conlike_unfolding = ValueArg -- n==0; look for an interesting unfolding
+ -- See Note [Conlike is interesting]
| otherwise = TrivArg -- n==0, no useful unfolding
where
- evald_unfolding = isEvaldUnfolding (idUnfolding v)
+ conlike_unfolding = isConLikeUnfolding (idUnfolding v)
go (Type _) _ = TrivArg
go (App fn (Type _)) n = go fn n
-- | Returns @Just (dc, [t1..tk], [x1..xn])@ if the argument expression is
-- a *saturated* constructor application of the form @dc t1..tk x1 .. xn@,
-- where t1..tk are the *universally-qantified* type args of 'dc'
-exprIsConApp_maybe :: CoreExpr -> Maybe (DataCon, [Type], [CoreExpr])
+exprIsConApp_maybe :: IdUnfoldingFun -> CoreExpr -> Maybe (DataCon, [Type], [CoreExpr])
-exprIsConApp_maybe (Note _ expr)
- = exprIsConApp_maybe expr
+exprIsConApp_maybe id_unf (Note _ expr)
+ = exprIsConApp_maybe id_unf expr
-- We ignore all notes. For example,
-- case _scc_ "foo" (C a b) of
-- C a b -> e
-- should be optimised away, but it will be only if we look
-- through the SCC note.
-exprIsConApp_maybe (Cast expr co)
+exprIsConApp_maybe id_unf (Cast expr co)
= -- Here we do the KPush reduction rule as described in the FC paper
-- The transformation applies iff we have
-- (C e1 ... en) `cast` co
-- The left-hand one must be a T, because exprIsConApp returned True
-- but the right-hand one might not be. (Though it usually will.)
- case exprIsConApp_maybe expr of {
+ case exprIsConApp_maybe id_unf expr of {
Nothing -> Nothing ;
Just (dc, _dc_univ_args, dc_args) ->
let dump_doc = vcat [ppr dc, ppr dc_univ_tyvars, ppr dc_ex_tyvars,
ppr arg_tys, ppr dc_args, ppr _dc_univ_args,
ppr ex_args, ppr val_args]
- ASSERT2( coreEqType from_ty (mkTyConApp dc_tc _dc_univ_args), dump_doc )
+ in
+ ASSERT2( coreEqType _from_ty (mkTyConApp dc_tc _dc_univ_args), dump_doc )
ASSERT2( all isTypeArg (ex_args ++ co_args), dump_doc )
ASSERT2( equalLength val_args arg_tys, dump_doc )
#endif
Just (dc, to_tc_arg_tys, ex_args ++ new_co_args ++ new_val_args)
}}
-exprIsConApp_maybe expr
+exprIsConApp_maybe id_unf expr
= analyse expr []
where
analyse (App fun arg) args = analyse fun (arg:args)
analyse rhs args
where
is_saturated = count isValArg args == idArity fun
- unfolding = idUnfolding fun
+ unfolding = id_unf fun -- Does not look through loop breakers
+ -- ToDo: we *may* look through variables that are NOINLINE
+ -- in this phase, and that is really not right
analyse _ _ = Nothing