Unfolding, UnfoldingGuidance, -- Abstract types
noUnfolding, mkImplicitUnfolding,
- mkTopUnfolding, mkUnfolding, mkCoreUnfolding,
- mkInlineRule, mkWwInlineRule,
+ mkUnfolding, mkCoreUnfolding,
+ mkTopUnfolding, mkSimpleUnfolding,
+ mkInlineUnfolding, mkInlinableUnfolding, mkWwInlineRule,
mkCompulsoryUnfolding, mkDFunUnfolding,
interestingArg, ArgSummary(..),
- couldBeSmallEnoughToInline,
+ couldBeSmallEnoughToInline, inlineBoringOk,
certainlyWillInline, smallEnoughToInline,
callSiteInline, CallCtxt(..),
import DynFlags
import CoreSyn
import PprCore () -- Instances
-import OccurAnal
+import TcType ( tcSplitDFunTy )
+import OccurAnal ( occurAnalyseExpr )
import CoreSubst hiding( substTy )
import CoreFVs ( exprFreeVars )
+import CoreArity ( manifestArity, exprBotStrictness_maybe )
import CoreUtils
import Id
import DataCon
import PrimOp
import IdInfo
import BasicTypes ( Arity )
-import TcType ( tcSplitDFunTy )
-import Type
+import Type
import Coercion
import PrelNames
import VarEnv ( mkInScopeSet )
import Bag
import Util
+import Pair
import FastTypes
import FastString
import Outputable
+import ForeignCall
+import Data.Maybe
\end{code}
\begin{code}
mkTopUnfolding :: Bool -> CoreExpr -> Unfolding
-mkTopUnfolding is_bottoming expr
- = mkUnfolding True {- Top level -} is_bottoming expr
+mkTopUnfolding = mkUnfolding InlineRhs True {- Top level -}
mkImplicitUnfolding :: CoreExpr -> Unfolding
-- For implicit Ids, do a tiny bit of optimising first
-- top-level flag to True. It gets set more accurately by the simplifier
-- Simplify.simplUnfolding.
-mkUnfolding :: Bool -> Bool -> CoreExpr -> Unfolding
-mkUnfolding top_lvl is_bottoming expr
+mkSimpleUnfolding :: CoreExpr -> Unfolding
+mkSimpleUnfolding = mkUnfolding InlineRhs False False
+
+mkDFunUnfolding :: Type -> [DFunArg CoreExpr] -> Unfolding
+mkDFunUnfolding dfun_ty ops
+ = DFunUnfolding dfun_nargs data_con ops
+ where
+ (tvs, n_theta, cls, _) = tcSplitDFunTy dfun_ty
+ dfun_nargs = length tvs + n_theta
+ data_con = classDataCon cls
+
+mkWwInlineRule :: Id -> CoreExpr -> Arity -> Unfolding
+mkWwInlineRule id expr arity
+ = mkCoreUnfolding (InlineWrapper id) True
+ (simpleOptExpr expr) arity
+ (UnfWhen unSaturatedOk boringCxtNotOk)
+
+mkCompulsoryUnfolding :: CoreExpr -> Unfolding
+mkCompulsoryUnfolding expr -- Used for things that absolutely must be unfolded
+ = mkCoreUnfolding InlineCompulsory True
+ (simpleOptExpr expr) 0 -- Arity of unfolding doesn't matter
+ (UnfWhen unSaturatedOk boringCxtOk)
+
+mkInlineUnfolding :: Maybe Arity -> CoreExpr -> Unfolding
+mkInlineUnfolding mb_arity expr
+ = mkCoreUnfolding InlineStable
+ True -- Note [Top-level flag on inline rules]
+ expr' arity
+ (UnfWhen unsat_ok boring_ok)
+ where
+ expr' = simpleOptExpr expr
+ (unsat_ok, arity) = case mb_arity of
+ Nothing -> (unSaturatedOk, manifestArity expr')
+ Just ar -> (needSaturated, ar)
+
+ boring_ok = inlineBoringOk expr'
+
+mkInlinableUnfolding :: CoreExpr -> Unfolding
+mkInlinableUnfolding expr
+ = mkUnfolding InlineStable True is_bot expr'
+ where
+ expr' = simpleOptExpr expr
+ is_bot = isJust (exprBotStrictness_maybe expr')
+\end{code}
+
+Internal functions
+
+\begin{code}
+mkCoreUnfolding :: UnfoldingSource -> Bool -> CoreExpr
+ -> Arity -> UnfoldingGuidance -> Unfolding
+-- Occurrence-analyses the expression before capturing it
+mkCoreUnfolding src top_lvl expr arity guidance
= CoreUnfolding { uf_tmpl = occurAnalyseExpr expr,
- uf_src = InlineRhs,
+ uf_src = src,
+ uf_arity = arity,
+ uf_is_top = top_lvl,
+ uf_is_value = exprIsHNF expr,
+ uf_is_conlike = exprIsConLike expr,
+ uf_is_cheap = exprIsCheap expr,
+ uf_expandable = exprIsExpandable expr,
+ uf_guidance = guidance }
+
+mkUnfolding :: UnfoldingSource -> Bool -> Bool -> CoreExpr -> Unfolding
+-- Calculates unfolding guidance
+-- Occurrence-analyses the expression before capturing it
+mkUnfolding src top_lvl is_bottoming expr
+ | top_lvl && is_bottoming
+ , not (exprIsTrivial expr)
+ = NoUnfolding -- See Note [Do not inline top-level bottoming functions]
+ | otherwise
+ = CoreUnfolding { uf_tmpl = occurAnalyseExpr expr,
+ uf_src = src,
uf_arity = arity,
uf_is_top = top_lvl,
uf_is_value = exprIsHNF expr,
uf_guidance = guidance }
where
is_cheap = exprIsCheap expr
- (arity, guidance) = calcUnfoldingGuidance is_cheap (top_lvl && is_bottoming)
+ (arity, guidance) = calcUnfoldingGuidance is_cheap
opt_UF_CreationThreshold expr
-- Sometimes during simplification, there's a large let-bound thing
-- which has been substituted, and so is now dead; so 'expr' contains
-- This can occasionally mean that the guidance is very pessimistic;
-- it gets fixed up next round. And it should be rare, because large
-- let-bound things that are dead are usually caught by preInlineUnconditionally
-
-mkCoreUnfolding :: Bool -> UnfoldingSource -> CoreExpr
- -> Arity -> UnfoldingGuidance -> Unfolding
--- Occurrence-analyses the expression before capturing it
-mkCoreUnfolding top_lvl src expr arity guidance
- = CoreUnfolding { uf_tmpl = occurAnalyseExpr expr,
- uf_src = src,
- uf_arity = arity,
- uf_is_top = top_lvl,
- uf_is_value = exprIsHNF expr,
- uf_is_conlike = exprIsConLike expr,
- uf_is_cheap = exprIsCheap expr,
- uf_expandable = exprIsExpandable expr,
- uf_guidance = guidance }
-
-mkDFunUnfolding :: DataCon -> [Id] -> Unfolding
-mkDFunUnfolding con ops = DFunUnfolding con (map Var ops)
-
-mkWwInlineRule :: Id -> CoreExpr -> Arity -> Unfolding
-mkWwInlineRule id expr arity
- = mkCoreUnfolding True (InlineWrapper id)
- (simpleOptExpr expr) arity
- (UnfWhen unSaturatedOk boringCxtNotOk)
-
-mkCompulsoryUnfolding :: CoreExpr -> Unfolding
-mkCompulsoryUnfolding expr -- Used for things that absolutely must be unfolded
- = mkCoreUnfolding True InlineCompulsory
- expr 0 -- Arity of unfolding doesn't matter
- (UnfWhen unSaturatedOk boringCxtOk)
-
-mkInlineRule :: Bool -> CoreExpr -> Arity -> Unfolding
-mkInlineRule unsat_ok expr arity
- = mkCoreUnfolding True InlineRule -- Note [Top-level flag on inline rules]
- expr' arity
- (UnfWhen unsat_ok boring_ok)
- where
- expr' = simpleOptExpr expr
- boring_ok = case calcUnfoldingGuidance True -- Treat as cheap
- False -- But not bottoming
- (arity+1) expr' of
- (_, UnfWhen _ boring_ok) -> boring_ok
- _other -> boringCxtNotOk
- -- See Note [INLINE for small functions]
\end{code}
-
%************************************************************************
%* *
\subsection{The UnfoldingGuidance type}
%************************************************************************
\begin{code}
+inlineBoringOk :: CoreExpr -> Bool
+-- See Note [INLINE for small functions]
+-- True => the result of inlining the expression is
+-- no bigger than the expression itself
+-- eg (\x y -> f y x)
+-- This is a quick and dirty version. It doesn't attempt
+-- to deal with (\x y z -> x (y z))
+-- The really important one is (x `cast` c)
+inlineBoringOk e
+ = go 0 e
+ where
+ go :: Int -> CoreExpr -> Bool
+ go credit (Lam x e) | isId x = go (credit+1) e
+ | otherwise = go credit e
+ go credit (App f (Type {})) = go credit f
+ go credit (App f a) | credit > 0
+ , exprIsTrivial a = go (credit-1) f
+ go credit (Note _ e) = go credit e
+ go credit (Cast e _) = go credit e
+ go _ (Var {}) = boringCxtOk
+ go _ _ = boringCxtNotOk
+
calcUnfoldingGuidance
:: Bool -- True <=> the rhs is cheap, or we want to treat it
-- as cheap (INLINE things)
- -> Bool -- True <=> this is a top-level unfolding for a
- -- diverging function; don't inline this
-> Int -- Bomb out if size gets bigger than this
-> CoreExpr -- Expression to look at
-> (Arity, UnfoldingGuidance)
-calcUnfoldingGuidance expr_is_cheap top_bot bOMB_OUT_SIZE expr
+calcUnfoldingGuidance expr_is_cheap bOMB_OUT_SIZE expr
= case collectBinders expr of { (bndrs, body) ->
let
val_bndrs = filter isId bndrs
| uncondInline n_val_bndrs (iBox size)
, expr_is_cheap
-> UnfWhen unSaturatedOk boringCxtOk -- Note [INLINE for small functions]
-
- | top_bot -- See Note [Do not inline top-level bottoming functions]
- -> UnfNever
-
| otherwise
-> UnfIfGoodArgs { ug_args = map (discount cased_bndrs) val_bndrs
, ug_size = iBox size
a function call to account for. Notice also that constructor applications
are very cheap, because exposing them to a caller is so valuable.
+[25/5/11] All sizes are now multiplied by 10, except for primops.
+This makes primops look cheap, and seems to be almost unversally
+beneficial. Done partly as a result of #4978.
Note [Do not inline top-level bottoming functions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- See Note [INLINE for small functions]
uncondInline arity size
| arity == 0 = size == 0
- | otherwise = size <= arity + 1
+ | otherwise = size <= 10 * (arity + 1)
\end{code}
size_up (Cast e _) = size_up e
size_up (Note _ e) = size_up e
size_up (Type _) = sizeZero -- Types cost nothing
+ size_up (Coercion _) = sizeZero
size_up (Lit lit) = sizeN (litSize lit)
size_up (Var f) = size_up_call f [] -- Make sure we get constructor
-- discounts even on nullary constructors
size_up (App fun (Type _)) = size_up fun
+ size_up (App fun (Coercion _)) = size_up fun
size_up (App fun arg) = size_up arg `addSizeNSD`
size_up_app fun [arg]
- size_up (Lam b e) | isId b = lamScrutDiscount (size_up e `addSizeN` 1)
+ size_up (Lam b e) | isId b = lamScrutDiscount (size_up e `addSizeN` 10)
| otherwise = size_up e
size_up (Let (NonRec binder rhs) body)
= size_up rhs `addSizeNSD`
size_up body `addSizeN`
- (if isUnLiftedType (idType binder) then 0 else 1)
+ (if isUnLiftedType (idType binder) then 0 else 10)
-- For the allocation
-- If the binder has an unlifted type there is no allocation
size_up (Let (Rec pairs) body)
= foldr (addSizeNSD . size_up . snd)
- (size_up body `addSizeN` length pairs) -- (length pairs) for the allocation
+ (size_up body `addSizeN` (10 * length pairs)) -- (length pairs) for the allocation
pairs
size_up (Case (Var v) _ _ alts)
-- the case when we are scrutinising an argument variable
alts_size (SizeIs tot tot_disc tot_scrut) -- Size of all alternatives
(SizeIs max _ _) -- Size of biggest alternative
- = SizeIs tot (unitBag (v, iBox (_ILIT(2) +# tot -# max)) `unionBags` tot_disc) tot_scrut
+ = SizeIs tot (unitBag (v, iBox (_ILIT(20) +# tot -# max)) `unionBags` tot_disc) tot_scrut
-- If the variable is known, we produce a discount that
-- will take us back to 'max', the size of the largest alternative
-- The 1+ is a little discount for reduced allocation in the caller
alts_size tot_size _ = tot_size
- size_up (Case e _ _ alts) = size_up e `addSizeNSD`
- foldr (addAltSize . size_up_alt) sizeZero alts
- -- We don't charge for the case itself
- -- It's a strict thing, and the price of the call
- -- is paid by scrut. Also consider
- -- case f x of DEFAULT -> e
- -- This is just ';'! Don't charge for it.
- --
- -- Moreover, we charge one per alternative.
+ size_up (Case e _ _ alts) = size_up e `addSizeNSD`
+ foldr (addAltSize . size_up_alt) case_size alts
+ where
+ case_size
+ | is_inline_scrut e, not (lengthExceeds alts 1) = sizeN (-10)
+ | otherwise = sizeZero
+ -- Normally we don't charge for the case itself, but
+ -- we charge one per alternative (see size_up_alt,
+ -- below) to account for the cost of the info table
+ -- and comparisons.
+ --
+ -- However, in certain cases (see is_inline_scrut
+ -- below), no code is generated for the case unless
+ -- there are multiple alts. In these cases we
+ -- subtract one, making the first alt free.
+ -- e.g. case x# +# y# of _ -> ... should cost 1
+ -- case touch# x# of _ -> ... should cost 0
+ -- (see #4978)
+ --
+ -- I would like to not have the "not (lengthExceeds alts 1)"
+ -- condition above, but without that some programs got worse
+ -- (spectral/hartel/event and spectral/para). I don't fully
+ -- understand why. (SDM 24/5/11)
+
+ -- unboxed variables, inline primops and unsafe foreign calls
+ -- are all "inline" things:
+ is_inline_scrut (Var v) = isUnLiftedType (idType v)
+ is_inline_scrut scrut
+ | (Var f, _) <- collectArgs scrut
+ = case idDetails f of
+ FCallId fc -> not (isSafeForeignCall fc)
+ PrimOpId op -> not (primOpOutOfLine op)
+ _other -> False
+ | otherwise
+ = False
------------
-- size_up_app is used when there's ONE OR MORE value args
size_up_app (App fun arg) args
- | isTypeArg arg = size_up_app fun args
+ | isTyCoArg arg = size_up_app fun args
| otherwise = size_up arg `addSizeNSD`
size_up_app fun (arg:args)
size_up_app (Var fun) args = size_up_call fun args
size_up_call :: Id -> [CoreExpr] -> ExprSize
size_up_call fun val_args
= case idDetails fun of
- FCallId _ -> sizeN opt_UF_DearOp
+ FCallId _ -> sizeN (10 * (1 + length val_args))
DataConWorkId dc -> conSize dc (length val_args)
PrimOpId op -> primOpSize op (length val_args)
ClassOpId _ -> classOpSize top_args val_args
_ -> funSize top_args fun (length val_args)
------------
- size_up_alt (_con, _bndrs, rhs) = size_up rhs `addSizeN` 1
+ size_up_alt (_con, _bndrs, rhs) = size_up rhs `addSizeN` 10
-- Don't charge for args, so that wrappers look cheap
-- (See comments about wrappers with Case)
--
-- | Finds a nominal size of a string literal.
litSize :: Literal -> Int
-- Used by CoreUnfold.sizeExpr
-litSize (MachStr str) = 1 + ((lengthFS str + 3) `div` 4)
+litSize (MachStr str) = 10 + 10 * ((lengthFS str + 3) `div` 4)
-- If size could be 0 then @f "x"@ might be too small
-- [Sept03: make literal strings a bit bigger to avoid fruitless
-- duplication of little strings]
classOpSize top_args (arg1 : other_args)
= SizeIs (iUnbox size) arg_discount (_ILIT(0))
where
- size = 2 + length other_args
+ size = 20 + (10 * length other_args)
-- If the class op is scrutinising a lambda bound dictionary then
-- give it a discount, to encourage the inlining of this function
-- The actual discount is rather arbitrarily chosen
res_discount | idArity fun > n_val_args = opt_UF_FunAppDiscount
| otherwise = 0
-- If the function is partially applied, show a result discount
-
- size | some_val_args = 1 + n_val_args
+ size | some_val_args = 10 * (1 + n_val_args)
| otherwise = 0
-- The 1+ is for the function itself
-- Add 1 for each non-trivial arg;
conSize :: DataCon -> Int -> ExprSize
conSize dc n_val_args
- | n_val_args == 0 = SizeIs (_ILIT(0)) emptyBag (_ILIT(1)) -- Like variables
- | isUnboxedTupleCon dc = SizeIs (_ILIT(0)) emptyBag (iUnbox n_val_args +# _ILIT(1))
- | otherwise = SizeIs (_ILIT(1)) emptyBag (iUnbox n_val_args +# _ILIT(1))
- -- Treat a constructors application as size 1, regardless of how
- -- many arguments it has; we are keen to expose them
- -- (and we charge separately for their args). We can't treat
- -- them as size zero, else we find that (Just x) has size 0,
- -- which is the same as a lone variable; and hence 'v' will
- -- always be replaced by (Just x), where v is bound to Just x.
- --
- -- However, unboxed tuples count as size zero
- -- I found occasions where we had
- -- f x y z = case op# x y z of { s -> (# s, () #) }
- -- and f wasn't getting inlined
+ | n_val_args == 0 = SizeIs (_ILIT(0)) emptyBag (_ILIT(10)) -- Like variables
+-- See Note [Unboxed tuple result discount]
+ | isUnboxedTupleCon dc = SizeIs (_ILIT(0)) emptyBag (iUnbox (10 * (1 + n_val_args)))
+
+-- See Note [Constructor size]
+ | otherwise = SizeIs (_ILIT(10)) emptyBag (iUnbox (10 * (10 + n_val_args)))
+ -- discont was (10 * (1 + n_val_args)), but it turns out that
+ -- adding a bigger constant here is an unambiguous win. We
+ -- REALLY like unfolding constructors that get scrutinised.
+ -- [SDM, 25/5/11]
+\end{code}
+
+Note [Constructor size]
+~~~~~~~~~~~~~~~~~~~~~~~
+Treat a constructors application as size 1, regardless of how many
+arguments it has; we are keen to expose them (and we charge separately
+for their args). We can't treat them as size zero, else we find that
+(Just x) has size 0, which is the same as a lone variable; and hence
+'v' will always be replaced by (Just x), where v is bound to Just x.
+
+However, unboxed tuples count as size zero. I found occasions where we had
+ f x y z = case op# x y z of { s -> (# s, () #) }
+and f wasn't getting inlined.
+
+Note [Unboxed tuple result discount]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+I tried giving unboxed tuples a *result discount* of zero (see the
+commented-out line). Why? When returned as a result they do not
+allocate, so maybe we don't want to charge so much for them If you
+have a non-zero discount here, we find that workers often get inlined
+back into wrappers, because it look like
+ f x = case $wf x of (# a,b #) -> (a,b)
+and we are keener because of the case. However while this change
+shrank binary sizes by 0.5% it also made spectral/boyer allocate 5%
+more. All other changes were very small. So it's not a big deal but I
+didn't adopt the idea.
+
+\begin{code}
primOpSize :: PrimOp -> Int -> ExprSize
primOpSize op n_val_args
- | not (primOpIsDupable op) = sizeN opt_UF_DearOp
- | not (primOpOutOfLine op) = sizeN 1
- -- Be very keen to inline simple primops.
- -- We give a discount of 1 for each arg so that (op# x y z) costs 2.
- -- We can't make it cost 1, else we'll inline let v = (op# x y z)
- -- at every use of v, which is excessive.
- --
- -- A good example is:
- -- let x = +# p q in C {x}
- -- Even though x get's an occurrence of 'many', its RHS looks cheap,
- -- and there's a good chance it'll get inlined back into C's RHS. Urgh!
-
- | otherwise = sizeN n_val_args
+ = if primOpOutOfLine op
+ then sizeN (op_size + n_val_args)
+ else sizeN op_size
+ where
+ op_size = primOpCodeSize op
buildSize :: ExprSize
-buildSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(4))
+buildSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(40))
-- We really want to inline applications of build
-- build t (\cn -> e) should cost only the cost of e (because build will be inlined later)
-- Indeed, we should add a result_discount becuause build is
-- The "4" is rather arbitrary.
augmentSize :: ExprSize
-augmentSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(4))
+augmentSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(40))
-- Ditto (augment t (\cn -> e) ys) should cost only the cost of
-- e plus ys. The -2 accounts for the \cn
\begin{code}
couldBeSmallEnoughToInline :: Int -> CoreExpr -> Bool
couldBeSmallEnoughToInline threshold rhs
- = case calcUnfoldingGuidance False False threshold rhs of
- (_, UnfNever) -> False
- _ -> True
+ = case sizeExpr (iUnbox threshold) [] body of
+ TooBig -> False
+ _ -> True
+ where
+ (_, body) = collectBinders rhs
----------------
smallEnoughToInline :: Unfolding -> Bool
UnfNever -> False
UnfWhen {} -> True
UnfIfGoodArgs { ug_size = size}
- -> is_cheap && size - (n_vals +1) <= opt_UF_UseThreshold
+ -> is_cheap && size - (10 * (n_vals +1)) <= opt_UF_UseThreshold
certainlyWillInline _
= False
\begin{code}
callSiteInline :: DynFlags
-> Id -- The Id
- -> Unfolding -- Its unfolding (if active)
+ -> Bool -- True <=> unfolding is active
-> Bool -- True if there are are no arguments at all (incl type args)
-> [ArgSummary] -- One for each value arg; True if it is interesting
-> CallCtxt -- True <=> continuation is interesting
-> Maybe CoreExpr -- Unfolding, if any
-
instance Outputable ArgSummary where
ppr TrivArg = ptext (sLit "TrivArg")
ppr NonTrivArg = ptext (sLit "NonTrivArg")
ppr CaseCtxt = ptext (sLit "CaseCtxt")
ppr ValAppCtxt = ptext (sLit "ValAppCtxt")
-callSiteInline dflags id unfolding lone_variable arg_infos cont_info
- = case unfolding of {
- NoUnfolding -> Nothing ;
- OtherCon _ -> Nothing ;
- DFunUnfolding {} -> Nothing ; -- Never unfold a DFun
- CoreUnfolding { uf_tmpl = unf_template, uf_is_top = is_top, uf_is_value = is_value,
- uf_is_cheap = is_cheap, uf_arity = uf_arity, uf_guidance = guidance } ->
+callSiteInline dflags id active_unfolding lone_variable arg_infos cont_info
+ = case idUnfolding id of
+ -- idUnfolding checks for loop-breakers, returning NoUnfolding
+ -- Things with an INLINE pragma may have an unfolding *and*
+ -- be a loop breaker (maybe the knot is not yet untied)
+ CoreUnfolding { uf_tmpl = unf_template, uf_is_top = is_top
+ , uf_is_cheap = is_cheap, uf_arity = uf_arity
+ , uf_guidance = guidance, uf_expandable = is_exp }
+ | active_unfolding -> tryUnfolding dflags id lone_variable
+ arg_infos cont_info unf_template is_top
+ is_cheap is_exp uf_arity guidance
+ | otherwise -> Nothing
+ NoUnfolding -> Nothing
+ OtherCon {} -> Nothing
+ DFunUnfolding {} -> Nothing -- Never unfold a DFun
+
+tryUnfolding :: DynFlags -> Id -> Bool -> [ArgSummary] -> CallCtxt
+ -> CoreExpr -> Bool -> Bool -> Bool -> Arity -> UnfoldingGuidance
+ -> Maybe CoreExpr
+tryUnfolding dflags id lone_variable
+ arg_infos cont_info unf_template is_top
+ is_cheap is_exp uf_arity guidance
-- uf_arity will typically be equal to (idArity id),
-- but may be less for InlineRules
- let
- n_val_args = length arg_infos
- saturated = n_val_args >= uf_arity
-
- result | yes_or_no = Just unf_template
- | otherwise = Nothing
-
- interesting_args = any nonTriv arg_infos
- -- NB: (any nonTriv arg_infos) looks at the
- -- over-saturated args too which is "wrong";
- -- but if over-saturated we inline anyway.
-
- -- some_benefit is used when the RHS is small enough
- -- and the call has enough (or too many) value
- -- arguments (ie n_val_args >= arity). But there must
- -- be *something* interesting about some argument, or the
- -- result context, to make it worth inlining
- some_benefit
- | not saturated = interesting_args -- Under-saturated
- -- Note [Unsaturated applications]
- | n_val_args > uf_arity = True -- Over-saturated
- | otherwise = interesting_args -- Saturated
- || interesting_saturated_call
-
- interesting_saturated_call
- = case cont_info of
- BoringCtxt -> not is_top && uf_arity > 0 -- Note [Nested functions]
- CaseCtxt -> not (lone_variable && is_value) -- Note [Lone variables]
- ArgCtxt {} -> uf_arity > 0 -- Note [Inlining in ArgCtxt]
- ValAppCtxt -> True -- Note [Cast then apply]
-
- (yes_or_no, extra_doc)
- = case guidance of
- UnfNever -> (False, empty)
-
- UnfWhen unsat_ok boring_ok
- -> (enough_args && (boring_ok || some_benefit), empty )
- where -- See Note [INLINE for small functions]
- enough_args = saturated || (unsat_ok && n_val_args > 0)
-
- UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size }
- -> ( is_cheap && some_benefit && small_enough
- , (text "discounted size =" <+> int discounted_size) )
- where
- discounted_size = size - discount
- small_enough = discounted_size <= opt_UF_UseThreshold
- discount = computeDiscount uf_arity arg_discounts
- res_discount arg_infos cont_info
-
- in
- if (dopt Opt_D_dump_inlinings dflags && dopt Opt_D_verbose_core2core dflags) then
- pprTrace ("Considering inlining: " ++ showSDoc (ppr id))
+ | dopt Opt_D_dump_inlinings dflags && dopt Opt_D_verbose_core2core dflags
+ = pprTrace ("Considering inlining: " ++ showSDoc (ppr id))
(vcat [text "arg infos" <+> ppr arg_infos,
text "uf arity" <+> ppr uf_arity,
text "interesting continuation" <+> ppr cont_info,
text "some_benefit" <+> ppr some_benefit,
- text "is value:" <+> ppr is_value,
+ text "is exp:" <+> ppr is_exp,
text "is cheap:" <+> ppr is_cheap,
text "guidance" <+> ppr guidance,
extra_doc,
text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"])
- result
- else
- result
- }
+ result
+ | otherwise = result
+
+ where
+ n_val_args = length arg_infos
+ saturated = n_val_args >= uf_arity
+
+ result | yes_or_no = Just unf_template
+ | otherwise = Nothing
+
+ interesting_args = any nonTriv arg_infos
+ -- NB: (any nonTriv arg_infos) looks at the
+ -- over-saturated args too which is "wrong";
+ -- but if over-saturated we inline anyway.
+
+ -- some_benefit is used when the RHS is small enough
+ -- and the call has enough (or too many) value
+ -- arguments (ie n_val_args >= arity). But there must
+ -- be *something* interesting about some argument, or the
+ -- result context, to make it worth inlining
+ some_benefit
+ | not saturated = interesting_args -- Under-saturated
+ -- Note [Unsaturated applications]
+ | n_val_args > uf_arity = True -- Over-saturated
+ | otherwise = interesting_args -- Saturated
+ || interesting_saturated_call
+
+ interesting_saturated_call
+ = case cont_info of
+ BoringCtxt -> not is_top && uf_arity > 0 -- Note [Nested functions]
+ CaseCtxt -> not (lone_variable && is_cheap) -- Note [Lone variables]
+ ArgCtxt {} -> uf_arity > 0 -- Note [Inlining in ArgCtxt]
+ ValAppCtxt -> True -- Note [Cast then apply]
+
+ (yes_or_no, extra_doc)
+ = case guidance of
+ UnfNever -> (False, empty)
+
+ UnfWhen unsat_ok boring_ok
+ -> (enough_args && (boring_ok || some_benefit), empty )
+ where -- See Note [INLINE for small functions]
+ enough_args = saturated || (unsat_ok && n_val_args > 0)
+
+ UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size }
+ -> ( is_cheap && some_benefit && small_enough
+ , (text "discounted size =" <+> int discounted_size) )
+ where
+ discounted_size = size - discount
+ small_enough = discounted_size <= opt_UF_UseThreshold
+ discount = computeDiscount uf_arity arg_discounts
+ res_discount arg_infos cont_info
\end{code}
Note [RHS of lets]
{- Arity: 3, HasNoCafRefs, Strictness: SLL,
Unfolding: (\ @ a $dOrd :: GHC.Classes.Ord a x :: a y :: a ->
case @ a GHC.Classes.<= @ a $dOrd x y of wild {
- GHC.Bool.False -> y GHC.Bool.True -> x }) -}
+ GHC.Types.False -> y GHC.Types.True -> x }) -}
We *really* want to inline $dmmin, even though it has arity 3, in
order to unravel the recursion.
for the RHS of a 'let', we only profit from the inlining if we get a
CONLIKE thing (modulo lets).
-Note [Lone variables]
-~~~~~~~~~~~~~~~~~~~~~
+Note [Lone variables] See also Note [Interaction of exprIsCheap and lone variables]
+~~~~~~~~~~~~~~~~~~~~~ which appears below
The "lone-variable" case is important. I spent ages messing about
with unsatisfactory varaints, but this is nice. The idea is that if a
variable appears all alone
as scrutinee of a case CaseCtxt
as arg of a fn ArgCtxt
AND
- it is bound to a value
+ it is bound to a cheap expression
then we should not inline it (unless there is some other reason,
e.g. is is the sole occurrence). That is what is happening at
There's no advantage in inlining f here, and perhaps
a significant disadvantage. Hence some_val_args in the Stop case
+Note [Interaction of exprIsCheap and lone variables]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The lone-variable test says "don't inline if a case expression
+scrutines a lone variable whose unfolding is cheap". It's very
+important that, under these circumstances, exprIsConApp_maybe
+can spot a constructor application. So, for example, we don't
+consider
+ let x = e in (x,x)
+to be cheap, and that's good because exprIsConApp_maybe doesn't
+think that expression is a constructor application.
+
+I used to test is_value rather than is_cheap, which was utterly
+wrong, because the above expression responds True to exprIsHNF.
+
+This kind of thing can occur if you have
+
+ {-# INLINE foo #-}
+ foo = let x = e in (x,x)
+
+which Roman did.
+
\begin{code}
computeDiscount :: Int -> [Int] -> Int -> [ArgSummary] -> CallCtxt -> Int
computeDiscount n_vals_wanted arg_discounts res_discount arg_infos cont_info
-- *efficiency* to be gained (e.g. beta reductions, case reductions)
-- by inlining.
- = 1 -- Discount of 1 because the result replaces the call
+ = 10 -- Discount of 1 because the result replaces the call
-- so we count 1 for the function itself
- + length (take n_vals_wanted arg_infos)
+ + 10 * length (take n_vals_wanted arg_infos)
-- Discount of (un-scaled) 1 for each arg supplied,
-- because the result replaces the call
arg_discount = sum (zipWith mk_arg_discount arg_discounts arg_infos)
mk_arg_discount _ TrivArg = 0
- mk_arg_discount _ NonTrivArg = 1
+ mk_arg_discount _ NonTrivArg = 10
mk_arg_discount discount ValueArg = discount
res_discount' = case cont_info of
BoringCtxt -> 0
CaseCtxt -> res_discount
- _other -> 4 `min` res_discount
+ _other -> 40 `min` res_discount
-- res_discount can be very large when a function returns
-- constructors; but we only want to invoke that large discount
-- when there's a case continuation.
conlike_unfolding = isConLikeUnfolding (idUnfolding v)
go (Type _) _ = TrivArg
- go (App fn (Type _)) n = go fn n
+ go (Coercion _) _ = TrivArg
+ go (App fn (Type _)) n = go fn n
+ go (App fn (Coercion _)) n = go fn n
go (App fn _) n = go fn (n+1)
go (Note _ a) n = go a n
go (Cast e _) n = go e n
-- where t1..tk are the *universally-qantified* type args of 'dc'
exprIsConApp_maybe :: IdUnfoldingFun -> CoreExpr -> Maybe (DataCon, [Type], [CoreExpr])
-exprIsConApp_maybe id_unf (Note _ expr)
+exprIsConApp_maybe id_unf (Note note expr)
+ | notSccNote note
= exprIsConApp_maybe id_unf expr
- -- We ignore all notes. For example,
+ -- We ignore all notes except SCCs. For example,
-- case _scc_ "foo" (C a b) of
-- C a b -> e
- -- should be optimised away, but it will be only if we look
- -- through the SCC note.
+ -- should not be optimised away, because we'll lose the
+ -- entry count on 'foo'; see Trac #4414
exprIsConApp_maybe id_unf (Cast expr co)
= -- Here we do the KPush reduction rule as described in the FC paper
Nothing -> Nothing ;
Just (dc, _dc_univ_args, dc_args) ->
- let (_from_ty, to_ty) = coercionKind co
+ let Pair _from_ty to_ty = coercionKind co
dc_tc = dataConTyCon dc
in
case splitTyConApp_maybe to_ty of {
dc_ex_tyvars = dataConExTyVars dc
arg_tys = dataConRepArgTys dc
- dc_eqs :: [(Type,Type)] -- All equalities from the DataCon
- dc_eqs = [(mkTyVarTy tv, ty) | (tv,ty) <- dataConEqSpec dc] ++
- [getEqPredTys eq_pred | eq_pred <- dataConEqTheta dc]
-
- (ex_args, rest1) = splitAtList dc_ex_tyvars dc_args
- (co_args, val_args) = splitAtList dc_eqs rest1
+ (ex_args, val_args) = splitAtList dc_ex_tyvars dc_args
-- Make the "theta" from Fig 3 of the paper
gammas = decomposeCo tc_arity co
- theta = zipOpenTvSubst (dc_univ_tyvars ++ dc_ex_tyvars)
- (gammas ++ stripTypeArgs ex_args)
-
- -- Cast the existential coercion arguments
- cast_co (ty1, ty2) (Type co)
- = Type $ mkSymCoercion (substTy theta ty1)
- `mkTransCoercion` co
- `mkTransCoercion` (substTy theta ty2)
- cast_co _ other_arg = pprPanic "cast_co" (ppr other_arg)
- new_co_args = zipWith cast_co dc_eqs co_args
-
+ theta = zipOpenCvSubst (dc_univ_tyvars ++ dc_ex_tyvars)
+ (gammas ++ map mkReflCo (stripTypeArgs ex_args))
+
-- Cast the value arguments (which include dictionaries)
new_val_args = zipWith cast_arg arg_tys val_args
- cast_arg arg_ty arg = mkCoerce (substTy theta arg_ty) arg
+ cast_arg arg_ty arg = mkCoerce (liftCoSubst theta arg_ty) arg
in
#ifdef DEBUG
let dump_doc = vcat [ppr dc, ppr dc_univ_tyvars, ppr dc_ex_tyvars,
ppr arg_tys, ppr dc_args, ppr _dc_univ_args,
ppr ex_args, ppr val_args]
in
- ASSERT2( coreEqType _from_ty (mkTyConApp dc_tc _dc_univ_args), dump_doc )
- ASSERT2( all isTypeArg (ex_args ++ co_args), dump_doc )
+ ASSERT2( eqType _from_ty (mkTyConApp dc_tc _dc_univ_args), dump_doc )
+ ASSERT2( all isTypeArg ex_args, dump_doc )
ASSERT2( equalLength val_args arg_tys, dump_doc )
#endif
- Just (dc, to_tc_arg_tys, ex_args ++ new_co_args ++ new_val_args)
+ Just (dc, to_tc_arg_tys, ex_args ++ new_val_args)
}}
exprIsConApp_maybe id_unf expr
analyse (Var fun) args
| Just con <- isDataConWorkId_maybe fun
- , is_saturated
+ , count isValArg args == idArity fun
, let (univ_ty_args, rest_args) = splitAtList (dataConUnivTyVars con) args
= Just (con, stripTypeArgs univ_ty_args, rest_args)
-- Look through dictionary functions; see Note [Unfolding DFuns]
- | DFunUnfolding con ops <- unfolding
- , is_saturated
- , let (dfun_tvs, _cls, dfun_res_tys) = tcSplitDFunTy (idType fun)
- subst = zipOpenTvSubst dfun_tvs (stripTypeArgs (takeList dfun_tvs args))
- = Just (con, substTys subst dfun_res_tys,
- [mkApps op args | op <- ops])
+ | DFunUnfolding dfun_nargs con ops <- unfolding
+ , let sat = length args == dfun_nargs -- See Note [DFun arity check]
+ in if sat then True else
+ pprTrace "Unsaturated dfun" (ppr fun <+> int dfun_nargs $$ ppr args) False
+ , let (dfun_tvs, _n_theta, _cls, dfun_res_tys) = tcSplitDFunTy (idType fun)
+ subst = zipOpenTvSubst dfun_tvs (stripTypeArgs (takeList dfun_tvs args))
+ mk_arg (DFunConstArg e) = e
+ mk_arg (DFunLamArg i) = args !! i
+ mk_arg (DFunPolyArg e) = mkApps e args
+ = Just (con, substTys subst dfun_res_tys, map mk_arg ops)
-- Look through unfoldings, but only cheap ones, because
-- we are effectively duplicating the unfolding
= -- pprTrace "expanding" (ppr fun $$ ppr rhs) $
analyse rhs args
where
- is_saturated = count isValArg args == idArity fun
unfolding = id_unf fun
analyse _ _ = Nothing
-----------
beta (Lam v body) pairs (arg : args)
- | isTypeArg arg
+ | isTyCoArg arg
= beta body ((v,arg):pairs) args
beta (Lam {}) _ _ -- Un-saturated, or not a type lambda
= Nothing
beta fun pairs args
- = case analyse (substExpr subst fun) args of
- Nothing -> -- pprTrace "Bale out! exprIsConApp_maybe" doc $
- Nothing
- Just ans -> -- pprTrace "Woo-hoo! exprIsConApp_maybe" doc $
- Just ans
+ = analyse (substExpr (text "subst-expr-is-con-app") subst fun) args
where
subst = mkOpenSubst (mkInScopeSet (exprFreeVars fun)) pairs
-- doc = vcat [ppr fun, ppr expr, ppr pairs, ppr args]
-
stripTypeArgs :: [CoreExpr] -> [Type]
stripTypeArgs args = ASSERT2( all isTypeArg args, ppr args )
[ty | Type ty <- args]
+ -- We really do want isTypeArg here, not isTyCoArg!
\end{code}
Note [Unfolding DFuns]
to the very same args as the dfun. It takes a little more work
to compute the type arguments to the dictionary constructor.
+Note [DFun arity check]
+~~~~~~~~~~~~~~~~~~~~~~~
+Here we check that the total number of supplied arguments (inclding
+type args) matches what the dfun is expecting. This may be *less*
+than the ordinary arity of the dfun: see Note [DFun unfoldings] in CoreSyn