import DynFlags
import CoreSyn
import PprCore () -- Instances
+import TcType ( tcSplitSigmaTy, tcSplitDFunHead )
import OccurAnal
import CoreSubst hiding( substTy )
import CoreFVs ( exprFreeVars )
+import CoreArity ( manifestArity )
import CoreUtils
import Id
import DataCon
%************************************************************************
\begin{code}
-mkTopUnfolding :: CoreExpr -> Unfolding
-mkTopUnfolding expr = mkUnfolding True {- Top level -} expr
+mkTopUnfolding :: Bool -> CoreExpr -> Unfolding
+mkTopUnfolding is_bottoming expr
+ = mkUnfolding True {- Top level -} is_bottoming expr
mkImplicitUnfolding :: CoreExpr -> Unfolding
-- For implicit Ids, do a tiny bit of optimising first
-mkImplicitUnfolding expr = mkTopUnfolding (simpleOptExpr expr)
+mkImplicitUnfolding expr = mkTopUnfolding False (simpleOptExpr expr)
-- Note [Top-level flag on inline rules]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- top-level flag to True. It gets set more accurately by the simplifier
-- Simplify.simplUnfolding.
-mkUnfolding :: Bool -> CoreExpr -> Unfolding
-mkUnfolding top_lvl expr
- = mkCoreUnfolding top_lvl expr arity guidance
+mkUnfolding :: Bool -> Bool -> CoreExpr -> Unfolding
+mkUnfolding top_lvl is_bottoming expr
+ = CoreUnfolding { uf_tmpl = occurAnalyseExpr expr,
+ uf_src = InlineRhs,
+ uf_arity = arity,
+ uf_is_top = top_lvl,
+ uf_is_value = exprIsHNF expr,
+ uf_is_conlike = exprIsConLike expr,
+ uf_expandable = exprIsExpandable expr,
+ uf_is_cheap = is_cheap,
+ uf_guidance = guidance }
where
- (arity, guidance) = calcUnfoldingGuidance opt_UF_CreationThreshold expr
+ is_cheap = exprIsCheap expr
+ (arity, guidance) = calcUnfoldingGuidance is_cheap (top_lvl && is_bottoming)
+ opt_UF_CreationThreshold expr
-- Sometimes during simplification, there's a large let-bound thing
-- which has been substituted, and so is now dead; so 'expr' contains
-- two copies of the thing while the occurrence-analysed expression doesn't
-- it gets fixed up next round. And it should be rare, because large
-- let-bound things that are dead are usually caught by preInlineUnconditionally
-mkCoreUnfolding :: Bool -> CoreExpr -> Arity -> UnfoldingGuidance -> Unfolding
+mkCoreUnfolding :: Bool -> UnfoldingSource -> CoreExpr
+ -> Arity -> UnfoldingGuidance -> Unfolding
-- Occurrence-analyses the expression before capturing it
-mkCoreUnfolding top_lvl expr arity guidance
+mkCoreUnfolding top_lvl src expr arity guidance
= CoreUnfolding { uf_tmpl = occurAnalyseExpr expr,
+ uf_src = src,
uf_arity = arity,
uf_is_top = top_lvl,
uf_is_value = exprIsHNF expr,
uf_expandable = exprIsExpandable expr,
uf_guidance = guidance }
-mkDFunUnfolding :: DataCon -> [Id] -> Unfolding
-mkDFunUnfolding con ops = DFunUnfolding con (map Var ops)
+mkDFunUnfolding :: Type -> [CoreExpr] -> Unfolding
+mkDFunUnfolding dfun_ty ops
+ = DFunUnfolding dfun_nargs data_con ops
+ where
+ (tvs, theta, head_ty) = tcSplitSigmaTy dfun_ty
+ -- NB: tcSplitSigmaTy: do not look through a newtype
+ -- when the dictionary type is a newtype
+ (cls, _) = tcSplitDFunHead head_ty
+ dfun_nargs = length tvs + length theta
+ data_con = classDataCon cls
mkWwInlineRule :: Id -> CoreExpr -> Arity -> Unfolding
mkWwInlineRule id expr arity
- = mkCoreUnfolding True (simpleOptExpr expr) arity
- (InlineRule { ir_sat = InlUnSat, ir_info = InlWrapper id })
+ = mkCoreUnfolding True (InlineWrapper id)
+ (simpleOptExpr expr) arity
+ (UnfWhen unSaturatedOk boringCxtNotOk)
mkCompulsoryUnfolding :: CoreExpr -> Unfolding
mkCompulsoryUnfolding expr -- Used for things that absolutely must be unfolded
- = mkCoreUnfolding True expr
- 0 -- Arity of unfolding doesn't matter
- (InlineRule { ir_info = InlAlways, ir_sat = InlUnSat })
+ = mkCoreUnfolding True InlineCompulsory
+ expr 0 -- Arity of unfolding doesn't matter
+ (UnfWhen unSaturatedOk boringCxtOk)
-mkInlineRule :: InlSatFlag -> CoreExpr -> Arity -> Unfolding
-mkInlineRule sat expr arity
- = mkCoreUnfolding True -- Note [Top-level flag on inline rules]
+mkInlineRule :: CoreExpr -> Maybe Arity -> Unfolding
+mkInlineRule expr mb_arity
+ = mkCoreUnfolding True InlineRule -- Note [Top-level flag on inline rules]
expr' arity
- (InlineRule { ir_sat = sat, ir_info = info })
+ (UnfWhen unsat_ok boring_ok)
where
expr' = simpleOptExpr expr
- info = if small then InlSmall else InlVanilla
- small = case calcUnfoldingGuidance (arity+1) expr' of
- (arity_e, UnfoldIfGoodArgs { ug_size = size_e })
- -> uncondInline arity_e size_e
- _other {- actually UnfoldNever -} -> False
+ (unsat_ok, arity) = case mb_arity of
+ Nothing -> (unSaturatedOk, manifestArity expr')
+ Just ar -> (needSaturated, ar)
+
+ boring_ok = case calcUnfoldingGuidance True -- Treat as cheap
+ False -- But not bottoming
+ (arity+1) expr' of
+ (_, UnfWhen _ boring_ok) -> boring_ok
+ _other -> boringCxtNotOk
+ -- See Note [INLINE for small functions]
\end{code}
\begin{code}
calcUnfoldingGuidance
- :: Int -- bomb out if size gets bigger than this
- -> CoreExpr -- expression to look at
+ :: Bool -- True <=> the rhs is cheap, or we want to treat it
+ -- as cheap (INLINE things)
+ -> Bool -- True <=> this is a top-level unfolding for a
+ -- diverging function; don't inline this
+ -> Int -- Bomb out if size gets bigger than this
+ -> CoreExpr -- Expression to look at
-> (Arity, UnfoldingGuidance)
-calcUnfoldingGuidance bOMB_OUT_SIZE expr
- = case collectBinders expr of { (binders, body) ->
+calcUnfoldingGuidance expr_is_cheap top_bot bOMB_OUT_SIZE expr
+ = case collectBinders expr of { (bndrs, body) ->
let
- val_binders = filter isId binders
- n_val_binders = length val_binders
+ val_bndrs = filter isId bndrs
+ n_val_bndrs = length val_bndrs
+
+ guidance
+ = case (sizeExpr (iUnbox bOMB_OUT_SIZE) val_bndrs body) of
+ TooBig -> UnfNever
+ SizeIs size cased_bndrs scrut_discount
+ | uncondInline n_val_bndrs (iBox size)
+ , expr_is_cheap
+ -> UnfWhen unSaturatedOk boringCxtOk -- Note [INLINE for small functions]
+ | top_bot -- See Note [Do not inline top-level bottoming functions]
+ -> UnfNever
+
+ | otherwise
+ -> UnfIfGoodArgs { ug_args = map (discount cased_bndrs) val_bndrs
+ , ug_size = iBox size
+ , ug_res = iBox scrut_discount }
+
+ discount cbs bndr
+ = foldlBag (\acc (b',n) -> if bndr==b' then acc+n else acc)
+ 0 cbs
in
- case (sizeExpr (iUnbox bOMB_OUT_SIZE) val_binders body) of
- TooBig -> (n_val_binders, UnfoldNever)
- SizeIs size cased_args scrut_discount
- -> (n_val_binders, UnfoldIfGoodArgs { ug_args = map discount_for val_binders
- , ug_size = iBox size
- , ug_res = iBox scrut_discount })
- where
- discount_for b = foldlBag (\acc (b',n) -> if b==b' then acc+n else acc)
- 0 cased_args
- }
+ (n_val_bndrs, guidance) }
\end{code}
Note [Computing the size of an expression]
a function call to account for. Notice also that constructor applications
are very cheap, because exposing them to a caller is so valuable.
-Note [Unconditional inlining]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
-than the thing it's replacing. Notice that
+
+Note [Do not inline top-level bottoming functions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The FloatOut pass has gone to some trouble to float out calls to 'error'
+and similar friends. See Note [Bottoming floats] in SetLevels.
+Do not re-inline them! But we *do* still inline if they are very small
+(the uncondInline stuff).
+
+
+Note [INLINE for small functions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider {-# INLINE f #-}
+ f x = Just x
+ g y = f y
+Then f's RHS is no larger than its LHS, so we should inline it into
+even the most boring context. In general, f the function is
+sufficiently small that its body is as small as the call itself, the
+inline unconditionally, regardless of how boring the context is.
+
+Things to note:
+
+ * We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
+ than the thing it's replacing. Notice that
(f x) --> (g 3) -- YES, unconditionally
(f x) --> x : [] -- YES, *even though* there are two
-- arguments to the cons
x --> g 3 -- NO
x --> Just v -- NO
-It's very important not to unconditionally replace a variable by
-a non-atomic term.
+ It's very important not to unconditionally replace a variable by
+ a non-atomic term.
+
+* We do this even if the thing isn't saturated, else we end up with the
+ silly situation that
+ f x y = x
+ ...map (f 3)...
+ doesn't inline. Even in a boring context, inlining without being
+ saturated will give a lambda instead of a PAP, and will be more
+ efficient at runtime.
+
+* However, when the function's arity > 0, we do insist that it
+ has at least one value argument at the call site. Otherwise we find this:
+ f = /\a \x:a. x
+ d = /\b. MkD (f b)
+ If we inline f here we get
+ d = /\b. MkD (\x:b. x)
+ and then prepareRhs floats out the argument, abstracting the type
+ variables, so we end up with the original again!
+
\begin{code}
uncondInline :: Arity -> Int -> Bool
-- Inline unconditionally if there no size increase
-- Size of call is arity (+1 for the function)
--- See Note [Unconditional inlining]
+-- See Note [INLINE for small functions]
uncondInline arity size
| arity == 0 = size == 0
| otherwise = size <= arity + 1
-- discounts even on nullary constructors
size_up (App fun (Type _)) = size_up fun
- size_up (App fun arg) = size_up_app fun [arg]
- `addSize` nukeScrutDiscount (size_up arg)
+ size_up (App fun arg) = size_up arg `addSizeNSD`
+ size_up_app fun [arg]
size_up (Lam b e) | isId b = lamScrutDiscount (size_up e `addSizeN` 1)
| otherwise = size_up e
size_up (Let (NonRec binder rhs) body)
- = nukeScrutDiscount (size_up rhs) `addSize`
- size_up body `addSizeN`
+ = size_up rhs `addSizeNSD`
+ size_up body `addSizeN`
(if isUnLiftedType (idType binder) then 0 else 1)
-- For the allocation
-- If the binder has an unlifted type there is no allocation
size_up (Let (Rec pairs) body)
- = nukeScrutDiscount rhs_size `addSize`
- size_up body `addSizeN`
- length pairs -- For the allocation
- where
- rhs_size = foldr (addSize . size_up . snd) sizeZero pairs
+ = foldr (addSizeNSD . size_up . snd)
+ (size_up body `addSizeN` length pairs) -- (length pairs) for the allocation
+ pairs
size_up (Case (Var v) _ _ alts)
| v `elem` top_args -- We are scrutinising an argument variable
- = alts_size (foldr addSize sizeOne alt_sizes) -- The 1 is for the case itself
+ = alts_size (foldr1 addAltSize alt_sizes)
(foldr1 maxSize alt_sizes)
-- Good to inline if an arg is scrutinised, because
-- that may eliminate allocation in the caller
-- alts_size tries to compute a good discount for
-- the case when we are scrutinising an argument variable
- alts_size (SizeIs tot tot_disc _tot_scrut) -- Size of all alternatives
- (SizeIs max _max_disc max_scrut) -- Size of biggest alternative
- = SizeIs tot (unitBag (v, iBox (_ILIT(1) +# tot -# max)) `unionBags` tot_disc) max_scrut
+ alts_size (SizeIs tot tot_disc tot_scrut) -- Size of all alternatives
+ (SizeIs max _ _) -- Size of biggest alternative
+ = SizeIs tot (unitBag (v, iBox (_ILIT(2) +# tot -# max)) `unionBags` tot_disc) tot_scrut
-- If the variable is known, we produce a discount that
-- will take us back to 'max', the size of the largest alternative
-- The 1+ is a little discount for reduced allocation in the caller
alts_size tot_size _ = tot_size
- size_up (Case e _ _ alts) = foldr (addSize . size_up_alt)
- (nukeScrutDiscount (size_up e))
- alts
- `addSizeN` 1 -- Add 1 for the case itself
+ size_up (Case e _ _ alts) = size_up e `addSizeNSD`
+ foldr (addAltSize . size_up_alt) sizeZero alts
-- We don't charge for the case itself
-- It's a strict thing, and the price of the call
-- is paid by scrut. Also consider
-- case f x of DEFAULT -> e
-- This is just ';'! Don't charge for it.
+ --
+ -- Moreover, we charge one per alternative.
------------
-- size_up_app is used when there's ONE OR MORE value args
size_up_app (App fun arg) args
| isTypeArg arg = size_up_app fun args
- | otherwise = size_up_app fun (arg:args)
- `addSize` nukeScrutDiscount (size_up arg)
+ | otherwise = size_up arg `addSizeNSD`
+ size_up_app fun (arg:args)
size_up_app (Var fun) args = size_up_call fun args
size_up_app other args = size_up other `addSizeN` length args
addSizeN TooBig _ = TooBig
addSizeN (SizeIs n xs d) m = mkSizeIs bOMB_OUT_SIZE (n +# iUnbox m) xs d
- addSize TooBig _ = TooBig
- addSize _ TooBig = TooBig
- addSize (SizeIs n1 xs d1) (SizeIs n2 ys d2)
- = mkSizeIs bOMB_OUT_SIZE (n1 +# n2) (xs `unionBags` ys) (d1 +# d2)
+ -- addAltSize is used to add the sizes of case alternatives
+ addAltSize TooBig _ = TooBig
+ addAltSize _ TooBig = TooBig
+ addAltSize (SizeIs n1 xs d1) (SizeIs n2 ys d2)
+ = mkSizeIs bOMB_OUT_SIZE (n1 +# n2)
+ (xs `unionBags` ys)
+ (d1 +# d2) -- Note [addAltSize result discounts]
+
+ -- This variant ignores the result discount from its LEFT argument
+ -- It's used when the second argument isn't part of the result
+ addSizeNSD TooBig _ = TooBig
+ addSizeNSD _ TooBig = TooBig
+ addSizeNSD (SizeIs n1 xs _) (SizeIs n2 ys d2)
+ = mkSizeIs bOMB_OUT_SIZE (n1 +# n2)
+ (xs `unionBags` ys)
+ d2 -- Ignore d1
\end{code}
\begin{code}
conSize :: DataCon -> Int -> ExprSize
conSize dc n_val_args
- | n_val_args == 0 = SizeIs (_ILIT(0)) emptyBag (_ILIT(1)) -- Like variables
+ | n_val_args == 0 = SizeIs (_ILIT(0)) emptyBag (_ILIT(1)) -- Like variables
+
+-- See Note [Constructor size]
| isUnboxedTupleCon dc = SizeIs (_ILIT(0)) emptyBag (iUnbox n_val_args +# _ILIT(1))
- | otherwise = SizeIs (_ILIT(1)) emptyBag (iUnbox n_val_args +# _ILIT(1))
- -- Treat a constructors application as size 1, regardless of how
- -- many arguments it has; we are keen to expose them
- -- (and we charge separately for their args). We can't treat
- -- them as size zero, else we find that (Just x) has size 0,
- -- which is the same as a lone variable; and hence 'v' will
- -- always be replaced by (Just x), where v is bound to Just x.
- --
- -- However, unboxed tuples count as size zero
- -- I found occasions where we had
- -- f x y z = case op# x y z of { s -> (# s, () #) }
- -- and f wasn't getting inlined
+-- See Note [Unboxed tuple result discount]
+-- | isUnboxedTupleCon dc = SizeIs (_ILIT(0)) emptyBag (_ILIT(0))
+
+-- See Note [Constructor size]
+ | otherwise = SizeIs (_ILIT(1)) emptyBag (iUnbox n_val_args +# _ILIT(1))
+\end{code}
+
+Note [Constructor size]
+~~~~~~~~~~~~~~~~~~~~~~~
+Treat a constructors application as size 1, regardless of how many
+arguments it has; we are keen to expose them (and we charge separately
+for their args). We can't treat them as size zero, else we find that
+(Just x) has size 0, which is the same as a lone variable; and hence
+'v' will always be replaced by (Just x), where v is bound to Just x.
+
+However, unboxed tuples count as size zero. I found occasions where we had
+ f x y z = case op# x y z of { s -> (# s, () #) }
+and f wasn't getting inlined.
+
+Note [Unboxed tuple result discount]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+I tried giving unboxed tuples a *result discount* of zero (see the
+commented-out line). Why? When returned as a result they do not
+allocate, so maybe we don't want to charge so much for them If you
+have a non-zero discount here, we find that workers often get inlined
+back into wrappers, because it look like
+ f x = case $wf x of (# a,b #) -> (a,b)
+and we are keener because of the case. However while this change
+shrank binary sizes by 0.5% it also made spectral/boyer allocate 5%
+more. All other changes were very small. So it's not a big deal but I
+didn't adopt the idea.
+
+\begin{code}
primOpSize :: PrimOp -> Int -> ExprSize
primOpSize op n_val_args
| not (primOpIsDupable op) = sizeN opt_UF_DearOp
-- Ditto (augment t (\cn -> e) ys) should cost only the cost of
-- e plus ys. The -2 accounts for the \cn
-nukeScrutDiscount :: ExprSize -> ExprSize
-nukeScrutDiscount (SizeIs n vs _) = SizeIs n vs (_ILIT(0))
-nukeScrutDiscount TooBig = TooBig
-
-- When we return a lambda, give a discount if it's used (applied)
lamScrutDiscount :: ExprSize -> ExprSize
lamScrutDiscount (SizeIs n vs _) = SizeIs n vs (iUnbox opt_UF_FunAppDiscount)
lamScrutDiscount TooBig = TooBig
\end{code}
+Note [addAltSize result discounts]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When adding the size of alternatives, we *add* the result discounts
+too, rather than take the *maximum*. For a multi-branch case, this
+gives a discount for each branch that returns a constructor, making us
+keener to inline. I did try using 'max' instead, but it makes nofib
+'rewrite' and 'puzzle' allocate significantly more, and didn't make
+binary sizes shrink significantly either.
+
Note [Discounts and thresholds]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Constants for discounts and thesholds are defined in main/StaticFlags,
maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 ># n2 = s1
| otherwise = s2
-sizeZero, sizeOne :: ExprSize
+sizeZero :: ExprSize
sizeN :: Int -> ExprSize
sizeZero = SizeIs (_ILIT(0)) emptyBag (_ILIT(0))
-sizeOne = SizeIs (_ILIT(1)) emptyBag (_ILIT(0))
sizeN n = SizeIs (iUnbox n) emptyBag (_ILIT(0))
\end{code}
-
-
%************************************************************************
%* *
\subsection[considerUnfolding]{Given all the info, do (not) do the unfolding}
\begin{code}
couldBeSmallEnoughToInline :: Int -> CoreExpr -> Bool
couldBeSmallEnoughToInline threshold rhs
- = case calcUnfoldingGuidance threshold rhs of
- (_, UnfoldNever) -> False
- _ -> True
+ = case sizeExpr (iUnbox threshold) [] body of
+ TooBig -> False
+ _ -> True
+ where
+ (_, body) = collectBinders rhs
----------------
smallEnoughToInline :: Unfolding -> Bool
-smallEnoughToInline (CoreUnfolding {uf_guidance = UnfoldIfGoodArgs {ug_size = size}})
+smallEnoughToInline (CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_size = size}})
= size <= opt_UF_UseThreshold
smallEnoughToInline _
= False
-- Sees if the unfolding is pretty certain to inline
certainlyWillInline (CoreUnfolding { uf_is_cheap = is_cheap, uf_arity = n_vals, uf_guidance = guidance })
= case guidance of
- UnfoldNever -> False
- InlineRule {} -> True
- UnfoldIfGoodArgs { ug_size = size}
+ UnfNever -> False
+ UnfWhen {} -> True
+ UnfIfGoodArgs { ug_size = size}
-> is_cheap && size - (n_vals +1) <= opt_UF_UseThreshold
certainlyWillInline _
\begin{code}
callSiteInline :: DynFlags
- -> Bool -- True <=> the Id can be inlined
-> Id -- The Id
+ -> Unfolding -- Its unfolding (if active)
-> Bool -- True if there are are no arguments at all (incl type args)
-> [ArgSummary] -- One for each value arg; True if it is interesting
-> CallCtxt -- True <=> continuation is interesting
ppr CaseCtxt = ptext (sLit "CaseCtxt")
ppr ValAppCtxt = ptext (sLit "ValAppCtxt")
-callSiteInline dflags active_inline id lone_variable arg_infos cont_info
- = let
- n_val_args = length arg_infos
- in
- case idUnfolding id of {
+callSiteInline dflags id unfolding lone_variable arg_infos cont_info
+ = case unfolding of {
NoUnfolding -> Nothing ;
OtherCon _ -> Nothing ;
DFunUnfolding {} -> Nothing ; -- Never unfold a DFun
- CoreUnfolding { uf_tmpl = unf_template, uf_is_top = is_top, uf_is_value = is_value,
+ CoreUnfolding { uf_tmpl = unf_template, uf_is_top = is_top,
uf_is_cheap = is_cheap, uf_arity = uf_arity, uf_guidance = guidance } ->
-- uf_arity will typically be equal to (idArity id),
-- but may be less for InlineRules
let
+ n_val_args = length arg_infos
+ saturated = n_val_args >= uf_arity
+
result | yes_or_no = Just unf_template
| otherwise = Nothing
-- arguments (ie n_val_args >= arity). But there must
-- be *something* interesting about some argument, or the
-- result context, to make it worth inlining
- some_benefit = interesting_args
- || n_val_args > uf_arity -- Over-saturated
- || interesting_saturated_call -- Exactly saturated
+ some_benefit
+ | not saturated = interesting_args -- Under-saturated
+ -- Note [Unsaturated applications]
+ | n_val_args > uf_arity = True -- Over-saturated
+ | otherwise = interesting_args -- Saturated
+ || interesting_saturated_call
interesting_saturated_call
= case cont_info of
- BoringCtxt -> not is_top && uf_arity > 0 -- Note [Nested functions]
- CaseCtxt -> not (lone_variable && is_value) -- Note [Lone variables]
- ArgCtxt {} -> uf_arity > 0 -- Note [Inlining in ArgCtxt]
- ValAppCtxt -> True -- Note [Cast then apply]
+ BoringCtxt -> not is_top && uf_arity > 0 -- Note [Nested functions]
+ CaseCtxt -> not (lone_variable && is_cheap) -- Note [Lone variables]
+ ArgCtxt {} -> uf_arity > 0 -- Note [Inlining in ArgCtxt]
+ ValAppCtxt -> True -- Note [Cast then apply]
- yes_or_no
+ (yes_or_no, extra_doc)
= case guidance of
- UnfoldNever -> False
-
- InlineRule { ir_info = inl_info, ir_sat = sat }
- | InlAlways <- inl_info -> True -- No top-level binding, so inline!
- -- Ignore is_active because we want to
- -- inline even if SimplGently is on.
- | not active_inline -> False
- | n_val_args < uf_arity -> yes_unsat -- Not enough value args
- | InlSmall <- inl_info -> True -- Note [INLINE for small functions]
- | otherwise -> some_benefit -- Saturated or over-saturated
- where
- -- See Note [Inlining an InlineRule]
- yes_unsat = case sat of
- InlSat -> False
- InlUnSat -> interesting_args
-
- UnfoldIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size }
- | not active_inline -> False
- | not is_cheap -> False
- | n_val_args < uf_arity -> interesting_args && small_enough
- -- Note [Unsaturated applications]
- | uncondInline uf_arity size -> True
- | otherwise -> some_benefit && small_enough
+ UnfNever -> (False, empty)
+
+ UnfWhen unsat_ok boring_ok
+ -> (enough_args && (boring_ok || some_benefit), empty )
+ where -- See Note [INLINE for small functions]
+ enough_args = saturated || (unsat_ok && n_val_args > 0)
+ UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size }
+ -> ( is_cheap && some_benefit && small_enough
+ , (text "discounted size =" <+> int discounted_size) )
where
- small_enough = (size - discount) <= opt_UF_UseThreshold
+ discounted_size = size - discount
+ small_enough = discounted_size <= opt_UF_UseThreshold
discount = computeDiscount uf_arity arg_discounts
res_discount arg_infos cont_info
in
- if dopt Opt_D_dump_inlinings dflags then
+ if (dopt Opt_D_dump_inlinings dflags && dopt Opt_D_verbose_core2core dflags) then
pprTrace ("Considering inlining: " ++ showSDoc (ppr id))
- (vcat [text "active:" <+> ppr active_inline,
- text "arg infos" <+> ppr arg_infos,
+ (vcat [text "arg infos" <+> ppr arg_infos,
+ text "uf arity" <+> ppr uf_arity,
text "interesting continuation" <+> ppr cont_info,
- text "is value:" <+> ppr is_value,
+ text "some_benefit" <+> ppr some_benefit,
text "is cheap:" <+> ppr is_cheap,
text "guidance" <+> ppr guidance,
+ extra_doc,
text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"])
result
else
order to unravel the recursion.
-Note [INLINE for small functions]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Consider {-# INLINE f #-}
- f x = Just x
- g y = f y
-Then f's RHS is no larger than its LHS, so we should inline it
-into even the most boring context. (We do so if there is no INLINE
-pragma!) That's the reason for the 'ug_small' flag on an InlineRule.
-
-
Note [Things to watch]
~~~~~~~~~~~~~~~~~~~~~~
* { y = I# 3; x = y `cast` co; ...case (x `cast` co) of ... }
Note [Inlining an InlineRule]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An InlineRules is used for
- (a) pogrammer INLINE pragmas
+ (a) programmer INLINE pragmas
(b) inlinings from worker/wrapper
For (a) the RHS may be large, and our contract is that we *only* inline
for the RHS of a 'let', we only profit from the inlining if we get a
CONLIKE thing (modulo lets).
-Note [Lone variables]
-~~~~~~~~~~~~~~~~~~~~~
+Note [Lone variables] See also Note [Interaction of exprIsCheap and lone variables]
+~~~~~~~~~~~~~~~~~~~~~ which appears below
The "lone-variable" case is important. I spent ages messing about
with unsatisfactory varaints, but this is nice. The idea is that if a
variable appears all alone
as scrutinee of a case CaseCtxt
as arg of a fn ArgCtxt
AND
- it is bound to a value
+ it is bound to a cheap expression
then we should not inline it (unless there is some other reason,
e.g. is is the sole occurrence). That is what is happening at
There's no advantage in inlining f here, and perhaps
a significant disadvantage. Hence some_val_args in the Stop case
+Note [Interaction of exprIsCheap and lone variables]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The lone-variable test says "don't inline if a case expression
+scrutines a lone variable whose unfolding is cheap". It's very
+important that, under these circumstances, exprIsConApp_maybe
+can spot a constructor application. So, for example, we don't
+consider
+ let x = e in (x,x)
+to be cheap, and that's good because exprIsConApp_maybe doesn't
+think that expression is a constructor application.
+
+I used to test is_value rather than is_cheap, which was utterly
+wrong, because the above expression responds True to exprIsHNF.
+
+This kind of thing can occur if you have
+
+ {-# INLINE foo #-}
+ foo = let x = e in (x,x)
+
+which Roman did.
+
\begin{code}
computeDiscount :: Int -> [Int] -> Int -> [ArgSummary] -> CallCtxt -> Int
computeDiscount n_vals_wanted arg_discounts res_discount arg_infos cont_info
-- | Returns @Just (dc, [t1..tk], [x1..xn])@ if the argument expression is
-- a *saturated* constructor application of the form @dc t1..tk x1 .. xn@,
-- where t1..tk are the *universally-qantified* type args of 'dc'
-exprIsConApp_maybe :: CoreExpr -> Maybe (DataCon, [Type], [CoreExpr])
+exprIsConApp_maybe :: IdUnfoldingFun -> CoreExpr -> Maybe (DataCon, [Type], [CoreExpr])
-exprIsConApp_maybe (Note _ expr)
- = exprIsConApp_maybe expr
+exprIsConApp_maybe id_unf (Note _ expr)
+ = exprIsConApp_maybe id_unf expr
-- We ignore all notes. For example,
-- case _scc_ "foo" (C a b) of
-- C a b -> e
-- should be optimised away, but it will be only if we look
-- through the SCC note.
-exprIsConApp_maybe (Cast expr co)
+exprIsConApp_maybe id_unf (Cast expr co)
= -- Here we do the KPush reduction rule as described in the FC paper
-- The transformation applies iff we have
-- (C e1 ... en) `cast` co
-- The left-hand one must be a T, because exprIsConApp returned True
-- but the right-hand one might not be. (Though it usually will.)
- case exprIsConApp_maybe expr of {
+ case exprIsConApp_maybe id_unf expr of {
Nothing -> Nothing ;
Just (dc, _dc_univ_args, dc_args) ->
Just (dc, to_tc_arg_tys, ex_args ++ new_co_args ++ new_val_args)
}}
-exprIsConApp_maybe expr
+exprIsConApp_maybe id_unf expr
= analyse expr []
where
analyse (App fun arg) args = analyse fun (arg:args)
analyse (Var fun) args
| Just con <- isDataConWorkId_maybe fun
- , is_saturated
+ , count isValArg args == idArity fun
, let (univ_ty_args, rest_args) = splitAtList (dataConUnivTyVars con) args
= Just (con, stripTypeArgs univ_ty_args, rest_args)
-- Look through dictionary functions; see Note [Unfolding DFuns]
- | DFunUnfolding con ops <- unfolding
- , is_saturated
+ | DFunUnfolding dfun_nargs con ops <- unfolding
+ , let sat = length args == dfun_nargs -- See Note [DFun arity check]
+ in if sat then True else
+ pprTrace "Unsaturated dfun" (ppr fun <+> int dfun_nargs $$ ppr args) False
, let (dfun_tvs, _cls, dfun_res_tys) = tcSplitDFunTy (idType fun)
subst = zipOpenTvSubst dfun_tvs (stripTypeArgs (takeList dfun_tvs args))
= Just (con, substTys subst dfun_res_tys,
-- Look through unfoldings, but only cheap ones, because
-- we are effectively duplicating the unfolding
- | CoreUnfolding { uf_expandable = expand_me, uf_tmpl = rhs } <- unfolding
- , expand_me = -- pprTrace "expanding" (ppr fun $$ ppr rhs) $
- analyse rhs args
+ | Just rhs <- expandUnfolding_maybe unfolding
+ = -- pprTrace "expanding" (ppr fun $$ ppr rhs) $
+ analyse rhs args
where
- is_saturated = count isValArg args == idArity fun
- unfolding = idUnfolding fun
+ unfolding = id_unf fun
analyse _ _ = Nothing
-----------
- in_scope = mkInScopeSet (exprFreeVars expr)
-
- -----------
beta (Lam v body) pairs (arg : args)
| isTypeArg arg
= beta body ((v,arg):pairs) args
= Nothing
beta fun pairs args
- = case analyse (substExpr subst fun) args of
- Nothing -> -- pprTrace "Bale out! exprIsConApp_maybe" doc $
- Nothing
- Just ans -> -- pprTrace "Woo-hoo! exprIsConApp_maybe" doc $
- Just ans
+ = analyse (substExpr (text "subst-expr-is-con-app") subst fun) args
where
- subst = mkOpenSubst in_scope pairs
+ subst = mkOpenSubst (mkInScopeSet (exprFreeVars fun)) pairs
-- doc = vcat [ppr fun, ppr expr, ppr pairs, ppr args]
to the very same args as the dfun. It takes a little more work
to compute the type arguments to the dictionary constructor.
+Note [DFun arity check]
+~~~~~~~~~~~~~~~~~~~~~~~
+Here we check that the total number of supplied arguments (inclding
+type args) matches what the dfun is expecting. This may be *less*
+than the ordinary arity of the dfun: see Note [DFun unfoldings] in CoreSyn
\ No newline at end of file