\begin{code}
module CoreUnfold (
- Unfolding(..), UnfoldingGuidance, -- types
+ Unfolding, UnfoldingGuidance, -- types
- noUnfolding, mkUnfolding, getUnfoldingTemplate,
- isEvaldUnfolding, hasUnfolding,
+ noUnfolding, mkUnfolding,
+ mkOtherCon, otherCons,
+ unfoldingTemplate, maybeUnfoldingTemplate,
+ isEvaldUnfolding, isCheapUnfolding,
+ hasUnfolding, hasSomeUnfolding,
couldBeSmallEnoughToInline,
certainlySmallEnoughToInline,
)
import CoreSyn
import PprCore ( pprCoreExpr )
-import CoreUtils ( whnfOrBottom )
import OccurAnal ( occurAnalyseGlobalExpr )
import BinderInfo ( )
-import CoreUtils ( coreExprType, exprIsTrivial, mkFormSummary,
- FormSummary(..) )
+import CoreUtils ( coreExprType, exprIsTrivial, exprIsValue, exprIsCheap )
import Id ( Id, idType, idUnique, isId,
getIdSpecialisation, getInlinePragma, getIdUnfolding
)
import VarSet
+import Name ( isLocallyDefined )
import Const ( Con(..), isLitLitLit, isWHNFCon )
import PrimOp ( PrimOp(..), primOpIsDupable )
import IdInfo ( ArityInfo(..), InlinePragInfo(..), OccInfo(..) )
import TyCon ( tyConFamilySize )
-import Type ( splitAlgTyConApp_maybe, splitFunTy_maybe )
+import Type ( splitAlgTyConApp_maybe, splitFunTy_maybe, isUnLiftedType )
import Const ( isNoRepLit )
import Unique ( Unique, buildIdKey, augmentIdKey, runSTRepIdKey )
import Maybes ( maybeToBool )
-- Here, f gets an OtherCon [] unfolding.
| CoreUnfolding -- An unfolding with redundant cached information
- FormSummary -- Tells whether the template is a WHNF or bottom
- UnfoldingGuidance -- Tells about the *size* of the template.
CoreExpr -- Template; binder-info is correct
+ Bool -- exprIsCheap template (cached); it won't duplicate (much) work
+ -- if you inline this in more than one place
+ Bool -- exprIsValue template (cached); it is ok to discard a `seq` on
+ -- this variable
+ UnfoldingGuidance -- Tells about the *size* of the template.
\end{code}
\begin{code}
noUnfolding = NoUnfolding
+mkOtherCon = OtherCon
mkUnfolding expr
- = let
- -- strictness mangling (depends on there being no CSE)
- ufg = calcUnfoldingGuidance opt_UF_CreationThreshold expr
- occ = occurAnalyseGlobalExpr expr
- in
- CoreUnfolding (mkFormSummary expr) ufg occ
+ = CoreUnfolding (occurAnalyseGlobalExpr expr)
+ (exprIsCheap expr)
+ (exprIsValue expr)
+ (calcUnfoldingGuidance opt_UF_CreationThreshold expr)
+
+unfoldingTemplate :: Unfolding -> CoreExpr
+unfoldingTemplate (CoreUnfolding expr _ _ _) = expr
+unfoldingTemplate other = panic "getUnfoldingTemplate"
-getUnfoldingTemplate :: Unfolding -> CoreExpr
-getUnfoldingTemplate (CoreUnfolding _ _ expr) = expr
-getUnfoldingTemplate other = panic "getUnfoldingTemplate"
+maybeUnfoldingTemplate :: Unfolding -> Maybe CoreExpr
+maybeUnfoldingTemplate (CoreUnfolding expr _ _ _) = Just expr
+maybeUnfoldingTemplate other = Nothing
+
+otherCons (OtherCon cons) = cons
+otherCons other = []
isEvaldUnfolding :: Unfolding -> Bool
-isEvaldUnfolding (OtherCon _) = True
-isEvaldUnfolding (CoreUnfolding ValueForm _ expr) = True
-isEvaldUnfolding other = False
+isEvaldUnfolding (OtherCon _) = True
+isEvaldUnfolding (CoreUnfolding _ _ is_evald _) = is_evald
+isEvaldUnfolding other = False
+
+isCheapUnfolding :: Unfolding -> Bool
+isCheapUnfolding (CoreUnfolding _ is_cheap _ _) = is_cheap
+isCheapUnfolding other = False
hasUnfolding :: Unfolding -> Bool
-hasUnfolding NoUnfolding = False
-hasUnfolding other = True
+hasUnfolding (CoreUnfolding _ _ _ _) = True
+hasUnfolding other = False
+
+hasSomeUnfolding :: Unfolding -> Bool
+hasSomeUnfolding NoUnfolding = False
+hasSomeUnfolding other = True
data UnfoldingGuidance
= UnfoldNever
discount_for b
| num_cases == 0 = 0
| is_fun_ty = num_cases * opt_UF_FunAppDiscount
- | is_data_ty = num_cases * tyConFamilySize tycon * opt_UF_ScrutConDiscount
+ | is_data_ty = num_cases * opt_UF_ScrutConDiscount
| otherwise = num_cases * opt_UF_PrimArgDiscount
where
num_cases = foldlBag (\n b' -> if b==b' then n+1 else n) 0 cased_args
size_up (Note _ body) = size_up body -- Notes cost nothing
- size_up (App fun (Type t)) = size_up fun
- size_up (App fun arg) = size_up_app fun `addSize` size_up arg
+ size_up (App fun (Type t)) = size_up fun
+ size_up (App fun arg) = size_up_app fun [arg]
size_up (Con con args) = foldr (addSize . size_up)
(size_up_con con args)
size_up (Let (NonRec binder rhs) body)
= nukeScrutDiscount (size_up rhs) `addSize`
size_up body `addSizeN`
- 1 -- For the allocation
+ (if isUnLiftedType (idType binder) then 0 else 1)
+ -- For the allocation
+ -- If the binder has an unlifted type there is no allocation
size_up (Let (Rec pairs) body)
= nukeScrutDiscount rhs_size `addSize`
size_up (Case scrut _ alts)
= nukeScrutDiscount (size_up scrut) `addSize`
arg_discount scrut `addSize`
- foldr (addSize . size_up_alt) sizeZero alts `addSizeN`
- case (splitAlgTyConApp_maybe (coreExprType scrut)) of
- Nothing -> 1
- Just (tc,_,_) -> tyConFamilySize tc
+ foldr (addSize . size_up_alt) sizeZero alts
+ `addSizeN` 1 -- charge one for the case itself.
+
+-- Just charge for the alts that exist, not the ones that might exist
+-- `addSizeN`
+-- case (splitAlgTyConApp_maybe (coreExprType scrut)) of
+-- Nothing -> 1
+-- Just (tc,_,_) -> tyConFamilySize tc
------------
+ size_up_app (App fun arg) args = size_up_app fun (arg:args)
+ size_up_app fun args = foldr (addSize . size_up) (fun_discount fun) args
+
-- A function application with at least one value argument
-- so if the function is an argument give it an arg-discount
- size_up_app (App fun arg) = size_up_app fun `addSize` size_up arg
- size_up_app fun = arg_discount fun `addSize` size_up fun
+ -- Also behave specially if the function is a build
+ fun_discount (Var fun) | idUnique fun == buildIdKey = buildSize
+ | idUnique fun == augmentIdKey = augmentSize
+ | fun `is_elem` args = scrutArg fun
+ fun_discount other = sizeZero
------------
size_up_alt (con, bndrs, rhs) = size_up rhs
op_cost | primOpIsDupable op = opt_UF_CheapOp
| otherwise = opt_UF_DearOp
- ------------
-- We want to record if we're case'ing, or applying, an argument
arg_discount (Var v) | v `is_elem` args = scrutArg v
arg_discount other = sizeZero
+ ------------
is_elem :: Id -> [Id] -> Bool
is_elem = isIn "size_up_scrut"
-- when asked about 'x' when x is bound to (C 3#).
-- This avoids gratuitous 'ticks' when x itself appears as an
-- atomic constructor argument.
+
+buildSize = SizeIs (-2#) emptyBag 4#
+ -- We really want to inline applications of build
+ -- build t (\cn -> e) should cost only the cost of e (because build will be inlined later)
+ -- Indeed, we should add a result_discount becuause build is
+ -- very like a constructor. We don't bother to check that the
+ -- build is saturated (it usually is). The "-2" discounts for the \c n,
+ -- The "4" is rather arbitrary.
+
+augmentSize = SizeIs (-2#) emptyBag 4#
+ -- Ditto (augment t (\cn -> e) ys) should cost only the cost of
+ -- e plus ys. The -2 accounts for the \cn
scrutArg v = SizeIs 0# (unitBag v) 0#
callSiteInline :: Bool -- True <=> the Id is black listed
-> Bool -- 'inline' note at call site
-> Id -- The Id
- -> [CoreExpr] -- Arguments
+ -> [Bool] -- One for each value arg; True if it is interesting
-> Bool -- True <=> continuation is interesting
-> Maybe CoreExpr -- Unfolding, if any
-callSiteInline black_listed inline_call id args interesting_cont
+callSiteInline black_listed inline_call id arg_infos interesting_cont
= case getIdUnfolding id of {
NoUnfolding -> Nothing ;
OtherCon _ -> Nothing ;
- CoreUnfolding form guidance unf_template ->
+ CoreUnfolding unf_template is_cheap _ guidance ->
let
result | yes_or_no = Just unf_template
| otherwise = Nothing
inline_prag = getInlinePragma id
- arg_infos = map interestingArg val_args
- val_args = filter isValArg args
- whnf = whnfOrBottom form
+ n_val_args = length arg_infos
yes_or_no =
case inline_prag of
IMustNotBeINLINEd -> False
IAmALoopBreaker -> False
IMustBeINLINEd -> True -- Overrides absolutely everything, including the black list
- ICanSafelyBeINLINEd in_lam one_br -> consider in_lam one_br
- NoInlinePragInfo -> consider InsideLam False
+ ICanSafelyBeINLINEd in_lam one_br -> consider in_lam True one_br
+ NoInlinePragInfo -> consider InsideLam False False
- consider in_lam one_branch
+ consider in_lam once once_in_one_branch
| black_listed = False
| inline_call = True
- | one_branch -- Be very keen to inline something if this is its unique occurrence; that
- -- gives a good chance of eliminating the original binding for the thing.
- -- The only time we hold back is when substituting inside a lambda;
- -- then if the context is totally uninteresting (not applied, not scrutinised)
- -- there is no point in substituting because it might just increase allocation.
- = case in_lam of
- NotInsideLam -> True
- InsideLam -> whnf && (not (null args) || interesting_cont)
+ | once_in_one_branch -- Be very keen to inline something if this is its unique occurrence; that
+ -- gives a good chance of eliminating the original binding for the thing.
+ -- The only time we hold back is when substituting inside a lambda;
+ -- then if the context is totally uninteresting (not applied, not scrutinised)
+ -- there is no point in substituting because it might just increase allocation.
+ = WARN( case in_lam of { NotInsideLam -> True; other -> False },
+ text "callSiteInline:oneOcc" <+> ppr id )
+ -- If it has one occurrence, not inside a lambda, PreInlineUnconditionally
+ -- should have zapped it already
+ is_cheap && (not (null arg_infos) || interesting_cont)
| otherwise -- Occurs (textually) more than once, so look at its size
= case guidance of
-- Size of call is n_vals_wanted (+1 for the function)
-> case in_lam of
NotInsideLam -> True
- InsideLam -> whnf
+ InsideLam -> is_cheap
- | not (or arg_infos || really_interesting_cont)
+ | not (or arg_infos || really_interesting_cont || once)
-- If it occurs more than once, there must be something interesting
-- about some argument, or the result, to make it worth inlining
+ -- We also drop this case if the thing occurs once, although perhaps in
+ -- several branches. In this case we are keener about inlining in the hope
+ -- that we'll be able to drop the allocation for the function altogether.
-> False
| otherwise
-> case in_lam of
NotInsideLam -> small_enough
- InsideLam -> whnf && small_enough
+ InsideLam -> is_cheap && small_enough
where
- n_args = length arg_infos
- enough_args = n_args >= n_vals_wanted
- really_interesting_cont | n_args < n_vals_wanted = False -- Too few args
- | n_args == n_vals_wanted = interesting_cont
- | otherwise = True -- Extra args
+ enough_args = n_val_args >= n_vals_wanted
+ really_interesting_cont | n_val_args < n_vals_wanted = False -- Too few args
+ | n_val_args == n_vals_wanted = interesting_cont
+ | otherwise = True -- Extra args
-- This rather elaborate defn for really_interesting_cont is important
-- Consider an I# = INLINE (\x -> I# {x})
-- The unfolding guidance deems it to have size 2, and no arguments.
-- So in an application (I# y) we must take the extra arg 'y' as
- -- evidene of an interesting context!
+ -- evidence of an interesting context!
small_enough = (size - discount) <= opt_UF_UseThreshold
discount = computeDiscount n_vals_wanted arg_discounts res_discount
text "inline prag:" <+> ppr inline_prag,
text "arg infos" <+> ppr arg_infos,
text "interesting continuation" <+> ppr interesting_cont,
- text "whnf" <+> ppr whnf,
+ text "is cheap" <+> ppr is_cheap,
text "guidance" <+> ppr guidance,
text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO",
if yes_or_no then
result
}
--- An argument is interesting if it has *some* structure
--- We are here trying to avoid unfolding a function that
--- is applied only to variables that have no unfolding
--- (i.e. they are probably lambda bound): f x y z
--- There is little point in inlining f here.
-interestingArg (Type _) = False
-interestingArg (App fn (Type _)) = interestingArg fn
-interestingArg (Var v) = hasUnfolding (getIdUnfolding v)
-interestingArg other = True
-
-
computeDiscount :: Int -> [Int] -> Int -> [Bool] -> Bool -> Int
computeDiscount n_vals_wanted arg_discounts res_discount arg_infos result_used
-- We multiple the raw discounts (args_discount and result_discount)
-- inlined because of the inline phase we are in. This is the sole
-- place that the inline phase number is looked at.
--- Phase 0: used for 'no inlinings please'
+-- Phase 0: used for 'no imported inlinings please'
+-- This prevents wrappers getting inlined which in turn is bad for full laziness
blackListed rule_vars (Just 0)
- = \v -> True
+ = \v -> not (isLocallyDefined v)
-- Phase 1: don't inline any rule-y things or things with specialisations
blackListed rule_vars (Just 1)