InsideLam, insideLam, notInsideLam,
OneBranch, oneBranch, notOneBranch,
+ InterestingCxt,
EP(..),
| IAmDead -- Marks unused variables. Sometimes useful for
-- lambda and case-bound variables.
- | OneOcc InsideLam
-
- OneBranch
+ | OneOcc !InsideLam
+ !OneBranch
+ !InterestingCxt
| IAmALoopBreaker -- Used by the occurrence analyser to mark loop-breakers
-- in a group of recursive definitions
seqOccInfo :: OccInfo -> ()
-seqOccInfo (OneOcc in_lam once) = in_lam `seq` once `seq` ()
-seqOccInfo occ = ()
+seqOccInfo occ = occ `seq` ()
+
+-----------------
+type InterestingCxt = Bool -- True <=> Function: is applied
+ -- Data value: scrutinised by a case with
+ -- at least one non-DEFAULT branch
+-----------------
type InsideLam = Bool -- True <=> Occurs inside a non-linear lambda
-- Substituting a redex for this occurrence is
-- dangerous because it might duplicate work.
insideLam = True
notInsideLam = False
+-----------------
type OneBranch = Bool -- True <=> Occurs in only one case branch
-- so no code-duplication issue to worry about
oneBranch = True
isDeadOcc IAmDead = True
isDeadOcc other = False
-isOneOcc (OneOcc _ _) = True
-isOneOcc other = False
+isOneOcc (OneOcc _ _ _) = True
+isOneOcc other = False
isFragileOcc :: OccInfo -> Bool
-isFragileOcc (OneOcc _ _) = True
-isFragileOcc other = False
+isFragileOcc (OneOcc _ _ _) = True
+isFragileOcc other = False
\end{code}
\begin{code}
ppr NoOccInfo = empty
ppr IAmALoopBreaker = ptext SLIT("LoopBreaker")
ppr IAmDead = ptext SLIT("Dead")
- ppr (OneOcc inside_lam one_branch) | inside_lam = ptext SLIT("OnceInLam")
- | one_branch = ptext SLIT("Once")
- | otherwise = ptext SLIT("OnceEachBranch")
+ ppr (OneOcc inside_lam one_branch int_cxt)
+ = ptext SLIT("Once") <> pp_lam <> pp_br <> pp_args
+ where
+ pp_lam | inside_lam = char 'L'
+ | otherwise = empty
+ pp_br | one_branch = empty
+ | otherwise = char '*'
+ pp_args | int_cxt = char '!'
+ | otherwise = empty
instance Show OccInfo where
showsPrec p occ = showsPrecSDoc p (ppr occ)
where
-- The "unsafe" occ info is the ones that say I'm not in a lambda
-- because that might not be true for an unsaturated lambda
- is_safe_occ (OneOcc in_lam _) = in_lam
- is_safe_occ other = True
+ is_safe_occ (OneOcc in_lam _ _) = in_lam
+ is_safe_occ other = True
safe_occ = case occ of
- OneOcc _ once -> OneOcc insideLam once
- other -> occ
+ OneOcc _ once int_cxt -> OneOcc insideLam once int_cxt
+ other -> occ
is_safe_dmd Nothing = True
is_safe_dmd (Just dmd) = not (isStrictDmd dmd)
| otherwise = case occ of
IAmDead -> pprTrace "callSiteInline: dead" (ppr id) False
IAmALoopBreaker -> False
- OneOcc in_lam one_br -> (not in_lam || is_cheap) && consider_safe in_lam True one_br
- NoOccInfo -> is_cheap && consider_safe True False False
- consider_safe in_lam once once_in_one_branch
- -- consider_safe decides whether it's a good idea to inline something,
- -- given that there's no work-duplication issue (the caller checks that).
- -- once_in_one_branch = True means there's a unique textual occurrence
- | inline_call = True
+ -- Occurs once in one branch. These are deal with by
+ -- preInlineUnconditionally, so we ignore them here:
+ OneOcc _ True _ -> False
- | once_in_one_branch
- -- Be very keen to inline something if this is its unique occurrence:
- --
- -- a) Inlining gives a good chance of eliminating the original
- -- binding (and hence the allocation) for the thing.
- -- (Provided it's not a top level binding, in which case the
- -- allocation costs nothing.)
- --
- -- b) Inlining a function that is called only once exposes the
- -- body function to the call site.
- --
- -- The only time we hold back is when substituting inside a lambda;
- -- then if the context is totally uninteresting (not applied, not scrutinised)
- -- there is no point in substituting because it might just increase allocation,
- -- by allocating the function itself many times
- -- Note [Jan 2002]: this comment looks out of date. The actual code
- -- doesn't inline *ever* in an uninteresting context. Why not? I
- -- think it's just because we don't want to inline top-level constants
- -- into uninteresting contexts, lest we (for example) re-nest top-level
- -- literal lists.
- --
- -- Note: there used to be a '&& not top_level' in the guard above,
- -- but that stopped us inlining top-level functions used only once,
- -- which is stupid
- = WARN( not is_top && not in_lam, ppr id )
- -- If (not in_lam) && one_br then PreInlineUnconditionally
- -- should have caught it, shouldn't it? Unless it's a top
- -- level thing.
- notNull arg_infos || interesting_cont
+ OneOcc in_lam False _ -> (not in_lam || is_cheap) && consider_safe True
+ other -> is_cheap && consider_safe False
+
+ consider_safe once
+ -- consider_safe decides whether it's a good idea to
+ -- inline something, given that there's no
+ -- work-duplication issue (the caller checks that).
+ | inline_call = True
| otherwise
= case guidance of
where
some_benefit = or arg_infos || really_interesting_cont ||
(not is_top && (once || (n_vals_wanted > 0 && enough_args)))
- -- If it occurs more than once, there must be something interesting
- -- about some argument, or the result context, to make it worth inlining
- --
- -- If a function has a nested defn we also record some-benefit,
- -- on the grounds that we are often able to eliminate the binding,
- -- and hence the allocation, for the function altogether; this is good
- -- for join points. But this only makes sense for *functions*;
- -- inlining a constructor doesn't help allocation unless the result is
- -- scrutinised. UNLESS the constructor occurs just once, albeit possibly
- -- in multiple case branches. Then inlining it doesn't increase allocation,
- -- but it does increase the chance that the constructor won't be allocated at all
- -- in the branches that don't use it.
-
+ -- If it occurs more than once, there must be
+ -- something interesting about some argument, or the
+ -- result context, to make it worth inlining
+ --
+ -- If a function has a nested defn we also record
+ -- some-benefit, on the grounds that we are often able
+ -- to eliminate the binding, and hence the allocation,
+ -- for the function altogether; this is good for join
+ -- points. But this only makes sense for *functions*;
+ -- inlining a constructor doesn't help allocation
+ -- unless the result is scrutinised. UNLESS the
+ -- constructor occurs just once, albeit possibly in
+ -- multiple case branches. Then inlining it doesn't
+ -- increase allocation, but it does increase the
+ -- chance that the constructor won't be allocated at
+ -- all in the branches that don't use it.
+
enough_args = n_val_args >= n_vals_wanted
really_interesting_cont | n_val_args < n_vals_wanted = False -- Too few args
| n_val_args == n_vals_wanted = interesting_cont
mkIfThenElse, mkAltExpr, mkPiType, mkPiTypes,
-- Taking expressions apart
- findDefault, findAlt,
+ findDefault, findAlt, isDefaultAlt,
-- Properties of expressions
exprType, coreAltType,
LT -> deflt -- Missed it already; the alts are in increasing order
EQ -> alt
GT -> ASSERT( not (con1 == DEFAULT) ) go alts deflt
+
+isDefaultAlt :: CoreAlt -> Bool
+isDefaultAlt (DEFAULT, _, _) = True
+isDefaultAlt other = False
\end{code}
import CoreSyn
import CoreFVs ( idRuleVars )
-import CoreUtils ( exprIsTrivial )
+import CoreUtils ( exprIsTrivial, isDefaultAlt )
import Id ( isDataConWorkId, isOneShotBndr, setOneShotLambda,
idOccInfo, setIdOccInfo, isLocalId,
isExportedId, idArity, idSpecialisation,
idType, idUnique, Id
)
+import BasicTypes ( OccInfo(..), isOneOcc, InterestingCxt )
import IdInfo ( isEmptySpecInfo )
-import BasicTypes ( OccInfo(..), isOneOcc )
import VarSet
import VarEnv
occAnalBind env (Rec pairs) body_usage
= foldr (_scc_ "occAnalBind.dofinal" do_final_bind) (body_usage, []) sccs
where
- binders = map fst pairs
-
analysed_pairs :: [Details1]
analysed_pairs = [ (bndr, rhs_usage, rhs')
| (bndr, rhs) <- pairs,
-- Crude solution: use rhsCtxt for things that occur just once...
certainly_inline id = case idOccInfo id of
- OneOcc in_lam one_br -> not in_lam && one_br
- other -> False
+ OneOcc in_lam one_br _ -> not in_lam && one_br
+ other -> False
-- [March 98] A new wrinkle is that if the binder has specialisations inside
-- it then we count the specialised Ids as "extra rhs's". That way
CoreExpr)
occAnal env (Type t) = (emptyDetails, Type t)
-
-occAnal env (Var v)
- = (var_uds, Var v)
- where
- var_uds | isLocalId v = unitVarEnv v oneOcc
- | otherwise = emptyDetails
-
+occAnal env (Var v) = (mkOneOcc env v False, Var v)
-- At one stage, I gathered the idRuleVars for v here too,
-- which in a way is the right thing to do.
-- Btu that went wrong right after specialisation, when
is_one_shot b = isId b && isOneShotBndr b
occAnal env (Case scrut bndr ty alts)
- = case mapAndUnzip (occAnalAlt env bndr) alts of { (alts_usage_s, alts') ->
- case occAnal vanillaCtxt scrut of { (scrut_usage, scrut') ->
- -- No need for rhsCtxt
+ = case occ_anal_scrut scrut alts of { (scrut_usage, scrut') ->
+ case mapAndUnzip (occAnalAlt env bndr) alts of { (alts_usage_s, alts') ->
let
alts_usage = foldr1 combineAltsUsageDetails alts_usage_s
alts_usage' = addCaseBndrUsage alts_usage
Nothing -> usage
Just occ -> extendVarEnv usage bndr (markMany occ)
+ occ_anal_scrut (Var v) (alt1 : other_alts)
+ | not (null other_alts) || not (isDefaultAlt alt1)
+ = (mkOneOcc env v True, Var v)
+ occ_anal_scrut scrut alts = occAnal vanillaCtxt scrut
+ -- No need for rhsCtxt
+
occAnal env (Let bind body)
= case occAnal env body of { (body_usage, body') ->
case occAnalBind env bind body_usage of { (final_usage, new_binds) ->
(fun_uds `combineUsageDetails` final_args_uds, mkApps (Var fun) args') }
where
fun_uniq = idUnique fun
-
- fun_uds | isLocalId fun = unitVarEnv fun oneOcc
- | otherwise = emptyDetails
-
+ fun_uds = mkOneOcc env fun (valArgCount args > 0)
args_stuff | fun_uniq == buildIdKey = appSpecial env 2 [True,True] args
| fun_uniq == augmentIdKey = appSpecial env 2 [True,True] args
| fun_uniq == foldrIdKey = appSpecial env 3 [False,True] args
%************************************************************************
\begin{code}
-oneOcc :: OccInfo
-oneOcc = OneOcc False True
+mkOneOcc :: OccEnv -> Id -> InterestingCxt -> UsageDetails
+mkOneOcc env id int_cxt
+ | isLocalId id = unitVarEnv id (OneOcc False True int_cxt)
+ | otherwise = emptyDetails
markMany, markInsideLam, markInsideSCC :: OccInfo -> OccInfo
markInsideSCC occ = markMany occ
-markInsideLam (OneOcc _ one_br) = OneOcc True one_br
-markInsideLam occ = occ
+markInsideLam (OneOcc _ one_br int_cxt) = OneOcc True one_br int_cxt
+markInsideLam occ = occ
addOccInfo, orOccInfo :: OccInfo -> OccInfo -> OccInfo
orOccInfo IAmDead info2 = info2
orOccInfo info1 IAmDead = info1
-orOccInfo (OneOcc in_lam1 one_branch1)
- (OneOcc in_lam2 one_branch2)
+orOccInfo (OneOcc in_lam1 one_branch1 int_cxt1)
+ (OneOcc in_lam2 one_branch2 int_cxt2)
= OneOcc (in_lam1 || in_lam2)
False -- False, because it occurs in both branches
+ (int_cxt1 && int_cxt2)
orOccInfo info1 info2 = NoOccInfo
\end{code}
etaExpand, exprEtaExpandArity, bindNonRec, mkCoerce2,
findDefault, exprOkForSpeculation, exprIsValue
)
-import Id ( idType, isDataConWorkId, idOccInfo, isDictId,
+import Id ( idType, isDataConWorkId, idOccInfo, isDictId, idArity,
mkSysLocal, isDeadBinder, idNewDemandInfo, isExportedId,
idUnfolding, idNewStrictness, idInlinePragma,
)
import DataCon ( dataConRepArity, dataConTyVars, dataConArgTys, isVanillaDataCon )
import Var ( tyVarKind, mkTyVar )
import VarSet
-import BasicTypes ( TopLevelFlag(..), isTopLevel, OccInfo(..), isLoopBreaker, isOneOcc,
+import BasicTypes ( TopLevelFlag(..), isTopLevel, isNotTopLevel, OccInfo(..), isLoopBreaker, isOneOcc,
Activation, isAlwaysActive, isActive )
import Util ( lengthExceeds )
import Outputable
xN = eN[xN-1]
We may end up simplifying e1 N times, e2 N-1 times, e3 N-3 times etc.
+This can happen with cascades of functions too:
+
+ f1 = \x1.e1
+ f2 = \xs.e2[f1]
+ f3 = \xs.e3[f3]
+ ...etc...
+
+THE MAIN INVARIANT is this:
+
+ ---- preInlineUnconditionally invariant -----
+ IF preInlineUnconditionally chooses to inline x = <rhs>
+ THEN doing the inlining should not change the occurrence
+ info for the free vars of <rhs>
+ ----------------------------------------------
+
+For example, it's tempting to look at trivial binding like
+ x = y
+and inline it unconditionally. But suppose x is used many times,
+but this is the unique occurrence of y. Then inlining x would change
+y's occurrence info, which breaks the invariant. It matters: y
+might have a BIG rhs, which will now be dup'd at every occurrenc of x.
-NB: we don't even look at the RHS to see if it's trivial
-We might have
- x = y
-where x is used many times, but this is the unique occurrence of y.
-We should NOT inline x at all its uses, because then we'd do the same
-for y -- aargh! So we must base this pre-rhs-simplification decision
-solely on x's occurrences, not on its rhs.
Evne RHSs labelled InlineMe aren't caught here, because there might be
no benefit from inlining at the call site.
phase), at which point don't.
\begin{code}
-preInlineUnconditionally :: SimplEnv -> TopLevelFlag -> InId -> Bool
-preInlineUnconditionally env top_lvl bndr
- | isTopLevel top_lvl, SimplPhase 0 <- phase = False
--- If we don't have this test, consider
+preInlineUnconditionally :: SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
+preInlineUnconditionally env top_lvl bndr rhs
+ | not active = False
+ | opt_SimplNoPreInlining = False
+ | otherwise = case idOccInfo bndr of
+ IAmDead -> True -- Happens in ((\x.1) v)
+ OneOcc in_lam True int_cxt -> try_once in_lam int_cxt
+ other -> False
+ where
+ phase = getMode env
+ active = case phase of
+ SimplGently -> isAlwaysActive prag
+ SimplPhase n -> isActive n prag
+ prag = idInlinePragma bndr
+
+ try_once in_lam int_cxt -- There's one textual occurrence
+ = not in_lam && (isNotTopLevel top_lvl || early_phase)
+ || (exprIsValue rhs && int_cxt)
+ -- exprIsValue => free vars of rhs are (Once in_lam) or Many,
+ -- so substituting rhs inside a lambda doesn't change the occ info
+ -- Caveat: except the fn of a PAP, but since it has arity > 0, it
+ -- must be a HNF, so it doesn't matter if we push it inside
+ -- a lambda
+ --
+ -- int_cxt The context isn't totally boring
+ -- E.g. let f = \ab.BIG in \y. map f xs
+ -- Don't want to substitute for f, because then we allocate
+ -- its closure every time the \y is called
+ -- But: let f = \ab.BIG in \y. map (f y) xs
+ -- Now we do want to substitute for f, even though it's not
+ -- saturated, because we're going to allocate a closure for
+ -- (f y) every time round the loop anyhow.
+
+ early_phase = case phase of
+ SimplPhase 0 -> False
+ other -> True
+-- If we don't have this early_phase test, consider
-- x = length [1,2,3]
-- The full laziness pass carefully floats all the cons cells to
-- top level, and preInlineUnconditionally floats them all back in.
-- top level things, but then we become more leery about inlining
-- them.
- | not active = False
- | opt_SimplNoPreInlining = False
- | otherwise = case idOccInfo bndr of
- IAmDead -> True -- Happens in ((\x.1) v)
- OneOcc in_lam once -> not in_lam && once
- -- Not inside a lambda, one occurrence ==> safe!
- other -> False
- where
- phase = getMode env
- active = case phase of
- SimplGently -> isAlwaysActive prag
- SimplPhase n -> isActive n prag
- prag = idInlinePragma bndr
\end{code}
postInlineUnconditionally
\begin{code}
postInlineUnconditionally :: SimplEnv -> OutId -> OccInfo -> OutExpr -> Bool
-postInlineUnconditionally env bndr occ_info rhs
- = exprIsTrivial rhs
- && active
- && not (isLoopBreaker occ_info)
- && not (isExportedId bndr)
- -- We used to have (isOneOcc occ_info) instead of
- -- not (isLoopBreaker occ_info) && not (isExportedId bndr)
- -- That was because a rather fragile use of rules got confused
- -- if you inlined even a binding f=g e.g. We used to have
- -- map = mapList
- -- But now a more precise use of phases has eliminated this problem,
- -- so the is_active test will do the job. I think.
- --
- -- OLD COMMENT: (delete soon)
- -- Indeed, you might suppose that
- -- there is nothing wrong with substituting for a trivial RHS, even
- -- if it occurs many times. But consider
- -- x = y
- -- h = _inline_me_ (...x...)
- -- Here we do *not* want to have x inlined, even though the RHS is
- -- trivial, becuase the contract for an INLINE pragma is "no inlining".
- -- This is important in the rules for the Prelude
+postInlineUnconditionally env bndr occ_info rhs
+ | not active = False
+ | isLoopBreaker occ_info = False
+ | isExportedId bndr = False
+ | exprIsTrivial rhs = True
+ | otherwise = False
where
active = case getMode env of
SimplGently -> isAlwaysActive prag
#endif
simplNonRecBind env bndr rhs rhs_se cont_ty thing_inside
- | preInlineUnconditionally env NotTopLevel bndr
+ = simplNonRecBind' env bndr rhs rhs_se cont_ty thing_inside
+
+simplNonRecBind' env bndr rhs rhs_se cont_ty thing_inside
+ | preInlineUnconditionally env NotTopLevel bndr rhs
= tick (PreInlineUnconditionally bndr) `thenSmpl_`
thing_inside (extendIdSubst env bndr (mkContEx rhs_se rhs))
-
- | isStrictDmd (idNewDemandInfo bndr) || isStrictType (idType bndr) -- A strict let
+ | isStrictDmd (idNewDemandInfo bndr) || isStrictType bndr_ty -- A strict let
= -- Don't use simplBinder because that doesn't keep
-- fragile occurrence info in the substitution
simplLetBndr env bndr `thenSmpl` \ (env, bndr1) ->
bndr2 = bndr1 `setIdInfo` simplIdInfo env (idInfo bndr)
env2 = modifyInScope env1 bndr2 bndr2
in
- completeNonRecX env2 True {- strict -} bndr bndr2 rhs1 thing_inside
+ if needsCaseBinding bndr_ty rhs1
+ then
+ thing_inside env2 `thenSmpl` \ (floats, body) ->
+ returnSmpl (emptyFloats env2, Case rhs1 bndr2 (exprType body)
+ [(DEFAULT, [], wrapFloats floats body)])
+ else
+ completeNonRecX env2 True {- strict -} bndr bndr2 rhs1 thing_inside
| otherwise -- Normal, lazy case
= -- Don't use simplBinder because that doesn't keep
simplLazyBind env NotTopLevel NonRecursive
bndr bndr' rhs rhs_se `thenSmpl` \ (floats, env) ->
addFloats env floats thing_inside
+
+ where
+ bndr_ty = idType bndr
\end{code}
A specialised variant of simplNonRec used when the RHS is already simplified, notably
let body' = wrapFloats floats body in
returnSmpl (emptyFloats env, Case new_rhs bndr' (exprType body') [(DEFAULT, [], body')])
- | preInlineUnconditionally env NotTopLevel bndr
+ | preInlineUnconditionally env NotTopLevel bndr new_rhs
-- This happens; for example, the case_bndr during case of
-- known constructor: case (a,b) of x { (p,q) -> ... }
-- Here x isn't mentioned in the RHS, so we don't want to
-> SimplM (FloatsWith SimplEnv)
simplRecOrTopPair env top_lvl bndr bndr' rhs
- | preInlineUnconditionally env top_lvl bndr -- Check for unconditional inline
- = tick (PreInlineUnconditionally bndr) `thenSmpl_`
+ | preInlineUnconditionally env top_lvl bndr rhs -- Check for unconditional inline
+ = tick (PreInlineUnconditionally bndr) `thenSmpl_`
returnSmpl (emptyFloats env, extendIdSubst env bndr (mkContEx env rhs))
| otherwise
-- After inling f at some of its call sites the original binding may
-- (for example) be no longer strictly demanded.
-- The solution here is a bit ad hoc...
- unfolding = mkUnfolding (isTopLevel top_lvl) new_rhs
info_w_unf = new_bndr_info `setUnfoldingInfo` unfolding
final_info | loop_breaker = new_bndr_info
| isEvaldUnfolding unfolding = zapDemandInfo info_w_unf `orElse` info_w_unf
returnSmpl (unitFloat env final_id new_rhs, env)
where
+ unfolding = mkUnfolding (isTopLevel top_lvl) new_rhs
loop_breaker = isLoopBreaker occ_info
old_info = idInfo old_bndr
occ_info = occInfo old_info