import OccurAnal
import CoreSubst hiding( substTy )
import CoreFVs ( exprFreeVars )
+import CoreArity ( manifestArity )
import CoreUtils
import Id
import DataCon
expr 0 -- Arity of unfolding doesn't matter
(UnfWhen unSaturatedOk boringCxtOk)
-mkInlineRule :: Bool -> CoreExpr -> Arity -> Unfolding
-mkInlineRule unsat_ok expr arity
+mkInlineRule :: CoreExpr -> Maybe Arity -> Unfolding
+mkInlineRule expr mb_arity
= mkCoreUnfolding True InlineRule -- Note [Top-level flag on inline rules]
expr' arity
(UnfWhen unsat_ok boring_ok)
where
expr' = simpleOptExpr expr
+ (unsat_ok, arity) = case mb_arity of
+ Nothing -> (unSaturatedOk, manifestArity expr')
+ Just ar -> (needSaturated, ar)
+
boring_ok = case calcUnfoldingGuidance True -- Treat as cheap
False -- But not bottoming
(arity+1) expr' of
= case (sizeExpr (iUnbox bOMB_OUT_SIZE) val_bndrs body) of
TooBig -> UnfNever
SizeIs size cased_bndrs scrut_discount
- | uncondInline n_val_bndrs (iBox size) && expr_is_cheap
- -> UnfWhen needSaturated boringCxtOk
-
+ | uncondInline n_val_bndrs (iBox size)
+ , expr_is_cheap
+ -> UnfWhen unSaturatedOk boringCxtOk -- Note [INLINE for small functions]
| top_bot -- See Note [Do not inline top-level bottoming functions]
-> UnfNever
(the uncondInline stuff).
-Note [Unconditional inlining]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
-than the thing it's replacing. Notice that
+Note [INLINE for small functions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider {-# INLINE f #-}
+ f x = Just x
+ g y = f y
+Then f's RHS is no larger than its LHS, so we should inline it into
+even the most boring context. In general, f the function is
+sufficiently small that its body is as small as the call itself, the
+inline unconditionally, regardless of how boring the context is.
+
+Things to note:
+
+ * We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
+ than the thing it's replacing. Notice that
(f x) --> (g 3) -- YES, unconditionally
(f x) --> x : [] -- YES, *even though* there are two
-- arguments to the cons
x --> g 3 -- NO
x --> Just v -- NO
-It's very important not to unconditionally replace a variable by
-a non-atomic term.
+ It's very important not to unconditionally replace a variable by
+ a non-atomic term.
+
+* We do this even if the thing isn't saturated, else we end up with the
+ silly situation that
+ f x y = x
+ ...map (f 3)...
+ doesn't inline. Even in a boring context, inlining without being
+ saturated will give a lambda instead of a PAP, and will be more
+ efficient at runtime.
+
+* However, when the function's arity > 0, we do insist that it
+ has at least one value argument at the call site. Otherwise we find this:
+ f = /\a \x:a. x
+ d = /\b. MkD (f b)
+ If we inline f here we get
+ d = /\b. MkD (\x:b. x)
+ and then prepareRhs floats out the argument, abstracting the type
+ variables, so we end up with the original again!
+
\begin{code}
uncondInline :: Arity -> Int -> Bool
-- Inline unconditionally if there no size increase
-- Size of call is arity (+1 for the function)
--- See Note [Unconditional inlining]
+-- See Note [INLINE for small functions]
uncondInline arity size
| arity == 0 = size == 0
| otherwise = size <= arity + 1
-- discounts even on nullary constructors
size_up (App fun (Type _)) = size_up fun
- size_up (App fun arg) = size_up_app fun [arg]
- `addSize` nukeScrutDiscount (size_up arg)
+ size_up (App fun arg) = size_up arg `addSizeNSD`
+ size_up_app fun [arg]
size_up (Lam b e) | isId b = lamScrutDiscount (size_up e `addSizeN` 1)
| otherwise = size_up e
size_up (Let (NonRec binder rhs) body)
- = nukeScrutDiscount (size_up rhs) `addSize`
- size_up body `addSizeN`
+ = size_up rhs `addSizeNSD`
+ size_up body `addSizeN`
(if isUnLiftedType (idType binder) then 0 else 1)
-- For the allocation
-- If the binder has an unlifted type there is no allocation
size_up (Let (Rec pairs) body)
- = nukeScrutDiscount rhs_size `addSize`
- size_up body `addSizeN`
- length pairs -- For the allocation
- where
- rhs_size = foldr (addSize . size_up . snd) sizeZero pairs
+ = foldr (addSizeNSD . size_up . snd)
+ (size_up body `addSizeN` length pairs) -- (length pairs) for the allocation
+ pairs
size_up (Case (Var v) _ _ alts)
| v `elem` top_args -- We are scrutinising an argument variable
- = alts_size (foldr1 addSize alt_sizes) -- The 1 is for the case itself
+ = alts_size (foldr1 addAltSize alt_sizes)
(foldr1 maxSize alt_sizes)
-- Good to inline if an arg is scrutinised, because
-- that may eliminate allocation in the caller
-- alts_size tries to compute a good discount for
-- the case when we are scrutinising an argument variable
- alts_size (SizeIs tot tot_disc _tot_scrut) -- Size of all alternatives
- (SizeIs max _max_disc max_scrut) -- Size of biggest alternative
- = SizeIs tot (unitBag (v, iBox (_ILIT(2) +# tot -# max)) `unionBags` tot_disc) max_scrut
+ alts_size (SizeIs tot tot_disc tot_scrut) -- Size of all alternatives
+ (SizeIs max _ _) -- Size of biggest alternative
+ = SizeIs tot (unitBag (v, iBox (_ILIT(2) +# tot -# max)) `unionBags` tot_disc) tot_scrut
-- If the variable is known, we produce a discount that
-- will take us back to 'max', the size of the largest alternative
-- The 1+ is a little discount for reduced allocation in the caller
alts_size tot_size _ = tot_size
- size_up (Case e _ _ alts) = foldr (addSize . size_up_alt)
- (nukeScrutDiscount (size_up e))
- alts
+ size_up (Case e _ _ alts) = size_up e `addSizeNSD`
+ foldr (addAltSize . size_up_alt) sizeZero alts
-- We don't charge for the case itself
-- It's a strict thing, and the price of the call
-- is paid by scrut. Also consider
-- size_up_app is used when there's ONE OR MORE value args
size_up_app (App fun arg) args
| isTypeArg arg = size_up_app fun args
- | otherwise = size_up_app fun (arg:args)
- `addSize` nukeScrutDiscount (size_up arg)
+ | otherwise = size_up arg `addSizeNSD`
+ size_up_app fun (arg:args)
size_up_app (Var fun) args = size_up_call fun args
size_up_app other args = size_up other `addSizeN` length args
addSizeN TooBig _ = TooBig
addSizeN (SizeIs n xs d) m = mkSizeIs bOMB_OUT_SIZE (n +# iUnbox m) xs d
- addSize TooBig _ = TooBig
- addSize _ TooBig = TooBig
- addSize (SizeIs n1 xs d1) (SizeIs n2 ys d2)
- = mkSizeIs bOMB_OUT_SIZE (n1 +# n2) (xs `unionBags` ys) (d1 +# d2)
+ -- addAltSize is used to add the sizes of case alternatives
+ addAltSize TooBig _ = TooBig
+ addAltSize _ TooBig = TooBig
+ addAltSize (SizeIs n1 xs d1) (SizeIs n2 ys d2)
+ = mkSizeIs bOMB_OUT_SIZE (n1 +# n2)
+ (xs `unionBags` ys)
+ (d1 +# d2) -- Note [addAltSize result discounts]
+
+ -- This variant ignores the result discount from its LEFT argument
+ -- It's used when the second argument isn't part of the result
+ addSizeNSD TooBig _ = TooBig
+ addSizeNSD _ TooBig = TooBig
+ addSizeNSD (SizeIs n1 xs _) (SizeIs n2 ys d2)
+ = mkSizeIs bOMB_OUT_SIZE (n1 +# n2)
+ (xs `unionBags` ys)
+ d2 -- Ignore d1
\end{code}
\begin{code}
conSize :: DataCon -> Int -> ExprSize
conSize dc n_val_args
- | n_val_args == 0 = SizeIs (_ILIT(0)) emptyBag (_ILIT(1)) -- Like variables
+ | n_val_args == 0 = SizeIs (_ILIT(0)) emptyBag (_ILIT(1)) -- Like variables
+
+-- See Note [Constructor size]
| isUnboxedTupleCon dc = SizeIs (_ILIT(0)) emptyBag (iUnbox n_val_args +# _ILIT(1))
- | otherwise = SizeIs (_ILIT(1)) emptyBag (iUnbox n_val_args +# _ILIT(1))
- -- Treat a constructors application as size 1, regardless of how
- -- many arguments it has; we are keen to expose them
- -- (and we charge separately for their args). We can't treat
- -- them as size zero, else we find that (Just x) has size 0,
- -- which is the same as a lone variable; and hence 'v' will
- -- always be replaced by (Just x), where v is bound to Just x.
- --
- -- However, unboxed tuples count as size zero
- -- I found occasions where we had
- -- f x y z = case op# x y z of { s -> (# s, () #) }
- -- and f wasn't getting inlined
+-- See Note [Unboxed tuple result discount]
+-- | isUnboxedTupleCon dc = SizeIs (_ILIT(0)) emptyBag (_ILIT(0))
+
+-- See Note [Constructor size]
+ | otherwise = SizeIs (_ILIT(1)) emptyBag (iUnbox n_val_args +# _ILIT(1))
+\end{code}
+
+Note [Constructor size]
+~~~~~~~~~~~~~~~~~~~~~~~
+Treat a constructors application as size 1, regardless of how many
+arguments it has; we are keen to expose them (and we charge separately
+for their args). We can't treat them as size zero, else we find that
+(Just x) has size 0, which is the same as a lone variable; and hence
+'v' will always be replaced by (Just x), where v is bound to Just x.
+
+However, unboxed tuples count as size zero. I found occasions where we had
+ f x y z = case op# x y z of { s -> (# s, () #) }
+and f wasn't getting inlined.
+
+Note [Unboxed tuple result discount]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+I tried giving unboxed tuples a *result discount* of zero (see the
+commented-out line). Why? When returned as a result they do not
+allocate, so maybe we don't want to charge so much for them If you
+have a non-zero discount here, we find that workers often get inlined
+back into wrappers, because it look like
+ f x = case $wf x of (# a,b #) -> (a,b)
+and we are keener because of the case. However while this change
+shrank binary sizes by 0.5% it also made spectral/boyer allocate 5%
+more. All other changes were very small. So it's not a big deal but I
+didn't adopt the idea.
+
+\begin{code}
primOpSize :: PrimOp -> Int -> ExprSize
primOpSize op n_val_args
| not (primOpIsDupable op) = sizeN opt_UF_DearOp
-- Ditto (augment t (\cn -> e) ys) should cost only the cost of
-- e plus ys. The -2 accounts for the \cn
-nukeScrutDiscount :: ExprSize -> ExprSize
-nukeScrutDiscount (SizeIs n vs _) = SizeIs n vs (_ILIT(0))
-nukeScrutDiscount TooBig = TooBig
-
-- When we return a lambda, give a discount if it's used (applied)
lamScrutDiscount :: ExprSize -> ExprSize
lamScrutDiscount (SizeIs n vs _) = SizeIs n vs (iUnbox opt_UF_FunAppDiscount)
lamScrutDiscount TooBig = TooBig
\end{code}
+Note [addAltSize result discounts]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When adding the size of alternatives, we *add* the result discounts
+too, rather than take the *maximum*. For a multi-branch case, this
+gives a discount for each branch that returns a constructor, making us
+keener to inline. I did try using 'max' instead, but it makes nofib
+'rewrite' and 'puzzle' allocate significantly more, and didn't make
+binary sizes shrink significantly either.
+
Note [Discounts and thresholds]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Constants for discounts and thesholds are defined in main/StaticFlags,
\begin{code}
couldBeSmallEnoughToInline :: Int -> CoreExpr -> Bool
couldBeSmallEnoughToInline threshold rhs
- = case calcUnfoldingGuidance False False threshold rhs of
- (_, UnfNever) -> False
- _ -> True
+ = case sizeExpr (iUnbox threshold) [] body of
+ TooBig -> False
+ _ -> True
+ where
+ (_, body) = collectBinders rhs
----------------
smallEnoughToInline :: Unfolding -> Bool
= case guidance of
UnfNever -> (False, empty)
- UnfWhen unsat_ok boring_ok -> ( (unsat_ok || saturated)
- && (boring_ok || some_benefit)
- , empty )
- -- For the boring_ok part see Note [INLINE for small functions]
+ UnfWhen unsat_ok boring_ok
+ -> (enough_args && (boring_ok || some_benefit), empty )
+ where -- See Note [INLINE for small functions]
+ enough_args = saturated || (unsat_ok && n_val_args > 0)
UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size }
-> ( is_cheap && some_benefit && small_enough
res_discount arg_infos cont_info
in
- if dopt Opt_D_dump_inlinings dflags then
+ if (dopt Opt_D_dump_inlinings dflags && dopt Opt_D_verbose_core2core dflags) then
pprTrace ("Considering inlining: " ++ showSDoc (ppr id))
(vcat [text "arg infos" <+> ppr arg_infos,
text "uf arity" <+> ppr uf_arity,
order to unravel the recursion.
-Note [INLINE for small functions]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Consider {-# INLINE f #-}
- f x = Just x
- g y = f y
-Then f's RHS is no larger than its LHS, so we should inline it
-into even the most boring context. (We do so if there is no INLINE
-pragma!)
-
-
Note [Things to watch]
~~~~~~~~~~~~~~~~~~~~~~
* { y = I# 3; x = y `cast` co; ...case (x `cast` co) of ... }
-- Look through unfoldings, but only cheap ones, because
-- we are effectively duplicating the unfolding
- | CoreUnfolding { uf_expandable = expand_me, uf_tmpl = rhs } <- unfolding
- , expand_me = -- pprTrace "expanding" (ppr fun $$ ppr rhs) $
- analyse rhs args
+ | Just rhs <- expandUnfolding_maybe unfolding
+ = -- pprTrace "expanding" (ppr fun $$ ppr rhs) $
+ analyse rhs args
where
is_saturated = count isValArg args == idArity fun
- unfolding = id_unf fun -- Does not look through loop breakers
- -- ToDo: we *may* look through variables that are NOINLINE
- -- in this phase, and that is really not right
+ unfolding = id_unf fun
analyse _ _ = Nothing
-----------
- in_scope = mkInScopeSet (exprFreeVars expr)
-
- -----------
beta (Lam v body) pairs (arg : args)
| isTypeArg arg
= beta body ((v,arg):pairs) args
= Nothing
beta fun pairs args
- = case analyse (substExpr subst fun) args of
+ = case analyse (substExpr (text "subst-expr-is-con-app") subst fun) args of
Nothing -> -- pprTrace "Bale out! exprIsConApp_maybe" doc $
Nothing
Just ans -> -- pprTrace "Woo-hoo! exprIsConApp_maybe" doc $
Just ans
where
- subst = mkOpenSubst in_scope pairs
+ subst = mkOpenSubst (mkInScopeSet (exprFreeVars fun)) pairs
-- doc = vcat [ppr fun, ppr expr, ppr pairs, ppr args]