import OccurAnal
import CoreSubst hiding( substTy )
import CoreFVs ( exprFreeVars )
+import CoreArity ( manifestArity )
import CoreUtils
import Id
import DataCon
expr 0 -- Arity of unfolding doesn't matter
(UnfWhen unSaturatedOk boringCxtOk)
-mkInlineRule :: Bool -> CoreExpr -> Arity -> Unfolding
-mkInlineRule unsat_ok expr arity
+mkInlineRule :: CoreExpr -> Maybe Arity -> Unfolding
+mkInlineRule expr mb_arity
= mkCoreUnfolding True InlineRule -- Note [Top-level flag on inline rules]
expr' arity
(UnfWhen unsat_ok boring_ok)
where
expr' = simpleOptExpr expr
+ (unsat_ok, arity) = case mb_arity of
+ Nothing -> (unSaturatedOk, manifestArity expr')
+ Just ar -> (needSaturated, ar)
+
boring_ok = case calcUnfoldingGuidance True -- Treat as cheap
False -- But not bottoming
(arity+1) expr' of
= case (sizeExpr (iUnbox bOMB_OUT_SIZE) val_bndrs body) of
TooBig -> UnfNever
SizeIs size cased_bndrs scrut_discount
- | uncondInline n_val_bndrs (iBox size) && expr_is_cheap
- -> UnfWhen needSaturated boringCxtOk
-
+ | uncondInline n_val_bndrs (iBox size)
+ , expr_is_cheap
+ -> UnfWhen unSaturatedOk boringCxtOk -- Note [INLINE for small functions]
| top_bot -- See Note [Do not inline top-level bottoming functions]
-> UnfNever
(the uncondInline stuff).
-Note [Unconditional inlining]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
-than the thing it's replacing. Notice that
+Note [INLINE for small functions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider {-# INLINE f #-}
+ f x = Just x
+ g y = f y
+Then f's RHS is no larger than its LHS, so we should inline it into
+even the most boring context. In general, f the function is
+sufficiently small that its body is as small as the call itself, the
+inline unconditionally, regardless of how boring the context is.
+
+Things to note:
+
+ * We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
+ than the thing it's replacing. Notice that
(f x) --> (g 3) -- YES, unconditionally
(f x) --> x : [] -- YES, *even though* there are two
-- arguments to the cons
x --> g 3 -- NO
x --> Just v -- NO
-It's very important not to unconditionally replace a variable by
-a non-atomic term.
+ It's very important not to unconditionally replace a variable by
+ a non-atomic term.
+
+* We do this even if the thing isn't saturated, else we end up with the
+ silly situation that
+ f x y = x
+ ...map (f 3)...
+ doesn't inline. Even in a boring context, inlining without being
+ saturated will give a lambda instead of a PAP, and will be more
+ efficient at runtime.
+
+* However, when the function's arity > 0, we do insist that it
+ has at least one value argument at the call site. Otherwise we find this:
+ f = /\a \x:a. x
+ d = /\b. MkD (f b)
+ If we inline f here we get
+ d = /\b. MkD (\x:b. x)
+ and then prepareRhs floats out the argument, abstracting the type
+ variables, so we end up with the original again!
+
\begin{code}
uncondInline :: Arity -> Int -> Bool
-- Inline unconditionally if there no size increase
-- Size of call is arity (+1 for the function)
--- See Note [Unconditional inlining]
+-- See Note [INLINE for small functions]
uncondInline arity size
| arity == 0 = size == 0
| otherwise = size <= arity + 1
conSize :: DataCon -> Int -> ExprSize
conSize dc n_val_args
- | n_val_args == 0 = SizeIs (_ILIT(0)) emptyBag (_ILIT(1)) -- Like variables
+ | n_val_args == 0 = SizeIs (_ILIT(0)) emptyBag (_ILIT(1)) -- Like variables
+
+-- See Note [Constructor size]
| isUnboxedTupleCon dc = SizeIs (_ILIT(0)) emptyBag (iUnbox n_val_args +# _ILIT(1))
- | otherwise = SizeIs (_ILIT(1)) emptyBag (iUnbox n_val_args +# _ILIT(1))
- -- Treat a constructors application as size 1, regardless of how
- -- many arguments it has; we are keen to expose them
- -- (and we charge separately for their args). We can't treat
- -- them as size zero, else we find that (Just x) has size 0,
- -- which is the same as a lone variable; and hence 'v' will
- -- always be replaced by (Just x), where v is bound to Just x.
- --
- -- However, unboxed tuples count as size zero
- -- I found occasions where we had
- -- f x y z = case op# x y z of { s -> (# s, () #) }
- -- and f wasn't getting inlined
+-- See Note [Unboxed tuple result discount]
+-- | isUnboxedTupleCon dc = SizeIs (_ILIT(0)) emptyBag (_ILIT(0))
+
+-- See Note [Constructor size]
+ | otherwise = SizeIs (_ILIT(1)) emptyBag (iUnbox n_val_args +# _ILIT(1))
+\end{code}
+
+Note [Constructor size]
+~~~~~~~~~~~~~~~~~~~~~~~
+Treat a constructors application as size 1, regardless of how many
+arguments it has; we are keen to expose them (and we charge separately
+for their args). We can't treat them as size zero, else we find that
+(Just x) has size 0, which is the same as a lone variable; and hence
+'v' will always be replaced by (Just x), where v is bound to Just x.
+
+However, unboxed tuples count as size zero. I found occasions where we had
+ f x y z = case op# x y z of { s -> (# s, () #) }
+and f wasn't getting inlined.
+
+Note [Unboxed tuple result discount]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+I tried giving unboxed tuples a *result discount* of zero (see the
+commented-out line). Why? When returned as a result they do not
+allocate, so maybe we don't want to charge so much for them If you
+have a non-zero discount here, we find that workers often get inlined
+back into wrappers, because it look like
+ f x = case $wf x of (# a,b #) -> (a,b)
+and we are keener because of the case. However while this change
+shrank binary sizes by 0.5% it also made spectral/boyer allocate 5%
+more. All other changes were very small. So it's not a big deal but I
+didn't adopt the idea.
+
+\begin{code}
primOpSize :: PrimOp -> Int -> ExprSize
primOpSize op n_val_args
| not (primOpIsDupable op) = sizeN opt_UF_DearOp
\begin{code}
couldBeSmallEnoughToInline :: Int -> CoreExpr -> Bool
couldBeSmallEnoughToInline threshold rhs
- = case calcUnfoldingGuidance False False threshold rhs of
- (_, UnfNever) -> False
- _ -> True
+ = case sizeExpr (iUnbox threshold) [] body of
+ TooBig -> False
+ _ -> True
+ where
+ (_, body) = collectBinders rhs
----------------
smallEnoughToInline :: Unfolding -> Bool
= case guidance of
UnfNever -> (False, empty)
- UnfWhen unsat_ok boring_ok -> ( (unsat_ok || saturated)
- && (boring_ok || some_benefit)
- , empty )
- -- For the boring_ok part see Note [INLINE for small functions]
+ UnfWhen unsat_ok boring_ok
+ -> (enough_args && (boring_ok || some_benefit), empty )
+ where -- See Note [INLINE for small functions]
+ enough_args = saturated || (unsat_ok && n_val_args > 0)
UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size }
-> ( is_cheap && some_benefit && small_enough
res_discount arg_infos cont_info
in
- if dopt Opt_D_dump_inlinings dflags then
+ if (dopt Opt_D_dump_inlinings dflags && dopt Opt_D_verbose_core2core dflags) then
pprTrace ("Considering inlining: " ++ showSDoc (ppr id))
(vcat [text "arg infos" <+> ppr arg_infos,
text "uf arity" <+> ppr uf_arity,
order to unravel the recursion.
-Note [INLINE for small functions]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Consider {-# INLINE f #-}
- f x = Just x
- g y = f y
-Then f's RHS is no larger than its LHS, so we should inline it
-into even the most boring context. (We do so if there is no INLINE
-pragma!)
-
-
Note [Things to watch]
~~~~~~~~~~~~~~~~~~~~~~
* { y = I# 3; x = y `cast` co; ...case (x `cast` co) of ... }
= Nothing
beta fun pairs args
- = case analyse (substExpr subst fun) args of
+ = case analyse (substExpr (text "subst-expr-is-con-app") subst fun) args of
Nothing -> -- pprTrace "Bale out! exprIsConApp_maybe" doc $
Nothing
Just ans -> -- pprTrace "Woo-hoo! exprIsConApp_maybe" doc $