2 % (c) The University of Glasgow 2006
3 % (c) The AQUA Project, Glasgow University, 1994-1998
8 Unfoldings (which can travel across module boundaries) are in Core
9 syntax (namely @CoreExpr@s).
11 The type @Unfolding@ sits ``above'' simply-Core-expressions
12 unfoldings, capturing ``higher-level'' things we know about a binding,
13 usually things that the simplifier found out (e.g., ``it's a
14 literal''). In the corner of a @CoreUnfolding@ unfolding, you will
15 find, unsurprisingly, a Core expression.
19 Unfolding, UnfoldingGuidance, -- Abstract types
21 noUnfolding, mkImplicitUnfolding,
22 mkTopUnfolding, mkUnfolding, mkCoreUnfolding,
23 mkInlineRule, mkWwInlineRule,
24 mkCompulsoryUnfolding, mkDFunUnfolding,
26 interestingArg, ArgSummary(..),
28 couldBeSmallEnoughToInline,
29 certainlyWillInline, smallEnoughToInline,
31 callSiteInline, CallCtxt(..),
37 #include "HsVersions.h"
42 import PprCore () -- Instances
44 import CoreSubst hiding( substTy )
45 import CoreFVs ( exprFreeVars )
53 import BasicTypes ( Arity )
54 import TcType ( tcSplitDFunTy )
58 import VarEnv ( mkInScopeSet )
68 %************************************************************************
70 \subsection{Making unfoldings}
72 %************************************************************************
75 mkTopUnfolding :: CoreExpr -> Unfolding
76 mkTopUnfolding expr = mkUnfolding True {- Top level -} expr
78 mkImplicitUnfolding :: CoreExpr -> Unfolding
79 -- For implicit Ids, do a tiny bit of optimising first
80 mkImplicitUnfolding expr = mkTopUnfolding (simpleOptExpr expr)
82 -- Note [Top-level flag on inline rules]
83 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
84 -- Slight hack: note that mk_inline_rules conservatively sets the
85 -- top-level flag to True. It gets set more accurately by the simplifier
86 -- Simplify.simplUnfolding.
88 mkUnfolding :: Bool -> CoreExpr -> Unfolding
89 mkUnfolding top_lvl expr
90 = mkCoreUnfolding top_lvl expr arity guidance
92 (arity, guidance) = calcUnfoldingGuidance opt_UF_CreationThreshold expr
93 -- Sometimes during simplification, there's a large let-bound thing
94 -- which has been substituted, and so is now dead; so 'expr' contains
95 -- two copies of the thing while the occurrence-analysed expression doesn't
96 -- Nevertheless, we don't occ-analyse before computing the size because the
97 -- size computation bales out after a while, whereas occurrence analysis does not.
99 -- This can occasionally mean that the guidance is very pessimistic;
100 -- it gets fixed up next round
102 mkCoreUnfolding :: Bool -> CoreExpr -> Arity -> UnfoldingGuidance -> Unfolding
103 -- Occurrence-analyses the expression before capturing it
104 mkCoreUnfolding top_lvl expr arity guidance
105 = CoreUnfolding { uf_tmpl = occurAnalyseExpr expr,
108 uf_is_value = exprIsHNF expr,
109 uf_is_conlike = exprIsConLike expr,
110 uf_is_cheap = exprIsCheap expr,
111 uf_expandable = exprIsExpandable expr,
112 uf_guidance = guidance }
114 mkDFunUnfolding :: DataCon -> [Id] -> Unfolding
115 mkDFunUnfolding con ops = DFunUnfolding con (map Var ops)
117 mkWwInlineRule :: Id -> CoreExpr -> Arity -> Unfolding
118 mkWwInlineRule id expr arity
119 = mkCoreUnfolding True (simpleOptExpr expr) arity
120 (InlineRule { ir_sat = InlUnSat, ir_info = InlWrapper id })
122 mkCompulsoryUnfolding :: CoreExpr -> Unfolding
123 mkCompulsoryUnfolding expr -- Used for things that absolutely must be unfolded
124 = mkCoreUnfolding True expr 0 -- Arity of unfolding doesn't matter
125 (InlineRule { ir_info = InlAlways, ir_sat = InlUnSat })
127 mkInlineRule :: InlSatFlag -> CoreExpr -> Arity -> Unfolding
128 mkInlineRule sat expr arity
129 = mkCoreUnfolding True -- Note [Top-level flag on inline rules]
131 (InlineRule { ir_sat = sat, ir_info = info })
133 expr' = simpleOptExpr expr
134 info = if small then InlSmall else InlVanilla
135 small = case calcUnfoldingGuidance (arity+1) expr' of
136 (arity_e, UnfoldIfGoodArgs { ug_size = size_e })
137 -> uncondInline arity_e size_e
138 _other {- actually UnfoldNever -} -> False
142 %************************************************************************
144 \subsection{The UnfoldingGuidance type}
146 %************************************************************************
149 calcUnfoldingGuidance
150 :: Int -- bomb out if size gets bigger than this
151 -> CoreExpr -- expression to look at
152 -> (Arity, UnfoldingGuidance)
153 calcUnfoldingGuidance bOMB_OUT_SIZE expr
154 = case collectBinders expr of { (binders, body) ->
156 val_binders = filter isId binders
157 n_val_binders = length val_binders
159 case (sizeExpr (iUnbox bOMB_OUT_SIZE) val_binders body) of
160 TooBig -> (n_val_binders, UnfoldNever)
161 SizeIs size cased_args scrut_discount
162 -> (n_val_binders, UnfoldIfGoodArgs { ug_args = map discount_for val_binders
163 , ug_size = iBox size
164 , ug_res = iBox scrut_discount })
166 discount_for b = foldlBag (\acc (b',n) -> if b==b' then acc+n else acc)
171 Note [Computing the size of an expression]
172 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
173 The basic idea of sizeExpr is obvious enough: count nodes. But getting the
174 heuristics right has taken a long time. Here's the basic strategy:
176 * Variables, literals: 0
177 (Exception for string literals, see litSize.)
179 * Function applications (f e1 .. en): 1 + #value args
181 * Constructor applications: 1, regardless of #args
183 * Let(rec): 1 + size of components
197 Notice that 'x' counts 0, while (f x) counts 2. That's deliberate: there's
198 a function call to account for. Notice also that constructor applications
199 are very cheap, because exposing them to a caller is so valuable.
201 Note [Unconditional inlining]
202 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
203 We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
204 than the thing it's replacing. Notice that
205 (f x) --> (g 3) -- YES, unconditionally
206 (f x) --> x : [] -- YES, *even though* there are two
207 -- arguments to the cons
211 It's very important not to unconditionally replace a variable by
215 uncondInline :: Arity -> Int -> Bool
216 -- Inline unconditionally if there no size increase
217 -- Size of call is arity (+1 for the function)
218 -- See Note [Unconditional inlining]
219 uncondInline arity size
220 | arity == 0 = size == 0
221 | otherwise = size <= arity + 1
226 sizeExpr :: FastInt -- Bomb out if it gets bigger than this
227 -> [Id] -- Arguments; we're interested in which of these
232 -- Note [Computing the size of an expression]
234 sizeExpr bOMB_OUT_SIZE top_args expr
237 size_up (Cast e _) = size_up e
238 size_up (Note _ e) = size_up e
239 size_up (Type _) = sizeZero -- Types cost nothing
240 size_up (Lit lit) = sizeN (litSize lit)
241 size_up (Var f) = size_up_call f [] -- Make sure we get constructor
242 -- discounts even on nullary constructors
244 size_up (App fun (Type _)) = size_up fun
245 size_up (App fun arg) = size_up_app fun [arg]
246 `addSize` nukeScrutDiscount (size_up arg)
248 size_up (Lam b e) | isId b = lamScrutDiscount (size_up e `addSizeN` 1)
249 | otherwise = size_up e
251 size_up (Let (NonRec binder rhs) body)
252 = nukeScrutDiscount (size_up rhs) `addSize`
253 size_up body `addSizeN`
254 (if isUnLiftedType (idType binder) then 0 else 1)
255 -- For the allocation
256 -- If the binder has an unlifted type there is no allocation
258 size_up (Let (Rec pairs) body)
259 = nukeScrutDiscount rhs_size `addSize`
260 size_up body `addSizeN`
261 length pairs -- For the allocation
263 rhs_size = foldr (addSize . size_up . snd) sizeZero pairs
265 size_up (Case (Var v) _ _ alts)
266 | v `elem` top_args -- We are scrutinising an argument variable
267 = alts_size (foldr addSize sizeOne alt_sizes) -- The 1 is for the case itself
268 (foldr1 maxSize alt_sizes)
269 -- Good to inline if an arg is scrutinised, because
270 -- that may eliminate allocation in the caller
271 -- And it eliminates the case itself
273 alt_sizes = map size_up_alt alts
275 -- alts_size tries to compute a good discount for
276 -- the case when we are scrutinising an argument variable
277 alts_size (SizeIs tot tot_disc _tot_scrut) -- Size of all alternatives
278 (SizeIs max _max_disc max_scrut) -- Size of biggest alternative
279 = SizeIs tot (unitBag (v, iBox (_ILIT(1) +# tot -# max)) `unionBags` tot_disc) max_scrut
280 -- If the variable is known, we produce a discount that
281 -- will take us back to 'max', the size of the largest alternative
282 -- The 1+ is a little discount for reduced allocation in the caller
284 -- Notice though, that we return tot_disc, the total discount from
285 -- all branches. I think that's right.
287 alts_size tot_size _ = tot_size
289 size_up (Case e _ _ alts) = foldr (addSize . size_up_alt)
290 (nukeScrutDiscount (size_up e))
292 `addSizeN` 1 -- Add 1 for the case itself
293 -- We don't charge for the case itself
294 -- It's a strict thing, and the price of the call
295 -- is paid by scrut. Also consider
296 -- case f x of DEFAULT -> e
297 -- This is just ';'! Don't charge for it.
300 -- size_up_app is used when there's ONE OR MORE value args
301 size_up_app (App fun arg) args
302 | isTypeArg arg = size_up_app fun args
303 | otherwise = size_up_app fun (arg:args)
304 `addSize` nukeScrutDiscount (size_up arg)
305 size_up_app (Var fun) args = size_up_call fun args
306 size_up_app other args = size_up other `addSizeN` length args
309 size_up_call :: Id -> [CoreExpr] -> ExprSize
310 size_up_call fun val_args
311 = case idDetails fun of
312 FCallId _ -> sizeN opt_UF_DearOp
313 DataConWorkId dc -> conSize dc (length val_args)
314 PrimOpId op -> primOpSize op (length val_args)
315 ClassOpId _ -> classOpSize top_args val_args
316 _ -> funSize top_args fun (length val_args)
319 size_up_alt (_con, _bndrs, rhs) = size_up rhs
320 -- Don't charge for args, so that wrappers look cheap
321 -- (See comments about wrappers with Case)
324 -- These addSize things have to be here because
325 -- I don't want to give them bOMB_OUT_SIZE as an argument
326 addSizeN TooBig _ = TooBig
327 addSizeN (SizeIs n xs d) m = mkSizeIs bOMB_OUT_SIZE (n +# iUnbox m) xs d
329 addSize TooBig _ = TooBig
330 addSize _ TooBig = TooBig
331 addSize (SizeIs n1 xs d1) (SizeIs n2 ys d2)
332 = mkSizeIs bOMB_OUT_SIZE (n1 +# n2) (xs `unionBags` ys) (d1 +# d2)
336 -- | Finds a nominal size of a string literal.
337 litSize :: Literal -> Int
338 -- Used by CoreUnfold.sizeExpr
339 litSize (MachStr str) = 1 + ((lengthFS str + 3) `div` 4)
340 -- If size could be 0 then @f "x"@ might be too small
341 -- [Sept03: make literal strings a bit bigger to avoid fruitless
342 -- duplication of little strings]
343 litSize _other = 0 -- Must match size of nullary constructors
344 -- Key point: if x |-> 4, then x must inline unconditionally
345 -- (eg via case binding)
347 classOpSize :: [Id] -> [CoreExpr] -> ExprSize
348 -- See Note [Conlike is interesting]
351 classOpSize top_args (arg1 : other_args)
352 = SizeIs (iUnbox size) arg_discount (_ILIT(0))
354 size = 2 + length other_args
355 -- If the class op is scrutinising a lambda bound dictionary then
356 -- give it a discount, to encourage the inlining of this function
357 -- The actual discount is rather arbitrarily chosen
358 arg_discount = case arg1 of
359 Var dict | dict `elem` top_args
360 -> unitBag (dict, opt_UF_DictDiscount)
363 funSize :: [Id] -> Id -> Int -> ExprSize
364 -- Size for functions that are not constructors or primops
365 -- Note [Function applications]
366 funSize top_args fun n_val_args
367 | fun `hasKey` buildIdKey = buildSize
368 | fun `hasKey` augmentIdKey = augmentSize
369 | otherwise = SizeIs (iUnbox size) arg_discount (iUnbox res_discount)
371 some_val_args = n_val_args > 0
373 arg_discount | some_val_args && fun `elem` top_args
374 = unitBag (fun, opt_UF_FunAppDiscount)
375 | otherwise = emptyBag
376 -- If the function is an argument and is applied
377 -- to some values, give it an arg-discount
379 res_discount | idArity fun > n_val_args = opt_UF_FunAppDiscount
381 -- If the function is partially applied, show a result discount
383 size | some_val_args = 1 + n_val_args
385 -- The 1+ is for the function itself
386 -- Add 1 for each non-trivial arg;
387 -- the allocation cost, as in let(rec)
390 conSize :: DataCon -> Int -> ExprSize
391 conSize dc n_val_args
392 | n_val_args == 0 = SizeIs (_ILIT(0)) emptyBag (_ILIT(1))
393 | isUnboxedTupleCon dc = SizeIs (_ILIT(0)) emptyBag (iUnbox n_val_args +# _ILIT(1))
394 | otherwise = SizeIs (_ILIT(1)) emptyBag (iUnbox n_val_args +# _ILIT(1))
395 -- Treat a constructors application as size 1, regardless of how
396 -- many arguments it has; we are keen to expose them
397 -- (and we charge separately for their args). We can't treat
398 -- them as size zero, else we find that (Just x) has size 0,
399 -- which is the same as a lone variable; and hence 'v' will
400 -- always be replaced by (Just x), where v is bound to Just x.
402 -- However, unboxed tuples count as size zero
403 -- I found occasions where we had
404 -- f x y z = case op# x y z of { s -> (# s, () #) }
405 -- and f wasn't getting inlined
407 primOpSize :: PrimOp -> Int -> ExprSize
408 primOpSize op n_val_args
409 | not (primOpIsDupable op) = sizeN opt_UF_DearOp
410 | not (primOpOutOfLine op) = sizeN 1
411 -- Be very keen to inline simple primops.
412 -- We give a discount of 1 for each arg so that (op# x y z) costs 2.
413 -- We can't make it cost 1, else we'll inline let v = (op# x y z)
414 -- at every use of v, which is excessive.
416 -- A good example is:
417 -- let x = +# p q in C {x}
418 -- Even though x get's an occurrence of 'many', its RHS looks cheap,
419 -- and there's a good chance it'll get inlined back into C's RHS. Urgh!
421 | otherwise = sizeN n_val_args
424 buildSize :: ExprSize
425 buildSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(4))
426 -- We really want to inline applications of build
427 -- build t (\cn -> e) should cost only the cost of e (because build will be inlined later)
428 -- Indeed, we should add a result_discount becuause build is
429 -- very like a constructor. We don't bother to check that the
430 -- build is saturated (it usually is). The "-2" discounts for the \c n,
431 -- The "4" is rather arbitrary.
433 augmentSize :: ExprSize
434 augmentSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(4))
435 -- Ditto (augment t (\cn -> e) ys) should cost only the cost of
436 -- e plus ys. The -2 accounts for the \cn
438 nukeScrutDiscount :: ExprSize -> ExprSize
439 nukeScrutDiscount (SizeIs n vs _) = SizeIs n vs (_ILIT(0))
440 nukeScrutDiscount TooBig = TooBig
442 -- When we return a lambda, give a discount if it's used (applied)
443 lamScrutDiscount :: ExprSize -> ExprSize
444 lamScrutDiscount (SizeIs n vs _) = SizeIs n vs (iUnbox opt_UF_FunAppDiscount)
445 lamScrutDiscount TooBig = TooBig
448 Note [Discounts and thresholds]
449 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
450 Constants for discounts and thesholds are defined in main/StaticFlags,
451 all of form opt_UF_xxxx. They are:
453 opt_UF_CreationThreshold (45)
454 At a definition site, if the unfolding is bigger than this, we
455 may discard it altogether
457 opt_UF_UseThreshold (6)
458 At a call site, if the unfolding, less discounts, is smaller than
459 this, then it's small enough inline
461 opt_UF_KeennessFactor (1.5)
462 Factor by which the discounts are multiplied before
463 subtracting from size
465 opt_UF_DictDiscount (1)
466 The discount for each occurrence of a dictionary argument
467 as an argument of a class method. Should be pretty small
468 else big functions may get inlined
470 opt_UF_FunAppDiscount (6)
471 Discount for a function argument that is applied. Quite
472 large, because if we inline we avoid the higher-order call.
475 The size of a foreign call or not-dupable PrimOp
478 Note [Function applications]
479 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
480 In a function application (f a b)
482 - If 'f' is an argument to the function being analysed,
483 and there's at least one value arg, record a FunAppDiscount for f
485 - If the application if a PAP (arity > 2 in this example)
486 record a *result* discount (because inlining
487 with "extra" args in the call may mean that we now
488 get a saturated application)
490 Code for manipulating sizes
493 data ExprSize = TooBig
494 | SizeIs FastInt -- Size found
495 (Bag (Id,Int)) -- Arguments cased herein, and discount for each such
496 FastInt -- Size to subtract if result is scrutinised
497 -- by a case expression
499 instance Outputable ExprSize where
500 ppr TooBig = ptext (sLit "TooBig")
501 ppr (SizeIs a _ c) = brackets (int (iBox a) <+> int (iBox c))
503 -- subtract the discount before deciding whether to bale out. eg. we
504 -- want to inline a large constructor application into a selector:
505 -- tup = (a_1, ..., a_99)
506 -- x = case tup of ...
508 mkSizeIs :: FastInt -> FastInt -> Bag (Id, Int) -> FastInt -> ExprSize
509 mkSizeIs max n xs d | (n -# d) ># max = TooBig
510 | otherwise = SizeIs n xs d
512 maxSize :: ExprSize -> ExprSize -> ExprSize
513 maxSize TooBig _ = TooBig
514 maxSize _ TooBig = TooBig
515 maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 ># n2 = s1
518 sizeZero, sizeOne :: ExprSize
519 sizeN :: Int -> ExprSize
521 sizeZero = SizeIs (_ILIT(0)) emptyBag (_ILIT(0))
522 sizeOne = SizeIs (_ILIT(1)) emptyBag (_ILIT(0))
523 sizeN n = SizeIs (iUnbox n) emptyBag (_ILIT(0))
529 %************************************************************************
531 \subsection[considerUnfolding]{Given all the info, do (not) do the unfolding}
533 %************************************************************************
535 We use 'couldBeSmallEnoughToInline' to avoid exporting inlinings that
536 we ``couldn't possibly use'' on the other side. Can be overridden w/
537 flaggery. Just the same as smallEnoughToInline, except that it has no
541 couldBeSmallEnoughToInline :: Int -> CoreExpr -> Bool
542 couldBeSmallEnoughToInline threshold rhs
543 = case calcUnfoldingGuidance threshold rhs of
544 (_, UnfoldNever) -> False
548 smallEnoughToInline :: Unfolding -> Bool
549 smallEnoughToInline (CoreUnfolding {uf_guidance = UnfoldIfGoodArgs {ug_size = size}})
550 = size <= opt_UF_UseThreshold
551 smallEnoughToInline _
555 certainlyWillInline :: Unfolding -> Bool
556 -- Sees if the unfolding is pretty certain to inline
557 certainlyWillInline (CoreUnfolding { uf_is_cheap = is_cheap, uf_arity = n_vals, uf_guidance = guidance })
560 InlineRule {} -> True
561 UnfoldIfGoodArgs { ug_size = size}
562 -> is_cheap && size - (n_vals +1) <= opt_UF_UseThreshold
564 certainlyWillInline _
568 %************************************************************************
570 \subsection{callSiteInline}
572 %************************************************************************
574 This is the key function. It decides whether to inline a variable at a call site
576 callSiteInline is used at call sites, so it is a bit more generous.
577 It's a very important function that embodies lots of heuristics.
578 A non-WHNF can be inlined if it doesn't occur inside a lambda,
579 and occurs exactly once or
580 occurs once in each branch of a case and is small
582 If the thing is in WHNF, there's no danger of duplicating work,
583 so we can inline if it occurs once, or is small
585 NOTE: we don't want to inline top-level functions that always diverge.
586 It just makes the code bigger. Tt turns out that the convenient way to prevent
587 them inlining is to give them a NOINLINE pragma, which we do in
588 StrictAnal.addStrictnessInfoToTopId
591 callSiteInline :: DynFlags
592 -> Bool -- True <=> the Id can be inlined
594 -> Bool -- True if there are are no arguments at all (incl type args)
595 -> [ArgSummary] -- One for each value arg; True if it is interesting
596 -> CallCtxt -- True <=> continuation is interesting
597 -> Maybe CoreExpr -- Unfolding, if any
600 instance Outputable ArgSummary where
601 ppr TrivArg = ptext (sLit "TrivArg")
602 ppr NonTrivArg = ptext (sLit "NonTrivArg")
603 ppr ValueArg = ptext (sLit "ValueArg")
605 data CallCtxt = BoringCtxt
607 | ArgCtxt -- We are somewhere in the argument of a function
608 Bool -- True <=> we're somewhere in the RHS of function with rules
609 -- False <=> we *are* the argument of a function with non-zero
612 -- we *are* the RHS of a let Note [RHS of lets]
613 -- In both cases, be a little keener to inline
615 | ValAppCtxt -- We're applied to at least one value arg
616 -- This arises when we have ((f x |> co) y)
617 -- Then the (f x) has argument 'x' but in a ValAppCtxt
619 | CaseCtxt -- We're the scrutinee of a case
620 -- that decomposes its scrutinee
622 instance Outputable CallCtxt where
623 ppr BoringCtxt = ptext (sLit "BoringCtxt")
624 ppr (ArgCtxt rules) = ptext (sLit "ArgCtxt") <+> ppr rules
625 ppr CaseCtxt = ptext (sLit "CaseCtxt")
626 ppr ValAppCtxt = ptext (sLit "ValAppCtxt")
628 callSiteInline dflags active_inline id lone_variable arg_infos cont_info
630 n_val_args = length arg_infos
632 case idUnfolding id of {
633 NoUnfolding -> Nothing ;
634 OtherCon _ -> Nothing ;
635 DFunUnfolding {} -> Nothing ; -- Never unfold a DFun
636 CoreUnfolding { uf_tmpl = unf_template, uf_is_top = is_top, uf_is_value = is_value,
637 uf_is_cheap = is_cheap, uf_arity = uf_arity, uf_guidance = guidance } ->
638 -- uf_arity will typically be equal to (idArity id),
639 -- but may be less for InlineRules
641 result | yes_or_no = Just unf_template
642 | otherwise = Nothing
644 interesting_args = any nonTriv arg_infos
645 -- NB: (any nonTriv arg_infos) looks at the
646 -- over-saturated args too which is "wrong";
647 -- but if over-saturated we inline anyway.
649 -- some_benefit is used when the RHS is small enough
650 -- and the call has enough (or too many) value
651 -- arguments (ie n_val_args >= arity). But there must
652 -- be *something* interesting about some argument, or the
653 -- result context, to make it worth inlining
654 some_benefit = interesting_args
655 || n_val_args > uf_arity -- Over-saturated
656 || interesting_saturated_call -- Exactly saturated
658 interesting_saturated_call
660 BoringCtxt -> not is_top && uf_arity > 0 -- Note [Nested functions]
661 CaseCtxt -> not (lone_variable && is_value) -- Note [Lone variables]
662 ArgCtxt {} -> uf_arity > 0 -- Note [Inlining in ArgCtxt]
663 ValAppCtxt -> True -- Note [Cast then apply]
669 InlineRule { ir_info = inl_info, ir_sat = sat }
670 | InlAlways <- inl_info -> True -- No top-level binding, so inline!
671 -- Ignore is_active because we want to
672 -- inline even if SimplGently is on.
673 | not active_inline -> False
674 | n_val_args < uf_arity -> yes_unsat -- Not enough value args
675 | InlSmall <- inl_info -> True -- Note [INLINE for small functions]
676 | otherwise -> some_benefit -- Saturated or over-saturated
678 -- See Note [Inlining an InlineRule]
679 yes_unsat = case sat of
681 InlUnSat -> interesting_args
683 UnfoldIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size }
684 | not active_inline -> False
685 | not is_cheap -> False
686 | n_val_args < uf_arity -> interesting_args && small_enough
687 -- Note [Unsaturated applications]
688 | uncondInline uf_arity size -> True
689 | otherwise -> some_benefit && small_enough
692 small_enough = (size - discount) <= opt_UF_UseThreshold
693 discount = computeDiscount uf_arity arg_discounts
694 res_discount arg_infos cont_info
697 if dopt Opt_D_dump_inlinings dflags then
698 pprTrace ("Considering inlining: " ++ showSDoc (ppr id))
699 (vcat [text "active:" <+> ppr active_inline,
700 text "arg infos" <+> ppr arg_infos,
701 text "interesting continuation" <+> ppr cont_info,
702 text "is value:" <+> ppr is_value,
703 text "is cheap:" <+> ppr is_cheap,
704 text "guidance" <+> ppr guidance,
705 text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"])
714 Be a tiny bit keener to inline in the RHS of a let, because that might
715 lead to good thing later
717 g y = let x = f y in ...(case x of (a,b,c) -> ...) ...
718 We'd inline 'f' if the call was in a case context, and it kind-of-is,
719 only we can't see it. So we treat the RHS of a let as not-totally-boring.
721 Note [Unsaturated applications]
722 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
723 When a call is not saturated, we *still* inline if one of the
724 arguments has interesting structure. That's sometimes very important.
725 A good example is the Ord instance for Bool in Base:
728 $fOrdBool =GHC.Classes.D:Ord
733 $cmin_ajX [Occ=LoopBreaker] :: Bool -> Bool -> Bool
734 $cmin_ajX = GHC.Classes.$dmmin @ Bool $fOrdBool
737 But the defn of GHC.Classes.$dmmin is:
739 $dmmin :: forall a. GHC.Classes.Ord a => a -> a -> a
740 {- Arity: 3, HasNoCafRefs, Strictness: SLL,
741 Unfolding: (\ @ a $dOrd :: GHC.Classes.Ord a x :: a y :: a ->
742 case @ a GHC.Classes.<= @ a $dOrd x y of wild {
743 GHC.Bool.False -> y GHC.Bool.True -> x }) -}
745 We *really* want to inline $dmmin, even though it has arity 3, in
746 order to unravel the recursion.
749 Note [INLINE for small functions]
750 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
751 Consider {-# INLINE f #-}
754 Then f's RHS is no larger than its LHS, so we should inline it
755 into even the most boring context. (We do so if there is no INLINE
756 pragma!) That's the reason for the 'ug_small' flag on an InlineRule.
759 Note [Things to watch]
760 ~~~~~~~~~~~~~~~~~~~~~~
761 * { y = I# 3; x = y `cast` co; ...case (x `cast` co) of ... }
762 Assume x is exported, so not inlined unconditionally.
763 Then we want x to inline unconditionally; no reason for it
764 not to, and doing so avoids an indirection.
766 * { x = I# 3; ....f x.... }
767 Make sure that x does not inline unconditionally!
768 Lest we get extra allocation.
770 Note [Inlining an InlineRule]
771 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
772 An InlineRules is used for
773 (a) pogrammer INLINE pragmas
774 (b) inlinings from worker/wrapper
776 For (a) the RHS may be large, and our contract is that we *only* inline
777 when the function is applied to all the arguments on the LHS of the
778 source-code defn. (The uf_arity in the rule.)
780 However for worker/wrapper it may be worth inlining even if the
781 arity is not satisfied (as we do in the CoreUnfolding case) so we don't
785 Note [Nested functions]
786 ~~~~~~~~~~~~~~~~~~~~~~~
787 If a function has a nested defn we also record some-benefit, on the
788 grounds that we are often able to eliminate the binding, and hence the
789 allocation, for the function altogether; this is good for join points.
790 But this only makes sense for *functions*; inlining a constructor
791 doesn't help allocation unless the result is scrutinised. UNLESS the
792 constructor occurs just once, albeit possibly in multiple case
793 branches. Then inlining it doesn't increase allocation, but it does
794 increase the chance that the constructor won't be allocated at all in
795 the branches that don't use it.
797 Note [Cast then apply]
798 ~~~~~~~~~~~~~~~~~~~~~~
800 myIndex = __inline_me ( (/\a. <blah>) |> co )
801 co :: (forall a. a -> a) ~ (forall a. T a)
802 ... /\a.\x. case ((myIndex a) |> sym co) x of { ... } ...
804 We need to inline myIndex to unravel this; but the actual call (myIndex a) has
805 no value arguments. The ValAppCtxt gives it enough incentive to inline.
807 Note [Inlining in ArgCtxt]
808 ~~~~~~~~~~~~~~~~~~~~~~~~~~
809 The condition (arity > 0) here is very important, because otherwise
810 we end up inlining top-level stuff into useless places; eg
813 This can make a very big difference: it adds 16% to nofib 'integer' allocs,
816 At one stage I replaced this condition by 'True' (leading to the above
817 slow-down). The motivation was test eyeball/inline1.hs; but that seems
820 NOTE: arguably, we should inline in ArgCtxt only if the result of the
821 call is at least CONLIKE. At least for the cases where we use ArgCtxt
822 for the RHS of a 'let', we only profit from the inlining if we get a
823 CONLIKE thing (modulo lets).
825 Note [Lone variables]
826 ~~~~~~~~~~~~~~~~~~~~~
827 The "lone-variable" case is important. I spent ages messing about
828 with unsatisfactory varaints, but this is nice. The idea is that if a
829 variable appears all alone
831 as an arg of lazy fn, or rhs BoringCtxt
832 as scrutinee of a case CaseCtxt
833 as arg of a fn ArgCtxt
835 it is bound to a value
837 then we should not inline it (unless there is some other reason,
838 e.g. is is the sole occurrence). That is what is happening at
839 the use of 'lone_variable' in 'interesting_saturated_call'.
841 Why? At least in the case-scrutinee situation, turning
842 let x = (a,b) in case x of y -> ...
844 let x = (a,b) in case (a,b) of y -> ...
846 let x = (a,b) in let y = (a,b) in ...
847 is bad if the binding for x will remain.
849 Another example: I discovered that strings
850 were getting inlined straight back into applications of 'error'
851 because the latter is strict.
853 f = \x -> ...(error s)...
855 Fundamentally such contexts should not encourage inlining because the
856 context can ``see'' the unfolding of the variable (e.g. case or a
857 RULE) so there's no gain. If the thing is bound to a value.
862 foo = _inline_ (\n. [n])
863 bar = _inline_ (foo 20)
864 baz = \n. case bar of { (m:_) -> m + n }
865 Here we really want to inline 'bar' so that we can inline 'foo'
866 and the whole thing unravels as it should obviously do. This is
867 important: in the NDP project, 'bar' generates a closure data
868 structure rather than a list.
870 So the non-inlining of lone_variables should only apply if the
871 unfolding is regarded as cheap; because that is when exprIsConApp_maybe
872 looks through the unfolding. Hence the "&& is_cheap" in the
875 * Even a type application or coercion isn't a lone variable.
877 case $fMonadST @ RealWorld of { :DMonad a b c -> c }
878 We had better inline that sucker! The case won't see through it.
880 For now, I'm treating treating a variable applied to types
881 in a *lazy* context "lone". The motivating example was
884 There's no advantage in inlining f here, and perhaps
885 a significant disadvantage. Hence some_val_args in the Stop case
888 computeDiscount :: Int -> [Int] -> Int -> [ArgSummary] -> CallCtxt -> Int
889 computeDiscount n_vals_wanted arg_discounts res_discount arg_infos cont_info
890 -- We multiple the raw discounts (args_discount and result_discount)
891 -- ty opt_UnfoldingKeenessFactor because the former have to do with
892 -- *size* whereas the discounts imply that there's some extra
893 -- *efficiency* to be gained (e.g. beta reductions, case reductions)
896 = 1 -- Discount of 1 because the result replaces the call
897 -- so we count 1 for the function itself
899 + length (take n_vals_wanted arg_infos)
900 -- Discount of (un-scaled) 1 for each arg supplied,
901 -- because the result replaces the call
903 + round (opt_UF_KeenessFactor *
904 fromIntegral (arg_discount + res_discount'))
906 arg_discount = sum (zipWith mk_arg_discount arg_discounts arg_infos)
908 mk_arg_discount _ TrivArg = 0
909 mk_arg_discount _ NonTrivArg = 1
910 mk_arg_discount discount ValueArg = discount
912 res_discount' = case cont_info of
914 CaseCtxt -> res_discount
915 _other -> 4 `min` res_discount
916 -- res_discount can be very large when a function returns
917 -- constructors; but we only want to invoke that large discount
918 -- when there's a case continuation.
919 -- Otherwise we, rather arbitrarily, threshold it. Yuk.
920 -- But we want to aovid inlining large functions that return
921 -- constructors into contexts that are simply "interesting"
924 %************************************************************************
926 Interesting arguments
928 %************************************************************************
930 Note [Interesting arguments]
931 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
932 An argument is interesting if it deserves a discount for unfoldings
933 with a discount in that argument position. The idea is to avoid
934 unfolding a function that is applied only to variables that have no
935 unfolding (i.e. they are probably lambda bound): f x y z There is
936 little point in inlining f here.
938 Generally, *values* (like (C a b) and (\x.e)) deserve discounts. But
939 we must look through lets, eg (let x = e in C a b), because the let will
940 float, exposing the value, if we inline. That makes it different to
943 Before 2009 we said it was interesting if the argument had *any* structure
944 at all; i.e. (hasSomeUnfolding v). But does too much inlining; see Trac #3016.
946 But we don't regard (f x y) as interesting, unless f is unsaturated.
947 If it's saturated and f hasn't inlined, then it's probably not going
950 Note [Conlike is interesting]
951 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
953 f d = ...((*) d x y)...
955 where df is con-like. Then we'd really like to inline 'f' so that the
956 rule for (*) (df d) can fire. To do this
957 a) we give a discount for being an argument of a class-op (eg (*) d)
958 b) we say that a con-like argument (eg (df d)) is interesting
961 data ArgSummary = TrivArg -- Nothing interesting
962 | NonTrivArg -- Arg has structure
963 | ValueArg -- Arg is a con-app or PAP
964 -- ..or con-like. Note [Conlike is interesting]
966 interestingArg :: CoreExpr -> ArgSummary
967 -- See Note [Interesting arguments]
968 interestingArg e = go e 0
970 -- n is # value args to which the expression is applied
971 go (Lit {}) _ = ValueArg
973 | isConLikeId v = ValueArg -- Experimenting with 'conlike' rather that
974 -- data constructors here
975 | idArity v > n = ValueArg -- Catches (eg) primops with arity but no unfolding
976 | n > 0 = NonTrivArg -- Saturated or unknown call
977 | conlike_unfolding = ValueArg -- n==0; look for an interesting unfolding
978 -- See Note [Conlike is interesting]
979 | otherwise = TrivArg -- n==0, no useful unfolding
981 conlike_unfolding = isConLikeUnfolding (idUnfolding v)
983 go (Type _) _ = TrivArg
984 go (App fn (Type _)) n = go fn n
985 go (App fn _) n = go fn (n+1)
986 go (Note _ a) n = go a n
987 go (Cast e _) n = go e n
991 | otherwise = ValueArg
992 go (Let _ e) n = case go e n of { ValueArg -> ValueArg; _ -> NonTrivArg }
993 go (Case {}) _ = NonTrivArg
995 nonTriv :: ArgSummary -> Bool
996 nonTriv TrivArg = False
1000 %************************************************************************
1004 %************************************************************************
1006 Note [exprIsConApp_maybe]
1007 ~~~~~~~~~~~~~~~~~~~~~~~~~
1008 exprIsConApp_maybe is a very important function. There are two principal
1010 * case e of { .... }
1011 * cls_op e, where cls_op is a class operation
1013 In both cases you want to know if e is of form (C e1..en) where C is
1016 However e might not *look* as if
1019 -- | Returns @Just (dc, [t1..tk], [x1..xn])@ if the argument expression is
1020 -- a *saturated* constructor application of the form @dc t1..tk x1 .. xn@,
1021 -- where t1..tk are the *universally-qantified* type args of 'dc'
1022 exprIsConApp_maybe :: CoreExpr -> Maybe (DataCon, [Type], [CoreExpr])
1024 exprIsConApp_maybe (Note _ expr)
1025 = exprIsConApp_maybe expr
1026 -- We ignore all notes. For example,
1027 -- case _scc_ "foo" (C a b) of
1029 -- should be optimised away, but it will be only if we look
1030 -- through the SCC note.
1032 exprIsConApp_maybe (Cast expr co)
1033 = -- Here we do the KPush reduction rule as described in the FC paper
1034 -- The transformation applies iff we have
1035 -- (C e1 ... en) `cast` co
1036 -- where co :: (T t1 .. tn) ~ to_ty
1037 -- The left-hand one must be a T, because exprIsConApp returned True
1038 -- but the right-hand one might not be. (Though it usually will.)
1040 case exprIsConApp_maybe expr of {
1041 Nothing -> Nothing ;
1042 Just (dc, _dc_univ_args, dc_args) ->
1044 let (_from_ty, to_ty) = coercionKind co
1045 dc_tc = dataConTyCon dc
1047 case splitTyConApp_maybe to_ty of {
1048 Nothing -> Nothing ;
1049 Just (to_tc, to_tc_arg_tys)
1050 | dc_tc /= to_tc -> Nothing
1051 -- These two Nothing cases are possible; we might see
1052 -- (C x y) `cast` (g :: T a ~ S [a]),
1053 -- where S is a type function. In fact, exprIsConApp
1054 -- will probably not be called in such circumstances,
1055 -- but there't nothing wrong with it
1059 tc_arity = tyConArity dc_tc
1060 dc_univ_tyvars = dataConUnivTyVars dc
1061 dc_ex_tyvars = dataConExTyVars dc
1062 arg_tys = dataConRepArgTys dc
1064 dc_eqs :: [(Type,Type)] -- All equalities from the DataCon
1065 dc_eqs = [(mkTyVarTy tv, ty) | (tv,ty) <- dataConEqSpec dc] ++
1066 [getEqPredTys eq_pred | eq_pred <- dataConEqTheta dc]
1068 (ex_args, rest1) = splitAtList dc_ex_tyvars dc_args
1069 (co_args, val_args) = splitAtList dc_eqs rest1
1071 -- Make the "theta" from Fig 3 of the paper
1072 gammas = decomposeCo tc_arity co
1073 theta = zipOpenTvSubst (dc_univ_tyvars ++ dc_ex_tyvars)
1074 (gammas ++ stripTypeArgs ex_args)
1076 -- Cast the existential coercion arguments
1077 cast_co (ty1, ty2) (Type co)
1078 = Type $ mkSymCoercion (substTy theta ty1)
1079 `mkTransCoercion` co
1080 `mkTransCoercion` (substTy theta ty2)
1081 cast_co _ other_arg = pprPanic "cast_co" (ppr other_arg)
1082 new_co_args = zipWith cast_co dc_eqs co_args
1084 -- Cast the value arguments (which include dictionaries)
1085 new_val_args = zipWith cast_arg arg_tys val_args
1086 cast_arg arg_ty arg = mkCoerce (substTy theta arg_ty) arg
1089 let dump_doc = vcat [ppr dc, ppr dc_univ_tyvars, ppr dc_ex_tyvars,
1090 ppr arg_tys, ppr dc_args, ppr _dc_univ_args,
1091 ppr ex_args, ppr val_args]
1093 ASSERT2( coreEqType _from_ty (mkTyConApp dc_tc _dc_univ_args), dump_doc )
1094 ASSERT2( all isTypeArg (ex_args ++ co_args), dump_doc )
1095 ASSERT2( equalLength val_args arg_tys, dump_doc )
1098 Just (dc, to_tc_arg_tys, ex_args ++ new_co_args ++ new_val_args)
1101 exprIsConApp_maybe expr
1104 analyse (App fun arg) args = analyse fun (arg:args)
1105 analyse fun@(Lam {}) args = beta fun [] args
1107 analyse (Var fun) args
1108 | Just con <- isDataConWorkId_maybe fun
1110 , let (univ_ty_args, rest_args) = splitAtList (dataConUnivTyVars con) args
1111 = Just (con, stripTypeArgs univ_ty_args, rest_args)
1113 -- Look through dictionary functions; see Note [Unfolding DFuns]
1114 | DFunUnfolding con ops <- unfolding
1116 , let (dfun_tvs, _cls, dfun_res_tys) = tcSplitDFunTy (idType fun)
1117 subst = zipOpenTvSubst dfun_tvs (stripTypeArgs (takeList dfun_tvs args))
1118 = Just (con, substTys subst dfun_res_tys,
1119 [mkApps op args | op <- ops])
1121 -- Look through unfoldings, but only cheap ones, because
1122 -- we are effectively duplicating the unfolding
1123 | CoreUnfolding { uf_expandable = expand_me, uf_tmpl = rhs } <- unfolding
1124 , expand_me = -- pprTrace "expanding" (ppr fun $$ ppr rhs) $
1127 is_saturated = count isValArg args == idArity fun
1128 unfolding = idUnfolding fun
1130 analyse _ _ = Nothing
1133 in_scope = mkInScopeSet (exprFreeVars expr)
1136 beta (Lam v body) pairs (arg : args)
1138 = beta body ((v,arg):pairs) args
1140 beta (Lam {}) _ _ -- Un-saturated, or not a type lambda
1144 = case analyse (substExpr subst fun) args of
1145 Nothing -> -- pprTrace "Bale out! exprIsConApp_maybe" doc $
1147 Just ans -> -- pprTrace "Woo-hoo! exprIsConApp_maybe" doc $
1150 subst = mkOpenSubst in_scope pairs
1151 -- doc = vcat [ppr fun, ppr expr, ppr pairs, ppr args]
1154 stripTypeArgs :: [CoreExpr] -> [Type]
1155 stripTypeArgs args = ASSERT2( all isTypeArg args, ppr args )
1156 [ty | Type ty <- args]
1159 Note [Unfolding DFuns]
1160 ~~~~~~~~~~~~~~~~~~~~~~
1163 df :: forall a b. (Eq a, Eq b) -> Eq (a,b)
1164 df a b d_a d_b = MkEqD (a,b) ($c1 a b d_a d_b)
1167 So to split it up we just need to apply the ops $c1, $c2 etc
1168 to the very same args as the dfun. It takes a little more work
1169 to compute the type arguments to the dictionary constructor.