2 % (c) The University of Glasgow 2006
3 % (c) The AQUA Project, Glasgow University, 1994-1998
8 Unfoldings (which can travel across module boundaries) are in Core
9 syntax (namely @CoreExpr@s).
11 The type @Unfolding@ sits ``above'' simply-Core-expressions
12 unfoldings, capturing ``higher-level'' things we know about a binding,
13 usually things that the simplifier found out (e.g., ``it's a
14 literal''). In the corner of a @CoreUnfolding@ unfolding, you will
15 find, unsurprisingly, a Core expression.
19 Unfolding, UnfoldingGuidance, -- Abstract types
21 noUnfolding, mkImplicitUnfolding,
22 mkTopUnfolding, mkUnfolding, mkCoreUnfolding,
23 mkInlineRule, mkWwInlineRule,
24 mkCompulsoryUnfolding, mkDFunUnfolding,
26 interestingArg, ArgSummary(..),
28 couldBeSmallEnoughToInline,
29 certainlyWillInline, smallEnoughToInline,
31 callSiteInline, CallCtxt(..),
37 #include "HsVersions.h"
42 import PprCore () -- Instances
44 import CoreSubst hiding( substTy )
45 import CoreFVs ( exprFreeVars )
53 import BasicTypes ( Arity )
54 import TcType ( tcSplitDFunTy )
58 import VarEnv ( mkInScopeSet )
68 %************************************************************************
70 \subsection{Making unfoldings}
72 %************************************************************************
75 mkTopUnfolding :: CoreExpr -> Unfolding
76 mkTopUnfolding expr = mkUnfolding True {- Top level -} expr
78 mkImplicitUnfolding :: CoreExpr -> Unfolding
79 -- For implicit Ids, do a tiny bit of optimising first
80 mkImplicitUnfolding expr = mkTopUnfolding (simpleOptExpr expr)
82 -- Note [Top-level flag on inline rules]
83 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
84 -- Slight hack: note that mk_inline_rules conservatively sets the
85 -- top-level flag to True. It gets set more accurately by the simplifier
86 -- Simplify.simplUnfolding.
88 mkUnfolding :: Bool -> CoreExpr -> Unfolding
89 mkUnfolding top_lvl expr
90 = CoreUnfolding { uf_tmpl = occurAnalyseExpr expr,
94 uf_is_value = exprIsHNF expr,
95 uf_is_conlike = exprIsConLike expr,
96 uf_expandable = exprIsExpandable expr,
97 uf_is_cheap = is_cheap,
98 uf_guidance = guidance }
100 is_cheap = exprIsCheap expr
101 (arity, guidance) = calcUnfoldingGuidance is_cheap opt_UF_CreationThreshold expr
102 -- Sometimes during simplification, there's a large let-bound thing
103 -- which has been substituted, and so is now dead; so 'expr' contains
104 -- two copies of the thing while the occurrence-analysed expression doesn't
105 -- Nevertheless, we *don't* occ-analyse before computing the size because the
106 -- size computation bales out after a while, whereas occurrence analysis does not.
108 -- This can occasionally mean that the guidance is very pessimistic;
109 -- it gets fixed up next round. And it should be rare, because large
110 -- let-bound things that are dead are usually caught by preInlineUnconditionally
112 mkCoreUnfolding :: Bool -> UnfoldingSource -> CoreExpr
113 -> Arity -> UnfoldingGuidance -> Unfolding
114 -- Occurrence-analyses the expression before capturing it
115 mkCoreUnfolding top_lvl src expr arity guidance
116 = CoreUnfolding { uf_tmpl = occurAnalyseExpr expr,
120 uf_is_value = exprIsHNF expr,
121 uf_is_conlike = exprIsConLike expr,
122 uf_is_cheap = exprIsCheap expr,
123 uf_expandable = exprIsExpandable expr,
124 uf_guidance = guidance }
126 mkDFunUnfolding :: DataCon -> [Id] -> Unfolding
127 mkDFunUnfolding con ops = DFunUnfolding con (map Var ops)
129 mkWwInlineRule :: Id -> CoreExpr -> Arity -> Unfolding
130 mkWwInlineRule id expr arity
131 = mkCoreUnfolding True (InlineWrapper id)
132 (simpleOptExpr expr) arity
133 (UnfWhen unSaturatedOk boringCxtNotOk)
135 mkCompulsoryUnfolding :: CoreExpr -> Unfolding
136 mkCompulsoryUnfolding expr -- Used for things that absolutely must be unfolded
137 = mkCoreUnfolding True InlineCompulsory
138 expr 0 -- Arity of unfolding doesn't matter
139 (UnfWhen unSaturatedOk boringCxtOk)
141 mkInlineRule :: Bool -> CoreExpr -> Arity -> Unfolding
142 mkInlineRule unsat_ok expr arity
143 = mkCoreUnfolding True InlineRule -- Note [Top-level flag on inline rules]
145 (UnfWhen unsat_ok boring_ok)
147 expr' = simpleOptExpr expr
148 boring_ok = case calcUnfoldingGuidance True -- Treat as cheap
150 (_, UnfWhen _ boring_ok) -> boring_ok
151 _other -> boringCxtNotOk
152 -- See Note [INLINE for small functions]
156 %************************************************************************
158 \subsection{The UnfoldingGuidance type}
160 %************************************************************************
163 calcUnfoldingGuidance
164 :: Bool -- True <=> the rhs is cheap, or we want to treat it
165 -- as cheap (INLINE things)
166 -> Int -- Bomb out if size gets bigger than this
167 -> CoreExpr -- Expression to look at
168 -> (Arity, UnfoldingGuidance)
169 calcUnfoldingGuidance expr_is_cheap bOMB_OUT_SIZE expr
170 = case collectBinders expr of { (bndrs, body) ->
172 val_bndrs = filter isId bndrs
173 n_val_bndrs = length val_bndrs
176 = case (sizeExpr (iUnbox bOMB_OUT_SIZE) val_bndrs body) of
178 SizeIs size cased_bndrs scrut_discount
179 | uncondInline n_val_bndrs (iBox size) && expr_is_cheap
180 -> UnfWhen needSaturated boringCxtOk
183 -> UnfIfGoodArgs { ug_args = map (discount cased_bndrs) val_bndrs
184 , ug_size = iBox size
185 , ug_res = iBox scrut_discount }
188 = foldlBag (\acc (b',n) -> if bndr==b' then acc+n else acc)
191 (n_val_bndrs, guidance) }
194 Note [Computing the size of an expression]
195 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
196 The basic idea of sizeExpr is obvious enough: count nodes. But getting the
197 heuristics right has taken a long time. Here's the basic strategy:
199 * Variables, literals: 0
200 (Exception for string literals, see litSize.)
202 * Function applications (f e1 .. en): 1 + #value args
204 * Constructor applications: 1, regardless of #args
206 * Let(rec): 1 + size of components
221 Notice that 'x' counts 0, while (f x) counts 2. That's deliberate: there's
222 a function call to account for. Notice also that constructor applications
223 are very cheap, because exposing them to a caller is so valuable.
225 Note [Unconditional inlining]
226 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
227 We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
228 than the thing it's replacing. Notice that
229 (f x) --> (g 3) -- YES, unconditionally
230 (f x) --> x : [] -- YES, *even though* there are two
231 -- arguments to the cons
235 It's very important not to unconditionally replace a variable by
239 uncondInline :: Arity -> Int -> Bool
240 -- Inline unconditionally if there no size increase
241 -- Size of call is arity (+1 for the function)
242 -- See Note [Unconditional inlining]
243 uncondInline arity size
244 | arity == 0 = size == 0
245 | otherwise = size <= arity + 1
250 sizeExpr :: FastInt -- Bomb out if it gets bigger than this
251 -> [Id] -- Arguments; we're interested in which of these
256 -- Note [Computing the size of an expression]
258 sizeExpr bOMB_OUT_SIZE top_args expr
261 size_up (Cast e _) = size_up e
262 size_up (Note _ e) = size_up e
263 size_up (Type _) = sizeZero -- Types cost nothing
264 size_up (Lit lit) = sizeN (litSize lit)
265 size_up (Var f) = size_up_call f [] -- Make sure we get constructor
266 -- discounts even on nullary constructors
268 size_up (App fun (Type _)) = size_up fun
269 size_up (App fun arg) = size_up_app fun [arg]
270 `addSize` nukeScrutDiscount (size_up arg)
272 size_up (Lam b e) | isId b = lamScrutDiscount (size_up e `addSizeN` 1)
273 | otherwise = size_up e
275 size_up (Let (NonRec binder rhs) body)
276 = nukeScrutDiscount (size_up rhs) `addSize`
277 size_up body `addSizeN`
278 (if isUnLiftedType (idType binder) then 0 else 1)
279 -- For the allocation
280 -- If the binder has an unlifted type there is no allocation
282 size_up (Let (Rec pairs) body)
283 = nukeScrutDiscount rhs_size `addSize`
284 size_up body `addSizeN`
285 length pairs -- For the allocation
287 rhs_size = foldr (addSize . size_up . snd) sizeZero pairs
289 size_up (Case (Var v) _ _ alts)
290 | v `elem` top_args -- We are scrutinising an argument variable
291 = alts_size (foldr1 addSize alt_sizes) -- The 1 is for the case itself
292 (foldr1 maxSize alt_sizes)
293 -- Good to inline if an arg is scrutinised, because
294 -- that may eliminate allocation in the caller
295 -- And it eliminates the case itself
297 alt_sizes = map size_up_alt alts
299 -- alts_size tries to compute a good discount for
300 -- the case when we are scrutinising an argument variable
301 alts_size (SizeIs tot tot_disc _tot_scrut) -- Size of all alternatives
302 (SizeIs max _max_disc max_scrut) -- Size of biggest alternative
303 = SizeIs tot (unitBag (v, iBox (_ILIT(2) +# tot -# max)) `unionBags` tot_disc) max_scrut
304 -- If the variable is known, we produce a discount that
305 -- will take us back to 'max', the size of the largest alternative
306 -- The 1+ is a little discount for reduced allocation in the caller
308 -- Notice though, that we return tot_disc, the total discount from
309 -- all branches. I think that's right.
311 alts_size tot_size _ = tot_size
313 size_up (Case e _ _ alts) = foldr (addSize . size_up_alt)
314 (nukeScrutDiscount (size_up e))
316 -- We don't charge for the case itself
317 -- It's a strict thing, and the price of the call
318 -- is paid by scrut. Also consider
319 -- case f x of DEFAULT -> e
320 -- This is just ';'! Don't charge for it.
322 -- Moreover, we charge one per alternative.
325 -- size_up_app is used when there's ONE OR MORE value args
326 size_up_app (App fun arg) args
327 | isTypeArg arg = size_up_app fun args
328 | otherwise = size_up_app fun (arg:args)
329 `addSize` nukeScrutDiscount (size_up arg)
330 size_up_app (Var fun) args = size_up_call fun args
331 size_up_app other args = size_up other `addSizeN` length args
334 size_up_call :: Id -> [CoreExpr] -> ExprSize
335 size_up_call fun val_args
336 = case idDetails fun of
337 FCallId _ -> sizeN opt_UF_DearOp
338 DataConWorkId dc -> conSize dc (length val_args)
339 PrimOpId op -> primOpSize op (length val_args)
340 ClassOpId _ -> classOpSize top_args val_args
341 _ -> funSize top_args fun (length val_args)
344 size_up_alt (_con, _bndrs, rhs) = size_up rhs `addSizeN` 1
345 -- Don't charge for args, so that wrappers look cheap
346 -- (See comments about wrappers with Case)
348 -- IMPORATANT: *do* charge 1 for the alternative, else we
349 -- find that giant case nests are treated as practically free
350 -- A good example is Foreign.C.Error.errrnoToIOError
353 -- These addSize things have to be here because
354 -- I don't want to give them bOMB_OUT_SIZE as an argument
355 addSizeN TooBig _ = TooBig
356 addSizeN (SizeIs n xs d) m = mkSizeIs bOMB_OUT_SIZE (n +# iUnbox m) xs d
358 addSize TooBig _ = TooBig
359 addSize _ TooBig = TooBig
360 addSize (SizeIs n1 xs d1) (SizeIs n2 ys d2)
361 = mkSizeIs bOMB_OUT_SIZE (n1 +# n2) (xs `unionBags` ys) (d1 +# d2)
365 -- | Finds a nominal size of a string literal.
366 litSize :: Literal -> Int
367 -- Used by CoreUnfold.sizeExpr
368 litSize (MachStr str) = 1 + ((lengthFS str + 3) `div` 4)
369 -- If size could be 0 then @f "x"@ might be too small
370 -- [Sept03: make literal strings a bit bigger to avoid fruitless
371 -- duplication of little strings]
372 litSize _other = 0 -- Must match size of nullary constructors
373 -- Key point: if x |-> 4, then x must inline unconditionally
374 -- (eg via case binding)
376 classOpSize :: [Id] -> [CoreExpr] -> ExprSize
377 -- See Note [Conlike is interesting]
380 classOpSize top_args (arg1 : other_args)
381 = SizeIs (iUnbox size) arg_discount (_ILIT(0))
383 size = 2 + length other_args
384 -- If the class op is scrutinising a lambda bound dictionary then
385 -- give it a discount, to encourage the inlining of this function
386 -- The actual discount is rather arbitrarily chosen
387 arg_discount = case arg1 of
388 Var dict | dict `elem` top_args
389 -> unitBag (dict, opt_UF_DictDiscount)
392 funSize :: [Id] -> Id -> Int -> ExprSize
393 -- Size for functions that are not constructors or primops
394 -- Note [Function applications]
395 funSize top_args fun n_val_args
396 | fun `hasKey` buildIdKey = buildSize
397 | fun `hasKey` augmentIdKey = augmentSize
398 | otherwise = SizeIs (iUnbox size) arg_discount (iUnbox res_discount)
400 some_val_args = n_val_args > 0
402 arg_discount | some_val_args && fun `elem` top_args
403 = unitBag (fun, opt_UF_FunAppDiscount)
404 | otherwise = emptyBag
405 -- If the function is an argument and is applied
406 -- to some values, give it an arg-discount
408 res_discount | idArity fun > n_val_args = opt_UF_FunAppDiscount
410 -- If the function is partially applied, show a result discount
412 size | some_val_args = 1 + n_val_args
414 -- The 1+ is for the function itself
415 -- Add 1 for each non-trivial arg;
416 -- the allocation cost, as in let(rec)
419 conSize :: DataCon -> Int -> ExprSize
420 conSize dc n_val_args
421 | n_val_args == 0 = SizeIs (_ILIT(0)) emptyBag (_ILIT(1)) -- Like variables
422 | isUnboxedTupleCon dc = SizeIs (_ILIT(0)) emptyBag (iUnbox n_val_args +# _ILIT(1))
423 | otherwise = SizeIs (_ILIT(1)) emptyBag (iUnbox n_val_args +# _ILIT(1))
424 -- Treat a constructors application as size 1, regardless of how
425 -- many arguments it has; we are keen to expose them
426 -- (and we charge separately for their args). We can't treat
427 -- them as size zero, else we find that (Just x) has size 0,
428 -- which is the same as a lone variable; and hence 'v' will
429 -- always be replaced by (Just x), where v is bound to Just x.
431 -- However, unboxed tuples count as size zero
432 -- I found occasions where we had
433 -- f x y z = case op# x y z of { s -> (# s, () #) }
434 -- and f wasn't getting inlined
436 primOpSize :: PrimOp -> Int -> ExprSize
437 primOpSize op n_val_args
438 | not (primOpIsDupable op) = sizeN opt_UF_DearOp
439 | not (primOpOutOfLine op) = sizeN 1
440 -- Be very keen to inline simple primops.
441 -- We give a discount of 1 for each arg so that (op# x y z) costs 2.
442 -- We can't make it cost 1, else we'll inline let v = (op# x y z)
443 -- at every use of v, which is excessive.
445 -- A good example is:
446 -- let x = +# p q in C {x}
447 -- Even though x get's an occurrence of 'many', its RHS looks cheap,
448 -- and there's a good chance it'll get inlined back into C's RHS. Urgh!
450 | otherwise = sizeN n_val_args
453 buildSize :: ExprSize
454 buildSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(4))
455 -- We really want to inline applications of build
456 -- build t (\cn -> e) should cost only the cost of e (because build will be inlined later)
457 -- Indeed, we should add a result_discount becuause build is
458 -- very like a constructor. We don't bother to check that the
459 -- build is saturated (it usually is). The "-2" discounts for the \c n,
460 -- The "4" is rather arbitrary.
462 augmentSize :: ExprSize
463 augmentSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(4))
464 -- Ditto (augment t (\cn -> e) ys) should cost only the cost of
465 -- e plus ys. The -2 accounts for the \cn
467 nukeScrutDiscount :: ExprSize -> ExprSize
468 nukeScrutDiscount (SizeIs n vs _) = SizeIs n vs (_ILIT(0))
469 nukeScrutDiscount TooBig = TooBig
471 -- When we return a lambda, give a discount if it's used (applied)
472 lamScrutDiscount :: ExprSize -> ExprSize
473 lamScrutDiscount (SizeIs n vs _) = SizeIs n vs (iUnbox opt_UF_FunAppDiscount)
474 lamScrutDiscount TooBig = TooBig
477 Note [Discounts and thresholds]
478 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
479 Constants for discounts and thesholds are defined in main/StaticFlags,
480 all of form opt_UF_xxxx. They are:
482 opt_UF_CreationThreshold (45)
483 At a definition site, if the unfolding is bigger than this, we
484 may discard it altogether
486 opt_UF_UseThreshold (6)
487 At a call site, if the unfolding, less discounts, is smaller than
488 this, then it's small enough inline
490 opt_UF_KeennessFactor (1.5)
491 Factor by which the discounts are multiplied before
492 subtracting from size
494 opt_UF_DictDiscount (1)
495 The discount for each occurrence of a dictionary argument
496 as an argument of a class method. Should be pretty small
497 else big functions may get inlined
499 opt_UF_FunAppDiscount (6)
500 Discount for a function argument that is applied. Quite
501 large, because if we inline we avoid the higher-order call.
504 The size of a foreign call or not-dupable PrimOp
507 Note [Function applications]
508 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
509 In a function application (f a b)
511 - If 'f' is an argument to the function being analysed,
512 and there's at least one value arg, record a FunAppDiscount for f
514 - If the application if a PAP (arity > 2 in this example)
515 record a *result* discount (because inlining
516 with "extra" args in the call may mean that we now
517 get a saturated application)
519 Code for manipulating sizes
522 data ExprSize = TooBig
523 | SizeIs FastInt -- Size found
524 (Bag (Id,Int)) -- Arguments cased herein, and discount for each such
525 FastInt -- Size to subtract if result is scrutinised
526 -- by a case expression
528 instance Outputable ExprSize where
529 ppr TooBig = ptext (sLit "TooBig")
530 ppr (SizeIs a _ c) = brackets (int (iBox a) <+> int (iBox c))
532 -- subtract the discount before deciding whether to bale out. eg. we
533 -- want to inline a large constructor application into a selector:
534 -- tup = (a_1, ..., a_99)
535 -- x = case tup of ...
537 mkSizeIs :: FastInt -> FastInt -> Bag (Id, Int) -> FastInt -> ExprSize
538 mkSizeIs max n xs d | (n -# d) ># max = TooBig
539 | otherwise = SizeIs n xs d
541 maxSize :: ExprSize -> ExprSize -> ExprSize
542 maxSize TooBig _ = TooBig
543 maxSize _ TooBig = TooBig
544 maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 ># n2 = s1
548 sizeN :: Int -> ExprSize
550 sizeZero = SizeIs (_ILIT(0)) emptyBag (_ILIT(0))
551 sizeN n = SizeIs (iUnbox n) emptyBag (_ILIT(0))
555 %************************************************************************
557 \subsection[considerUnfolding]{Given all the info, do (not) do the unfolding}
559 %************************************************************************
561 We use 'couldBeSmallEnoughToInline' to avoid exporting inlinings that
562 we ``couldn't possibly use'' on the other side. Can be overridden w/
563 flaggery. Just the same as smallEnoughToInline, except that it has no
567 couldBeSmallEnoughToInline :: Int -> CoreExpr -> Bool
568 couldBeSmallEnoughToInline threshold rhs
569 = case calcUnfoldingGuidance False threshold rhs of
570 (_, UnfNever) -> False
574 smallEnoughToInline :: Unfolding -> Bool
575 smallEnoughToInline (CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_size = size}})
576 = size <= opt_UF_UseThreshold
577 smallEnoughToInline _
581 certainlyWillInline :: Unfolding -> Bool
582 -- Sees if the unfolding is pretty certain to inline
583 certainlyWillInline (CoreUnfolding { uf_is_cheap = is_cheap, uf_arity = n_vals, uf_guidance = guidance })
587 UnfIfGoodArgs { ug_size = size}
588 -> is_cheap && size - (n_vals +1) <= opt_UF_UseThreshold
590 certainlyWillInline _
594 %************************************************************************
596 \subsection{callSiteInline}
598 %************************************************************************
600 This is the key function. It decides whether to inline a variable at a call site
602 callSiteInline is used at call sites, so it is a bit more generous.
603 It's a very important function that embodies lots of heuristics.
604 A non-WHNF can be inlined if it doesn't occur inside a lambda,
605 and occurs exactly once or
606 occurs once in each branch of a case and is small
608 If the thing is in WHNF, there's no danger of duplicating work,
609 so we can inline if it occurs once, or is small
611 NOTE: we don't want to inline top-level functions that always diverge.
612 It just makes the code bigger. Tt turns out that the convenient way to prevent
613 them inlining is to give them a NOINLINE pragma, which we do in
614 StrictAnal.addStrictnessInfoToTopId
617 callSiteInline :: DynFlags
619 -> Unfolding -- Its unfolding (if active)
620 -> Bool -- True if there are are no arguments at all (incl type args)
621 -> [ArgSummary] -- One for each value arg; True if it is interesting
622 -> CallCtxt -- True <=> continuation is interesting
623 -> Maybe CoreExpr -- Unfolding, if any
626 instance Outputable ArgSummary where
627 ppr TrivArg = ptext (sLit "TrivArg")
628 ppr NonTrivArg = ptext (sLit "NonTrivArg")
629 ppr ValueArg = ptext (sLit "ValueArg")
631 data CallCtxt = BoringCtxt
633 | ArgCtxt -- We are somewhere in the argument of a function
634 Bool -- True <=> we're somewhere in the RHS of function with rules
635 -- False <=> we *are* the argument of a function with non-zero
638 -- we *are* the RHS of a let Note [RHS of lets]
639 -- In both cases, be a little keener to inline
641 | ValAppCtxt -- We're applied to at least one value arg
642 -- This arises when we have ((f x |> co) y)
643 -- Then the (f x) has argument 'x' but in a ValAppCtxt
645 | CaseCtxt -- We're the scrutinee of a case
646 -- that decomposes its scrutinee
648 instance Outputable CallCtxt where
649 ppr BoringCtxt = ptext (sLit "BoringCtxt")
650 ppr (ArgCtxt rules) = ptext (sLit "ArgCtxt") <+> ppr rules
651 ppr CaseCtxt = ptext (sLit "CaseCtxt")
652 ppr ValAppCtxt = ptext (sLit "ValAppCtxt")
654 callSiteInline dflags id unfolding lone_variable arg_infos cont_info
655 = case unfolding of {
656 NoUnfolding -> Nothing ;
657 OtherCon _ -> Nothing ;
658 DFunUnfolding {} -> Nothing ; -- Never unfold a DFun
659 CoreUnfolding { uf_tmpl = unf_template, uf_is_top = is_top, uf_is_value = is_value,
660 uf_is_cheap = is_cheap, uf_arity = uf_arity, uf_guidance = guidance } ->
661 -- uf_arity will typically be equal to (idArity id),
662 -- but may be less for InlineRules
664 n_val_args = length arg_infos
665 saturated = n_val_args >= uf_arity
667 result | yes_or_no = Just unf_template
668 | otherwise = Nothing
670 interesting_args = any nonTriv arg_infos
671 -- NB: (any nonTriv arg_infos) looks at the
672 -- over-saturated args too which is "wrong";
673 -- but if over-saturated we inline anyway.
675 -- some_benefit is used when the RHS is small enough
676 -- and the call has enough (or too many) value
677 -- arguments (ie n_val_args >= arity). But there must
678 -- be *something* interesting about some argument, or the
679 -- result context, to make it worth inlining
681 | not saturated = interesting_args -- Under-saturated
682 -- Note [Unsaturated applications]
683 | n_val_args > uf_arity = True -- Over-saturated
684 | otherwise = interesting_args -- Saturated
685 || interesting_saturated_call
687 interesting_saturated_call
689 BoringCtxt -> not is_top && uf_arity > 0 -- Note [Nested functions]
690 CaseCtxt -> not (lone_variable && is_value) -- Note [Lone variables]
691 ArgCtxt {} -> uf_arity > 0 -- Note [Inlining in ArgCtxt]
692 ValAppCtxt -> True -- Note [Cast then apply]
694 (yes_or_no, extra_doc)
696 UnfNever -> (False, empty)
698 UnfWhen unsat_ok boring_ok -> ( (unsat_ok || saturated)
699 && (boring_ok || some_benefit)
701 -- For the boring_ok part see Note [INLINE for small functions]
703 UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size }
704 -> ( is_cheap && some_benefit && small_enough
705 , (text "discounted size =" <+> int discounted_size) )
707 discounted_size = size - discount
708 small_enough = discounted_size <= opt_UF_UseThreshold
709 discount = computeDiscount uf_arity arg_discounts
710 res_discount arg_infos cont_info
713 if dopt Opt_D_dump_inlinings dflags then
714 pprTrace ("Considering inlining: " ++ showSDoc (ppr id))
715 (vcat [text "arg infos" <+> ppr arg_infos,
716 text "uf arity" <+> ppr uf_arity,
717 text "interesting continuation" <+> ppr cont_info,
718 text "some_benefit" <+> ppr some_benefit,
719 text "is value:" <+> ppr is_value,
720 text "is cheap:" <+> ppr is_cheap,
721 text "guidance" <+> ppr guidance,
723 text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"])
732 Be a tiny bit keener to inline in the RHS of a let, because that might
733 lead to good thing later
735 g y = let x = f y in ...(case x of (a,b,c) -> ...) ...
736 We'd inline 'f' if the call was in a case context, and it kind-of-is,
737 only we can't see it. So we treat the RHS of a let as not-totally-boring.
739 Note [Unsaturated applications]
740 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
741 When a call is not saturated, we *still* inline if one of the
742 arguments has interesting structure. That's sometimes very important.
743 A good example is the Ord instance for Bool in Base:
746 $fOrdBool =GHC.Classes.D:Ord
751 $cmin_ajX [Occ=LoopBreaker] :: Bool -> Bool -> Bool
752 $cmin_ajX = GHC.Classes.$dmmin @ Bool $fOrdBool
755 But the defn of GHC.Classes.$dmmin is:
757 $dmmin :: forall a. GHC.Classes.Ord a => a -> a -> a
758 {- Arity: 3, HasNoCafRefs, Strictness: SLL,
759 Unfolding: (\ @ a $dOrd :: GHC.Classes.Ord a x :: a y :: a ->
760 case @ a GHC.Classes.<= @ a $dOrd x y of wild {
761 GHC.Bool.False -> y GHC.Bool.True -> x }) -}
763 We *really* want to inline $dmmin, even though it has arity 3, in
764 order to unravel the recursion.
767 Note [INLINE for small functions]
768 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
769 Consider {-# INLINE f #-}
772 Then f's RHS is no larger than its LHS, so we should inline it
773 into even the most boring context. (We do so if there is no INLINE
777 Note [Things to watch]
778 ~~~~~~~~~~~~~~~~~~~~~~
779 * { y = I# 3; x = y `cast` co; ...case (x `cast` co) of ... }
780 Assume x is exported, so not inlined unconditionally.
781 Then we want x to inline unconditionally; no reason for it
782 not to, and doing so avoids an indirection.
784 * { x = I# 3; ....f x.... }
785 Make sure that x does not inline unconditionally!
786 Lest we get extra allocation.
788 Note [Inlining an InlineRule]
789 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
790 An InlineRules is used for
791 (a) programmer INLINE pragmas
792 (b) inlinings from worker/wrapper
794 For (a) the RHS may be large, and our contract is that we *only* inline
795 when the function is applied to all the arguments on the LHS of the
796 source-code defn. (The uf_arity in the rule.)
798 However for worker/wrapper it may be worth inlining even if the
799 arity is not satisfied (as we do in the CoreUnfolding case) so we don't
803 Note [Nested functions]
804 ~~~~~~~~~~~~~~~~~~~~~~~
805 If a function has a nested defn we also record some-benefit, on the
806 grounds that we are often able to eliminate the binding, and hence the
807 allocation, for the function altogether; this is good for join points.
808 But this only makes sense for *functions*; inlining a constructor
809 doesn't help allocation unless the result is scrutinised. UNLESS the
810 constructor occurs just once, albeit possibly in multiple case
811 branches. Then inlining it doesn't increase allocation, but it does
812 increase the chance that the constructor won't be allocated at all in
813 the branches that don't use it.
815 Note [Cast then apply]
816 ~~~~~~~~~~~~~~~~~~~~~~
818 myIndex = __inline_me ( (/\a. <blah>) |> co )
819 co :: (forall a. a -> a) ~ (forall a. T a)
820 ... /\a.\x. case ((myIndex a) |> sym co) x of { ... } ...
822 We need to inline myIndex to unravel this; but the actual call (myIndex a) has
823 no value arguments. The ValAppCtxt gives it enough incentive to inline.
825 Note [Inlining in ArgCtxt]
826 ~~~~~~~~~~~~~~~~~~~~~~~~~~
827 The condition (arity > 0) here is very important, because otherwise
828 we end up inlining top-level stuff into useless places; eg
831 This can make a very big difference: it adds 16% to nofib 'integer' allocs,
834 At one stage I replaced this condition by 'True' (leading to the above
835 slow-down). The motivation was test eyeball/inline1.hs; but that seems
838 NOTE: arguably, we should inline in ArgCtxt only if the result of the
839 call is at least CONLIKE. At least for the cases where we use ArgCtxt
840 for the RHS of a 'let', we only profit from the inlining if we get a
841 CONLIKE thing (modulo lets).
843 Note [Lone variables]
844 ~~~~~~~~~~~~~~~~~~~~~
845 The "lone-variable" case is important. I spent ages messing about
846 with unsatisfactory varaints, but this is nice. The idea is that if a
847 variable appears all alone
849 as an arg of lazy fn, or rhs BoringCtxt
850 as scrutinee of a case CaseCtxt
851 as arg of a fn ArgCtxt
853 it is bound to a value
855 then we should not inline it (unless there is some other reason,
856 e.g. is is the sole occurrence). That is what is happening at
857 the use of 'lone_variable' in 'interesting_saturated_call'.
859 Why? At least in the case-scrutinee situation, turning
860 let x = (a,b) in case x of y -> ...
862 let x = (a,b) in case (a,b) of y -> ...
864 let x = (a,b) in let y = (a,b) in ...
865 is bad if the binding for x will remain.
867 Another example: I discovered that strings
868 were getting inlined straight back into applications of 'error'
869 because the latter is strict.
871 f = \x -> ...(error s)...
873 Fundamentally such contexts should not encourage inlining because the
874 context can ``see'' the unfolding of the variable (e.g. case or a
875 RULE) so there's no gain. If the thing is bound to a value.
880 foo = _inline_ (\n. [n])
881 bar = _inline_ (foo 20)
882 baz = \n. case bar of { (m:_) -> m + n }
883 Here we really want to inline 'bar' so that we can inline 'foo'
884 and the whole thing unravels as it should obviously do. This is
885 important: in the NDP project, 'bar' generates a closure data
886 structure rather than a list.
888 So the non-inlining of lone_variables should only apply if the
889 unfolding is regarded as cheap; because that is when exprIsConApp_maybe
890 looks through the unfolding. Hence the "&& is_cheap" in the
893 * Even a type application or coercion isn't a lone variable.
895 case $fMonadST @ RealWorld of { :DMonad a b c -> c }
896 We had better inline that sucker! The case won't see through it.
898 For now, I'm treating treating a variable applied to types
899 in a *lazy* context "lone". The motivating example was
902 There's no advantage in inlining f here, and perhaps
903 a significant disadvantage. Hence some_val_args in the Stop case
906 computeDiscount :: Int -> [Int] -> Int -> [ArgSummary] -> CallCtxt -> Int
907 computeDiscount n_vals_wanted arg_discounts res_discount arg_infos cont_info
908 -- We multiple the raw discounts (args_discount and result_discount)
909 -- ty opt_UnfoldingKeenessFactor because the former have to do with
910 -- *size* whereas the discounts imply that there's some extra
911 -- *efficiency* to be gained (e.g. beta reductions, case reductions)
914 = 1 -- Discount of 1 because the result replaces the call
915 -- so we count 1 for the function itself
917 + length (take n_vals_wanted arg_infos)
918 -- Discount of (un-scaled) 1 for each arg supplied,
919 -- because the result replaces the call
921 + round (opt_UF_KeenessFactor *
922 fromIntegral (arg_discount + res_discount'))
924 arg_discount = sum (zipWith mk_arg_discount arg_discounts arg_infos)
926 mk_arg_discount _ TrivArg = 0
927 mk_arg_discount _ NonTrivArg = 1
928 mk_arg_discount discount ValueArg = discount
930 res_discount' = case cont_info of
932 CaseCtxt -> res_discount
933 _other -> 4 `min` res_discount
934 -- res_discount can be very large when a function returns
935 -- constructors; but we only want to invoke that large discount
936 -- when there's a case continuation.
937 -- Otherwise we, rather arbitrarily, threshold it. Yuk.
938 -- But we want to aovid inlining large functions that return
939 -- constructors into contexts that are simply "interesting"
942 %************************************************************************
944 Interesting arguments
946 %************************************************************************
948 Note [Interesting arguments]
949 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
950 An argument is interesting if it deserves a discount for unfoldings
951 with a discount in that argument position. The idea is to avoid
952 unfolding a function that is applied only to variables that have no
953 unfolding (i.e. they are probably lambda bound): f x y z There is
954 little point in inlining f here.
956 Generally, *values* (like (C a b) and (\x.e)) deserve discounts. But
957 we must look through lets, eg (let x = e in C a b), because the let will
958 float, exposing the value, if we inline. That makes it different to
961 Before 2009 we said it was interesting if the argument had *any* structure
962 at all; i.e. (hasSomeUnfolding v). But does too much inlining; see Trac #3016.
964 But we don't regard (f x y) as interesting, unless f is unsaturated.
965 If it's saturated and f hasn't inlined, then it's probably not going
968 Note [Conlike is interesting]
969 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
971 f d = ...((*) d x y)...
973 where df is con-like. Then we'd really like to inline 'f' so that the
974 rule for (*) (df d) can fire. To do this
975 a) we give a discount for being an argument of a class-op (eg (*) d)
976 b) we say that a con-like argument (eg (df d)) is interesting
979 data ArgSummary = TrivArg -- Nothing interesting
980 | NonTrivArg -- Arg has structure
981 | ValueArg -- Arg is a con-app or PAP
982 -- ..or con-like. Note [Conlike is interesting]
984 interestingArg :: CoreExpr -> ArgSummary
985 -- See Note [Interesting arguments]
986 interestingArg e = go e 0
988 -- n is # value args to which the expression is applied
989 go (Lit {}) _ = ValueArg
991 | isConLikeId v = ValueArg -- Experimenting with 'conlike' rather that
992 -- data constructors here
993 | idArity v > n = ValueArg -- Catches (eg) primops with arity but no unfolding
994 | n > 0 = NonTrivArg -- Saturated or unknown call
995 | conlike_unfolding = ValueArg -- n==0; look for an interesting unfolding
996 -- See Note [Conlike is interesting]
997 | otherwise = TrivArg -- n==0, no useful unfolding
999 conlike_unfolding = isConLikeUnfolding (idUnfolding v)
1001 go (Type _) _ = TrivArg
1002 go (App fn (Type _)) n = go fn n
1003 go (App fn _) n = go fn (n+1)
1004 go (Note _ a) n = go a n
1005 go (Cast e _) n = go e n
1007 | isTyVar v = go e n
1009 | otherwise = ValueArg
1010 go (Let _ e) n = case go e n of { ValueArg -> ValueArg; _ -> NonTrivArg }
1011 go (Case {}) _ = NonTrivArg
1013 nonTriv :: ArgSummary -> Bool
1014 nonTriv TrivArg = False
1018 %************************************************************************
1022 %************************************************************************
1024 Note [exprIsConApp_maybe]
1025 ~~~~~~~~~~~~~~~~~~~~~~~~~
1026 exprIsConApp_maybe is a very important function. There are two principal
1028 * case e of { .... }
1029 * cls_op e, where cls_op is a class operation
1031 In both cases you want to know if e is of form (C e1..en) where C is
1034 However e might not *look* as if
1037 -- | Returns @Just (dc, [t1..tk], [x1..xn])@ if the argument expression is
1038 -- a *saturated* constructor application of the form @dc t1..tk x1 .. xn@,
1039 -- where t1..tk are the *universally-qantified* type args of 'dc'
1040 exprIsConApp_maybe :: IdUnfoldingFun -> CoreExpr -> Maybe (DataCon, [Type], [CoreExpr])
1042 exprIsConApp_maybe id_unf (Note _ expr)
1043 = exprIsConApp_maybe id_unf expr
1044 -- We ignore all notes. For example,
1045 -- case _scc_ "foo" (C a b) of
1047 -- should be optimised away, but it will be only if we look
1048 -- through the SCC note.
1050 exprIsConApp_maybe id_unf (Cast expr co)
1051 = -- Here we do the KPush reduction rule as described in the FC paper
1052 -- The transformation applies iff we have
1053 -- (C e1 ... en) `cast` co
1054 -- where co :: (T t1 .. tn) ~ to_ty
1055 -- The left-hand one must be a T, because exprIsConApp returned True
1056 -- but the right-hand one might not be. (Though it usually will.)
1058 case exprIsConApp_maybe id_unf expr of {
1059 Nothing -> Nothing ;
1060 Just (dc, _dc_univ_args, dc_args) ->
1062 let (_from_ty, to_ty) = coercionKind co
1063 dc_tc = dataConTyCon dc
1065 case splitTyConApp_maybe to_ty of {
1066 Nothing -> Nothing ;
1067 Just (to_tc, to_tc_arg_tys)
1068 | dc_tc /= to_tc -> Nothing
1069 -- These two Nothing cases are possible; we might see
1070 -- (C x y) `cast` (g :: T a ~ S [a]),
1071 -- where S is a type function. In fact, exprIsConApp
1072 -- will probably not be called in such circumstances,
1073 -- but there't nothing wrong with it
1077 tc_arity = tyConArity dc_tc
1078 dc_univ_tyvars = dataConUnivTyVars dc
1079 dc_ex_tyvars = dataConExTyVars dc
1080 arg_tys = dataConRepArgTys dc
1082 dc_eqs :: [(Type,Type)] -- All equalities from the DataCon
1083 dc_eqs = [(mkTyVarTy tv, ty) | (tv,ty) <- dataConEqSpec dc] ++
1084 [getEqPredTys eq_pred | eq_pred <- dataConEqTheta dc]
1086 (ex_args, rest1) = splitAtList dc_ex_tyvars dc_args
1087 (co_args, val_args) = splitAtList dc_eqs rest1
1089 -- Make the "theta" from Fig 3 of the paper
1090 gammas = decomposeCo tc_arity co
1091 theta = zipOpenTvSubst (dc_univ_tyvars ++ dc_ex_tyvars)
1092 (gammas ++ stripTypeArgs ex_args)
1094 -- Cast the existential coercion arguments
1095 cast_co (ty1, ty2) (Type co)
1096 = Type $ mkSymCoercion (substTy theta ty1)
1097 `mkTransCoercion` co
1098 `mkTransCoercion` (substTy theta ty2)
1099 cast_co _ other_arg = pprPanic "cast_co" (ppr other_arg)
1100 new_co_args = zipWith cast_co dc_eqs co_args
1102 -- Cast the value arguments (which include dictionaries)
1103 new_val_args = zipWith cast_arg arg_tys val_args
1104 cast_arg arg_ty arg = mkCoerce (substTy theta arg_ty) arg
1107 let dump_doc = vcat [ppr dc, ppr dc_univ_tyvars, ppr dc_ex_tyvars,
1108 ppr arg_tys, ppr dc_args, ppr _dc_univ_args,
1109 ppr ex_args, ppr val_args]
1111 ASSERT2( coreEqType _from_ty (mkTyConApp dc_tc _dc_univ_args), dump_doc )
1112 ASSERT2( all isTypeArg (ex_args ++ co_args), dump_doc )
1113 ASSERT2( equalLength val_args arg_tys, dump_doc )
1116 Just (dc, to_tc_arg_tys, ex_args ++ new_co_args ++ new_val_args)
1119 exprIsConApp_maybe id_unf expr
1122 analyse (App fun arg) args = analyse fun (arg:args)
1123 analyse fun@(Lam {}) args = beta fun [] args
1125 analyse (Var fun) args
1126 | Just con <- isDataConWorkId_maybe fun
1128 , let (univ_ty_args, rest_args) = splitAtList (dataConUnivTyVars con) args
1129 = Just (con, stripTypeArgs univ_ty_args, rest_args)
1131 -- Look through dictionary functions; see Note [Unfolding DFuns]
1132 | DFunUnfolding con ops <- unfolding
1134 , let (dfun_tvs, _cls, dfun_res_tys) = tcSplitDFunTy (idType fun)
1135 subst = zipOpenTvSubst dfun_tvs (stripTypeArgs (takeList dfun_tvs args))
1136 = Just (con, substTys subst dfun_res_tys,
1137 [mkApps op args | op <- ops])
1139 -- Look through unfoldings, but only cheap ones, because
1140 -- we are effectively duplicating the unfolding
1141 | CoreUnfolding { uf_expandable = expand_me, uf_tmpl = rhs } <- unfolding
1142 , expand_me = -- pprTrace "expanding" (ppr fun $$ ppr rhs) $
1145 is_saturated = count isValArg args == idArity fun
1146 unfolding = id_unf fun -- Does not look through loop breakers
1147 -- ToDo: we *may* look through variables that are NOINLINE
1148 -- in this phase, and that is really not right
1150 analyse _ _ = Nothing
1153 in_scope = mkInScopeSet (exprFreeVars expr)
1156 beta (Lam v body) pairs (arg : args)
1158 = beta body ((v,arg):pairs) args
1160 beta (Lam {}) _ _ -- Un-saturated, or not a type lambda
1164 = case analyse (substExpr subst fun) args of
1165 Nothing -> -- pprTrace "Bale out! exprIsConApp_maybe" doc $
1167 Just ans -> -- pprTrace "Woo-hoo! exprIsConApp_maybe" doc $
1170 subst = mkOpenSubst in_scope pairs
1171 -- doc = vcat [ppr fun, ppr expr, ppr pairs, ppr args]
1174 stripTypeArgs :: [CoreExpr] -> [Type]
1175 stripTypeArgs args = ASSERT2( all isTypeArg args, ppr args )
1176 [ty | Type ty <- args]
1179 Note [Unfolding DFuns]
1180 ~~~~~~~~~~~~~~~~~~~~~~
1183 df :: forall a b. (Eq a, Eq b) -> Eq (a,b)
1184 df a b d_a d_b = MkEqD (a,b) ($c1 a b d_a d_b)
1187 So to split it up we just need to apply the ops $c1, $c2 etc
1188 to the very same args as the dfun. It takes a little more work
1189 to compute the type arguments to the dictionary constructor.