compiler/coreSyn/CoreUnfold.lhs

   1 %
   2 % (c) The University of Glasgow 2006
   3 % (c) The AQUA Project, Glasgow University, 1994-1998
   4 %
   5
   6 Core-syntax unfoldings
   7
   8 Unfoldings (which can travel across module boundaries) are in Core
   9 syntax (namely @CoreExpr@s).
  10
  11 The type @Unfolding@ sits ``above'' simply-Core-expressions
  12 unfoldings, capturing ``higher-level'' things we know about a binding,
  13 usually things that the simplifier found out (e.g., ``it's a
  14 literal'').  In the corner of a @CoreUnfolding@ unfolding, you will
  15 find, unsurprisingly, a Core expression.
  16
  17 \begin{code}
  18 module CoreUnfold (
  19         Unfolding, UnfoldingGuidance,   -- Abstract types
  20
  21         noUnfolding, mkImplicitUnfolding,
  22         mkTopUnfolding, mkUnfolding,
  23         mkInlineRule, mkWwInlineRule,
  24         mkCompulsoryUnfolding,
  25
  26         couldBeSmallEnoughToInline,
  27         certainlyWillInline, smallEnoughToInline,
  28
  29         callSiteInline, CallCtxt(..)
  30
  31     ) where
  32
  33 import StaticFlags
  34 import DynFlags
  35 import CoreSyn
  36 import PprCore          ()      -- Instances
  37 import OccurAnal
  38 import CoreSubst
  39 import CoreUtils
  40 import Id
  41 import DataCon
  42 import Literal
  43 import PrimOp
  44 import IdInfo
  45 import BasicTypes       ( Arity )
  46 import Type hiding( substTy, extendTvSubst )
  47 import Maybes
  48 import PrelNames
  49 import Bag
  50 import FastTypes
  51 import FastString
  52 import Outputable
  53
  54 \end{code}
  55
  56
  57 %************************************************************************
  58 %*                                                                      *
  59 \subsection{Making unfoldings}
  60 %*                                                                      *
  61 %************************************************************************
  62
  63 \begin{code}
  64 mkTopUnfolding :: CoreExpr -> Unfolding
  65 mkTopUnfolding expr = mkUnfolding True {- Top level -} expr
  66
  67 mkImplicitUnfolding :: CoreExpr -> Unfolding
  68 -- For implicit Ids, do a tiny bit of optimising first
  69 mkImplicitUnfolding expr
  70   = CoreUnfolding (simpleOptExpr expr)
  71                   True
  72                   (exprIsHNF expr)
  73                   (exprIsCheap expr)
  74                   (calcUnfoldingGuidance opt_UF_CreationThreshold expr)
  75
  76 mkInlineRule :: CoreExpr -> Arity -> Unfolding
  77 mkInlineRule expr arity
  78   = InlineRule { uf_tmpl = simpleOptExpr expr,
  79                  uf_is_top = True,       -- Conservative; this gets set more
  80                                          -- accuately by the simplifier (slight hack)
  81                                          -- in SimplEnv.substUnfolding
  82                  uf_arity = arity,
  83                  uf_is_value = exprIsHNF expr,
  84                  uf_worker = Nothing }
  85
  86 mkWwInlineRule :: CoreExpr -> Arity -> Id -> Unfolding
  87 mkWwInlineRule expr arity wkr
  88   = InlineRule { uf_tmpl = simpleOptExpr expr,
  89                  uf_is_top = True,       -- Conservative; see mkInlineRule
  90                  uf_arity = arity,
  91                  uf_is_value = exprIsHNF expr,
  92                  uf_worker = Just wkr }
  93
  94 mkUnfolding :: Bool -> CoreExpr -> Unfolding
  95 mkUnfolding top_lvl expr
  96   = CoreUnfolding { uf_tmpl = occurAnalyseExpr expr,
  97                     uf_is_top = top_lvl,
  98                     uf_is_value = exprIsHNF expr,
  99                     uf_is_cheap = exprIsCheap expr,
 100                     uf_guidance = calcUnfoldingGuidance opt_UF_CreationThreshold expr }
 101         -- Sometimes during simplification, there's a large let-bound thing
 102         -- which has been substituted, and so is now dead; so 'expr' contains
 103         -- two copies of the thing while the occurrence-analysed expression doesn't
 104         -- Nevertheless, we don't occ-analyse before computing the size because the
 105         -- size computation bales out after a while, whereas occurrence analysis does not.
 106         --
 107         -- This can occasionally mean that the guidance is very pessimistic;
 108         -- it gets fixed up next round
 109
 110 mkCompulsoryUnfolding :: CoreExpr -> Unfolding
 111 mkCompulsoryUnfolding expr      -- Used for things that absolutely must be unfolded
 112   = CompulsoryUnfolding (occurAnalyseExpr expr)
 113 \end{code}
 114
 115
 116 %************************************************************************
 117 %*                                                                      *
 118 \subsection{The UnfoldingGuidance type}
 119 %*                                                                      *
 120 %************************************************************************
 121
 122 \begin{code}
 123 calcUnfoldingGuidance
 124         :: Int                  -- bomb out if size gets bigger than this
 125         -> CoreExpr             -- expression to look at
 126         -> UnfoldingGuidance
 127 calcUnfoldingGuidance bOMB_OUT_SIZE expr
 128   = case collectBinders expr of { (binders, body) ->
 129     let
 130         val_binders = filter isId binders
 131         n_val_binders = length val_binders
 132     in
 133     case (sizeExpr (iUnbox bOMB_OUT_SIZE) val_binders body) of
 134       TooBig -> UnfoldNever
 135       SizeIs size cased_args scrut_discount
 136         -> UnfoldIfGoodArgs { ug_arity = n_val_binders
 137                             , ug_args  = map discount_for val_binders
 138                             , ug_size  = iBox size
 139                             , ug_res   = iBox scrut_discount }
 140         where
 141             discount_for b = foldlBag (\acc (b',n) -> if b==b' then acc+n else acc)
 142                                       0 cased_args
 143         }
 144 \end{code}
 145
 146 \begin{code}
 147 sizeExpr :: FastInt         -- Bomb out if it gets bigger than this
 148          -> [Id]            -- Arguments; we're interested in which of these
 149                             -- get case'd
 150          -> CoreExpr
 151          -> ExprSize
 152
 153 sizeExpr bOMB_OUT_SIZE top_args expr
 154   = size_up expr
 155   where
 156     size_up (Type _)           = sizeZero      -- Types cost nothing
 157     size_up (Var _)            = sizeOne
 158     size_up (Note _ body)      = size_up body  -- Notes cost nothing
 159     size_up (Cast e _)         = size_up e
 160     size_up (App fun (Type _)) = size_up fun
 161     size_up (App fun arg)      = size_up_app fun [arg]
 162
 163     size_up (Lit lit)          = sizeN (litSize lit)
 164
 165     size_up (Lam b e) | isId b    = lamScrutDiscount (size_up e `addSizeN` 1)
 166                       | otherwise = size_up e
 167
 168     size_up (Let (NonRec binder rhs) body)
 169       = nukeScrutDiscount (size_up rhs)         `addSize`
 170         size_up body                            `addSizeN`
 171         (if isUnLiftedType (idType binder) then 0 else 1)
 172                 -- For the allocation
 173                 -- If the binder has an unlifted type there is no allocation
 174
 175     size_up (Let (Rec pairs) body)
 176       = nukeScrutDiscount rhs_size              `addSize`
 177         size_up body                            `addSizeN`
 178         length pairs            -- For the allocation
 179       where
 180         rhs_size = foldr (addSize . size_up . snd) sizeZero pairs
 181
 182     size_up (Case (Var v) _ _ alts)
 183         | v `elem` top_args             -- We are scrutinising an argument variable
 184         =
 185 {-      I'm nuking this special case; BUT see the comment with case alternatives.
 186
 187         (a) It's too eager.  We don't want to inline a wrapper into a
 188             context with no benefit.
 189             E.g.  \ x. f (x+x)          no point in inlining (+) here!
 190
 191         (b) It's ineffective. Once g's wrapper is inlined, its case-expressions
 192             aren't scrutinising arguments any more
 193
 194             case alts of
 195
 196                 [alt] -> size_up_alt alt `addSize` SizeIs (_ILIT(0)) (unitBag (v, 1)) (_ILIT(0))
 197                 -- We want to make wrapper-style evaluation look cheap, so that
 198                 -- when we inline a wrapper it doesn't make call site (much) bigger
 199                 -- Otherwise we get nasty phase ordering stuff:
 200                 --      f x = g x x
 201                 --      h y = ...(f e)...
 202                 -- If we inline g's wrapper, f looks big, and doesn't get inlined
 203                 -- into h; if we inline f first, while it looks small, then g's
 204                 -- wrapper will get inlined later anyway.  To avoid this nasty
 205                 -- ordering difference, we make (case a of (x,y) -> ...),
 206                 --  *where a is one of the arguments* look free.
 207
 208                 other ->
 209 -}
 210                          alts_size (foldr addSize sizeOne alt_sizes)    -- The 1 is for the scrutinee
 211                                    (foldr1 maxSize alt_sizes)
 212
 213                 -- Good to inline if an arg is scrutinised, because
 214                 -- that may eliminate allocation in the caller
 215                 -- And it eliminates the case itself
 216
 217         where
 218           alt_sizes = map size_up_alt alts
 219
 220                 -- alts_size tries to compute a good discount for
 221                 -- the case when we are scrutinising an argument variable
 222           alts_size (SizeIs tot _tot_disc _tot_scrut)           -- Size of all alternatives
 223                     (SizeIs max  max_disc  max_scrut)           -- Size of biggest alternative
 224                 = SizeIs tot (unitBag (v, iBox (_ILIT(1) +# tot -# max)) `unionBags` max_disc) max_scrut
 225                         -- If the variable is known, we produce a discount that
 226                         -- will take us back to 'max', the size of rh largest alternative
 227                         -- The 1+ is a little discount for reduced allocation in the caller
 228           alts_size tot_size _ = tot_size
 229
 230     size_up (Case e _ _ alts) = nukeScrutDiscount (size_up e) `addSize`
 231                                  foldr (addSize . size_up_alt) sizeZero alts
 232                 -- We don't charge for the case itself
 233                 -- It's a strict thing, and the price of the call
 234                 -- is paid by scrut.  Also consider
 235                 --      case f x of DEFAULT -> e
 236                 -- This is just ';'!  Don't charge for it.
 237
 238     ------------
 239     size_up_app (App fun arg) args
 240         | isTypeArg arg              = size_up_app fun args
 241         | otherwise                  = size_up_app fun (arg:args)
 242     size_up_app fun           args   = foldr (addSize . nukeScrutDiscount . size_up)
 243                                              (size_up_fun fun args)
 244                                              args
 245
 246         -- A function application with at least one value argument
 247         -- so if the function is an argument give it an arg-discount
 248         --
 249         -- Also behave specially if the function is a build
 250         --
 251         -- Also if the function is a constant Id (constr or primop)
 252         -- compute discounts specially
 253     size_up_fun (Var fun) args
 254       | fun `hasKey` buildIdKey   = buildSize
 255       | fun `hasKey` augmentIdKey = augmentSize
 256       | otherwise
 257       = case globalIdDetails fun of
 258           DataConWorkId dc -> conSizeN dc (valArgCount args)
 259
 260           FCallId _    -> sizeN opt_UF_DearOp
 261           PrimOpId op  -> primOpSize op (valArgCount args)
 262                           -- foldr addSize (primOpSize op) (map arg_discount args)
 263                           -- At one time I tried giving an arg-discount if a primop
 264                           -- is applied to one of the function's arguments, but it's
 265                           -- not good.  At the moment, any unlifted-type arg gets a
 266                           -- 'True' for 'yes I'm evald', so we collect the discount even
 267                           -- if we know nothing about it.  And just having it in a primop
 268                           -- doesn't help at all if we don't know something more.
 269
 270           _            -> fun_discount fun `addSizeN`
 271                           (1 + length (filter (not . exprIsTrivial) args))
 272                                 -- The 1+ is for the function itself
 273                                 -- Add 1 for each non-trivial arg;
 274                                 -- the allocation cost, as in let(rec)
 275                                 -- Slight hack here: for constructors the args are almost always
 276                                 --      trivial; and for primops they are almost always prim typed
 277                                 --      We should really only count for non-prim-typed args in the
 278                                 --      general case, but that seems too much like hard work
 279
 280     size_up_fun other _ = size_up other
 281
 282     ------------
 283     size_up_alt (_con, _bndrs, rhs) = size_up rhs
 284         -- Don't charge for args, so that wrappers look cheap
 285         -- (See comments about wrappers with Case)
 286
 287     ------------
 288         -- We want to record if we're case'ing, or applying, an argument
 289     fun_discount v | v `elem` top_args = SizeIs (_ILIT(0)) (unitBag (v, opt_UF_FunAppDiscount)) (_ILIT(0))
 290     fun_discount _                     = sizeZero
 291
 292     ------------
 293         -- These addSize things have to be here because
 294         -- I don't want to give them bOMB_OUT_SIZE as an argument
 295
 296     addSizeN TooBig          _  = TooBig
 297     addSizeN (SizeIs n xs d) m  = mkSizeIs bOMB_OUT_SIZE (n +# iUnbox m) xs d
 298
 299     addSize TooBig            _                 = TooBig
 300     addSize _                 TooBig            = TooBig
 301     addSize (SizeIs n1 xs d1) (SizeIs n2 ys d2)
 302         = mkSizeIs bOMB_OUT_SIZE (n1 +# n2) (xs `unionBags` ys) (d1 +# d2)
 303 \end{code}
 304
 305 Code for manipulating sizes
 306
 307 \begin{code}
 308 data ExprSize = TooBig
 309               | SizeIs FastInt          -- Size found
 310                        (Bag (Id,Int))   -- Arguments cased herein, and discount for each such
 311                        FastInt          -- Size to subtract if result is scrutinised
 312                                         -- by a case expression
 313
 314 -- subtract the discount before deciding whether to bale out. eg. we
 315 -- want to inline a large constructor application into a selector:
 316 --      tup = (a_1, ..., a_99)
 317 --      x = case tup of ...
 318 --
 319 mkSizeIs :: FastInt -> FastInt -> Bag (Id, Int) -> FastInt -> ExprSize
 320 mkSizeIs max n xs d | (n -# d) ># max = TooBig
 321                     | otherwise       = SizeIs n xs d
 322
 323 maxSize :: ExprSize -> ExprSize -> ExprSize
 324 maxSize TooBig         _                                  = TooBig
 325 maxSize _              TooBig                             = TooBig
 326 maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 ># n2  = s1
 327                                               | otherwise = s2
 328
 329 sizeZero, sizeOne :: ExprSize
 330 sizeN :: Int -> ExprSize
 331 conSizeN :: DataCon ->Int -> ExprSize
 332
 333 sizeZero        = SizeIs (_ILIT(0))  emptyBag (_ILIT(0))
 334 sizeOne         = SizeIs (_ILIT(1))  emptyBag (_ILIT(0))
 335 sizeN n         = SizeIs (iUnbox n) emptyBag (_ILIT(0))
 336 conSizeN dc n
 337   | isUnboxedTupleCon dc = SizeIs (_ILIT(0)) emptyBag (iUnbox n +# _ILIT(1))
 338   | otherwise            = SizeIs (_ILIT(1)) emptyBag (iUnbox n +# _ILIT(1))
 339         -- Treat constructors as size 1; we are keen to expose them
 340         -- (and we charge separately for their args).  We can't treat
 341         -- them as size zero, else we find that (iBox x) has size 1,
 342         -- which is the same as a lone variable; and hence 'v' will
 343         -- always be replaced by (iBox x), where v is bound to iBox x.
 344         --
 345         -- However, unboxed tuples count as size zero
 346         -- I found occasions where we had
 347         --      f x y z = case op# x y z of { s -> (# s, () #) }
 348         -- and f wasn't getting inlined
 349
 350 primOpSize :: PrimOp -> Int -> ExprSize
 351 primOpSize op n_args
 352  | not (primOpIsDupable op) = sizeN opt_UF_DearOp
 353  | not (primOpOutOfLine op) = sizeN (2 - n_args)
 354         -- Be very keen to inline simple primops.
 355         -- We give a discount of 1 for each arg so that (op# x y z) costs 2.
 356         -- We can't make it cost 1, else we'll inline let v = (op# x y z)
 357         -- at every use of v, which is excessive.
 358         --
 359         -- A good example is:
 360         --      let x = +# p q in C {x}
 361         -- Even though x get's an occurrence of 'many', its RHS looks cheap,
 362         -- and there's a good chance it'll get inlined back into C's RHS. Urgh!
 363  | otherwise                = sizeOne
 364
 365 buildSize :: ExprSize
 366 buildSize = SizeIs (_ILIT(-2)) emptyBag (_ILIT(4))
 367         -- We really want to inline applications of build
 368         -- build t (\cn -> e) should cost only the cost of e (because build will be inlined later)
 369         -- Indeed, we should add a result_discount becuause build is
 370         -- very like a constructor.  We don't bother to check that the
 371         -- build is saturated (it usually is).  The "-2" discounts for the \c n,
 372         -- The "4" is rather arbitrary.
 373
 374 augmentSize :: ExprSize
 375 augmentSize = SizeIs (_ILIT(-2)) emptyBag (_ILIT(4))
 376         -- Ditto (augment t (\cn -> e) ys) should cost only the cost of
 377         -- e plus ys. The -2 accounts for the \cn
 378
 379 nukeScrutDiscount :: ExprSize -> ExprSize
 380 nukeScrutDiscount (SizeIs n vs _) = SizeIs n vs (_ILIT(0))
 381 nukeScrutDiscount TooBig          = TooBig
 382
 383 -- When we return a lambda, give a discount if it's used (applied)
 384 lamScrutDiscount :: ExprSize -> ExprSize
 385 lamScrutDiscount (SizeIs n vs _) = case opt_UF_FunAppDiscount of { d -> SizeIs n vs (iUnbox d) }
 386 lamScrutDiscount TooBig          = TooBig
 387 \end{code}
 388
 389
 390 %************************************************************************
 391 %*                                                                      *
 392 \subsection[considerUnfolding]{Given all the info, do (not) do the unfolding}
 393 %*                                                                      *
 394 %************************************************************************
 395
 396 We have very limited information about an unfolding expression: (1)~so
 397 many type arguments and so many value arguments expected---for our
 398 purposes here, we assume we've got those.  (2)~A ``size'' or ``cost,''
 399 a single integer.  (3)~An ``argument info'' vector.  For this, what we
 400 have at the moment is a Boolean per argument position that says, ``I
 401 will look with great favour on an explicit constructor in this
 402 position.'' (4)~The ``discount'' to subtract if the expression
 403 is being scrutinised.
 404
 405 Assuming we have enough type- and value arguments (if not, we give up
 406 immediately), then we see if the ``discounted size'' is below some
 407 (semi-arbitrary) threshold.  It works like this: for every argument
 408 position where we're looking for a constructor AND WE HAVE ONE in our
 409 hands, we get a (again, semi-arbitrary) discount [proportion to the
 410 number of constructors in the type being scrutinized].
 411
 412 If we're in the context of a scrutinee ( \tr{(case <expr > of A .. -> ...;.. )})
 413 and the expression in question will evaluate to a constructor, we use
 414 the computed discount size *for the result only* rather than
 415 computing the argument discounts. Since we know the result of
 416 the expression is going to be taken apart, discounting its size
 417 is more accurate (see @sizeExpr@ above for how this discount size
 418 is computed).
 419
 420 We use this one to avoid exporting inlinings that we ``couldn't possibly
 421 use'' on the other side.  Can be overridden w/ flaggery.
 422 Just the same as smallEnoughToInline, except that it has no actual arguments.
 423
 424 \begin{code}
 425 couldBeSmallEnoughToInline :: Int -> CoreExpr -> Bool
 426 couldBeSmallEnoughToInline threshold rhs = case calcUnfoldingGuidance threshold rhs of
 427                                                 UnfoldNever -> False
 428                                                 _           -> True
 429
 430 certainlyWillInline :: Unfolding -> Bool
 431   -- Sees if the unfolding is pretty certain to inline
 432 certainlyWillInline (CompulsoryUnfolding {}) = True
 433 certainlyWillInline (InlineRule {})          = True
 434 certainlyWillInline (CoreUnfolding
 435     { uf_is_cheap = is_cheap
 436     , uf_guidance = UnfoldIfGoodArgs {ug_arity = n_vals, ug_size = size}})
 437   = is_cheap && size - (n_vals +1) <= opt_UF_UseThreshold
 438 certainlyWillInline _
 439   = False
 440
 441 smallEnoughToInline :: Unfolding -> Bool
 442 smallEnoughToInline (CoreUnfolding {uf_guidance = UnfoldIfGoodArgs {ug_size = size}})
 443   = size <= opt_UF_UseThreshold
 444 smallEnoughToInline _
 445   = False
 446 \end{code}
 447
 448 %************************************************************************
 449 %*                                                                      *
 450 \subsection{callSiteInline}
 451 %*                                                                      *
 452 %************************************************************************
 453
 454 This is the key function.  It decides whether to inline a variable at a call site
 455
 456 callSiteInline is used at call sites, so it is a bit more generous.
 457 It's a very important function that embodies lots of heuristics.
 458 A non-WHNF can be inlined if it doesn't occur inside a lambda,
 459 and occurs exactly once or
 460     occurs once in each branch of a case and is small
 461
 462 If the thing is in WHNF, there's no danger of duplicating work,
 463 so we can inline if it occurs once, or is small
 464
 465 NOTE: we don't want to inline top-level functions that always diverge.
 466 It just makes the code bigger.  Tt turns out that the convenient way to prevent
 467 them inlining is to give them a NOINLINE pragma, which we do in
 468 StrictAnal.addStrictnessInfoToTopId
 469
 470 \begin{code}
 471 callSiteInline :: DynFlags
 472                -> Bool                  -- True <=> the Id can be inlined
 473                -> Id                    -- The Id
 474                -> Bool                  -- True if there are are no arguments at all (incl type args)
 475                -> [Bool]                -- One for each value arg; True if it is interesting
 476                -> CallCtxt              -- True <=> continuation is interesting
 477                -> Maybe CoreExpr        -- Unfolding, if any
 478
 479
 480 data CallCtxt = BoringCtxt
 481
 482               | ArgCtxt Bool    -- We're somewhere in the RHS of function with rules
 483                                 --      => be keener to inline
 484                         Int     -- We *are* the argument of a function with this arg discount
 485                                 --      => be keener to inline
 486                 -- INVARIANT: ArgCtxt False 0 ==> BoringCtxt
 487
 488               | ValAppCtxt      -- We're applied to at least one value arg
 489                                 -- This arises when we have ((f x |> co) y)
 490                                 -- Then the (f x) has argument 'x' but in a ValAppCtxt
 491
 492               | CaseCtxt        -- We're the scrutinee of a case
 493                                 -- that decomposes its scrutinee
 494
 495 instance Outputable CallCtxt where
 496   ppr BoringCtxt    = ptext (sLit "BoringCtxt")
 497   ppr (ArgCtxt _ _) = ptext (sLit "ArgCtxt")
 498   ppr CaseCtxt      = ptext (sLit "CaseCtxt")
 499   ppr ValAppCtxt    = ptext (sLit "ValAppCtxt")
 500
 501 callSiteInline dflags active_inline id lone_variable arg_infos cont_info
 502   = let
 503         n_val_args  = length arg_infos
 504     in
 505     case idUnfolding id of {
 506         NoUnfolding -> Nothing ;
 507         OtherCon _  -> Nothing ;
 508
 509         CompulsoryUnfolding unf_template -> Just unf_template ;
 510                 -- CompulsoryUnfolding => there is no top-level binding
 511                 -- for these things, so we must inline it.
 512                 -- Only a couple of primop-like things have
 513                 -- compulsory unfoldings (see MkId.lhs).
 514                 -- We don't allow them to be inactive
 515
 516         InlineRule { uf_tmpl = unf_template, uf_arity = arity, uf_is_top = is_top
 517                    , uf_is_value = is_value, uf_worker = mb_worker }
 518             -> let yes_or_no | not active_inline   = False
 519                              | n_val_args <  arity = yes_unsat  -- Not enough value args
 520                              | n_val_args == arity = yes_exact  -- Exactly saturated
 521                              | otherwise           = True       -- Over-saturated
 522                    result | yes_or_no = Just unf_template
 523                           | otherwise = Nothing
 524
 525                    -- See Note [Inlining an InlineRule]
 526                    is_wrapper = isJust mb_worker
 527                    yes_unsat | is_wrapper  = or arg_infos
 528                              | otherwise   = False
 529
 530                    yes_exact = or arg_infos || interesting_saturated_call
 531                    interesting_saturated_call
 532                         = case cont_info of
 533                             BoringCtxt -> not is_top                            -- Note [Nested functions]
 534                             CaseCtxt   -> not lone_variable || not is_value     -- Note [Lone variables]
 535                             ArgCtxt {} -> arity > 0                             -- Note [Inlining in ArgCtxt]
 536                             ValAppCtxt -> True                                  -- Note [Cast then apply]
 537                in
 538                if dopt Opt_D_dump_inlinings dflags then
 539                 pprTrace ("Considering InlineRule for: " ++ showSDoc (ppr id))
 540                          (vcat [text "active:" <+> ppr active_inline,
 541                                 text "arg infos" <+> ppr arg_infos,
 542                                 text "interesting call" <+> ppr interesting_saturated_call,
 543                                 text "is value:" <+> ppr is_value,
 544                                 text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"])
 545                           result
 546                 else result ;
 547
 548         CoreUnfolding { uf_tmpl = unf_template, uf_is_top = is_top, uf_is_value = is_value,
 549                         uf_is_cheap = is_cheap, uf_guidance = guidance } ->
 550
 551     let
 552         result | yes_or_no = Just unf_template
 553                | otherwise = Nothing
 554
 555         yes_or_no = active_inline && is_cheap && consider_safe
 556                 -- We consider even the once-in-one-branch
 557                 -- occurrences, because they won't all have been
 558                 -- caught by preInlineUnconditionally.  In particular,
 559                 -- if the occurrence is once inside a lambda, and the
 560                 -- rhs is cheap but not a manifest lambda, then
 561                 -- pre-inline will not have inlined it for fear of
 562                 -- invalidating the occurrence info in the rhs.
 563
 564         consider_safe
 565                 -- consider_safe decides whether it's a good idea to
 566                 -- inline something, given that there's no
 567                 -- work-duplication issue (the caller checks that).
 568           = case guidance of
 569               UnfoldNever  -> False
 570               UnfoldIfGoodArgs { ug_arity = n_vals_wanted, ug_args = arg_discounts
 571                                , ug_res = res_discount, ug_size = size }
 572                   | enough_args && size <= (n_vals_wanted + 1)
 573                         -- Inline unconditionally if there no size increase
 574                         -- Size of call is n_vals_wanted (+1 for the function)
 575                   -> True
 576
 577                   | otherwise
 578                   -> some_benefit && small_enough && inline_enough_args
 579
 580                   where
 581                     enough_args = n_val_args >= n_vals_wanted
 582                     inline_enough_args =
 583                       not (dopt Opt_InlineIfEnoughArgs dflags) || enough_args
 584
 585
 586                     some_benefit = or arg_infos || really_interesting_cont
 587                                 -- There must be something interesting
 588                                 -- about some argument, or the result
 589                                 -- context, to make it worth inlining
 590
 591                     really_interesting_cont
 592                         | n_val_args <  n_vals_wanted = False   -- Too few args
 593                         | n_val_args == n_vals_wanted = interesting_saturated_call
 594                         | otherwise                   = True    -- Extra args
 595                         -- really_interesting_cont tells if the result of the
 596                         -- call is in an interesting context.
 597
 598                     interesting_saturated_call
 599                         = case cont_info of
 600                             BoringCtxt -> not is_top && n_vals_wanted > 0       -- Note [Nested functions]
 601                             CaseCtxt   -> not lone_variable || not is_value     -- Note [Lone variables]
 602                             ArgCtxt {} -> n_vals_wanted > 0                     -- Note [Inlining in ArgCtxt]
 603                             ValAppCtxt -> True                                  -- Note [Cast then apply]
 604
 605                     small_enough = (size - discount) <= opt_UF_UseThreshold
 606                     discount = computeDiscount n_vals_wanted arg_discounts
 607                                                res_discount' arg_infos
 608                     res_discount' = case cont_info of
 609                                         BoringCtxt  -> 0
 610                                         CaseCtxt    -> res_discount
 611                                         _other      -> 4 `min` res_discount
 612                         -- res_discount can be very large when a function returns
 613                         -- construtors; but we only want to invoke that large discount
 614                         -- when there's a case continuation.
 615                         -- Otherwise we, rather arbitrarily, threshold it.  Yuk.
 616                         -- But we want to aovid inlining large functions that return
 617                         -- constructors into contexts that are simply "interesting"
 618
 619     in
 620     if dopt Opt_D_dump_inlinings dflags then
 621         pprTrace ("Considering inlining: " ++ showSDoc (ppr id))
 622                  (vcat [text "active:" <+> ppr active_inline,
 623                         text "arg infos" <+> ppr arg_infos,
 624                         text "interesting continuation" <+> ppr cont_info,
 625                         text "is value:" <+> ppr is_value,
 626                         text "is cheap:" <+> ppr is_cheap,
 627                         text "guidance" <+> ppr guidance,
 628                         text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"])
 629                   result
 630     else
 631     result
 632     }
 633 \end{code}
 634
 635 Note [Inlining an InlineRule]
 636 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 637 An InlineRules is used for
 638   (a) pogrammer INLINE pragmas
 639   (b) inlinings from worker/wrapper
 640
 641 For (a) the RHS may be large, and our contract is that we *only* inline
 642 when the function is applied to all the arguments on the LHS of the
 643 source-code defn.  (The uf_arity in the rule.)
 644
 645 However for worker/wrapper it may be worth inlining even if the
 646 arity is not satisfied (as we do in the CoreUnfolding case) so we don't
 647 require saturation.
 648
 649
 650 Note [Nested functions]
 651 ~~~~~~~~~~~~~~~~~~~~~~~
 652 If a function has a nested defn we also record some-benefit, on the
 653 grounds that we are often able to eliminate the binding, and hence the
 654 allocation, for the function altogether; this is good for join points.
 655 But this only makes sense for *functions*; inlining a constructor
 656 doesn't help allocation unless the result is scrutinised.  UNLESS the
 657 constructor occurs just once, albeit possibly in multiple case
 658 branches.  Then inlining it doesn't increase allocation, but it does
 659 increase the chance that the constructor won't be allocated at all in
 660 the branches that don't use it.
 661
 662 Note [Cast then apply]
 663 ~~~~~~~~~~~~~~~~~~~~~~
 664 Consider
 665    myIndex = __inline_me ( (/\a. <blah>) |> co )
 666    co :: (forall a. a -> a) ~ (forall a. T a)
 667      ... /\a.\x. case ((myIndex a) |> sym co) x of { ... } ...
 668
 669 We need to inline myIndex to unravel this; but the actual call (myIndex a) has
 670 no value arguments.  The ValAppCtxt gives it enough incentive to inline.
 671
 672 Note [Inlining in ArgCtxt]
 673 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 674 The condition (n_vals_wanted > 0) here is very important, because otherwise
 675 we end up inlining top-level stuff into useless places; eg
 676    x = I# 3#
 677    f = \y.  g x
 678 This can make a very big difference: it adds 16% to nofib 'integer' allocs,
 679 and 20% to 'power'.
 680
 681 At one stage I replaced this condition by 'True' (leading to the above
 682 slow-down).  The motivation was test eyeball/inline1.hs; but that seems
 683 to work ok now.
 684
 685 Note [Lone variables]
 686 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 687 The "lone-variable" case is important.  I spent ages messing about
 688 with unsatisfactory varaints, but this is nice.  The idea is that if a
 689 variable appears all alone
 690         as an arg of lazy fn, or rhs    Stop
 691         as scrutinee of a case          Select
 692         as arg of a strict fn           ArgOf
 693 AND
 694         it is bound to a value
 695 then we should not inline it (unless there is some other reason,
 696 e.g. is is the sole occurrence).  That is what is happening at
 697 the use of 'lone_variable' in 'interesting_saturated_call'.
 698
 699 Why?  At least in the case-scrutinee situation, turning
 700         let x = (a,b) in case x of y -> ...
 701 into
 702         let x = (a,b) in case (a,b) of y -> ...
 703 and thence to
 704         let x = (a,b) in let y = (a,b) in ...
 705 is bad if the binding for x will remain.
 706
 707 Another example: I discovered that strings
 708 were getting inlined straight back into applications of 'error'
 709 because the latter is strict.
 710         s = "foo"
 711         f = \x -> ...(error s)...
 712
 713 Fundamentally such contexts should not encourage inlining because the
 714 context can ``see'' the unfolding of the variable (e.g. case or a
 715 RULE) so there's no gain.  If the thing is bound to a value.
 716
 717 However, watch out:
 718
 719  * Consider this:
 720         foo = _inline_ (\n. [n])
 721         bar = _inline_ (foo 20)
 722         baz = \n. case bar of { (m:_) -> m + n }
 723    Here we really want to inline 'bar' so that we can inline 'foo'
 724    and the whole thing unravels as it should obviously do.  This is
 725    important: in the NDP project, 'bar' generates a closure data
 726    structure rather than a list.
 727
 728  * Even a type application or coercion isn't a lone variable.
 729    Consider
 730         case $fMonadST @ RealWorld of { :DMonad a b c -> c }
 731    We had better inline that sucker!  The case won't see through it.
 732
 733    For now, I'm treating treating a variable applied to types
 734    in a *lazy* context "lone". The motivating example was
 735         f = /\a. \x. BIG
 736         g = /\a. \y.  h (f a)
 737    There's no advantage in inlining f here, and perhaps
 738    a significant disadvantage.  Hence some_val_args in the Stop case
 739
 740 \begin{code}
 741 computeDiscount :: Int -> [Int] -> Int -> [Bool] -> Int
 742 computeDiscount n_vals_wanted arg_discounts result_discount arg_infos
 743         -- We multiple the raw discounts (args_discount and result_discount)
 744         -- ty opt_UnfoldingKeenessFactor because the former have to do with
 745         --  *size* whereas the discounts imply that there's some extra
 746         --  *efficiency* to be gained (e.g. beta reductions, case reductions)
 747         -- by inlining.
 748
 749         -- we also discount 1 for each argument passed, because these will
 750         -- reduce with the lambdas in the function (we count 1 for a lambda
 751         -- in size_up).
 752   = 1 +                 -- Discount of 1 because the result replaces the call
 753                         -- so we count 1 for the function itself
 754     length (take n_vals_wanted arg_infos) +
 755                         -- Discount of 1 for each arg supplied, because the
 756                         -- result replaces the call
 757     round (opt_UF_KeenessFactor *
 758            fromIntegral (arg_discount + result_discount))
 759   where
 760     arg_discount = sum (zipWith mk_arg_discount arg_discounts arg_infos)
 761
 762     mk_arg_discount discount is_evald | is_evald  = discount
 763                                       | otherwise = 0
 764 \end{code}
 765