ghc/compiler/coreSyn/CoreUnfold.lhs

   1 %
   2 % (c) The AQUA Project, Glasgow University, 1994-1998
   3 %
   4 \section[CoreUnfold]{Core-syntax unfoldings}
   5
   6 Unfoldings (which can travel across module boundaries) are in Core
   7 syntax (namely @CoreExpr@s).
   8
   9 The type @Unfolding@ sits ``above'' simply-Core-expressions
  10 unfoldings, capturing ``higher-level'' things we know about a binding,
  11 usually things that the simplifier found out (e.g., ``it's a
  12 literal'').  In the corner of a @CoreUnfolding@ unfolding, you will
  13 find, unsurprisingly, a Core expression.
  14
  15 \begin{code}
  16 module CoreUnfold (
  17         Unfolding, UnfoldingGuidance,   -- Abstract types
  18
  19         noUnfolding, mkTopUnfolding, mkUnfolding, mkCompulsoryUnfolding, seqUnfolding,
  20         mkOtherCon, otherCons,
  21         unfoldingTemplate, maybeUnfoldingTemplate,
  22         isEvaldUnfolding, isValueUnfolding, isCheapUnfolding, isCompulsoryUnfolding,
  23         hasUnfolding, hasSomeUnfolding,
  24
  25         couldBeSmallEnoughToInline,
  26         certainlyWillInline,
  27         okToUnfoldInHiFile,
  28
  29         callSiteInline, blackListed
  30     ) where
  31
  32 #include "HsVersions.h"
  33
  34 import CmdLineOpts      ( opt_UF_CreationThreshold,
  35                           opt_UF_UseThreshold,
  36                           opt_UF_ScrutConDiscount,
  37                           opt_UF_FunAppDiscount,
  38                           opt_UF_PrimArgDiscount,
  39                           opt_UF_KeenessFactor,
  40                           opt_UF_CheapOp, opt_UF_DearOp,
  41                           opt_UnfoldCasms, opt_PprStyle_Debug,
  42                           opt_D_dump_inlinings
  43                         )
  44 import CoreSyn
  45 import PprCore          ( pprCoreExpr )
  46 import OccurAnal        ( occurAnalyseGlobalExpr )
  47 import CoreUtils        ( exprIsValue, exprIsCheap, exprIsBottom, exprIsTrivial )
  48 import Id               ( Id, idType, idFlavour, isId, idWorkerInfo,
  49                           idSpecialisation, idInlinePragma, idUnfolding,
  50                           isPrimOpId_maybe
  51                         )
  52 import VarSet
  53 import Literal          ( isLitLitLit )
  54 import PrimOp           ( PrimOp(..), primOpIsDupable, primOpOutOfLine, ccallIsCasm )
  55 import IdInfo           ( ArityInfo(..), InlinePragInfo(..), OccInfo(..), IdFlavour(..), CprInfo(..),
  56                           insideLam, workerExists, isNeverInlinePrag
  57                         )
  58 import Type             ( splitFunTy_maybe, isUnLiftedType )
  59 import Unique           ( Unique, buildIdKey, augmentIdKey, hasKey )
  60 import Bag
  61 import Outputable
  62
  63 #if __GLASGOW_HASKELL__ >= 404
  64 import GlaExts          ( fromInt )
  65 #endif
  66 \end{code}
  67
  68
  69 %************************************************************************
  70 %*                                                                      *
  71 \subsection{Making unfoldings}
  72 %*                                                                      *
  73 %************************************************************************
  74
  75 \begin{code}
  76 mkTopUnfolding expr = mkUnfolding True {- Top level -} expr
  77
  78 mkUnfolding top_lvl expr
  79   = CoreUnfolding (occurAnalyseGlobalExpr expr)
  80                   top_lvl
  81                   (exprIsCheap expr)
  82                   (exprIsValue expr)
  83                   (exprIsBottom expr)
  84                   (calcUnfoldingGuidance opt_UF_CreationThreshold expr)
  85         -- Sometimes during simplification, there's a large let-bound thing
  86         -- which has been substituted, and so is now dead; so 'expr' contains
  87         -- two copies of the thing while the occurrence-analysed expression doesn't
  88         -- Nevertheless, we don't occ-analyse before computing the size because the
  89         -- size computation bales out after a while, whereas occurrence analysis does not.
  90         --
  91         -- This can occasionally mean that the guidance is very pessimistic;
  92         -- it gets fixed up next round
  93
  94 mkCompulsoryUnfolding expr      -- Used for things that absolutely must be unfolded
  95   = CompulsoryUnfolding (occurAnalyseGlobalExpr expr)
  96 \end{code}
  97
  98
  99 %************************************************************************
 100 %*                                                                      *
 101 \subsection{The UnfoldingGuidance type}
 102 %*                                                                      *
 103 %************************************************************************
 104
 105 \begin{code}
 106 instance Outputable UnfoldingGuidance where
 107     ppr UnfoldNever     = ptext SLIT("NEVER")
 108     ppr (UnfoldIfGoodArgs v cs size discount)
 109       = hsep [ ptext SLIT("IF_ARGS"), int v,
 110                brackets (hsep (map int cs)),
 111                int size,
 112                int discount ]
 113 \end{code}
 114
 115
 116 \begin{code}
 117 calcUnfoldingGuidance
 118         :: Int                  -- bomb out if size gets bigger than this
 119         -> CoreExpr             -- expression to look at
 120         -> UnfoldingGuidance
 121 calcUnfoldingGuidance bOMB_OUT_SIZE expr
 122   = case collect_val_bndrs expr of { (inline, val_binders, body) ->
 123     let
 124         n_val_binders = length val_binders
 125
 126         max_inline_size = n_val_binders+2
 127         -- The idea is that if there is an INLINE pragma (inline is True)
 128         -- and there's a big body, we give a size of n_val_binders+2.  This
 129         -- This is just enough to fail the no-size-increase test in callSiteInline,
 130         --   so that INLINE things don't get inlined into entirely boring contexts,
 131         --   but no more.
 132
 133     in
 134     case (sizeExpr bOMB_OUT_SIZE val_binders body) of
 135
 136       TooBig
 137         | not inline -> UnfoldNever
 138                 -- A big function with an INLINE pragma must
 139                 -- have an UnfoldIfGoodArgs guidance
 140         | inline     -> UnfoldIfGoodArgs n_val_binders
 141                                          (map (const 0) val_binders)
 142                                          max_inline_size 0
 143
 144       SizeIs size cased_args scrut_discount
 145         -> UnfoldIfGoodArgs
 146                         n_val_binders
 147                         (map discount_for val_binders)
 148                         final_size
 149                         (I# scrut_discount)
 150         where
 151             boxed_size    = I# size
 152
 153             final_size | inline     = boxed_size `min` max_inline_size
 154                        | otherwise  = boxed_size
 155
 156                 -- Sometimes an INLINE thing is smaller than n_val_binders+2.
 157                 -- A particular case in point is a constructor, which has size 1.
 158                 -- We want to inline this regardless, hence the `min`
 159
 160             discount_for b = foldlBag (\acc (b',n) -> if b==b' then acc+n else acc)
 161                                       0 cased_args
 162         }
 163   where
 164     collect_val_bndrs e = go False [] e
 165         -- We need to be a bit careful about how we collect the
 166         -- value binders.  In ptic, if we see
 167         --      __inline_me (\x y -> e)
 168         -- We want to say "2 value binders".  Why?  So that
 169         -- we take account of information given for the arguments
 170
 171     go inline rev_vbs (Note InlineMe e)     = go True   rev_vbs     e
 172     go inline rev_vbs (Lam b e) | isId b    = go inline (b:rev_vbs) e
 173                                 | otherwise = go inline rev_vbs     e
 174     go inline rev_vbs e                     = (inline, reverse rev_vbs, e)
 175 \end{code}
 176
 177 \begin{code}
 178 sizeExpr :: Int             -- Bomb out if it gets bigger than this
 179          -> [Id]            -- Arguments; we're interested in which of these
 180                             -- get case'd
 181          -> CoreExpr
 182          -> ExprSize
 183
 184 sizeExpr (I# bOMB_OUT_SIZE) top_args expr
 185   = size_up expr
 186   where
 187     size_up (Type t)          = sizeZero        -- Types cost nothing
 188     size_up (Var v)           = sizeOne
 189
 190     size_up (Note _ body)     = size_up body    -- Notes cost nothing
 191
 192     size_up (App fun (Type t))  = size_up fun
 193     size_up (App fun arg)     = size_up_app fun [arg]
 194
 195     size_up (Lit lit) = sizeOne
 196
 197     size_up (Lam b e) | isId b    = lamScrutDiscount (size_up e `addSizeN` 1)
 198                       | otherwise = size_up e
 199
 200     size_up (Let (NonRec binder rhs) body)
 201       = nukeScrutDiscount (size_up rhs)         `addSize`
 202         size_up body                            `addSizeN`
 203         (if isUnLiftedType (idType binder) then 0 else 1)
 204                 -- For the allocation
 205                 -- If the binder has an unlifted type there is no allocation
 206
 207     size_up (Let (Rec pairs) body)
 208       = nukeScrutDiscount rhs_size              `addSize`
 209         size_up body                            `addSizeN`
 210         length pairs            -- For the allocation
 211       where
 212         rhs_size = foldr (addSize . size_up . snd) sizeZero pairs
 213
 214         -- We want to make wrapper-style evaluation look cheap, so that
 215         -- when we inline a wrapper it doesn't make call site (much) bigger
 216         -- Otherwise we get nasty phase ordering stuff:
 217         --      f x = g x x
 218         --      h y = ...(f e)...
 219         -- If we inline g's wrapper, f looks big, and doesn't get inlined
 220         -- into h; if we inline f first, while it looks small, then g's
 221         -- wrapper will get inlined later anyway.  To avoid this nasty
 222         -- ordering difference, we make (case a of (x,y) -> ...) look free.
 223     size_up (Case (Var v) _ [alt])
 224         | v `elem` top_args
 225         = size_up_alt alt `addSize` SizeIs 0# (unitBag (v, 1)) 0#
 226                 -- Good to inline if an arg is scrutinised, because
 227                 -- that may eliminate allocation in the caller
 228                 -- And it eliminates the case itself
 229         | otherwise
 230         = size_up_alt alt
 231
 232         -- Scrutinising one of the argument variables,
 233         -- with more than one alternative
 234     size_up (Case (Var v) _ alts)
 235         | v `elem` top_args
 236         = alts_size (foldr addSize sizeOne alt_sizes)   -- The 1 is for the scrutinee
 237                     (foldr1 maxSize alt_sizes)
 238         where
 239           alt_sizes = map size_up_alt alts
 240
 241           alts_size (SizeIs tot tot_disc tot_scrut)             -- Size of all alternatives
 242                     (SizeIs max max_disc max_scrut)             -- Size of biggest alternative
 243                 = SizeIs tot (unitBag (v, I# (1# +# tot -# max)) `unionBags` max_disc) max_scrut
 244                         -- If the variable is known, we produce a discount that
 245                         -- will take us back to 'max', the size of rh largest alternative
 246                         -- The 1+ is a little discount for reduced allocation in the caller
 247
 248           alts_size tot_size _ = tot_size
 249
 250
 251     size_up (Case e _ alts) = nukeScrutDiscount (size_up e) `addSize`
 252                               foldr (addSize . size_up_alt) sizeZero alts
 253                 -- We don't charge for the case itself
 254                 -- It's a strict thing, and the price of the call
 255                 -- is paid by scrut.  Also consider
 256                 --      case f x of DEFAULT -> e
 257                 -- This is just ';'!  Don't charge for it.
 258
 259     ------------
 260     size_up_app (App fun arg) args
 261         | isTypeArg arg              = size_up_app fun args
 262         | otherwise                  = size_up_app fun (arg:args)
 263     size_up_app fun           args   = foldr (addSize . nukeScrutDiscount . size_up)
 264                                              (size_up_fun fun args)
 265                                              args
 266
 267         -- A function application with at least one value argument
 268         -- so if the function is an argument give it an arg-discount
 269         --
 270         -- Also behave specially if the function is a build
 271         --
 272         -- Also if the function is a constant Id (constr or primop)
 273         -- compute discounts specially
 274     size_up_fun (Var fun) args
 275       | fun `hasKey` buildIdKey   = buildSize
 276       | fun `hasKey` augmentIdKey = augmentSize
 277       | otherwise
 278       = case idFlavour fun of
 279           DataConId dc -> conSizeN (valArgCount args)
 280
 281           PrimOpId op  -> primOpSize op (valArgCount args)
 282                           -- foldr addSize (primOpSize op) (map arg_discount args)
 283                           -- At one time I tried giving an arg-discount if a primop
 284                           -- is applied to one of the function's arguments, but it's
 285                           -- not good.  At the moment, any unlifted-type arg gets a
 286                           -- 'True' for 'yes I'm evald', so we collect the discount even
 287                           -- if we know nothing about it.  And just having it in a primop
 288                           -- doesn't help at all if we don't know something more.
 289
 290           other        -> fun_discount fun `addSizeN`
 291                           (1 + length (filter (not . exprIsTrivial) args))
 292                                 -- The 1+ is for the function itself
 293                                 -- Add 1 for each non-trivial arg;
 294                                 -- the allocation cost, as in let(rec)
 295                                 -- Slight hack here: for constructors the args are almost always
 296                                 --      trivial; and for primops they are almost always prim typed
 297                                 --      We should really only count for non-prim-typed args in the
 298                                 --      general case, but that seems too much like hard work
 299
 300     size_up_fun other args = size_up other
 301
 302     ------------
 303     size_up_alt (con, bndrs, rhs) = size_up rhs
 304             -- Don't charge for args, so that wrappers look cheap
 305
 306     ------------
 307         -- We want to record if we're case'ing, or applying, an argument
 308     fun_discount v | v `elem` top_args = SizeIs 0# (unitBag (v, opt_UF_FunAppDiscount)) 0#
 309     fun_discount other                    = sizeZero
 310
 311     ------------
 312         -- These addSize things have to be here because
 313         -- I don't want to give them bOMB_OUT_SIZE as an argument
 314
 315     addSizeN TooBig          _      = TooBig
 316     addSizeN (SizeIs n xs d) (I# m)
 317       | n_tot ># bOMB_OUT_SIZE      = TooBig
 318       | otherwise                   = SizeIs n_tot xs d
 319       where
 320         n_tot = n +# m
 321
 322     addSize TooBig _ = TooBig
 323     addSize _ TooBig = TooBig
 324     addSize (SizeIs n1 xs d1) (SizeIs n2 ys d2)
 325       | n_tot ># bOMB_OUT_SIZE = TooBig
 326       | otherwise              = SizeIs n_tot xys d_tot
 327       where
 328         n_tot = n1 +# n2
 329         d_tot = d1 +# d2
 330         xys   = xs `unionBags` ys
 331 \end{code}
 332
 333 Code for manipulating sizes
 334
 335 \begin{code}
 336
 337 data ExprSize = TooBig
 338               | SizeIs Int#             -- Size found
 339                        (Bag (Id,Int))   -- Arguments cased herein, and discount for each such
 340                        Int#             -- Size to subtract if result is scrutinised
 341                                         -- by a case expression
 342
 343 isTooBig TooBig = True
 344 isTooBig _      = False
 345
 346 maxSize TooBig         _                                  = TooBig
 347 maxSize _              TooBig                             = TooBig
 348 maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 ># n2  = s1
 349                                               | otherwise = s2
 350
 351 sizeZero        = SizeIs 0# emptyBag 0#
 352 sizeOne         = SizeIs 1# emptyBag 0#
 353 sizeTwo         = SizeIs 2# emptyBag 0#
 354 sizeN (I# n)    = SizeIs n  emptyBag 0#
 355 conSizeN (I# n) = SizeIs 1# emptyBag (n +# 1#)
 356         -- Treat constructors as size 1; we are keen to expose them
 357         -- (and we charge separately for their args).  We can't treat
 358         -- them as size zero, else we find that (I# x) has size 1,
 359         -- which is the same as a lone variable; and hence 'v' will
 360         -- always be replaced by (I# x), where v is bound to I# x.
 361
 362 primOpSize op n_args
 363  | not (primOpIsDupable op) = sizeN opt_UF_DearOp
 364  | not (primOpOutOfLine op) = sizeZero                  -- These are good to inline
 365  | otherwise                = sizeOne
 366
 367 buildSize = SizeIs (-2#) emptyBag 4#
 368         -- We really want to inline applications of build
 369         -- build t (\cn -> e) should cost only the cost of e (because build will be inlined later)
 370         -- Indeed, we should add a result_discount becuause build is
 371         -- very like a constructor.  We don't bother to check that the
 372         -- build is saturated (it usually is).  The "-2" discounts for the \c n,
 373         -- The "4" is rather arbitrary.
 374
 375 augmentSize = SizeIs (-2#) emptyBag 4#
 376         -- Ditto (augment t (\cn -> e) ys) should cost only the cost of
 377         -- e plus ys. The -2 accounts for the \cn
 378
 379 nukeScrutDiscount (SizeIs n vs d) = SizeIs n vs 0#
 380 nukeScrutDiscount TooBig          = TooBig
 381
 382 -- When we return a lambda, give a discount if it's used (applied)
 383 lamScrutDiscount  (SizeIs n vs d) = case opt_UF_FunAppDiscount of { I# d -> SizeIs n vs d }
 384 lamScrutDiscount TooBig           = TooBig
 385 \end{code}
 386
 387
 388 %************************************************************************
 389 %*                                                                      *
 390 \subsection[considerUnfolding]{Given all the info, do (not) do the unfolding}
 391 %*                                                                      *
 392 %************************************************************************
 393
 394 We have very limited information about an unfolding expression: (1)~so
 395 many type arguments and so many value arguments expected---for our
 396 purposes here, we assume we've got those.  (2)~A ``size'' or ``cost,''
 397 a single integer.  (3)~An ``argument info'' vector.  For this, what we
 398 have at the moment is a Boolean per argument position that says, ``I
 399 will look with great favour on an explicit constructor in this
 400 position.'' (4)~The ``discount'' to subtract if the expression
 401 is being scrutinised.
 402
 403 Assuming we have enough type- and value arguments (if not, we give up
 404 immediately), then we see if the ``discounted size'' is below some
 405 (semi-arbitrary) threshold.  It works like this: for every argument
 406 position where we're looking for a constructor AND WE HAVE ONE in our
 407 hands, we get a (again, semi-arbitrary) discount [proportion to the
 408 number of constructors in the type being scrutinized].
 409
 410 If we're in the context of a scrutinee ( \tr{(case <expr > of A .. -> ...;.. )})
 411 and the expression in question will evaluate to a constructor, we use
 412 the computed discount size *for the result only* rather than
 413 computing the argument discounts. Since we know the result of
 414 the expression is going to be taken apart, discounting its size
 415 is more accurate (see @sizeExpr@ above for how this discount size
 416 is computed).
 417
 418 We use this one to avoid exporting inlinings that we ``couldn't possibly
 419 use'' on the other side.  Can be overridden w/ flaggery.
 420 Just the same as smallEnoughToInline, except that it has no actual arguments.
 421
 422 \begin{code}
 423 couldBeSmallEnoughToInline :: Int -> CoreExpr -> Bool
 424 couldBeSmallEnoughToInline threshold rhs = case calcUnfoldingGuidance threshold rhs of
 425                                                 UnfoldNever -> False
 426                                                 other       -> True
 427
 428 certainlyWillInline :: Id -> Bool
 429         -- Sees if the Id is pretty certain to inline
 430 certainlyWillInline v
 431   = case idUnfolding v of
 432
 433         CoreUnfolding _ _ _ is_value _ g@(UnfoldIfGoodArgs n_vals _ size _)
 434            ->    is_value
 435               && size - (n_vals +1) <= opt_UF_UseThreshold
 436
 437         other -> False
 438 \end{code}
 439
 440 @okToUnfoldInHifile@ is used when emitting unfolding info into an interface
 441 file to determine whether an unfolding candidate really should be unfolded.
 442 The predicate is needed to prevent @_casm_@s (+ lit-lits) from being emitted
 443 into interface files.
 444
 445 The reason for inlining expressions containing _casm_s into interface files
 446 is that these fragments of C are likely to mention functions/#defines that
 447 will be out-of-scope when inlined into another module. This is not an
 448 unfixable problem for the user (just need to -#include the approp. header
 449 file), but turning it off seems to the simplest thing to do.
 450
 451 \begin{code}
 452 okToUnfoldInHiFile :: CoreExpr -> Bool
 453 okToUnfoldInHiFile e = opt_UnfoldCasms || go e
 454  where
 455     -- Race over an expression looking for CCalls..
 456     go (Var v)                = case isPrimOpId_maybe v of
 457                                   Just op -> okToUnfoldPrimOp op
 458                                   Nothing -> True
 459     go (Lit lit)              = not (isLitLitLit lit)
 460     go (App fun arg)          = go fun && go arg
 461     go (Lam _ body)           = go body
 462     go (Let binds body)       = and (map go (body :rhssOfBind binds))
 463     go (Case scrut bndr alts) = and (map go (scrut:rhssOfAlts alts)) &&
 464                                 not (any isLitLitLit [ lit | (LitAlt lit, _, _) <- alts ])
 465     go (Note _ body)          = go body
 466     go (Type _)               = True
 467
 468     -- ok to unfold a PrimOp as long as it's not a _casm_
 469     okToUnfoldPrimOp (CCallOp ccall) = not (ccallIsCasm ccall)
 470     okToUnfoldPrimOp _               = True
 471 \end{code}
 472
 473
 474 %************************************************************************
 475 %*                                                                      *
 476 \subsection{callSiteInline}
 477 %*                                                                      *
 478 %************************************************************************
 479
 480 This is the key function.  It decides whether to inline a variable at a call site
 481
 482 callSiteInline is used at call sites, so it is a bit more generous.
 483 It's a very important function that embodies lots of heuristics.
 484 A non-WHNF can be inlined if it doesn't occur inside a lambda,
 485 and occurs exactly once or
 486     occurs once in each branch of a case and is small
 487
 488 If the thing is in WHNF, there's no danger of duplicating work,
 489 so we can inline if it occurs once, or is small
 490
 491 NOTE: we don't want to inline top-level functions that always diverge.
 492 It just makes the code bigger.  Tt turns out that the convenient way to prevent
 493 them inlining is to give them a NOINLINE pragma, which we do in
 494 StrictAnal.addStrictnessInfoToTopId
 495
 496 \begin{code}
 497 callSiteInline :: Bool                  -- True <=> the Id is black listed
 498                -> Bool                  -- 'inline' note at call site
 499                -> OccInfo
 500                -> Id                    -- The Id
 501                -> [Bool]                -- One for each value arg; True if it is interesting
 502                -> Bool                  -- True <=> continuation is interesting
 503                -> Maybe CoreExpr        -- Unfolding, if any
 504
 505
 506 callSiteInline black_listed inline_call occ id arg_infos interesting_cont
 507   = case idUnfolding id of {
 508         NoUnfolding -> Nothing ;
 509         OtherCon cs -> Nothing ;
 510         CompulsoryUnfolding unf_template | black_listed -> Nothing
 511                                          | otherwise    -> Just unf_template ;
 512                 -- Constructors have compulsory unfoldings, but
 513                 -- may have rules, in which case they are
 514                 -- black listed till later
 515         CoreUnfolding unf_template is_top is_cheap is_value is_bot guidance ->
 516
 517     let
 518         result | yes_or_no = Just unf_template
 519                | otherwise = Nothing
 520
 521         n_val_args  = length arg_infos
 522
 523         ok_inside_lam = is_value || is_bot || (is_cheap && not is_top)
 524                                 -- I'm experimenting with is_cheap && not is_top
 525
 526         yes_or_no
 527           | black_listed = False
 528           | otherwise    = case occ of
 529                                 IAmDead              -> pprTrace "callSiteInline: dead" (ppr id) False
 530                                 IAmALoopBreaker      -> False
 531                                 OneOcc in_lam one_br -> (not in_lam || ok_inside_lam) && consider_safe in_lam True  one_br
 532                                 NoOccInfo            -> ok_inside_lam                 && consider_safe True   False False
 533
 534         consider_safe in_lam once once_in_one_branch
 535                 -- consider_safe decides whether it's a good idea to inline something,
 536                 -- given that there's no work-duplication issue (the caller checks that).
 537                 -- once_in_one_branch = True means there's a unique textual occurrence
 538           | inline_call  = True
 539
 540           | once_in_one_branch
 541                 -- Be very keen to inline something if this is its unique occurrence:
 542                 --
 543                 --   a) Inlining gives a good chance of eliminating the original
 544                 --      binding (and hence the allocation) for the thing.
 545                 --      (Provided it's not a top level binding, in which case the
 546                 --       allocation costs nothing.)
 547                 --
 548                 --   b) Inlining a function that is called only once exposes the
 549                 --      body function to the call site.
 550                 --
 551                 -- The only time we hold back is when substituting inside a lambda;
 552                 -- then if the context is totally uninteresting (not applied, not scrutinised)
 553                 -- there is no point in substituting because it might just increase allocation,
 554                 -- by allocating the function itself many times
 555                 --
 556                 -- Note: there used to be a '&& not top_level' in the guard above,
 557                 --       but that stopped us inlining top-level functions used only once,
 558                 --       which is stupid
 559           = not in_lam || not (null arg_infos) || interesting_cont
 560
 561           | otherwise
 562           = case guidance of
 563               UnfoldNever  -> False ;
 564               UnfoldIfGoodArgs n_vals_wanted arg_discounts size res_discount
 565
 566                   | enough_args && size <= (n_vals_wanted + 1)
 567                         -- No size increase
 568                         -- Size of call is n_vals_wanted (+1 for the function)
 569                   -> True
 570
 571                   | otherwise
 572                   -> some_benefit && small_enough
 573
 574                   where
 575                     some_benefit = or arg_infos || really_interesting_cont ||
 576                                    (not is_top && (once || (n_vals_wanted > 0 && enough_args)))
 577                         -- If it occurs more than once, there must be something interesting
 578                         -- about some argument, or the result context, to make it worth inlining
 579                         --
 580                         -- If a function has a nested defn we also record some-benefit,
 581                         -- on the grounds that we are often able to eliminate the binding,
 582                         -- and hence the allocation, for the function altogether; this is good
 583                         -- for join points.  But this only makes sense for *functions*;
 584                         -- inlining a constructor doesn't help allocation unless the result is
 585                         -- scrutinised.  UNLESS the constructor occurs just once, albeit possibly
 586                         -- in multiple case branches.  Then inlining it doesn't increase allocation,
 587                         -- but it does increase the chance that the constructor won't be allocated at all
 588                         -- in the branches that don't use it.
 589
 590                     enough_args           = n_val_args >= n_vals_wanted
 591                     really_interesting_cont | n_val_args <  n_vals_wanted = False       -- Too few args
 592                                             | n_val_args == n_vals_wanted = interesting_cont
 593                                             | otherwise                   = True        -- Extra args
 594                         -- really_interesting_cont tells if the result of the
 595                         -- call is in an interesting context.
 596
 597                     small_enough = (size - discount) <= opt_UF_UseThreshold
 598                     discount     = computeDiscount n_vals_wanted arg_discounts res_discount
 599                                                  arg_infos really_interesting_cont
 600
 601     in
 602 #ifdef DEBUG
 603     if opt_D_dump_inlinings then
 604         pprTrace "Considering inlining"
 605                  (ppr id <+> vcat [text "black listed" <+> ppr black_listed,
 606                                    text "occ info:" <+> ppr occ,
 607                                    text "arg infos" <+> ppr arg_infos,
 608                                    text "interesting continuation" <+> ppr interesting_cont,
 609                                    text "is value:" <+> ppr is_value,
 610                                    text "is cheap:" <+> ppr is_cheap,
 611                                    text "is bottom:" <+> ppr is_bot,
 612                                    text "is top-level:"    <+> ppr is_top,
 613                                    text "guidance" <+> ppr guidance,
 614                                    text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO",
 615                                    if yes_or_no then
 616                                         text "Unfolding =" <+> pprCoreExpr unf_template
 617                                    else empty])
 618                   result
 619     else
 620 #endif
 621     result
 622     }
 623
 624 computeDiscount :: Int -> [Int] -> Int -> [Bool] -> Bool -> Int
 625 computeDiscount n_vals_wanted arg_discounts res_discount arg_infos result_used
 626         -- We multiple the raw discounts (args_discount and result_discount)
 627         -- ty opt_UnfoldingKeenessFactor because the former have to do with
 628         -- *size* whereas the discounts imply that there's some extra
 629         -- *efficiency* to be gained (e.g. beta reductions, case reductions)
 630         -- by inlining.
 631
 632         -- we also discount 1 for each argument passed, because these will
 633         -- reduce with the lambdas in the function (we count 1 for a lambda
 634         -- in size_up).
 635   = 1 +                 -- Discount of 1 because the result replaces the call
 636                         -- so we count 1 for the function itself
 637     length (take n_vals_wanted arg_infos) +
 638                         -- Discount of 1 for each arg supplied, because the
 639                         -- result replaces the call
 640     round (opt_UF_KeenessFactor *
 641            fromInt (arg_discount + result_discount))
 642   where
 643     arg_discount = sum (zipWith mk_arg_discount arg_discounts arg_infos)
 644
 645     mk_arg_discount discount is_evald | is_evald  = discount
 646                                       | otherwise = 0
 647
 648         -- Don't give a result discount unless there are enough args
 649     result_discount | result_used = res_discount        -- Over-applied, or case scrut
 650                     | otherwise   = 0
 651 \end{code}
 652
 653
 654 %************************************************************************
 655 %*                                                                      *
 656 \subsection{Black-listing}
 657 %*                                                                      *
 658 %************************************************************************
 659
 660 Inlining is controlled by the "Inline phase" number, which is set
 661 by the per-simplification-pass '-finline-phase' flag.
 662
 663 For optimisation we use phase 1,2 and nothing (i.e. no -finline-phase flag)
 664 in that order.  The meanings of these are determined by the @blackListed@ function
 665 here.
 666
 667 The final simplification doesn't have a phase number.
 668
 669 Pragmas
 670 ~~~~~~~
 671         Pragma          Black list if
 672
 673 (least black listing, most inlining)
 674         INLINE n foo    phase is Just p *and* p<n *and* foo appears on LHS of rule
 675         INLINE foo      phase is Just p *and*           foo appears on LHS of rule
 676         NOINLINE n foo  phase is Just p *and* (p<n *or* foo appears on LHS of rule)
 677         NOINLINE foo    always
 678 (most black listing, least inlining)
 679
 680 \begin{code}
 681 blackListed :: IdSet            -- Used in transformation rules
 682             -> Maybe Int        -- Inline phase
 683             -> Id -> Bool       -- True <=> blacklisted
 684
 685 -- The blackListed function sees whether a variable should *not* be
 686 -- inlined because of the inline phase we are in.  This is the sole
 687 -- place that the inline phase number is looked at.
 688
 689 blackListed rule_vars Nothing           -- Last phase
 690   = \v -> isNeverInlinePrag (idInlinePragma v)
 691
 692 blackListed rule_vars (Just phase)
 693   = \v -> normal_case rule_vars phase v
 694
 695 normal_case rule_vars phase v
 696   = case idInlinePragma v of
 697         NoInlinePragInfo -> has_rules
 698
 699         IMustNotBeINLINEd from_INLINE Nothing
 700           | from_INLINE -> has_rules    -- Black list until final phase
 701           | otherwise   -> True         -- Always blacklisted
 702
 703         IMustNotBeINLINEd from_inline (Just threshold)
 704           | from_inline -> (phase < threshold && has_rules)
 705           | otherwise   -> (phase < threshold || has_rules)
 706   where
 707     has_rules =  v `elemVarSet` rule_vars
 708               || not (isEmptyCoreRules (idSpecialisation v))
 709 \end{code}
 710
 711
 712 SLPJ 95/04: Why @runST@ must be inlined very late:
 713 \begin{verbatim}
 714 f x =
 715   runST ( \ s -> let
 716                     (a, s')  = newArray# 100 [] s
 717                     (_, s'') = fill_in_array_or_something a x s'
 718                   in
 719                   freezeArray# a s'' )
 720 \end{verbatim}
 721 If we inline @runST@, we'll get:
 722 \begin{verbatim}
 723 f x = let
 724         (a, s')  = newArray# 100 [] realWorld#{-NB-}
 725         (_, s'') = fill_in_array_or_something a x s'
 726       in
 727       freezeArray# a s''
 728 \end{verbatim}
 729 And now the @newArray#@ binding can be floated to become a CAF, which
 730 is totally and utterly wrong:
 731 \begin{verbatim}
 732 f = let
 733     (a, s')  = newArray# 100 [] realWorld#{-NB-} -- YIKES!!!
 734     in
 735     \ x ->
 736         let (_, s'') = fill_in_array_or_something a x s' in
 737         freezeArray# a s''
 738 \end{verbatim}
 739 All calls to @f@ will share a {\em single} array!
 740
 741 Yet we do want to inline runST sometime, so we can avoid
 742 needless code.  Solution: black list it until the last moment.
 743