ghc/compiler/coreSyn/CoreUnfold.lhs

   1 %
   2 % (c) The AQUA Project, Glasgow University, 1994-1998
   3 %
   4 \section[CoreUnfold]{Core-syntax unfoldings}
   5
   6 Unfoldings (which can travel across module boundaries) are in Core
   7 syntax (namely @CoreExpr@s).
   8
   9 The type @Unfolding@ sits ``above'' simply-Core-expressions
  10 unfoldings, capturing ``higher-level'' things we know about a binding,
  11 usually things that the simplifier found out (e.g., ``it's a
  12 literal'').  In the corner of a @CoreUnfolding@ unfolding, you will
  13 find, unsurprisingly, a Core expression.
  14
  15 \begin{code}
  16 module CoreUnfold (
  17         Unfolding, UnfoldingGuidance,   -- Abstract types
  18
  19         noUnfolding, mkTopUnfolding, mkUnfolding, mkCompulsoryUnfolding, seqUnfolding,
  20         mkOtherCon, otherCons,
  21         unfoldingTemplate, maybeUnfoldingTemplate,
  22         isEvaldUnfolding, isValueUnfolding, isCheapUnfolding, isCompulsoryUnfolding,
  23         hasUnfolding, hasSomeUnfolding,
  24
  25         couldBeSmallEnoughToInline,
  26         certainlyWillInline,
  27         okToUnfoldInHiFile,
  28
  29         callSiteInline, blackListed
  30     ) where
  31
  32 #include "HsVersions.h"
  33
  34 import CmdLineOpts      ( opt_UF_CreationThreshold,
  35                           opt_UF_UseThreshold,
  36                           opt_UF_ScrutConDiscount,
  37                           opt_UF_FunAppDiscount,
  38                           opt_UF_PrimArgDiscount,
  39                           opt_UF_KeenessFactor,
  40                           opt_UF_CheapOp, opt_UF_DearOp,
  41                           opt_UnfoldCasms, opt_PprStyle_Debug,
  42                           opt_D_dump_inlinings
  43                         )
  44 import CoreSyn
  45 import PprCore          ( pprCoreExpr )
  46 import OccurAnal        ( occurAnalyseGlobalExpr )
  47 import CoreUtils        ( exprIsValue, exprIsCheap, exprIsBottom, exprIsTrivial )
  48 import Id               ( Id, idType, idFlavour, isId, idWorkerInfo,
  49                           idSpecialisation, idInlinePragma, idUnfolding,
  50                           isPrimOpId_maybe
  51                         )
  52 import VarSet
  53 import Literal          ( isLitLitLit )
  54 import PrimOp           ( PrimOp(..), primOpIsDupable, primOpOutOfLine, ccallIsCasm )
  55 import IdInfo           ( ArityInfo(..), InlinePragInfo(..), OccInfo(..), IdFlavour(..), CprInfo(..),
  56                           insideLam, workerExists, isNeverInlinePrag
  57                         )
  58 import Type             ( splitFunTy_maybe, isUnLiftedType )
  59 import Unique           ( Unique, buildIdKey, augmentIdKey, hasKey )
  60 import Bag
  61 import Outputable
  62
  63 #if __GLASGOW_HASKELL__ >= 404
  64 import GlaExts          ( fromInt )
  65 #endif
  66 \end{code}
  67
  68
  69 %************************************************************************
  70 %*                                                                      *
  71 \subsection{Making unfoldings}
  72 %*                                                                      *
  73 %************************************************************************
  74
  75 \begin{code}
  76 mkTopUnfolding expr = mkUnfolding True {- Top level -} expr
  77
  78 mkUnfolding top_lvl expr
  79   = CoreUnfolding (occurAnalyseGlobalExpr expr)
  80                   top_lvl
  81                   (exprIsCheap expr)
  82                   (exprIsValue expr)
  83                   (exprIsBottom expr)
  84                   (calcUnfoldingGuidance opt_UF_CreationThreshold expr)
  85         -- Sometimes during simplification, there's a large let-bound thing
  86         -- which has been substituted, and so is now dead; so 'expr' contains
  87         -- two copies of the thing while the occurrence-analysed expression doesn't
  88         -- Nevertheless, we don't occ-analyse before computing the size because the
  89         -- size computation bales out after a while, whereas occurrence analysis does not.
  90         --
  91         -- This can occasionally mean that the guidance is very pessimistic;
  92         -- it gets fixed up next round
  93
  94 mkCompulsoryUnfolding expr      -- Used for things that absolutely must be unfolded
  95   = CompulsoryUnfolding (occurAnalyseGlobalExpr expr)
  96 \end{code}
  97
  98
  99 %************************************************************************
 100 %*                                                                      *
 101 \subsection{The UnfoldingGuidance type}
 102 %*                                                                      *
 103 %************************************************************************
 104
 105 \begin{code}
 106 instance Outputable UnfoldingGuidance where
 107     ppr UnfoldNever     = ptext SLIT("NEVER")
 108     ppr (UnfoldIfGoodArgs v cs size discount)
 109       = hsep [ ptext SLIT("IF_ARGS"), int v,
 110                brackets (hsep (map int cs)),
 111                int size,
 112                int discount ]
 113 \end{code}
 114
 115
 116 \begin{code}
 117 calcUnfoldingGuidance
 118         :: Int                  -- bomb out if size gets bigger than this
 119         -> CoreExpr             -- expression to look at
 120         -> UnfoldingGuidance
 121 calcUnfoldingGuidance bOMB_OUT_SIZE expr
 122   = case collect_val_bndrs expr of { (inline, val_binders, body) ->
 123     let
 124         n_val_binders = length val_binders
 125
 126         max_inline_size = n_val_binders+2
 127         -- The idea is that if there is an INLINE pragma (inline is True)
 128         -- and there's a big body, we give a size of n_val_binders+2.  This
 129         -- This is just enough to fail the no-size-increase test in callSiteInline,
 130         --   so that INLINE things don't get inlined into entirely boring contexts,
 131         --   but no more.
 132
 133     in
 134     case (sizeExpr bOMB_OUT_SIZE val_binders body) of
 135
 136       TooBig
 137         | not inline -> UnfoldNever
 138                 -- A big function with an INLINE pragma must
 139                 -- have an UnfoldIfGoodArgs guidance
 140         | inline     -> UnfoldIfGoodArgs n_val_binders
 141                                          (map (const 0) val_binders)
 142                                          max_inline_size 0
 143
 144       SizeIs size cased_args scrut_discount
 145         -> UnfoldIfGoodArgs
 146                         n_val_binders
 147                         (map discount_for val_binders)
 148                         final_size
 149                         (I# scrut_discount)
 150         where
 151             boxed_size    = I# size
 152
 153             final_size | inline     = boxed_size `min` max_inline_size
 154                        | otherwise  = boxed_size
 155
 156                 -- Sometimes an INLINE thing is smaller than n_val_binders+2.
 157                 -- A particular case in point is a constructor, which has size 1.
 158                 -- We want to inline this regardless, hence the `min`
 159
 160             discount_for b = foldlBag (\acc (b',n) -> if b==b' then acc+n else acc)
 161                                       0 cased_args
 162         }
 163   where
 164     collect_val_bndrs e = go False [] e
 165         -- We need to be a bit careful about how we collect the
 166         -- value binders.  In ptic, if we see
 167         --      __inline_me (\x y -> e)
 168         -- We want to say "2 value binders".  Why?  So that
 169         -- we take account of information given for the arguments
 170
 171     go inline rev_vbs (Note InlineMe e)     = go True   rev_vbs     e
 172     go inline rev_vbs (Lam b e) | isId b    = go inline (b:rev_vbs) e
 173                                 | otherwise = go inline rev_vbs     e
 174     go inline rev_vbs e                     = (inline, reverse rev_vbs, e)
 175 \end{code}
 176
 177 \begin{code}
 178 sizeExpr :: Int             -- Bomb out if it gets bigger than this
 179          -> [Id]            -- Arguments; we're interested in which of these
 180                             -- get case'd
 181          -> CoreExpr
 182          -> ExprSize
 183
 184 sizeExpr (I# bOMB_OUT_SIZE) top_args expr
 185   = size_up expr
 186   where
 187     size_up (Type t)          = sizeZero        -- Types cost nothing
 188     size_up (Var v)           = sizeOne
 189
 190     size_up (Note _ body)     = size_up body    -- Notes cost nothing
 191
 192     size_up (App fun (Type t))  = size_up fun
 193     size_up (App fun arg)     = size_up_app fun [arg]
 194
 195     size_up (Lit lit) = sizeOne
 196
 197     size_up (Lam b e) | isId b    = lamScrutDiscount (size_up e `addSizeN` 1)
 198                       | otherwise = size_up e
 199
 200     size_up (Let (NonRec binder rhs) body)
 201       = nukeScrutDiscount (size_up rhs)         `addSize`
 202         size_up body                            `addSizeN`
 203         (if isUnLiftedType (idType binder) then 0 else 1)
 204                 -- For the allocation
 205                 -- If the binder has an unlifted type there is no allocation
 206
 207     size_up (Let (Rec pairs) body)
 208       = nukeScrutDiscount rhs_size              `addSize`
 209         size_up body                            `addSizeN`
 210         length pairs            -- For the allocation
 211       where
 212         rhs_size = foldr (addSize . size_up . snd) sizeZero pairs
 213
 214         -- We want to make wrapper-style evaluation look cheap, so that
 215         -- when we inline a wrapper it doesn't make call site (much) bigger
 216         -- Otherwise we get nasty phase ordering stuff:
 217         --      f x = g x x
 218         --      h y = ...(f e)...
 219         -- If we inline g's wrapper, f looks big, and doesn't get inlined
 220         -- into h; if we inline f first, while it looks small, then g's
 221         -- wrapper will get inlined later anyway.  To avoid this nasty
 222         -- ordering difference, we make (case a of (x,y) -> ...) look free.
 223     size_up (Case (Var v) _ [alt])
 224         | v `elem` top_args
 225         = size_up_alt alt `addSize` SizeIs 0# (unitBag (v, 1)) 0#
 226                 -- Good to inline if an arg is scrutinised, because
 227                 -- that may eliminate allocation in the caller
 228                 -- And it eliminates the case itself
 229         | otherwise
 230         = size_up_alt alt
 231
 232         -- Scrutinising one of the argument variables,
 233         -- with more than one alternative
 234     size_up (Case (Var v) _ alts)
 235         | v `elem` top_args
 236         = alts_size (foldr addSize sizeOne alt_sizes)   -- The 1 is for the scrutinee
 237                     (foldr1 maxSize alt_sizes)
 238         where
 239           v_in_args = v `elem` top_args
 240           alt_sizes = map size_up_alt alts
 241
 242           alts_size (SizeIs tot tot_disc tot_scrut)             -- Size of all alternatives
 243                     (SizeIs max max_disc max_scrut)             -- Size of biggest alternative
 244                 = SizeIs tot (unitBag (v, I# (1# +# tot -# max)) `unionBags` max_disc) max_scrut
 245                         -- If the variable is known, we produce a discount that
 246                         -- will take us back to 'max', the size of rh largest alternative
 247                         -- The 1+ is a little discount for reduced allocation in the caller
 248
 249           alts_size tot_size _ = tot_size
 250
 251
 252     size_up (Case e _ alts) = nukeScrutDiscount (size_up e) `addSize`
 253                               foldr (addSize . size_up_alt) sizeZero alts
 254                 -- We don't charge for the case itself
 255                 -- It's a strict thing, and the price of the call
 256                 -- is paid by scrut.  Also consider
 257                 --      case f x of DEFAULT -> e
 258                 -- This is just ';'!  Don't charge for it.
 259
 260     ------------
 261     size_up_app (App fun arg) args
 262         | isTypeArg arg              = size_up_app fun args
 263         | otherwise                  = size_up_app fun (arg:args)
 264     size_up_app fun           args   = foldr (addSize . nukeScrutDiscount . size_up)
 265                                              (size_up_fun fun args)
 266                                              args
 267
 268         -- A function application with at least one value argument
 269         -- so if the function is an argument give it an arg-discount
 270         --
 271         -- Also behave specially if the function is a build
 272         --
 273         -- Also if the function is a constant Id (constr or primop)
 274         -- compute discounts specially
 275     size_up_fun (Var fun) args
 276       | fun `hasKey` buildIdKey   = buildSize
 277       | fun `hasKey` augmentIdKey = augmentSize
 278       | otherwise
 279       = case idFlavour fun of
 280           DataConId dc -> conSizeN (valArgCount args)
 281
 282           PrimOpId op  -> primOpSize op (valArgCount args)
 283                           -- foldr addSize (primOpSize op) (map arg_discount args)
 284                           -- At one time I tried giving an arg-discount if a primop
 285                           -- is applied to one of the function's arguments, but it's
 286                           -- not good.  At the moment, any unlifted-type arg gets a
 287                           -- 'True' for 'yes I'm evald', so we collect the discount even
 288                           -- if we know nothing about it.  And just having it in a primop
 289                           -- doesn't help at all if we don't know something more.
 290
 291           other        -> fun_discount fun `addSizeN`
 292                           (1 + length (filter (not . exprIsTrivial) args))
 293                                 -- The 1+ is for the function itself
 294                                 -- Add 1 for each non-trivial arg;
 295                                 -- the allocation cost, as in let(rec)
 296                                 -- Slight hack here: for constructors the args are almost always
 297                                 --      trivial; and for primops they are almost always prim typed
 298                                 --      We should really only count for non-prim-typed args in the
 299                                 --      general case, but that seems too much like hard work
 300
 301     size_up_fun other args = size_up other
 302
 303     ------------
 304     size_up_alt (con, bndrs, rhs) = size_up rhs
 305             -- Don't charge for args, so that wrappers look cheap
 306
 307     ------------
 308         -- We want to record if we're case'ing, or applying, an argument
 309     fun_discount v | v `elem` top_args = SizeIs 0# (unitBag (v, opt_UF_FunAppDiscount)) 0#
 310     fun_discount other                    = sizeZero
 311
 312     ------------
 313         -- These addSize things have to be here because
 314         -- I don't want to give them bOMB_OUT_SIZE as an argument
 315
 316     addSizeN TooBig          _      = TooBig
 317     addSizeN (SizeIs n xs d) (I# m)
 318       | n_tot ># bOMB_OUT_SIZE      = TooBig
 319       | otherwise                   = SizeIs n_tot xs d
 320       where
 321         n_tot = n +# m
 322
 323     addSize TooBig _ = TooBig
 324     addSize _ TooBig = TooBig
 325     addSize (SizeIs n1 xs d1) (SizeIs n2 ys d2)
 326       | n_tot ># bOMB_OUT_SIZE = TooBig
 327       | otherwise              = SizeIs n_tot xys d_tot
 328       where
 329         n_tot = n1 +# n2
 330         d_tot = d1 +# d2
 331         xys   = xs `unionBags` ys
 332 \end{code}
 333
 334 Code for manipulating sizes
 335
 336 \begin{code}
 337
 338 data ExprSize = TooBig
 339               | SizeIs Int#             -- Size found
 340                        (Bag (Id,Int))   -- Arguments cased herein, and discount for each such
 341                        Int#             -- Size to subtract if result is scrutinised
 342                                         -- by a case expression
 343
 344 isTooBig TooBig = True
 345 isTooBig _      = False
 346
 347 maxSize TooBig         _                                  = TooBig
 348 maxSize _              TooBig                             = TooBig
 349 maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 ># n2  = s1
 350                                               | otherwise = s2
 351
 352 sizeZero        = SizeIs 0# emptyBag 0#
 353 sizeOne         = SizeIs 1# emptyBag 0#
 354 sizeTwo         = SizeIs 2# emptyBag 0#
 355 sizeN (I# n)    = SizeIs n  emptyBag 0#
 356 conSizeN (I# n) = SizeIs 1# emptyBag (n +# 1#)
 357         -- Treat constructors as size 1; we are keen to expose them
 358         -- (and we charge separately for their args).  We can't treat
 359         -- them as size zero, else we find that (I# x) has size 1,
 360         -- which is the same as a lone variable; and hence 'v' will
 361         -- always be replaced by (I# x), where v is bound to I# x.
 362
 363 primOpSize op n_args
 364  | not (primOpIsDupable op) = sizeN opt_UF_DearOp
 365  | not (primOpOutOfLine op) = sizeZero                  -- These are good to inline
 366  | otherwise                = sizeOne
 367
 368 buildSize = SizeIs (-2#) emptyBag 4#
 369         -- We really want to inline applications of build
 370         -- build t (\cn -> e) should cost only the cost of e (because build will be inlined later)
 371         -- Indeed, we should add a result_discount becuause build is
 372         -- very like a constructor.  We don't bother to check that the
 373         -- build is saturated (it usually is).  The "-2" discounts for the \c n,
 374         -- The "4" is rather arbitrary.
 375
 376 augmentSize = SizeIs (-2#) emptyBag 4#
 377         -- Ditto (augment t (\cn -> e) ys) should cost only the cost of
 378         -- e plus ys. The -2 accounts for the \cn
 379
 380 nukeScrutDiscount (SizeIs n vs d) = SizeIs n vs 0#
 381 nukeScrutDiscount TooBig          = TooBig
 382
 383 -- When we return a lambda, give a discount if it's used (applied)
 384 lamScrutDiscount  (SizeIs n vs d) = case opt_UF_FunAppDiscount of { I# d -> SizeIs n vs d }
 385 lamScrutDiscount TooBig           = TooBig
 386 \end{code}
 387
 388
 389 %************************************************************************
 390 %*                                                                      *
 391 \subsection[considerUnfolding]{Given all the info, do (not) do the unfolding}
 392 %*                                                                      *
 393 %************************************************************************
 394
 395 We have very limited information about an unfolding expression: (1)~so
 396 many type arguments and so many value arguments expected---for our
 397 purposes here, we assume we've got those.  (2)~A ``size'' or ``cost,''
 398 a single integer.  (3)~An ``argument info'' vector.  For this, what we
 399 have at the moment is a Boolean per argument position that says, ``I
 400 will look with great favour on an explicit constructor in this
 401 position.'' (4)~The ``discount'' to subtract if the expression
 402 is being scrutinised.
 403
 404 Assuming we have enough type- and value arguments (if not, we give up
 405 immediately), then we see if the ``discounted size'' is below some
 406 (semi-arbitrary) threshold.  It works like this: for every argument
 407 position where we're looking for a constructor AND WE HAVE ONE in our
 408 hands, we get a (again, semi-arbitrary) discount [proportion to the
 409 number of constructors in the type being scrutinized].
 410
 411 If we're in the context of a scrutinee ( \tr{(case <expr > of A .. -> ...;.. )})
 412 and the expression in question will evaluate to a constructor, we use
 413 the computed discount size *for the result only* rather than
 414 computing the argument discounts. Since we know the result of
 415 the expression is going to be taken apart, discounting its size
 416 is more accurate (see @sizeExpr@ above for how this discount size
 417 is computed).
 418
 419 We use this one to avoid exporting inlinings that we ``couldn't possibly
 420 use'' on the other side.  Can be overridden w/ flaggery.
 421 Just the same as smallEnoughToInline, except that it has no actual arguments.
 422
 423 \begin{code}
 424 couldBeSmallEnoughToInline :: Int -> CoreExpr -> Bool
 425 couldBeSmallEnoughToInline threshold rhs = case calcUnfoldingGuidance threshold rhs of
 426                                                 UnfoldNever -> False
 427                                                 other       -> True
 428
 429 certainlyWillInline :: Id -> Bool
 430         -- Sees if the Id is pretty certain to inline
 431 certainlyWillInline v
 432   = case idUnfolding v of
 433
 434         CoreUnfolding _ _ _ is_value _ g@(UnfoldIfGoodArgs n_vals _ size _)
 435            ->    is_value
 436               && size - (n_vals +1) <= opt_UF_UseThreshold
 437
 438         other -> False
 439 \end{code}
 440
 441 @okToUnfoldInHifile@ is used when emitting unfolding info into an interface
 442 file to determine whether an unfolding candidate really should be unfolded.
 443 The predicate is needed to prevent @_casm_@s (+ lit-lits) from being emitted
 444 into interface files.
 445
 446 The reason for inlining expressions containing _casm_s into interface files
 447 is that these fragments of C are likely to mention functions/#defines that
 448 will be out-of-scope when inlined into another module. This is not an
 449 unfixable problem for the user (just need to -#include the approp. header
 450 file), but turning it off seems to the simplest thing to do.
 451
 452 \begin{code}
 453 okToUnfoldInHiFile :: CoreExpr -> Bool
 454 okToUnfoldInHiFile e = opt_UnfoldCasms || go e
 455  where
 456     -- Race over an expression looking for CCalls..
 457     go (Var v)                = case isPrimOpId_maybe v of
 458                                   Just op -> okToUnfoldPrimOp op
 459                                   Nothing -> True
 460     go (Lit lit)              = not (isLitLitLit lit)
 461     go (App fun arg)          = go fun && go arg
 462     go (Lam _ body)           = go body
 463     go (Let binds body)       = and (map go (body :rhssOfBind binds))
 464     go (Case scrut bndr alts) = and (map go (scrut:rhssOfAlts alts)) &&
 465                                 not (any isLitLitLit [ lit | (LitAlt lit, _, _) <- alts ])
 466     go (Note _ body)          = go body
 467     go (Type _)               = True
 468
 469     -- ok to unfold a PrimOp as long as it's not a _casm_
 470     okToUnfoldPrimOp (CCallOp ccall) = not (ccallIsCasm ccall)
 471     okToUnfoldPrimOp _               = True
 472 \end{code}
 473
 474
 475 %************************************************************************
 476 %*                                                                      *
 477 \subsection{callSiteInline}
 478 %*                                                                      *
 479 %************************************************************************
 480
 481 This is the key function.  It decides whether to inline a variable at a call site
 482
 483 callSiteInline is used at call sites, so it is a bit more generous.
 484 It's a very important function that embodies lots of heuristics.
 485 A non-WHNF can be inlined if it doesn't occur inside a lambda,
 486 and occurs exactly once or
 487     occurs once in each branch of a case and is small
 488
 489 If the thing is in WHNF, there's no danger of duplicating work,
 490 so we can inline if it occurs once, or is small
 491
 492 NOTE: we don't want to inline top-level functions that always diverge.
 493 It just makes the code bigger.  Tt turns out that the convenient way to prevent
 494 them inlining is to give them a NOINLINE pragma, which we do in
 495 StrictAnal.addStrictnessInfoToTopId
 496
 497 \begin{code}
 498 callSiteInline :: Bool                  -- True <=> the Id is black listed
 499                -> Bool                  -- 'inline' note at call site
 500                -> OccInfo
 501                -> Id                    -- The Id
 502                -> [Bool]                -- One for each value arg; True if it is interesting
 503                -> Bool                  -- True <=> continuation is interesting
 504                -> Maybe CoreExpr        -- Unfolding, if any
 505
 506
 507 callSiteInline black_listed inline_call occ id arg_infos interesting_cont
 508   = case idUnfolding id of {
 509         NoUnfolding -> Nothing ;
 510         OtherCon cs -> Nothing ;
 511         CompulsoryUnfolding unf_template | black_listed -> Nothing
 512                                          | otherwise    -> Just unf_template ;
 513                 -- Constructors have compulsory unfoldings, but
 514                 -- may have rules, in which case they are
 515                 -- black listed till later
 516         CoreUnfolding unf_template is_top is_cheap is_value is_bot guidance ->
 517
 518     let
 519         result | yes_or_no = Just unf_template
 520                | otherwise = Nothing
 521
 522         n_val_args  = length arg_infos
 523
 524         ok_inside_lam = is_value || is_bot || (is_cheap && not is_top)
 525                                 -- I'm experimenting with is_cheap && not is_top
 526
 527         yes_or_no
 528           | black_listed = False
 529           | otherwise    = case occ of
 530                                 IAmDead              -> pprTrace "callSiteInline: dead" (ppr id) False
 531                                 IAmALoopBreaker      -> False
 532                                 OneOcc in_lam one_br -> (not in_lam || ok_inside_lam) && consider_safe in_lam True  one_br
 533                                 NoOccInfo            -> ok_inside_lam                 && consider_safe True   False False
 534
 535         consider_safe in_lam once once_in_one_branch
 536                 -- consider_safe decides whether it's a good idea to inline something,
 537                 -- given that there's no work-duplication issue (the caller checks that).
 538                 -- once_in_one_branch = True means there's a unique textual occurrence
 539           | inline_call  = True
 540
 541           | once_in_one_branch
 542                 -- Be very keen to inline something if this is its unique occurrence:
 543                 --
 544                 --   a) Inlining gives a good chance of eliminating the original
 545                 --      binding (and hence the allocation) for the thing.
 546                 --      (Provided it's not a top level binding, in which case the
 547                 --       allocation costs nothing.)
 548                 --
 549                 --   b) Inlining a function that is called only once exposes the
 550                 --      body function to the call site.
 551                 --
 552                 -- The only time we hold back is when substituting inside a lambda;
 553                 -- then if the context is totally uninteresting (not applied, not scrutinised)
 554                 -- there is no point in substituting because it might just increase allocation,
 555                 -- by allocating the function itself many times
 556                 --
 557                 -- Note: there used to be a '&& not top_level' in the guard above,
 558                 --       but that stopped us inlining top-level functions used only once,
 559                 --       which is stupid
 560           = not in_lam || not (null arg_infos) || interesting_cont
 561
 562           | otherwise
 563           = case guidance of
 564               UnfoldNever  -> False ;
 565               UnfoldIfGoodArgs n_vals_wanted arg_discounts size res_discount
 566
 567                   | enough_args && size <= (n_vals_wanted + 1)
 568                         -- No size increase
 569                         -- Size of call is n_vals_wanted (+1 for the function)
 570                   -> True
 571
 572                   | otherwise
 573                   -> some_benefit && small_enough
 574
 575                   where
 576                     some_benefit = or arg_infos || really_interesting_cont ||
 577                                    (not is_top && (once || (n_vals_wanted > 0 && enough_args)))
 578                         -- If it occurs more than once, there must be something interesting
 579                         -- about some argument, or the result context, to make it worth inlining
 580                         --
 581                         -- If a function has a nested defn we also record some-benefit,
 582                         -- on the grounds that we are often able to eliminate the binding,
 583                         -- and hence the allocation, for the function altogether; this is good
 584                         -- for join points.  But this only makes sense for *functions*;
 585                         -- inlining a constructor doesn't help allocation unless the result is
 586                         -- scrutinised.  UNLESS the constructor occurs just once, albeit possibly
 587                         -- in multiple case branches.  Then inlining it doesn't increase allocation,
 588                         -- but it does increase the chance that the constructor won't be allocated at all
 589                         -- in the branches that don't use it.
 590
 591                     enough_args           = n_val_args >= n_vals_wanted
 592                     really_interesting_cont | n_val_args <  n_vals_wanted = False       -- Too few args
 593                                             | n_val_args == n_vals_wanted = interesting_cont
 594                                             | otherwise                   = True        -- Extra args
 595                         -- really_interesting_cont tells if the result of the
 596                         -- call is in an interesting context.
 597
 598                     small_enough = (size - discount) <= opt_UF_UseThreshold
 599                     discount     = computeDiscount n_vals_wanted arg_discounts res_discount
 600                                                  arg_infos really_interesting_cont
 601
 602     in
 603 #ifdef DEBUG
 604     if opt_D_dump_inlinings then
 605         pprTrace "Considering inlining"
 606                  (ppr id <+> vcat [text "black listed" <+> ppr black_listed,
 607                                    text "occ info:" <+> ppr occ,
 608                                    text "arg infos" <+> ppr arg_infos,
 609                                    text "interesting continuation" <+> ppr interesting_cont,
 610                                    text "is value:" <+> ppr is_value,
 611                                    text "is cheap:" <+> ppr is_cheap,
 612                                    text "is bottom:" <+> ppr is_bot,
 613                                    text "is top-level:"    <+> ppr is_top,
 614                                    text "guidance" <+> ppr guidance,
 615                                    text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO",
 616                                    if yes_or_no then
 617                                         text "Unfolding =" <+> pprCoreExpr unf_template
 618                                    else empty])
 619                   result
 620     else
 621 #endif
 622     result
 623     }
 624
 625 computeDiscount :: Int -> [Int] -> Int -> [Bool] -> Bool -> Int
 626 computeDiscount n_vals_wanted arg_discounts res_discount arg_infos result_used
 627         -- We multiple the raw discounts (args_discount and result_discount)
 628         -- ty opt_UnfoldingKeenessFactor because the former have to do with
 629         -- *size* whereas the discounts imply that there's some extra
 630         -- *efficiency* to be gained (e.g. beta reductions, case reductions)
 631         -- by inlining.
 632
 633         -- we also discount 1 for each argument passed, because these will
 634         -- reduce with the lambdas in the function (we count 1 for a lambda
 635         -- in size_up).
 636   = 1 +                 -- Discount of 1 because the result replaces the call
 637                         -- so we count 1 for the function itself
 638     length (take n_vals_wanted arg_infos) +
 639                         -- Discount of 1 for each arg supplied, because the
 640                         -- result replaces the call
 641     round (opt_UF_KeenessFactor *
 642            fromInt (arg_discount + result_discount))
 643   where
 644     arg_discount = sum (zipWith mk_arg_discount arg_discounts arg_infos)
 645
 646     mk_arg_discount discount is_evald | is_evald  = discount
 647                                       | otherwise = 0
 648
 649         -- Don't give a result discount unless there are enough args
 650     result_discount | result_used = res_discount        -- Over-applied, or case scrut
 651                     | otherwise   = 0
 652 \end{code}
 653
 654
 655 %************************************************************************
 656 %*                                                                      *
 657 \subsection{Black-listing}
 658 %*                                                                      *
 659 %************************************************************************
 660
 661 Inlining is controlled by the "Inline phase" number, which is set
 662 by the per-simplification-pass '-finline-phase' flag.
 663
 664 For optimisation we use phase 1,2 and nothing (i.e. no -finline-phase flag)
 665 in that order.  The meanings of these are determined by the @blackListed@ function
 666 here.
 667
 668 The final simplification doesn't have a phase number.
 669
 670 Pragmas
 671 ~~~~~~~
 672         Pragma          Black list if
 673
 674 (least black listing, most inlining)
 675         INLINE n foo    phase is Just p *and* p<n *and* foo appears on LHS of rule
 676         INLINE foo      phase is Just p *and*           foo appears on LHS of rule
 677         NOINLINE n foo  phase is Just p *and* (p<n *or* foo appears on LHS of rule)
 678         NOINLINE foo    always
 679 (most black listing, least inlining)
 680
 681 \begin{code}
 682 blackListed :: IdSet            -- Used in transformation rules
 683             -> Maybe Int        -- Inline phase
 684             -> Id -> Bool       -- True <=> blacklisted
 685
 686 -- The blackListed function sees whether a variable should *not* be
 687 -- inlined because of the inline phase we are in.  This is the sole
 688 -- place that the inline phase number is looked at.
 689
 690 blackListed rule_vars Nothing           -- Last phase
 691   = \v -> isNeverInlinePrag (idInlinePragma v)
 692
 693 blackListed rule_vars (Just phase)
 694   = \v -> normal_case rule_vars phase v
 695
 696 normal_case rule_vars phase v
 697   = case idInlinePragma v of
 698         NoInlinePragInfo -> has_rules
 699
 700         IMustNotBeINLINEd from_INLINE Nothing
 701           | from_INLINE -> has_rules    -- Black list until final phase
 702           | otherwise   -> True         -- Always blacklisted
 703
 704         IMustNotBeINLINEd from_inline (Just threshold)
 705           | from_inline -> (phase < threshold && has_rules)
 706           | otherwise   -> (phase < threshold || has_rules)
 707   where
 708     has_rules =  v `elemVarSet` rule_vars
 709               || not (isEmptyCoreRules (idSpecialisation v))
 710 \end{code}
 711
 712
 713 SLPJ 95/04: Why @runST@ must be inlined very late:
 714 \begin{verbatim}
 715 f x =
 716   runST ( \ s -> let
 717                     (a, s')  = newArray# 100 [] s
 718                     (_, s'') = fill_in_array_or_something a x s'
 719                   in
 720                   freezeArray# a s'' )
 721 \end{verbatim}
 722 If we inline @runST@, we'll get:
 723 \begin{verbatim}
 724 f x = let
 725         (a, s')  = newArray# 100 [] realWorld#{-NB-}
 726         (_, s'') = fill_in_array_or_something a x s'
 727       in
 728       freezeArray# a s''
 729 \end{verbatim}
 730 And now the @newArray#@ binding can be floated to become a CAF, which
 731 is totally and utterly wrong:
 732 \begin{verbatim}
 733 f = let
 734     (a, s')  = newArray# 100 [] realWorld#{-NB-} -- YIKES!!!
 735     in
 736     \ x ->
 737         let (_, s'') = fill_in_array_or_something a x s' in
 738         freezeArray# a s''
 739 \end{verbatim}
 740 All calls to @f@ will share a {\em single} array!
 741
 742 Yet we do want to inline runST sometime, so we can avoid
 743 needless code.  Solution: black list it until the last moment.
 744