compiler/coreSyn/CoreUnfold.lhs

   1 %
   2 % (c) The University of Glasgow 2006
   3 % (c) The AQUA Project, Glasgow University, 1994-1998
   4 %
   5
   6 Core-syntax unfoldings
   7
   8 Unfoldings (which can travel across module boundaries) are in Core
   9 syntax (namely @CoreExpr@s).
  10
  11 The type @Unfolding@ sits ``above'' simply-Core-expressions
  12 unfoldings, capturing ``higher-level'' things we know about a binding,
  13 usually things that the simplifier found out (e.g., ``it's a
  14 literal'').  In the corner of a @CoreUnfolding@ unfolding, you will
  15 find, unsurprisingly, a Core expression.
  16
  17 \begin{code}
  18 module CoreUnfold (
  19         Unfolding, UnfoldingGuidance,   -- Abstract types
  20
  21         noUnfolding, mkTopUnfolding, mkImplicitUnfolding, mkUnfolding,
  22         mkCompulsoryUnfolding, seqUnfolding,
  23         evaldUnfolding, mkOtherCon, otherCons,
  24         unfoldingTemplate, maybeUnfoldingTemplate,
  25         isEvaldUnfolding, isValueUnfolding, isExpandableUnfolding, isCompulsoryUnfolding,
  26         hasUnfolding, hasSomeUnfolding, neverUnfold,
  27
  28         interestingArg, ArgSummary(..),
  29
  30         couldBeSmallEnoughToInline,
  31         certainlyWillInline, smallEnoughToInline,
  32
  33         callSiteInline, CallCtxt(..),
  34
  35     ) where
  36
  37 import StaticFlags
  38 import DynFlags
  39 import CoreSyn
  40 import PprCore          ()      -- Instances
  41 import OccurAnal
  42 import CoreSubst        ( Subst, emptySubst, substTy, extendIdSubst, extendTvSubst
  43                         , lookupIdSubst, substBndr, substBndrs, substRecBndrs )
  44 import CoreUtils
  45 import Id
  46 import DataCon
  47 import Literal
  48 import PrimOp
  49 import IdInfo
  50 import Type hiding( substTy, extendTvSubst )
  51 import PrelNames
  52 import Bag
  53 import FastTypes
  54 import FastString
  55 import Outputable
  56
  57 \end{code}
  58
  59
  60 %************************************************************************
  61 %*                                                                      *
  62 \subsection{Making unfoldings}
  63 %*                                                                      *
  64 %************************************************************************
  65
  66 \begin{code}
  67 mkTopUnfolding :: CoreExpr -> Unfolding
  68 mkTopUnfolding expr = mkUnfolding True {- Top level -} expr
  69
  70 mkImplicitUnfolding :: CoreExpr -> Unfolding
  71 -- For implicit Ids, do a tiny bit of optimising first
  72 mkImplicitUnfolding expr
  73   = CoreUnfolding (simpleOptExpr emptySubst expr)
  74                   True
  75                   (exprIsHNF expr)
  76                   (exprIsCheap expr)
  77                   (exprIsExpandable expr)
  78                   (calcUnfoldingGuidance opt_UF_CreationThreshold expr)
  79
  80 mkUnfolding :: Bool -> CoreExpr -> Unfolding
  81 mkUnfolding top_lvl expr
  82   = CoreUnfolding (occurAnalyseExpr expr)
  83                   top_lvl
  84
  85                   (exprIsHNF expr)
  86                         -- Already evaluated
  87
  88                   (exprIsCheap expr)
  89                         -- OK to inline inside a lambda
  90
  91                   (exprIsExpandable expr)
  92
  93                   (calcUnfoldingGuidance opt_UF_CreationThreshold expr)
  94         -- Sometimes during simplification, there's a large let-bound thing
  95         -- which has been substituted, and so is now dead; so 'expr' contains
  96         -- two copies of the thing while the occurrence-analysed expression doesn't
  97         -- Nevertheless, we don't occ-analyse before computing the size because the
  98         -- size computation bales out after a while, whereas occurrence analysis does not.
  99         --
 100         -- This can occasionally mean that the guidance is very pessimistic;
 101         -- it gets fixed up next round
 102
 103 instance Outputable Unfolding where
 104   ppr NoUnfolding = ptext (sLit "No unfolding")
 105   ppr (OtherCon cs) = ptext (sLit "OtherCon") <+> ppr cs
 106   ppr (CompulsoryUnfolding e) = ptext (sLit "Compulsory") <+> ppr e
 107   ppr (CoreUnfolding e top hnf cheap expable g)
 108         = ptext (sLit "Unf") <+> sep [ppr top <+> ppr hnf <+> ppr cheap <+> ppr expable <+> ppr g,
 109                                      ppr e]
 110
 111 mkCompulsoryUnfolding :: CoreExpr -> Unfolding
 112 mkCompulsoryUnfolding expr      -- Used for things that absolutely must be unfolded
 113   = CompulsoryUnfolding (occurAnalyseExpr expr)
 114 \end{code}
 115
 116
 117 %************************************************************************
 118 %*                                                                      *
 119 \subsection{The UnfoldingGuidance type}
 120 %*                                                                      *
 121 %************************************************************************
 122
 123 \begin{code}
 124 instance Outputable UnfoldingGuidance where
 125     ppr UnfoldNever     = ptext (sLit "NEVER")
 126     ppr (UnfoldIfGoodArgs v cs size discount)
 127       = hsep [ ptext (sLit "IF_ARGS"), int v,
 128                brackets (hsep (map int cs)),
 129                int size,
 130                int discount ]
 131 \end{code}
 132
 133
 134 \begin{code}
 135 calcUnfoldingGuidance
 136         :: Int                  -- bomb out if size gets bigger than this
 137         -> CoreExpr             -- expression to look at
 138         -> UnfoldingGuidance
 139 calcUnfoldingGuidance bOMB_OUT_SIZE expr
 140   = case collect_val_bndrs expr of { (inline, val_binders, body) ->
 141     let
 142         n_val_binders = length val_binders
 143
 144         max_inline_size = n_val_binders+2
 145         -- The idea is that if there is an INLINE pragma (inline is True)
 146         -- and there's a big body, we give a size of n_val_binders+2.  This
 147         -- This is just enough to fail the no-size-increase test in callSiteInline,
 148         --   so that INLINE things don't get inlined into entirely boring contexts,
 149         --   but no more.
 150
 151     in
 152     case (sizeExpr (iUnbox bOMB_OUT_SIZE) val_binders body) of
 153
 154       TooBig
 155         | not inline -> UnfoldNever
 156                 -- A big function with an INLINE pragma must
 157                 -- have an UnfoldIfGoodArgs guidance
 158         | otherwise  -> UnfoldIfGoodArgs n_val_binders
 159                                          (map (const 0) val_binders)
 160                                          max_inline_size 0
 161
 162       SizeIs size cased_args scrut_discount
 163         -> UnfoldIfGoodArgs
 164                         n_val_binders
 165                         (map discount_for val_binders)
 166                         final_size
 167                         (iBox scrut_discount)
 168         where
 169             boxed_size    = iBox size
 170
 171             final_size | inline     = boxed_size `min` max_inline_size
 172                        | otherwise  = boxed_size
 173
 174                 -- Sometimes an INLINE thing is smaller than n_val_binders+2.
 175                 -- A particular case in point is a constructor, which has size 1.
 176                 -- We want to inline this regardless, hence the `min`
 177
 178             discount_for b = foldlBag (\acc (b',n) -> if b==b' then acc+n else acc)
 179                                       0 cased_args
 180         }
 181   where
 182     collect_val_bndrs e = go False [] e
 183         -- We need to be a bit careful about how we collect the
 184         -- value binders.  In ptic, if we see
 185         --      __inline_me (\x y -> e)
 186         -- We want to say "2 value binders".  Why?  So that
 187         -- we take account of information given for the arguments
 188
 189     go _      rev_vbs (Note InlineMe e)     = go True   rev_vbs     e
 190     go inline rev_vbs (Lam b e) | isId b    = go inline (b:rev_vbs) e
 191                                 | otherwise = go inline rev_vbs     e
 192     go inline rev_vbs e                     = (inline, reverse rev_vbs, e)
 193 \end{code}
 194
 195 Note [Computing the size of an expression]
 196 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 197 The basic idea of sizeExpr is obvious enough: count nodes.  But getting the
 198 heuristics right has taken a long time.  Here's the basic strategy:
 199
 200     * Variables, literals: 0
 201       (Exception for string literals, see litSize.)
 202
 203     * Function applications (f e1 .. en): 1 + #value args
 204
 205     * Constructor applications: 1, regardless of #args
 206
 207     * Let(rec): 1 + size of components
 208
 209     * Note, cast: 0
 210
 211 Examples
 212
 213   Size  Term
 214   --------------
 215     0     42#
 216     0     x
 217     2     f x
 218     1     Just x
 219     4     f (g x)
 220
 221 Notice that 'x' counts 0, while (f x) counts 2.  That's deliberate: there's
 222 a function call to account for.  Notice also that constructor applications
 223 are very cheap, because exposing them to a caller is so valuable.
 224
 225 Thing to watch out for
 226
 227 * We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
 228   than the thing it's replacing.  Notice that
 229       (f x) --> (g 3)             -- YES, unconditionally
 230       (f x) --> x : []            -- YES, *even though* there are two
 231                                   --      arguments to the cons
 232       x     --> g 3               -- NO
 233       x     --> Just v            -- NO
 234
 235   It's very important not to unconditionally replace a variable by
 236   a non-atomic term.
 237
 238
 239 \begin{code}
 240 sizeExpr :: FastInt         -- Bomb out if it gets bigger than this
 241          -> [Id]            -- Arguments; we're interested in which of these
 242                             -- get case'd
 243          -> CoreExpr
 244          -> ExprSize
 245
 246 -- Note [Computing the size of an expression]
 247
 248 sizeExpr bOMB_OUT_SIZE top_args expr
 249   = size_up expr
 250   where
 251     size_up (Type _)   = sizeZero           -- Types cost nothing
 252     size_up (Lit lit)  = sizeN (litSize lit)
 253     size_up (Var f)    = size_up_call f 0   -- Make sure we get constructor
 254                                             -- discounts even on nullary constructors
 255     size_up (Cast e _) = size_up e
 256
 257     size_up (Note InlineMe _)  = sizeOne         -- Inline notes make it look very small
 258         -- This can be important.  If you have an instance decl like this:
 259         --      instance Foo a => Foo [a] where
 260         --         {-# INLINE op1, op2 #-}
 261         --         op1 = ...
 262         --         op2 = ...
 263         -- then we'll get a dfun which is a pair of two INLINE lambdas
 264     size_up (Note _      body) = size_up body  -- Other notes cost nothing
 265
 266     size_up (App fun (Type _)) = size_up fun
 267     size_up (App fun arg)      = size_up_app fun [arg]
 268                                   `addSize` nukeScrutDiscount (size_up arg)
 269
 270     size_up (Lam b e) | isId b    = lamScrutDiscount (size_up e `addSizeN` 1)
 271                       | otherwise = size_up e
 272
 273     size_up (Let (NonRec binder rhs) body)
 274       = nukeScrutDiscount (size_up rhs)         `addSize`
 275         size_up body                            `addSizeN`
 276         (if isUnLiftedType (idType binder) then 0 else 1)
 277                 -- For the allocation
 278                 -- If the binder has an unlifted type there is no allocation
 279
 280     size_up (Let (Rec pairs) body)
 281       = nukeScrutDiscount rhs_size              `addSize`
 282         size_up body                            `addSizeN`
 283         length pairs            -- For the allocation
 284       where
 285         rhs_size = foldr (addSize . size_up . snd) sizeZero pairs
 286
 287     size_up (Case (Var v) _ _ alts)
 288         | v `elem` top_args             -- We are scrutinising an argument variable
 289         = alts_size (foldr addSize sizeOne alt_sizes)   -- The 1 is for the case itself
 290                     (foldr1 maxSize alt_sizes)
 291                 -- Good to inline if an arg is scrutinised, because
 292                 -- that may eliminate allocation in the caller
 293                 -- And it eliminates the case itself
 294         where
 295           alt_sizes = map size_up_alt alts
 296
 297                 -- alts_size tries to compute a good discount for
 298                 -- the case when we are scrutinising an argument variable
 299           alts_size (SizeIs tot tot_disc _tot_scrut)           -- Size of all alternatives
 300                     (SizeIs max _max_disc  max_scrut)           -- Size of biggest alternative
 301                 = SizeIs tot (unitBag (v, iBox (_ILIT(1) +# tot -# max)) `unionBags` tot_disc) max_scrut
 302                         -- If the variable is known, we produce a discount that
 303                         -- will take us back to 'max', the size of the largest alternative
 304                         -- The 1+ is a little discount for reduced allocation in the caller
 305                         --
 306                         -- Notice though, that we return tot_disc, the total discount from
 307                         -- all branches.  I think that's right.
 308
 309           alts_size tot_size _ = tot_size
 310
 311     size_up (Case e _ _ alts) = foldr (addSize . size_up_alt)
 312                                       (nukeScrutDiscount (size_up e))
 313                                       alts
 314                                 `addSizeN` 1    -- Add 1 for the case itself
 315                 -- We don't charge for the case itself
 316                 -- It's a strict thing, and the price of the call
 317                 -- is paid by scrut.  Also consider
 318                 --      case f x of DEFAULT -> e
 319                 -- This is just ';'!  Don't charge for it.
 320
 321     ------------
 322     -- size_up_app is used when there's ONE OR MORE value args
 323     size_up_app (App fun arg) args
 324         | isTypeArg arg            = size_up_app fun args
 325         | otherwise                = size_up_app fun (arg:args)
 326                                      `addSize` nukeScrutDiscount (size_up arg)
 327     size_up_app (Var fun)     args = size_up_call fun (length args)
 328     size_up_app other         args = size_up other `addSizeN` length args
 329
 330     ------------
 331     size_up_call :: Id -> Int -> ExprSize
 332     size_up_call fun n_val_args
 333        = case idDetails fun of
 334            FCallId _        -> sizeN opt_UF_DearOp
 335            DataConWorkId dc -> conSize    dc n_val_args
 336            PrimOpId op      -> primOpSize op n_val_args
 337            _                -> funSize top_args fun n_val_args
 338
 339     ------------
 340     size_up_alt (_con, _bndrs, rhs) = size_up rhs
 341         -- Don't charge for args, so that wrappers look cheap
 342         -- (See comments about wrappers with Case)
 343
 344     ------------
 345         -- These addSize things have to be here because
 346         -- I don't want to give them bOMB_OUT_SIZE as an argument
 347     addSizeN TooBig          _  = TooBig
 348     addSizeN (SizeIs n xs d) m  = mkSizeIs bOMB_OUT_SIZE (n +# iUnbox m) xs d
 349
 350     addSize TooBig            _                 = TooBig
 351     addSize _                 TooBig            = TooBig
 352     addSize (SizeIs n1 xs d1) (SizeIs n2 ys d2)
 353         = mkSizeIs bOMB_OUT_SIZE (n1 +# n2) (xs `unionBags` ys) (d1 +# d2)
 354 \end{code}
 355
 356 \begin{code}
 357 -- | Finds a nominal size of a string literal.
 358 litSize :: Literal -> Int
 359 -- Used by CoreUnfold.sizeExpr
 360 litSize (MachStr str) = 1 + ((lengthFS str + 3) `div` 4)
 361         -- If size could be 0 then @f "x"@ might be too small
 362         -- [Sept03: make literal strings a bit bigger to avoid fruitless
 363         --  duplication of little strings]
 364 litSize _other = 0    -- Must match size of nullary constructors
 365                       -- Key point: if  x |-> 4, then x must inline unconditionally
 366                       --            (eg via case binding)
 367
 368 funSize :: [Id] -> Id -> Int -> ExprSize
 369 -- Size for functions that are not constructors or primops
 370 -- Note [Function applications]
 371 funSize top_args fun n_val_args
 372   | fun `hasKey` buildIdKey   = buildSize
 373   | fun `hasKey` augmentIdKey = augmentSize
 374   | otherwise = SizeIs (iUnbox size) arg_discount (iUnbox res_discount)
 375   where
 376     some_val_args = n_val_args > 0
 377
 378     arg_discount | some_val_args && fun `elem` top_args
 379                  = unitBag (fun, opt_UF_FunAppDiscount)
 380                  | otherwise = emptyBag
 381         -- If the function is an argument and is applied
 382         -- to some values, give it an arg-discount
 383
 384     res_discount | idArity fun > n_val_args = opt_UF_FunAppDiscount
 385                  | otherwise                = 0
 386         -- If the function is partially applied, show a result discount
 387
 388     size | some_val_args = 1 + n_val_args
 389          | otherwise     = 0
 390         -- The 1+ is for the function itself
 391         -- Add 1 for each non-trivial arg;
 392         -- the allocation cost, as in let(rec)
 393
 394
 395 conSize :: DataCon -> Int -> ExprSize
 396 conSize dc n_val_args
 397   | n_val_args == 0      = SizeIs (_ILIT(0)) emptyBag (_ILIT(1))
 398   | isUnboxedTupleCon dc = SizeIs (_ILIT(0)) emptyBag (iUnbox n_val_args +# _ILIT(1))
 399   | otherwise            = SizeIs (_ILIT(1)) emptyBag (iUnbox n_val_args +# _ILIT(1))
 400         -- Treat a constructors application as size 1, regardless of how
 401         -- many arguments it has; we are keen to expose them
 402         -- (and we charge separately for their args).  We can't treat
 403         -- them as size zero, else we find that (Just x) has size 0,
 404         -- which is the same as a lone variable; and hence 'v' will
 405         -- always be replaced by (Just x), where v is bound to Just x.
 406         --
 407         -- However, unboxed tuples count as size zero
 408         -- I found occasions where we had
 409         --      f x y z = case op# x y z of { s -> (# s, () #) }
 410         -- and f wasn't getting inlined
 411
 412 primOpSize :: PrimOp -> Int -> ExprSize
 413 primOpSize op n_val_args
 414  | not (primOpIsDupable op) = sizeN opt_UF_DearOp
 415  | not (primOpOutOfLine op) = sizeN 1
 416         -- Be very keen to inline simple primops.
 417         -- We give a discount of 1 for each arg so that (op# x y z) costs 2.
 418         -- We can't make it cost 1, else we'll inline let v = (op# x y z)
 419         -- at every use of v, which is excessive.
 420         --
 421         -- A good example is:
 422         --      let x = +# p q in C {x}
 423         -- Even though x get's an occurrence of 'many', its RHS looks cheap,
 424         -- and there's a good chance it'll get inlined back into C's RHS. Urgh!
 425
 426  | otherwise = sizeN n_val_args
 427
 428
 429 buildSize :: ExprSize
 430 buildSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(4))
 431         -- We really want to inline applications of build
 432         -- build t (\cn -> e) should cost only the cost of e (because build will be inlined later)
 433         -- Indeed, we should add a result_discount becuause build is
 434         -- very like a constructor.  We don't bother to check that the
 435         -- build is saturated (it usually is).  The "-2" discounts for the \c n,
 436         -- The "4" is rather arbitrary.
 437
 438 augmentSize :: ExprSize
 439 augmentSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(4))
 440         -- Ditto (augment t (\cn -> e) ys) should cost only the cost of
 441         -- e plus ys. The -2 accounts for the \cn
 442
 443 nukeScrutDiscount :: ExprSize -> ExprSize
 444 nukeScrutDiscount (SizeIs n vs _) = SizeIs n vs (_ILIT(0))
 445 nukeScrutDiscount TooBig          = TooBig
 446
 447 -- When we return a lambda, give a discount if it's used (applied)
 448 lamScrutDiscount :: ExprSize -> ExprSize
 449 lamScrutDiscount (SizeIs n vs _) = SizeIs n vs (iUnbox opt_UF_FunAppDiscount)
 450 lamScrutDiscount TooBig          = TooBig
 451 \end{code}
 452
 453
 454 Note [Function applications]
 455 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 456 In a function application (f a b)
 457
 458   - If 'f' is an argument to the function being analysed,
 459     and there's at least one value arg, record a FunAppDiscount for f
 460
 461   - If the application if a PAP (arity > 2 in this example)
 462     record a *result* discount (because inlining
 463     with "extra" args in the call may mean that we now
 464     get a saturated application)
 465
 466 Code for manipulating sizes
 467
 468 \begin{code}
 469 data ExprSize = TooBig
 470               | SizeIs FastInt          -- Size found
 471                        (Bag (Id,Int))   -- Arguments cased herein, and discount for each such
 472                        FastInt          -- Size to subtract if result is scrutinised
 473                                         -- by a case expression
 474
 475 instance Outputable ExprSize where
 476   ppr TooBig         = ptext (sLit "TooBig")
 477   ppr (SizeIs a _ c) = brackets (int (iBox a) <+> int (iBox c))
 478
 479 -- subtract the discount before deciding whether to bale out. eg. we
 480 -- want to inline a large constructor application into a selector:
 481 --      tup = (a_1, ..., a_99)
 482 --      x = case tup of ...
 483 --
 484 mkSizeIs :: FastInt -> FastInt -> Bag (Id, Int) -> FastInt -> ExprSize
 485 mkSizeIs max n xs d | (n -# d) ># max = TooBig
 486                     | otherwise       = SizeIs n xs d
 487
 488 maxSize :: ExprSize -> ExprSize -> ExprSize
 489 maxSize TooBig         _                                  = TooBig
 490 maxSize _              TooBig                             = TooBig
 491 maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 ># n2  = s1
 492                                               | otherwise = s2
 493
 494 sizeZero, sizeOne :: ExprSize
 495 sizeN :: Int -> ExprSize
 496
 497 sizeZero = SizeIs (_ILIT(0))  emptyBag (_ILIT(0))
 498 sizeOne  = SizeIs (_ILIT(1))  emptyBag (_ILIT(0))
 499 sizeN n  = SizeIs (iUnbox n) emptyBag (_ILIT(0))
 500 \end{code}
 501
 502
 503
 504
 505 %************************************************************************
 506 %*                                                                      *
 507 \subsection[considerUnfolding]{Given all the info, do (not) do the unfolding}
 508 %*                                                                      *
 509 %************************************************************************
 510
 511 We have very limited information about an unfolding expression: (1)~so
 512 many type arguments and so many value arguments expected---for our
 513 purposes here, we assume we've got those.  (2)~A ``size'' or ``cost,''
 514 a single integer.  (3)~An ``argument info'' vector.  For this, what we
 515 have at the moment is a Boolean per argument position that says, ``I
 516 will look with great favour on an explicit constructor in this
 517 position.'' (4)~The ``discount'' to subtract if the expression
 518 is being scrutinised.
 519
 520 Assuming we have enough type- and value arguments (if not, we give up
 521 immediately), then we see if the ``discounted size'' is below some
 522 (semi-arbitrary) threshold.  It works like this: for every argument
 523 position where we're looking for a constructor AND WE HAVE ONE in our
 524 hands, we get a (again, semi-arbitrary) discount [proportion to the
 525 number of constructors in the type being scrutinized].
 526
 527 If we're in the context of a scrutinee ( \tr{(case <expr > of A .. -> ...;.. )})
 528 and the expression in question will evaluate to a constructor, we use
 529 the computed discount size *for the result only* rather than
 530 computing the argument discounts. Since we know the result of
 531 the expression is going to be taken apart, discounting its size
 532 is more accurate (see @sizeExpr@ above for how this discount size
 533 is computed).
 534
 535 We use this one to avoid exporting inlinings that we ``couldn't possibly
 536 use'' on the other side.  Can be overridden w/ flaggery.
 537 Just the same as smallEnoughToInline, except that it has no actual arguments.
 538
 539 \begin{code}
 540 couldBeSmallEnoughToInline :: Int -> CoreExpr -> Bool
 541 couldBeSmallEnoughToInline threshold rhs = case calcUnfoldingGuidance threshold rhs of
 542                                                 UnfoldNever -> False
 543                                                 _           -> True
 544
 545 certainlyWillInline :: Unfolding -> Bool
 546   -- Sees if the unfolding is pretty certain to inline
 547 certainlyWillInline (CoreUnfolding _ _ _ is_cheap _ (UnfoldIfGoodArgs n_vals _ size _))
 548   = is_cheap && size - (n_vals+1) <= opt_UF_UseThreshold
 549 certainlyWillInline _
 550   = False
 551
 552 smallEnoughToInline :: Unfolding -> Bool
 553 smallEnoughToInline (CoreUnfolding _ _ _ _ _ (UnfoldIfGoodArgs _ _ size _))
 554   = size <= opt_UF_UseThreshold
 555 smallEnoughToInline _
 556   = False
 557 \end{code}
 558
 559 %************************************************************************
 560 %*                                                                      *
 561 \subsection{callSiteInline}
 562 %*                                                                      *
 563 %************************************************************************
 564
 565 This is the key function.  It decides whether to inline a variable at a call site
 566
 567 callSiteInline is used at call sites, so it is a bit more generous.
 568 It's a very important function that embodies lots of heuristics.
 569 A non-WHNF can be inlined if it doesn't occur inside a lambda,
 570 and occurs exactly once or
 571     occurs once in each branch of a case and is small
 572
 573 If the thing is in WHNF, there's no danger of duplicating work,
 574 so we can inline if it occurs once, or is small
 575
 576 NOTE: we don't want to inline top-level functions that always diverge.
 577 It just makes the code bigger.  Tt turns out that the convenient way to prevent
 578 them inlining is to give them a NOINLINE pragma, which we do in
 579 StrictAnal.addStrictnessInfoToTopId
 580
 581 \begin{code}
 582 callSiteInline :: DynFlags
 583                -> Bool                  -- True <=> the Id can be inlined
 584                -> Id                    -- The Id
 585                -> Bool                  -- True if there are are no arguments at all (incl type args)
 586                -> [ArgSummary]          -- One for each value arg; True if it is interesting
 587                -> CallCtxt              -- True <=> continuation is interesting
 588                -> Maybe CoreExpr        -- Unfolding, if any
 589
 590
 591 instance Outputable ArgSummary where
 592   ppr TrivArg    = ptext (sLit "TrivArg")
 593   ppr NonTrivArg = ptext (sLit "NonTrivArg")
 594   ppr ValueArg   = ptext (sLit "ValueArg")
 595
 596 data CallCtxt = BoringCtxt
 597
 598               | ArgCtxt Bool    -- We're somewhere in the RHS of function with rules
 599                                 --      => be keener to inline
 600                         Int     -- We *are* the argument of a function with this arg discount
 601                                 --      => be keener to inline
 602                 -- INVARIANT: ArgCtxt False 0 ==> BoringCtxt
 603
 604               | ValAppCtxt      -- We're applied to at least one value arg
 605                                 -- This arises when we have ((f x |> co) y)
 606                                 -- Then the (f x) has argument 'x' but in a ValAppCtxt
 607
 608               | CaseCtxt        -- We're the scrutinee of a case
 609                                 -- that decomposes its scrutinee
 610
 611 instance Outputable CallCtxt where
 612   ppr BoringCtxt    = ptext (sLit "BoringCtxt")
 613   ppr (ArgCtxt _ _) = ptext (sLit "ArgCtxt")
 614   ppr CaseCtxt      = ptext (sLit "CaseCtxt")
 615   ppr ValAppCtxt    = ptext (sLit "ValAppCtxt")
 616
 617 callSiteInline dflags active_inline id lone_variable arg_infos cont_info
 618   = case idUnfolding id of {
 619         NoUnfolding -> Nothing ;
 620         OtherCon _  -> Nothing ;
 621
 622         CompulsoryUnfolding unf_template -> Just unf_template ;
 623                 -- CompulsoryUnfolding => there is no top-level binding
 624                 -- for these things, so we must inline it.
 625                 -- Only a couple of primop-like things have
 626                 -- compulsory unfoldings (see MkId.lhs).
 627                 -- We don't allow them to be inactive
 628
 629         CoreUnfolding unf_template is_top is_value is_cheap is_expable guidance ->
 630
 631     let
 632         result | yes_or_no = Just unf_template
 633                | otherwise = Nothing
 634
 635         n_val_args  = length arg_infos
 636
 637         yes_or_no = active_inline && is_cheap && consider_safe
 638                 -- We consider even the once-in-one-branch
 639                 -- occurrences, because they won't all have been
 640                 -- caught by preInlineUnconditionally.  In particular,
 641                 -- if the occurrence is once inside a lambda, and the
 642                 -- rhs is cheap but not a manifest lambda, then
 643                 -- pre-inline will not have inlined it for fear of
 644                 -- invalidating the occurrence info in the rhs.
 645
 646         consider_safe
 647                 -- consider_safe decides whether it's a good idea to
 648                 -- inline something, given that there's no
 649                 -- work-duplication issue (the caller checks that).
 650           = case guidance of
 651               UnfoldNever  -> False
 652               UnfoldIfGoodArgs n_vals_wanted arg_discounts size res_discount
 653                   | uncond_inline -> True
 654                   | otherwise     -> some_benefit && small_enough && inline_enough_args
 655
 656                   where
 657                         -- Inline unconditionally if there no size increase
 658                         -- Size of call is n_vals_wanted (+1 for the function)
 659                     uncond_inline
 660                        | n_vals_wanted == 0 = size == 0
 661                        | otherwise          = enough_args && (size <= n_vals_wanted + 1)
 662
 663                     enough_args = n_val_args >= n_vals_wanted
 664                     inline_enough_args =
 665                       not (dopt Opt_InlineIfEnoughArgs dflags) || enough_args
 666
 667
 668                     some_benefit = any nonTriv arg_infos || really_interesting_cont
 669                                 -- There must be something interesting
 670                                 -- about some argument, or the result
 671                                 -- context, to make it worth inlining
 672
 673                                 -- NB: (any nonTriv arg_infos) looks at the over-saturated
 674                                 -- args too which is wrong; but if over-saturated
 675                                 -- we'll probably inline anyway.
 676
 677                     really_interesting_cont
 678                         | n_val_args <  n_vals_wanted = False   -- Too few args
 679                         | n_val_args == n_vals_wanted = interesting_saturated_call
 680                         | otherwise                   = True    -- Extra args
 681                         -- really_interesting_cont tells if the result of the
 682                         -- call is in an interesting context.
 683
 684                     interesting_saturated_call
 685                         = case cont_info of
 686                             BoringCtxt -> not is_top && n_vals_wanted > 0       -- Note [Nested functions]
 687                             CaseCtxt   -> not lone_variable || not is_value     -- Note [Lone variables]
 688                             ArgCtxt {} -> n_vals_wanted > 0                     -- Note [Inlining in ArgCtxt]
 689                             ValAppCtxt -> True                                  -- Note [Cast then apply]
 690
 691                     small_enough = (size - discount) <= opt_UF_UseThreshold
 692                     discount = computeDiscount n_vals_wanted arg_discounts
 693                                                res_discount arg_infos cont_info
 694
 695     in
 696     if dopt Opt_D_dump_inlinings dflags then
 697         pprTrace ("Considering inlining: " ++ showSDoc (ppr id))
 698                  (vcat [text "active:" <+> ppr active_inline,
 699                         text "arg infos" <+> ppr arg_infos,
 700                         text "interesting continuation" <+> ppr cont_info,
 701                         text "is value:" <+> ppr is_value,
 702                         text "is cheap:" <+> ppr is_cheap,
 703                         text "is expandable:" <+> ppr is_expable,
 704                         text "guidance" <+> ppr guidance,
 705                         text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"])
 706                   result
 707     else
 708     result
 709     }
 710 \end{code}
 711
 712 Note [Things to watch]
 713 ~~~~~~~~~~~~~~~~~~~~~~
 714 *   { y = I# 3; x = y `cast` co; ...case (x `cast` co) of ... }
 715     Assume x is exported, so not inlined unconditionally.
 716     Then we want x to inline unconditionally; no reason for it
 717     not to, and doing so avoids an indirection.
 718
 719 *   { x = I# 3; ....f x.... }
 720     Make sure that x does not inline unconditionally!
 721     Lest we get extra allocation.
 722
 723 Note [Nested functions]
 724 ~~~~~~~~~~~~~~~~~~~~~~~
 725 If a function has a nested defn we also record some-benefit, on the
 726 grounds that we are often able to eliminate the binding, and hence the
 727 allocation, for the function altogether; this is good for join points.
 728 But this only makes sense for *functions*; inlining a constructor
 729 doesn't help allocation unless the result is scrutinised.  UNLESS the
 730 constructor occurs just once, albeit possibly in multiple case
 731 branches.  Then inlining it doesn't increase allocation, but it does
 732 increase the chance that the constructor won't be allocated at all in
 733 the branches that don't use it.
 734
 735 Note [Cast then apply]
 736 ~~~~~~~~~~~~~~~~~~~~~~
 737 Consider
 738    myIndex = __inline_me ( (/\a. <blah>) |> co )
 739    co :: (forall a. a -> a) ~ (forall a. T a)
 740      ... /\a.\x. case ((myIndex a) |> sym co) x of { ... } ...
 741
 742 We need to inline myIndex to unravel this; but the actual call (myIndex a) has
 743 no value arguments.  The ValAppCtxt gives it enough incentive to inline.
 744
 745 Note [Inlining in ArgCtxt]
 746 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 747 The condition (n_vals_wanted > 0) here is very important, because otherwise
 748 we end up inlining top-level stuff into useless places; eg
 749    x = I# 3#
 750    f = \y.  g x
 751 This can make a very big difference: it adds 16% to nofib 'integer' allocs,
 752 and 20% to 'power'.
 753
 754 At one stage I replaced this condition by 'True' (leading to the above
 755 slow-down).  The motivation was test eyeball/inline1.hs; but that seems
 756 to work ok now.
 757
 758 Note [Lone variables]
 759 ~~~~~~~~~~~~~~~~~~~~~
 760 The "lone-variable" case is important.  I spent ages messing about
 761 with unsatisfactory varaints, but this is nice.  The idea is that if a
 762 variable appears all alone
 763         as an arg of lazy fn, or rhs    Stop
 764         as scrutinee of a case          Select
 765         as arg of a strict fn           ArgOf
 766 AND
 767         it is bound to a value
 768 then we should not inline it (unless there is some other reason,
 769 e.g. is is the sole occurrence).  That is what is happening at
 770 the use of 'lone_variable' in 'interesting_saturated_call'.
 771
 772 Why?  At least in the case-scrutinee situation, turning
 773         let x = (a,b) in case x of y -> ...
 774 into
 775         let x = (a,b) in case (a,b) of y -> ...
 776 and thence to
 777         let x = (a,b) in let y = (a,b) in ...
 778 is bad if the binding for x will remain.
 779
 780 Another example: I discovered that strings
 781 were getting inlined straight back into applications of 'error'
 782 because the latter is strict.
 783         s = "foo"
 784         f = \x -> ...(error s)...
 785
 786 Fundamentally such contexts should not encourage inlining because the
 787 context can ``see'' the unfolding of the variable (e.g. case or a
 788 RULE) so there's no gain.  If the thing is bound to a value.
 789
 790 However, watch out:
 791
 792  * Consider this:
 793         foo = _inline_ (\n. [n])
 794         bar = _inline_ (foo 20)
 795         baz = \n. case bar of { (m:_) -> m + n }
 796    Here we really want to inline 'bar' so that we can inline 'foo'
 797    and the whole thing unravels as it should obviously do.  This is
 798    important: in the NDP project, 'bar' generates a closure data
 799    structure rather than a list.
 800
 801  * Even a type application or coercion isn't a lone variable.
 802    Consider
 803         case $fMonadST @ RealWorld of { :DMonad a b c -> c }
 804    We had better inline that sucker!  The case won't see through it.
 805
 806    For now, I'm treating treating a variable applied to types
 807    in a *lazy* context "lone". The motivating example was
 808         f = /\a. \x. BIG
 809         g = /\a. \y.  h (f a)
 810    There's no advantage in inlining f here, and perhaps
 811    a significant disadvantage.  Hence some_val_args in the Stop case
 812
 813 \begin{code}
 814 computeDiscount :: Int -> [Int] -> Int -> [ArgSummary] -> CallCtxt -> Int
 815 computeDiscount n_vals_wanted arg_discounts res_discount arg_infos cont_info
 816         -- We multiple the raw discounts (args_discount and result_discount)
 817         -- ty opt_UnfoldingKeenessFactor because the former have to do with
 818         --  *size* whereas the discounts imply that there's some extra
 819         --  *efficiency* to be gained (e.g. beta reductions, case reductions)
 820         -- by inlining.
 821
 822   = 1           -- Discount of 1 because the result replaces the call
 823                 -- so we count 1 for the function itself
 824
 825     + length (take n_vals_wanted arg_infos)
 826                -- Discount of (un-scaled) 1 for each arg supplied,
 827                -- because the result replaces the call
 828
 829     + round (opt_UF_KeenessFactor *
 830              fromIntegral (arg_discount + res_discount'))
 831   where
 832     arg_discount = sum (zipWith mk_arg_discount arg_discounts arg_infos)
 833
 834     mk_arg_discount _        TrivArg    = 0
 835     mk_arg_discount _        NonTrivArg = 1
 836     mk_arg_discount discount ValueArg   = discount
 837
 838     res_discount' = case cont_info of
 839                         BoringCtxt  -> 0
 840                         CaseCtxt    -> res_discount
 841                         _other      -> 4 `min` res_discount
 842                 -- res_discount can be very large when a function returns
 843                 -- construtors; but we only want to invoke that large discount
 844                 -- when there's a case continuation.
 845                 -- Otherwise we, rather arbitrarily, threshold it.  Yuk.
 846                 -- But we want to aovid inlining large functions that return
 847                 -- constructors into contexts that are simply "interesting"
 848 \end{code}
 849
 850 %************************************************************************
 851 %*                                                                      *
 852         Interesting arguments
 853 %*                                                                      *
 854 %************************************************************************
 855
 856 Note [Interesting arguments]
 857 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 858 An argument is interesting if it deserves a discount for unfoldings
 859 with a discount in that argument position.  The idea is to avoid
 860 unfolding a function that is applied only to variables that have no
 861 unfolding (i.e. they are probably lambda bound): f x y z There is
 862 little point in inlining f here.
 863
 864 Generally, *values* (like (C a b) and (\x.e)) deserve discounts.  But
 865 we must look through lets, eg (let x = e in C a b), because the let will
 866 float, exposing the value, if we inline.  That makes it different to
 867 exprIsHNF.
 868
 869 Before 2009 we said it was interesting if the argument had *any* structure
 870 at all; i.e. (hasSomeUnfolding v).  But does too much inlining; see Trac #3016.
 871
 872 But we don't regard (f x y) as interesting, unless f is unsaturated.
 873 If it's saturated and f hasn't inlined, then it's probably not going
 874 to now!
 875
 876 \begin{code}
 877 data ArgSummary = TrivArg       -- Nothing interesting
 878                 | NonTrivArg    -- Arg has structure
 879                 | ValueArg      -- Arg is a con-app or PAP
 880
 881 interestingArg :: CoreExpr -> ArgSummary
 882 -- See Note [Interesting arguments]
 883 interestingArg e = go e 0
 884   where
 885     -- n is # value args to which the expression is applied
 886     go (Lit {}) _          = ValueArg
 887     go (Var v)  n
 888        | isDataConWorkId v = ValueArg
 889        | idArity v > n     = ValueArg   -- Catches (eg) primops with arity but no unfolding
 890        | n > 0             = NonTrivArg -- Saturated or unknown call
 891        | evald_unfolding   = ValueArg   -- n==0; look for a value
 892        | otherwise         = TrivArg    -- n==0, no useful unfolding
 893        where
 894          evald_unfolding = isEvaldUnfolding (idUnfolding v)
 895
 896     go (Type _)          _ = TrivArg
 897     go (App fn (Type _)) n = go fn n
 898     go (App fn _)        n = go fn (n+1)
 899     go (Note _ a)        n = go a n
 900     go (Cast e _)        n = go e n
 901     go (Lam v e)         n
 902        | isTyVar v         = go e n
 903        | n>0               = go e (n-1)
 904        | otherwise         = ValueArg
 905     go (Let _ e)         n = case go e n of { ValueArg -> ValueArg; _ -> NonTrivArg }
 906     go (Case {})         _ = NonTrivArg
 907
 908 nonTriv ::  ArgSummary -> Bool
 909 nonTriv TrivArg = False
 910 nonTriv _       = True
 911 \end{code}
 912
 913
 914 %************************************************************************
 915 %*                                                                      *
 916         The Very Simple Optimiser
 917 %*                                                                      *
 918 %************************************************************************
 919
 920
 921 \begin{code}
 922 simpleOptExpr :: Subst -> CoreExpr -> CoreExpr
 923 -- Return an occur-analysed and slightly optimised expression
 924 -- The optimisation is very straightforward: just
 925 -- inline non-recursive bindings that are used only once,
 926 -- or wheere the RHS is trivial
 927
 928 simpleOptExpr subst expr
 929   = go subst (occurAnalyseExpr expr)
 930   where
 931     go subst (Var v)          = lookupIdSubst subst v
 932     go subst (App e1 e2)      = App (go subst e1) (go subst e2)
 933     go subst (Type ty)        = Type (substTy subst ty)
 934     go _     (Lit lit)        = Lit lit
 935     go subst (Note note e)    = Note note (go subst e)
 936     go subst (Cast e co)      = Cast (go subst e) (substTy subst co)
 937     go subst (Let bind body)  = go_bind subst bind body
 938     go subst (Lam bndr body)  = Lam bndr' (go subst' body)
 939                               where
 940                                 (subst', bndr') = substBndr subst bndr
 941
 942     go subst (Case e b ty as) = Case (go subst e) b'
 943                                      (substTy subst ty)
 944                                      (map (go_alt subst') as)
 945                               where
 946                                  (subst', b') = substBndr subst b
 947
 948
 949     ----------------------
 950     go_alt subst (con, bndrs, rhs) = (con, bndrs', go subst' rhs)
 951                                  where
 952                                    (subst', bndrs') = substBndrs subst bndrs
 953
 954     ----------------------
 955     go_bind subst (Rec prs) body = Let (Rec (bndrs' `zip` rhss'))
 956                                        (go subst' body)
 957                             where
 958                               (bndrs, rhss)    = unzip prs
 959                               (subst', bndrs') = substRecBndrs subst bndrs
 960                               rhss'            = map (go subst') rhss
 961
 962     go_bind subst (NonRec b r) body = go_nonrec subst b (go subst r) body
 963
 964     ----------------------
 965     go_nonrec subst b (Type ty') body
 966       | isTyVar b = go (extendTvSubst subst b ty') body
 967         -- let a::* = TYPE ty in <body>
 968     go_nonrec subst b r' body
 969       | isId b  -- let x = e in <body>
 970       , exprIsTrivial r' || safe_to_inline (idOccInfo b)
 971       = go (extendIdSubst subst b r') body
 972     go_nonrec subst b r' body
 973       = Let (NonRec b' r') (go subst' body)
 974       where
 975         (subst', b') = substBndr subst b
 976
 977     ----------------------
 978         -- Unconditionally safe to inline
 979     safe_to_inline :: OccInfo -> Bool
 980     safe_to_inline IAmDead                  = True
 981     safe_to_inline (OneOcc in_lam one_br _) = not in_lam && one_br
 982     safe_to_inline (IAmALoopBreaker {})     = False
 983     safe_to_inline NoOccInfo                = False
 984 \end{code}