compiler/coreSyn/CoreUnfold.lhs

   1 %
   2 % (c) The University of Glasgow 2006
   3 % (c) The AQUA Project, Glasgow University, 1994-1998
   4 %
   5
   6 Core-syntax unfoldings
   7
   8 Unfoldings (which can travel across module boundaries) are in Core
   9 syntax (namely @CoreExpr@s).
  10
  11 The type @Unfolding@ sits ``above'' simply-Core-expressions
  12 unfoldings, capturing ``higher-level'' things we know about a binding,
  13 usually things that the simplifier found out (e.g., ``it's a
  14 literal'').  In the corner of a @CoreUnfolding@ unfolding, you will
  15 find, unsurprisingly, a Core expression.
  16
  17 \begin{code}
  18 module CoreUnfold (
  19         Unfolding, UnfoldingGuidance,   -- Abstract types
  20
  21         noUnfolding, mkImplicitUnfolding,
  22         mkUnfolding, mkCoreUnfolding,
  23         mkTopUnfolding, mkSimpleUnfolding,
  24         mkInlineUnfolding, mkInlinableUnfolding, mkWwInlineRule,
  25         mkCompulsoryUnfolding, mkDFunUnfolding,
  26
  27         interestingArg, ArgSummary(..),
  28
  29         couldBeSmallEnoughToInline, inlineBoringOk,
  30         certainlyWillInline, smallEnoughToInline,
  31
  32         callSiteInline, CallCtxt(..),
  33
  34         exprIsConApp_maybe
  35
  36     ) where
  37
  38 #include "HsVersions.h"
  39
  40 import StaticFlags
  41 import DynFlags
  42 import CoreSyn
  43 import PprCore          ()      -- Instances
  44 import TcType           ( tcSplitDFunTy )
  45 import OccurAnal        ( occurAnalyseExpr )
  46 import CoreSubst hiding( substTy )
  47 import CoreFVs         ( exprFreeVars )
  48 import CoreArity       ( manifestArity, exprBotStrictness_maybe )
  49 import CoreUtils
  50 import Id
  51 import DataCon
  52 import TyCon
  53 import Literal
  54 import PrimOp
  55 import IdInfo
  56 import BasicTypes       ( Arity )
  57 import Type
  58 import Coercion
  59 import PrelNames
  60 import VarEnv           ( mkInScopeSet )
  61 import Bag
  62 import Util
  63 import Pair
  64 import FastTypes
  65 import FastString
  66 import Outputable
  67 import ForeignCall
  68
  69 import Data.Maybe
  70 \end{code}
  71
  72
  73 %************************************************************************
  74 %*                                                                      *
  75 \subsection{Making unfoldings}
  76 %*                                                                      *
  77 %************************************************************************
  78
  79 \begin{code}
  80 mkTopUnfolding :: Bool -> CoreExpr -> Unfolding
  81 mkTopUnfolding = mkUnfolding InlineRhs True {- Top level -}
  82
  83 mkImplicitUnfolding :: CoreExpr -> Unfolding
  84 -- For implicit Ids, do a tiny bit of optimising first
  85 mkImplicitUnfolding expr = mkTopUnfolding False (simpleOptExpr expr)
  86
  87 -- Note [Top-level flag on inline rules]
  88 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  89 -- Slight hack: note that mk_inline_rules conservatively sets the
  90 -- top-level flag to True.  It gets set more accurately by the simplifier
  91 -- Simplify.simplUnfolding.
  92
  93 mkSimpleUnfolding :: CoreExpr -> Unfolding
  94 mkSimpleUnfolding = mkUnfolding InlineRhs False False
  95
  96 mkDFunUnfolding :: Type -> [DFunArg CoreExpr] -> Unfolding
  97 mkDFunUnfolding dfun_ty ops
  98   = DFunUnfolding dfun_nargs data_con ops
  99   where
 100     (tvs, n_theta, cls, _) = tcSplitDFunTy dfun_ty
 101     dfun_nargs = length tvs + n_theta
 102     data_con   = classDataCon cls
 103
 104 mkWwInlineRule :: Id -> CoreExpr -> Arity -> Unfolding
 105 mkWwInlineRule id expr arity
 106   = mkCoreUnfolding (InlineWrapper id) True
 107                    (simpleOptExpr expr) arity
 108                    (UnfWhen unSaturatedOk boringCxtNotOk)
 109
 110 mkCompulsoryUnfolding :: CoreExpr -> Unfolding
 111 mkCompulsoryUnfolding expr         -- Used for things that absolutely must be unfolded
 112   = mkCoreUnfolding InlineCompulsory True
 113                     (simpleOptExpr expr) 0    -- Arity of unfolding doesn't matter
 114                     (UnfWhen unSaturatedOk boringCxtOk)
 115
 116 mkInlineUnfolding :: Maybe Arity -> CoreExpr -> Unfolding
 117 mkInlineUnfolding mb_arity expr
 118   = mkCoreUnfolding InlineStable
 119                     True         -- Note [Top-level flag on inline rules]
 120                     expr' arity
 121                     (UnfWhen unsat_ok boring_ok)
 122   where
 123     expr' = simpleOptExpr expr
 124     (unsat_ok, arity) = case mb_arity of
 125                           Nothing -> (unSaturatedOk, manifestArity expr')
 126                           Just ar -> (needSaturated, ar)
 127
 128     boring_ok = inlineBoringOk expr'
 129
 130 mkInlinableUnfolding :: CoreExpr -> Unfolding
 131 mkInlinableUnfolding expr
 132   = mkUnfolding InlineStable True is_bot expr'
 133   where
 134     expr' = simpleOptExpr expr
 135     is_bot = isJust (exprBotStrictness_maybe expr')
 136 \end{code}
 137
 138 Internal functions
 139
 140 \begin{code}
 141 mkCoreUnfolding :: UnfoldingSource -> Bool -> CoreExpr
 142                 -> Arity -> UnfoldingGuidance -> Unfolding
 143 -- Occurrence-analyses the expression before capturing it
 144 mkCoreUnfolding src top_lvl expr arity guidance
 145   = CoreUnfolding { uf_tmpl       = occurAnalyseExpr expr,
 146                     uf_src        = src,
 147                     uf_arity      = arity,
 148                     uf_is_top     = top_lvl,
 149                     uf_is_value   = exprIsHNF        expr,
 150                     uf_is_conlike = exprIsConLike    expr,
 151                     uf_is_cheap   = exprIsCheap      expr,
 152                     uf_expandable = exprIsExpandable expr,
 153                     uf_guidance   = guidance }
 154
 155 mkUnfolding :: UnfoldingSource -> Bool -> Bool -> CoreExpr -> Unfolding
 156 -- Calculates unfolding guidance
 157 -- Occurrence-analyses the expression before capturing it
 158 mkUnfolding src top_lvl is_bottoming expr
 159   | top_lvl && is_bottoming
 160   , not (exprIsTrivial expr)
 161   = NoUnfolding    -- See Note [Do not inline top-level bottoming functions]
 162   | otherwise
 163   = CoreUnfolding { uf_tmpl       = occurAnalyseExpr expr,
 164                     uf_src        = src,
 165                     uf_arity      = arity,
 166                     uf_is_top     = top_lvl,
 167                     uf_is_value   = exprIsHNF        expr,
 168                     uf_is_conlike = exprIsConLike    expr,
 169                     uf_expandable = exprIsExpandable expr,
 170                     uf_is_cheap   = is_cheap,
 171                     uf_guidance   = guidance }
 172   where
 173     is_cheap = exprIsCheap expr
 174     (arity, guidance) = calcUnfoldingGuidance is_cheap
 175                                               opt_UF_CreationThreshold expr
 176         -- Sometimes during simplification, there's a large let-bound thing
 177         -- which has been substituted, and so is now dead; so 'expr' contains
 178         -- two copies of the thing while the occurrence-analysed expression doesn't
 179         -- Nevertheless, we *don't* occ-analyse before computing the size because the
 180         -- size computation bales out after a while, whereas occurrence analysis does not.
 181         --
 182         -- This can occasionally mean that the guidance is very pessimistic;
 183         -- it gets fixed up next round.  And it should be rare, because large
 184         -- let-bound things that are dead are usually caught by preInlineUnconditionally
 185 \end{code}
 186
 187 %************************************************************************
 188 %*                                                                      *
 189 \subsection{The UnfoldingGuidance type}
 190 %*                                                                      *
 191 %************************************************************************
 192
 193 \begin{code}
 194 inlineBoringOk :: CoreExpr -> Bool
 195 -- See Note [INLINE for small functions]
 196 -- True => the result of inlining the expression is
 197 --         no bigger than the expression itself
 198 --     eg      (\x y -> f y x)
 199 -- This is a quick and dirty version. It doesn't attempt
 200 -- to deal with  (\x y z -> x (y z))
 201 -- The really important one is (x `cast` c)
 202 inlineBoringOk e
 203   = go 0 e
 204   where
 205     go :: Int -> CoreExpr -> Bool
 206     go credit (Lam x e) | isId x           = go (credit+1) e
 207                         | otherwise        = go credit e
 208     go credit (App f (Type {}))            = go credit f
 209     go credit (App f a) | credit > 0
 210                         , exprIsTrivial a  = go (credit-1) f
 211     go credit (Note _ e)                   = go credit e
 212     go credit (Cast e _)                   = go credit e
 213     go _      (Var {})                     = boringCxtOk
 214     go _      _                            = boringCxtNotOk
 215
 216 calcUnfoldingGuidance
 217         :: Bool         -- True <=> the rhs is cheap, or we want to treat it
 218                         --          as cheap (INLINE things)
 219         -> Int          -- Bomb out if size gets bigger than this
 220         -> CoreExpr     -- Expression to look at
 221         -> (Arity, UnfoldingGuidance)
 222 calcUnfoldingGuidance expr_is_cheap bOMB_OUT_SIZE expr
 223   = case collectBinders expr of { (bndrs, body) ->
 224     let
 225         val_bndrs   = filter isId bndrs
 226         n_val_bndrs = length val_bndrs
 227
 228         guidance
 229           = case (sizeExpr (iUnbox bOMB_OUT_SIZE) val_bndrs body) of
 230               TooBig -> UnfNever
 231               SizeIs size cased_bndrs scrut_discount
 232                 | uncondInline n_val_bndrs (iBox size)
 233                 , expr_is_cheap
 234                 -> UnfWhen unSaturatedOk boringCxtOk   -- Note [INLINE for small functions]
 235                 | otherwise
 236                 -> UnfIfGoodArgs { ug_args  = map (discount cased_bndrs) val_bndrs
 237                                  , ug_size  = iBox size
 238                                  , ug_res   = iBox scrut_discount }
 239
 240         discount cbs bndr
 241            = foldlBag (\acc (b',n) -> if bndr==b' then acc+n else acc)
 242                       0 cbs
 243     in
 244     (n_val_bndrs, guidance) }
 245 \end{code}
 246
 247 Note [Computing the size of an expression]
 248 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 249 The basic idea of sizeExpr is obvious enough: count nodes.  But getting the
 250 heuristics right has taken a long time.  Here's the basic strategy:
 251
 252     * Variables, literals: 0
 253       (Exception for string literals, see litSize.)
 254
 255     * Function applications (f e1 .. en): 1 + #value args
 256
 257     * Constructor applications: 1, regardless of #args
 258
 259     * Let(rec): 1 + size of components
 260
 261     * Note, cast: 0
 262
 263 Examples
 264
 265   Size  Term
 266   --------------
 267     0     42#
 268     0     x
 269     0     True
 270     2     f x
 271     1     Just x
 272     4     f (g x)
 273
 274 Notice that 'x' counts 0, while (f x) counts 2.  That's deliberate: there's
 275 a function call to account for.  Notice also that constructor applications
 276 are very cheap, because exposing them to a caller is so valuable.
 277
 278 [25/5/11] All sizes are now multiplied by 10, except for primops.
 279 This makes primops look cheap, and seems to be almost unversally
 280 beneficial.  Done partly as a result of #4978.
 281
 282 Note [Do not inline top-level bottoming functions]
 283 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 284 The FloatOut pass has gone to some trouble to float out calls to 'error'
 285 and similar friends.  See Note [Bottoming floats] in SetLevels.
 286 Do not re-inline them!  But we *do* still inline if they are very small
 287 (the uncondInline stuff).
 288
 289
 290 Note [INLINE for small functions]
 291 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 292 Consider        {-# INLINE f #-}
 293                 f x = Just x
 294                 g y = f y
 295 Then f's RHS is no larger than its LHS, so we should inline it into
 296 even the most boring context.  In general, f the function is
 297 sufficiently small that its body is as small as the call itself, the
 298 inline unconditionally, regardless of how boring the context is.
 299
 300 Things to note:
 301
 302  * We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
 303    than the thing it's replacing.  Notice that
 304       (f x) --> (g 3)             -- YES, unconditionally
 305       (f x) --> x : []            -- YES, *even though* there are two
 306                                   --      arguments to the cons
 307       x     --> g 3               -- NO
 308       x     --> Just v            -- NO
 309
 310   It's very important not to unconditionally replace a variable by
 311   a non-atomic term.
 312
 313 * We do this even if the thing isn't saturated, else we end up with the
 314   silly situation that
 315      f x y = x
 316      ...map (f 3)...
 317   doesn't inline.  Even in a boring context, inlining without being
 318   saturated will give a lambda instead of a PAP, and will be more
 319   efficient at runtime.
 320
 321 * However, when the function's arity > 0, we do insist that it
 322   has at least one value argument at the call site.  Otherwise we find this:
 323        f = /\a \x:a. x
 324        d = /\b. MkD (f b)
 325   If we inline f here we get
 326        d = /\b. MkD (\x:b. x)
 327   and then prepareRhs floats out the argument, abstracting the type
 328   variables, so we end up with the original again!
 329
 330
 331 \begin{code}
 332 uncondInline :: Arity -> Int -> Bool
 333 -- Inline unconditionally if there no size increase
 334 -- Size of call is arity (+1 for the function)
 335 -- See Note [INLINE for small functions]
 336 uncondInline arity size
 337   | arity == 0 = size == 0
 338   | otherwise  = size <= 10 * (arity + 1)
 339 \end{code}
 340
 341
 342 \begin{code}
 343 sizeExpr :: FastInt         -- Bomb out if it gets bigger than this
 344          -> [Id]            -- Arguments; we're interested in which of these
 345                             -- get case'd
 346          -> CoreExpr
 347          -> ExprSize
 348
 349 -- Note [Computing the size of an expression]
 350
 351 sizeExpr bOMB_OUT_SIZE top_args expr
 352   = size_up expr
 353   where
 354     size_up (Cast e _) = size_up e
 355     size_up (Note _ e) = size_up e
 356     size_up (Type _)   = sizeZero           -- Types cost nothing
 357     size_up (Coercion _) = sizeZero
 358     size_up (Lit lit)  = sizeN (litSize lit)
 359     size_up (Var f)    = size_up_call f []  -- Make sure we get constructor
 360                                             -- discounts even on nullary constructors
 361
 362     size_up (App fun (Type _)) = size_up fun
 363     size_up (App fun (Coercion _)) = size_up fun
 364     size_up (App fun arg)      = size_up arg  `addSizeNSD`
 365                                  size_up_app fun [arg]
 366
 367     size_up (Lam b e) | isId b    = lamScrutDiscount (size_up e `addSizeN` 10)
 368                       | otherwise = size_up e
 369
 370     size_up (Let (NonRec binder rhs) body)
 371       = size_up rhs             `addSizeNSD`
 372         size_up body            `addSizeN`
 373         (if isUnLiftedType (idType binder) then 0 else 10)
 374                 -- For the allocation
 375                 -- If the binder has an unlifted type there is no allocation
 376
 377     size_up (Let (Rec pairs) body)
 378       = foldr (addSizeNSD . size_up . snd)
 379               (size_up body `addSizeN` (10 * length pairs))     -- (length pairs) for the allocation
 380               pairs
 381
 382     size_up (Case (Var v) _ _ alts)
 383         | v `elem` top_args             -- We are scrutinising an argument variable
 384         = alts_size (foldr1 addAltSize alt_sizes)
 385                     (foldr1 maxSize alt_sizes)
 386                 -- Good to inline if an arg is scrutinised, because
 387                 -- that may eliminate allocation in the caller
 388                 -- And it eliminates the case itself
 389         where
 390           alt_sizes = map size_up_alt alts
 391
 392                 -- alts_size tries to compute a good discount for
 393                 -- the case when we are scrutinising an argument variable
 394           alts_size (SizeIs tot tot_disc tot_scrut)  -- Size of all alternatives
 395                     (SizeIs max _        _)          -- Size of biggest alternative
 396                 = SizeIs tot (unitBag (v, iBox (_ILIT(20) +# tot -# max)) `unionBags` tot_disc) tot_scrut
 397                         -- If the variable is known, we produce a discount that
 398                         -- will take us back to 'max', the size of the largest alternative
 399                         -- The 1+ is a little discount for reduced allocation in the caller
 400                         --
 401                         -- Notice though, that we return tot_disc, the total discount from
 402                         -- all branches.  I think that's right.
 403
 404           alts_size tot_size _ = tot_size
 405
 406     size_up (Case e _ _ alts) = size_up e  `addSizeNSD`
 407                                 foldr (addAltSize . size_up_alt) case_size alts
 408       where
 409           case_size
 410            | is_inline_scrut e, not (lengthExceeds alts 1)  = sizeN (-10)
 411            | otherwise = sizeZero
 412                 -- Normally we don't charge for the case itself, but
 413                 -- we charge one per alternative (see size_up_alt,
 414                 -- below) to account for the cost of the info table
 415                 -- and comparisons.
 416                 --
 417                 -- However, in certain cases (see is_inline_scrut
 418                 -- below), no code is generated for the case unless
 419                 -- there are multiple alts.  In these cases we
 420                 -- subtract one, making the first alt free.
 421                 -- e.g. case x# +# y# of _ -> ...   should cost 1
 422                 --      case touch# x# of _ -> ...  should cost 0
 423                 -- (see #4978)
 424                 --
 425                 -- I would like to not have the "not (lengthExceeds alts 1)"
 426                 -- condition above, but without that some programs got worse
 427                 -- (spectral/hartel/event and spectral/para).  I don't fully
 428                 -- understand why. (SDM 24/5/11)
 429
 430                 -- unboxed variables, inline primops and unsafe foreign calls
 431                 -- are all "inline" things:
 432           is_inline_scrut (Var v) = isUnLiftedType (idType v)
 433           is_inline_scrut scrut
 434               | (Var f, _) <- collectArgs scrut
 435                 = case idDetails f of
 436                     FCallId fc  -> not (isSafeForeignCall fc)
 437                     PrimOpId op -> not (primOpOutOfLine op)
 438                     _other      -> False
 439               | otherwise
 440                 = False
 441
 442     ------------
 443     -- size_up_app is used when there's ONE OR MORE value args
 444     size_up_app (App fun arg) args
 445         | isTyCoArg arg            = size_up_app fun args
 446         | otherwise                = size_up arg  `addSizeNSD`
 447                                      size_up_app fun (arg:args)
 448     size_up_app (Var fun)     args = size_up_call fun args
 449     size_up_app other         args = size_up other `addSizeN` length args
 450
 451     ------------
 452     size_up_call :: Id -> [CoreExpr] -> ExprSize
 453     size_up_call fun val_args
 454        = case idDetails fun of
 455            FCallId _        -> sizeN (10 * (1 + length val_args))
 456            DataConWorkId dc -> conSize    dc (length val_args)
 457            PrimOpId op      -> primOpSize op (length val_args)
 458            ClassOpId _      -> classOpSize top_args val_args
 459            _                -> funSize top_args fun (length val_args)
 460
 461     ------------
 462     size_up_alt (_con, _bndrs, rhs) = size_up rhs `addSizeN` 10
 463         -- Don't charge for args, so that wrappers look cheap
 464         -- (See comments about wrappers with Case)
 465         --
 466         -- IMPORATANT: *do* charge 1 for the alternative, else we
 467         -- find that giant case nests are treated as practically free
 468         -- A good example is Foreign.C.Error.errrnoToIOError
 469
 470     ------------
 471         -- These addSize things have to be here because
 472         -- I don't want to give them bOMB_OUT_SIZE as an argument
 473     addSizeN TooBig          _  = TooBig
 474     addSizeN (SizeIs n xs d) m  = mkSizeIs bOMB_OUT_SIZE (n +# iUnbox m) xs d
 475
 476         -- addAltSize is used to add the sizes of case alternatives
 477     addAltSize TooBig            _      = TooBig
 478     addAltSize _                 TooBig = TooBig
 479     addAltSize (SizeIs n1 xs d1) (SizeIs n2 ys d2)
 480         = mkSizeIs bOMB_OUT_SIZE (n1 +# n2)
 481                                  (xs `unionBags` ys)
 482                                  (d1 +# d2)   -- Note [addAltSize result discounts]
 483
 484         -- This variant ignores the result discount from its LEFT argument
 485         -- It's used when the second argument isn't part of the result
 486     addSizeNSD TooBig            _      = TooBig
 487     addSizeNSD _                 TooBig = TooBig
 488     addSizeNSD (SizeIs n1 xs _) (SizeIs n2 ys d2)
 489         = mkSizeIs bOMB_OUT_SIZE (n1 +# n2)
 490                                  (xs `unionBags` ys)
 491                                  d2  -- Ignore d1
 492 \end{code}
 493
 494 \begin{code}
 495 -- | Finds a nominal size of a string literal.
 496 litSize :: Literal -> Int
 497 -- Used by CoreUnfold.sizeExpr
 498 litSize (MachStr str) = 10 + 10 * ((lengthFS str + 3) `div` 4)
 499         -- If size could be 0 then @f "x"@ might be too small
 500         -- [Sept03: make literal strings a bit bigger to avoid fruitless
 501         --  duplication of little strings]
 502 litSize _other = 0    -- Must match size of nullary constructors
 503                       -- Key point: if  x |-> 4, then x must inline unconditionally
 504                       --            (eg via case binding)
 505
 506 classOpSize :: [Id] -> [CoreExpr] -> ExprSize
 507 -- See Note [Conlike is interesting]
 508 classOpSize _ []
 509   = sizeZero
 510 classOpSize top_args (arg1 : other_args)
 511   = SizeIs (iUnbox size) arg_discount (_ILIT(0))
 512   where
 513     size = 20 + (10 * length other_args)
 514     -- If the class op is scrutinising a lambda bound dictionary then
 515     -- give it a discount, to encourage the inlining of this function
 516     -- The actual discount is rather arbitrarily chosen
 517     arg_discount = case arg1 of
 518                      Var dict | dict `elem` top_args
 519                               -> unitBag (dict, opt_UF_DictDiscount)
 520                      _other   -> emptyBag
 521
 522 funSize :: [Id] -> Id -> Int -> ExprSize
 523 -- Size for functions that are not constructors or primops
 524 -- Note [Function applications]
 525 funSize top_args fun n_val_args
 526   | fun `hasKey` buildIdKey   = buildSize
 527   | fun `hasKey` augmentIdKey = augmentSize
 528   | otherwise = SizeIs (iUnbox size) arg_discount (iUnbox res_discount)
 529   where
 530     some_val_args = n_val_args > 0
 531
 532     arg_discount | some_val_args && fun `elem` top_args
 533                  = unitBag (fun, opt_UF_FunAppDiscount)
 534                  | otherwise = emptyBag
 535         -- If the function is an argument and is applied
 536         -- to some values, give it an arg-discount
 537
 538     res_discount | idArity fun > n_val_args = opt_UF_FunAppDiscount
 539                  | otherwise                = 0
 540         -- If the function is partially applied, show a result discount
 541     size | some_val_args = 10 * (1 + n_val_args)
 542          | otherwise     = 0
 543         -- The 1+ is for the function itself
 544         -- Add 1 for each non-trivial arg;
 545         -- the allocation cost, as in let(rec)
 546
 547
 548 conSize :: DataCon -> Int -> ExprSize
 549 conSize dc n_val_args
 550   | n_val_args == 0 = SizeIs (_ILIT(0)) emptyBag (_ILIT(10))    -- Like variables
 551
 552 -- See Note [Unboxed tuple result discount]
 553   | isUnboxedTupleCon dc = SizeIs (_ILIT(0)) emptyBag (iUnbox (10 * (1 + n_val_args)))
 554
 555 -- See Note [Constructor size]
 556   | otherwise = SizeIs (_ILIT(10)) emptyBag (iUnbox (10 * (10 + n_val_args)))
 557      -- discont was (10 * (1 + n_val_args)), but it turns out that
 558      -- adding a bigger constant here is an unambiguous win.  We
 559      -- REALLY like unfolding constructors that get scrutinised.
 560      -- [SDM, 25/5/11]
 561 \end{code}
 562
 563 Note [Constructor size]
 564 ~~~~~~~~~~~~~~~~~~~~~~~
 565 Treat a constructors application as size 1, regardless of how many
 566 arguments it has; we are keen to expose them (and we charge separately
 567 for their args).  We can't treat them as size zero, else we find that
 568 (Just x) has size 0, which is the same as a lone variable; and hence
 569 'v' will always be replaced by (Just x), where v is bound to Just x.
 570
 571 However, unboxed tuples count as size zero. I found occasions where we had
 572         f x y z = case op# x y z of { s -> (# s, () #) }
 573 and f wasn't getting inlined.
 574
 575 Note [Unboxed tuple result discount]
 576 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 577 I tried giving unboxed tuples a *result discount* of zero (see the
 578 commented-out line).  Why?  When returned as a result they do not
 579 allocate, so maybe we don't want to charge so much for them If you
 580 have a non-zero discount here, we find that workers often get inlined
 581 back into wrappers, because it look like
 582     f x = case $wf x of (# a,b #) -> (a,b)
 583 and we are keener because of the case.  However while this change
 584 shrank binary sizes by 0.5% it also made spectral/boyer allocate 5%
 585 more. All other changes were very small. So it's not a big deal but I
 586 didn't adopt the idea.
 587
 588 \begin{code}
 589 primOpSize :: PrimOp -> Int -> ExprSize
 590 primOpSize op n_val_args
 591  = if primOpOutOfLine op
 592       then sizeN (op_size + n_val_args)
 593       else sizeN op_size
 594  where
 595    op_size = primOpCodeSize op
 596
 597
 598 buildSize :: ExprSize
 599 buildSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(40))
 600         -- We really want to inline applications of build
 601         -- build t (\cn -> e) should cost only the cost of e (because build will be inlined later)
 602         -- Indeed, we should add a result_discount becuause build is
 603         -- very like a constructor.  We don't bother to check that the
 604         -- build is saturated (it usually is).  The "-2" discounts for the \c n,
 605         -- The "4" is rather arbitrary.
 606
 607 augmentSize :: ExprSize
 608 augmentSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(40))
 609         -- Ditto (augment t (\cn -> e) ys) should cost only the cost of
 610         -- e plus ys. The -2 accounts for the \cn
 611
 612 -- When we return a lambda, give a discount if it's used (applied)
 613 lamScrutDiscount :: ExprSize -> ExprSize
 614 lamScrutDiscount (SizeIs n vs _) = SizeIs n vs (iUnbox opt_UF_FunAppDiscount)
 615 lamScrutDiscount TooBig          = TooBig
 616 \end{code}
 617
 618 Note [addAltSize result discounts]
 619 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 620 When adding the size of alternatives, we *add* the result discounts
 621 too, rather than take the *maximum*.  For a multi-branch case, this
 622 gives a discount for each branch that returns a constructor, making us
 623 keener to inline.  I did try using 'max' instead, but it makes nofib
 624 'rewrite' and 'puzzle' allocate significantly more, and didn't make
 625 binary sizes shrink significantly either.
 626
 627 Note [Discounts and thresholds]
 628 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 629 Constants for discounts and thesholds are defined in main/StaticFlags,
 630 all of form opt_UF_xxxx.   They are:
 631
 632 opt_UF_CreationThreshold (45)
 633      At a definition site, if the unfolding is bigger than this, we
 634      may discard it altogether
 635
 636 opt_UF_UseThreshold (6)
 637      At a call site, if the unfolding, less discounts, is smaller than
 638      this, then it's small enough inline
 639
 640 opt_UF_KeennessFactor (1.5)
 641      Factor by which the discounts are multiplied before
 642      subtracting from size
 643
 644 opt_UF_DictDiscount (1)
 645      The discount for each occurrence of a dictionary argument
 646      as an argument of a class method.  Should be pretty small
 647      else big functions may get inlined
 648
 649 opt_UF_FunAppDiscount (6)
 650      Discount for a function argument that is applied.  Quite
 651      large, because if we inline we avoid the higher-order call.
 652
 653 opt_UF_DearOp (4)
 654      The size of a foreign call or not-dupable PrimOp
 655
 656
 657 Note [Function applications]
 658 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 659 In a function application (f a b)
 660
 661   - If 'f' is an argument to the function being analysed,
 662     and there's at least one value arg, record a FunAppDiscount for f
 663
 664   - If the application if a PAP (arity > 2 in this example)
 665     record a *result* discount (because inlining
 666     with "extra" args in the call may mean that we now
 667     get a saturated application)
 668
 669 Code for manipulating sizes
 670
 671 \begin{code}
 672 data ExprSize = TooBig
 673               | SizeIs FastInt          -- Size found
 674                        (Bag (Id,Int))   -- Arguments cased herein, and discount for each such
 675                        FastInt          -- Size to subtract if result is scrutinised
 676                                         -- by a case expression
 677
 678 instance Outputable ExprSize where
 679   ppr TooBig         = ptext (sLit "TooBig")
 680   ppr (SizeIs a _ c) = brackets (int (iBox a) <+> int (iBox c))
 681
 682 -- subtract the discount before deciding whether to bale out. eg. we
 683 -- want to inline a large constructor application into a selector:
 684 --      tup = (a_1, ..., a_99)
 685 --      x = case tup of ...
 686 --
 687 mkSizeIs :: FastInt -> FastInt -> Bag (Id, Int) -> FastInt -> ExprSize
 688 mkSizeIs max n xs d | (n -# d) ># max = TooBig
 689                     | otherwise       = SizeIs n xs d
 690
 691 maxSize :: ExprSize -> ExprSize -> ExprSize
 692 maxSize TooBig         _                                  = TooBig
 693 maxSize _              TooBig                             = TooBig
 694 maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 ># n2  = s1
 695                                               | otherwise = s2
 696
 697 sizeZero :: ExprSize
 698 sizeN :: Int -> ExprSize
 699
 700 sizeZero = SizeIs (_ILIT(0))  emptyBag (_ILIT(0))
 701 sizeN n  = SizeIs (iUnbox n) emptyBag (_ILIT(0))
 702 \end{code}
 703
 704
 705 %************************************************************************
 706 %*                                                                      *
 707 \subsection[considerUnfolding]{Given all the info, do (not) do the unfolding}
 708 %*                                                                      *
 709 %************************************************************************
 710
 711 We use 'couldBeSmallEnoughToInline' to avoid exporting inlinings that
 712 we ``couldn't possibly use'' on the other side.  Can be overridden w/
 713 flaggery.  Just the same as smallEnoughToInline, except that it has no
 714 actual arguments.
 715
 716 \begin{code}
 717 couldBeSmallEnoughToInline :: Int -> CoreExpr -> Bool
 718 couldBeSmallEnoughToInline threshold rhs
 719   = case sizeExpr (iUnbox threshold) [] body of
 720        TooBig -> False
 721        _      -> True
 722   where
 723     (_, body) = collectBinders rhs
 724
 725 ----------------
 726 smallEnoughToInline :: Unfolding -> Bool
 727 smallEnoughToInline (CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_size = size}})
 728   = size <= opt_UF_UseThreshold
 729 smallEnoughToInline _
 730   = False
 731
 732 ----------------
 733 certainlyWillInline :: Unfolding -> Bool
 734   -- Sees if the unfolding is pretty certain to inline
 735 certainlyWillInline (CoreUnfolding { uf_is_cheap = is_cheap, uf_arity = n_vals, uf_guidance = guidance })
 736   = case guidance of
 737       UnfNever      -> False
 738       UnfWhen {}    -> True
 739       UnfIfGoodArgs { ug_size = size}
 740                     -> is_cheap && size - (10 * (n_vals +1)) <= opt_UF_UseThreshold
 741
 742 certainlyWillInline _
 743   = False
 744 \end{code}
 745
 746 %************************************************************************
 747 %*                                                                      *
 748 \subsection{callSiteInline}
 749 %*                                                                      *
 750 %************************************************************************
 751
 752 This is the key function.  It decides whether to inline a variable at a call site
 753
 754 callSiteInline is used at call sites, so it is a bit more generous.
 755 It's a very important function that embodies lots of heuristics.
 756 A non-WHNF can be inlined if it doesn't occur inside a lambda,
 757 and occurs exactly once or
 758     occurs once in each branch of a case and is small
 759
 760 If the thing is in WHNF, there's no danger of duplicating work,
 761 so we can inline if it occurs once, or is small
 762
 763 NOTE: we don't want to inline top-level functions that always diverge.
 764 It just makes the code bigger.  Tt turns out that the convenient way to prevent
 765 them inlining is to give them a NOINLINE pragma, which we do in
 766 StrictAnal.addStrictnessInfoToTopId
 767
 768 \begin{code}
 769 callSiteInline :: DynFlags
 770                -> Id                    -- The Id
 771                -> Bool                  -- True <=> unfolding is active
 772                -> Bool                  -- True if there are are no arguments at all (incl type args)
 773                -> [ArgSummary]          -- One for each value arg; True if it is interesting
 774                -> CallCtxt              -- True <=> continuation is interesting
 775                -> Maybe CoreExpr        -- Unfolding, if any
 776
 777 instance Outputable ArgSummary where
 778   ppr TrivArg    = ptext (sLit "TrivArg")
 779   ppr NonTrivArg = ptext (sLit "NonTrivArg")
 780   ppr ValueArg   = ptext (sLit "ValueArg")
 781
 782 data CallCtxt = BoringCtxt
 783
 784               | ArgCtxt         -- We are somewhere in the argument of a function
 785                         Bool    -- True  <=> we're somewhere in the RHS of function with rules
 786                                 -- False <=> we *are* the argument of a function with non-zero
 787                                 --           arg discount
 788                                 --        OR
 789                                 --           we *are* the RHS of a let  Note [RHS of lets]
 790                                 -- In both cases, be a little keener to inline
 791
 792               | ValAppCtxt      -- We're applied to at least one value arg
 793                                 -- This arises when we have ((f x |> co) y)
 794                                 -- Then the (f x) has argument 'x' but in a ValAppCtxt
 795
 796               | CaseCtxt        -- We're the scrutinee of a case
 797                                 -- that decomposes its scrutinee
 798
 799 instance Outputable CallCtxt where
 800   ppr BoringCtxt      = ptext (sLit "BoringCtxt")
 801   ppr (ArgCtxt rules) = ptext (sLit "ArgCtxt") <+> ppr rules
 802   ppr CaseCtxt        = ptext (sLit "CaseCtxt")
 803   ppr ValAppCtxt      = ptext (sLit "ValAppCtxt")
 804
 805 callSiteInline dflags id active_unfolding lone_variable arg_infos cont_info
 806   = case idUnfolding id of
 807       -- idUnfolding checks for loop-breakers, returning NoUnfolding
 808       -- Things with an INLINE pragma may have an unfolding *and*
 809       -- be a loop breaker  (maybe the knot is not yet untied)
 810         CoreUnfolding { uf_tmpl = unf_template, uf_is_top = is_top
 811                       , uf_is_cheap = is_cheap, uf_arity = uf_arity
 812                       , uf_guidance = guidance, uf_expandable = is_exp }
 813           | active_unfolding -> tryUnfolding dflags id lone_variable
 814                                     arg_infos cont_info unf_template is_top
 815                                     is_cheap is_exp uf_arity guidance
 816           | otherwise    -> Nothing
 817         NoUnfolding      -> Nothing
 818         OtherCon {}      -> Nothing
 819         DFunUnfolding {} -> Nothing     -- Never unfold a DFun
 820
 821 tryUnfolding :: DynFlags -> Id -> Bool -> [ArgSummary] -> CallCtxt
 822              -> CoreExpr -> Bool -> Bool -> Bool -> Arity -> UnfoldingGuidance
 823              -> Maybe CoreExpr
 824 tryUnfolding dflags id lone_variable
 825              arg_infos cont_info unf_template is_top
 826              is_cheap is_exp uf_arity guidance
 827                         -- uf_arity will typically be equal to (idArity id),
 828                         -- but may be less for InlineRules
 829  | dopt Opt_D_dump_inlinings dflags && dopt Opt_D_verbose_core2core dflags
 830  = pprTrace ("Considering inlining: " ++ showSDoc (ppr id))
 831                  (vcat [text "arg infos" <+> ppr arg_infos,
 832                         text "uf arity" <+> ppr uf_arity,
 833                         text "interesting continuation" <+> ppr cont_info,
 834                         text "some_benefit" <+> ppr some_benefit,
 835                         text "is exp:" <+> ppr is_exp,
 836                         text "is cheap:" <+> ppr is_cheap,
 837                         text "guidance" <+> ppr guidance,
 838                         extra_doc,
 839                         text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"])
 840                  result
 841   | otherwise  = result
 842
 843   where
 844     n_val_args = length arg_infos
 845     saturated  = n_val_args >= uf_arity
 846
 847     result | yes_or_no = Just unf_template
 848            | otherwise = Nothing
 849
 850     interesting_args = any nonTriv arg_infos
 851         -- NB: (any nonTriv arg_infos) looks at the
 852         -- over-saturated args too which is "wrong";
 853         -- but if over-saturated we inline anyway.
 854
 855            -- some_benefit is used when the RHS is small enough
 856            -- and the call has enough (or too many) value
 857            -- arguments (ie n_val_args >= arity). But there must
 858            -- be *something* interesting about some argument, or the
 859            -- result context, to make it worth inlining
 860     some_benefit
 861        | not saturated = interesting_args       -- Under-saturated
 862                                         -- Note [Unsaturated applications]
 863        | n_val_args > uf_arity = True   -- Over-saturated
 864        | otherwise = interesting_args   -- Saturated
 865                   || interesting_saturated_call
 866
 867     interesting_saturated_call
 868       = case cont_info of
 869           BoringCtxt -> not is_top && uf_arity > 0        -- Note [Nested functions]
 870           CaseCtxt   -> not (lone_variable && is_cheap)   -- Note [Lone variables]
 871           ArgCtxt {} -> uf_arity > 0                      -- Note [Inlining in ArgCtxt]
 872           ValAppCtxt -> True                              -- Note [Cast then apply]
 873
 874     (yes_or_no, extra_doc)
 875       = case guidance of
 876           UnfNever -> (False, empty)
 877
 878           UnfWhen unsat_ok boring_ok
 879              -> (enough_args && (boring_ok || some_benefit), empty )
 880              where      -- See Note [INLINE for small functions]
 881                enough_args = saturated || (unsat_ok && n_val_args > 0)
 882
 883           UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size }
 884              -> ( is_cheap && some_benefit && small_enough
 885                 , (text "discounted size =" <+> int discounted_size) )
 886              where
 887                discounted_size = size - discount
 888                small_enough = discounted_size <= opt_UF_UseThreshold
 889                discount = computeDiscount uf_arity arg_discounts
 890                                           res_discount arg_infos cont_info
 891 \end{code}
 892
 893 Note [RHS of lets]
 894 ~~~~~~~~~~~~~~~~~~
 895 Be a tiny bit keener to inline in the RHS of a let, because that might
 896 lead to good thing later
 897      f y = (y,y,y)
 898      g y = let x = f y in ...(case x of (a,b,c) -> ...) ...
 899 We'd inline 'f' if the call was in a case context, and it kind-of-is,
 900 only we can't see it.  So we treat the RHS of a let as not-totally-boring.
 901
 902 Note [Unsaturated applications]
 903 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 904 When a call is not saturated, we *still* inline if one of the
 905 arguments has interesting structure.  That's sometimes very important.
 906 A good example is the Ord instance for Bool in Base:
 907
 908  Rec {
 909     $fOrdBool =GHC.Classes.D:Ord
 910                  @ Bool
 911                  ...
 912                  $cmin_ajX
 913
 914     $cmin_ajX [Occ=LoopBreaker] :: Bool -> Bool -> Bool
 915     $cmin_ajX = GHC.Classes.$dmmin @ Bool $fOrdBool
 916   }
 917
 918 But the defn of GHC.Classes.$dmmin is:
 919
 920   $dmmin :: forall a. GHC.Classes.Ord a => a -> a -> a
 921     {- Arity: 3, HasNoCafRefs, Strictness: SLL,
 922        Unfolding: (\ @ a $dOrd :: GHC.Classes.Ord a x :: a y :: a ->
 923                    case @ a GHC.Classes.<= @ a $dOrd x y of wild {
 924                      GHC.Types.False -> y GHC.Types.True -> x }) -}
 925
 926 We *really* want to inline $dmmin, even though it has arity 3, in
 927 order to unravel the recursion.
 928
 929
 930 Note [Things to watch]
 931 ~~~~~~~~~~~~~~~~~~~~~~
 932 *   { y = I# 3; x = y `cast` co; ...case (x `cast` co) of ... }
 933     Assume x is exported, so not inlined unconditionally.
 934     Then we want x to inline unconditionally; no reason for it
 935     not to, and doing so avoids an indirection.
 936
 937 *   { x = I# 3; ....f x.... }
 938     Make sure that x does not inline unconditionally!
 939     Lest we get extra allocation.
 940
 941 Note [Inlining an InlineRule]
 942 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 943 An InlineRules is used for
 944   (a) programmer INLINE pragmas
 945   (b) inlinings from worker/wrapper
 946
 947 For (a) the RHS may be large, and our contract is that we *only* inline
 948 when the function is applied to all the arguments on the LHS of the
 949 source-code defn.  (The uf_arity in the rule.)
 950
 951 However for worker/wrapper it may be worth inlining even if the
 952 arity is not satisfied (as we do in the CoreUnfolding case) so we don't
 953 require saturation.
 954
 955
 956 Note [Nested functions]
 957 ~~~~~~~~~~~~~~~~~~~~~~~
 958 If a function has a nested defn we also record some-benefit, on the
 959 grounds that we are often able to eliminate the binding, and hence the
 960 allocation, for the function altogether; this is good for join points.
 961 But this only makes sense for *functions*; inlining a constructor
 962 doesn't help allocation unless the result is scrutinised.  UNLESS the
 963 constructor occurs just once, albeit possibly in multiple case
 964 branches.  Then inlining it doesn't increase allocation, but it does
 965 increase the chance that the constructor won't be allocated at all in
 966 the branches that don't use it.
 967
 968 Note [Cast then apply]
 969 ~~~~~~~~~~~~~~~~~~~~~~
 970 Consider
 971    myIndex = __inline_me ( (/\a. <blah>) |> co )
 972    co :: (forall a. a -> a) ~ (forall a. T a)
 973      ... /\a.\x. case ((myIndex a) |> sym co) x of { ... } ...
 974
 975 We need to inline myIndex to unravel this; but the actual call (myIndex a) has
 976 no value arguments.  The ValAppCtxt gives it enough incentive to inline.
 977
 978 Note [Inlining in ArgCtxt]
 979 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 980 The condition (arity > 0) here is very important, because otherwise
 981 we end up inlining top-level stuff into useless places; eg
 982    x = I# 3#
 983    f = \y.  g x
 984 This can make a very big difference: it adds 16% to nofib 'integer' allocs,
 985 and 20% to 'power'.
 986
 987 At one stage I replaced this condition by 'True' (leading to the above
 988 slow-down).  The motivation was test eyeball/inline1.hs; but that seems
 989 to work ok now.
 990
 991 NOTE: arguably, we should inline in ArgCtxt only if the result of the
 992 call is at least CONLIKE.  At least for the cases where we use ArgCtxt
 993 for the RHS of a 'let', we only profit from the inlining if we get a
 994 CONLIKE thing (modulo lets).
 995
 996 Note [Lone variables]   See also Note [Interaction of exprIsCheap and lone variables]
 997 ~~~~~~~~~~~~~~~~~~~~~   which appears below
 998 The "lone-variable" case is important.  I spent ages messing about
 999 with unsatisfactory varaints, but this is nice.  The idea is that if a
1000 variable appears all alone
1001
1002         as an arg of lazy fn, or rhs    BoringCtxt
1003         as scrutinee of a case          CaseCtxt
1004         as arg of a fn                  ArgCtxt
1005 AND
1006         it is bound to a cheap expression
1007
1008 then we should not inline it (unless there is some other reason,
1009 e.g. is is the sole occurrence).  That is what is happening at
1010 the use of 'lone_variable' in 'interesting_saturated_call'.
1011
1012 Why?  At least in the case-scrutinee situation, turning
1013         let x = (a,b) in case x of y -> ...
1014 into
1015         let x = (a,b) in case (a,b) of y -> ...
1016 and thence to
1017         let x = (a,b) in let y = (a,b) in ...
1018 is bad if the binding for x will remain.
1019
1020 Another example: I discovered that strings
1021 were getting inlined straight back into applications of 'error'
1022 because the latter is strict.
1023         s = "foo"
1024         f = \x -> ...(error s)...
1025
1026 Fundamentally such contexts should not encourage inlining because the
1027 context can ``see'' the unfolding of the variable (e.g. case or a
1028 RULE) so there's no gain.  If the thing is bound to a value.
1029
1030 However, watch out:
1031
1032  * Consider this:
1033         foo = _inline_ (\n. [n])
1034         bar = _inline_ (foo 20)
1035         baz = \n. case bar of { (m:_) -> m + n }
1036    Here we really want to inline 'bar' so that we can inline 'foo'
1037    and the whole thing unravels as it should obviously do.  This is
1038    important: in the NDP project, 'bar' generates a closure data
1039    structure rather than a list.
1040
1041    So the non-inlining of lone_variables should only apply if the
1042    unfolding is regarded as cheap; because that is when exprIsConApp_maybe
1043    looks through the unfolding.  Hence the "&& is_cheap" in the
1044    InlineRule branch.
1045
1046  * Even a type application or coercion isn't a lone variable.
1047    Consider
1048         case $fMonadST @ RealWorld of { :DMonad a b c -> c }
1049    We had better inline that sucker!  The case won't see through it.
1050
1051    For now, I'm treating treating a variable applied to types
1052    in a *lazy* context "lone". The motivating example was
1053         f = /\a. \x. BIG
1054         g = /\a. \y.  h (f a)
1055    There's no advantage in inlining f here, and perhaps
1056    a significant disadvantage.  Hence some_val_args in the Stop case
1057
1058 Note [Interaction of exprIsCheap and lone variables]
1059 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1060 The lone-variable test says "don't inline if a case expression
1061 scrutines a lone variable whose unfolding is cheap".  It's very
1062 important that, under these circumstances, exprIsConApp_maybe
1063 can spot a constructor application. So, for example, we don't
1064 consider
1065         let x = e in (x,x)
1066 to be cheap, and that's good because exprIsConApp_maybe doesn't
1067 think that expression is a constructor application.
1068
1069 I used to test is_value rather than is_cheap, which was utterly
1070 wrong, because the above expression responds True to exprIsHNF.
1071
1072 This kind of thing can occur if you have
1073
1074         {-# INLINE foo #-}
1075         foo = let x = e in (x,x)
1076
1077 which Roman did.
1078
1079 \begin{code}
1080 computeDiscount :: Int -> [Int] -> Int -> [ArgSummary] -> CallCtxt -> Int
1081 computeDiscount n_vals_wanted arg_discounts res_discount arg_infos cont_info
1082         -- We multiple the raw discounts (args_discount and result_discount)
1083         -- ty opt_UnfoldingKeenessFactor because the former have to do with
1084         --  *size* whereas the discounts imply that there's some extra
1085         --  *efficiency* to be gained (e.g. beta reductions, case reductions)
1086         -- by inlining.
1087
1088   = 10          -- Discount of 1 because the result replaces the call
1089                 -- so we count 1 for the function itself
1090
1091     + 10 * length (take n_vals_wanted arg_infos)
1092                -- Discount of (un-scaled) 1 for each arg supplied,
1093                -- because the result replaces the call
1094
1095     + round (opt_UF_KeenessFactor *
1096              fromIntegral (arg_discount + res_discount'))
1097   where
1098     arg_discount = sum (zipWith mk_arg_discount arg_discounts arg_infos)
1099
1100     mk_arg_discount _        TrivArg    = 0
1101     mk_arg_discount _        NonTrivArg = 10
1102     mk_arg_discount discount ValueArg   = discount
1103
1104     res_discount' = case cont_info of
1105                         BoringCtxt  -> 0
1106                         CaseCtxt    -> res_discount
1107                         _other      -> 40 `min` res_discount
1108                 -- res_discount can be very large when a function returns
1109                 -- constructors; but we only want to invoke that large discount
1110                 -- when there's a case continuation.
1111                 -- Otherwise we, rather arbitrarily, threshold it.  Yuk.
1112                 -- But we want to aovid inlining large functions that return
1113                 -- constructors into contexts that are simply "interesting"
1114 \end{code}
1115
1116 %************************************************************************
1117 %*                                                                      *
1118         Interesting arguments
1119 %*                                                                      *
1120 %************************************************************************
1121
1122 Note [Interesting arguments]
1123 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1124 An argument is interesting if it deserves a discount for unfoldings
1125 with a discount in that argument position.  The idea is to avoid
1126 unfolding a function that is applied only to variables that have no
1127 unfolding (i.e. they are probably lambda bound): f x y z There is
1128 little point in inlining f here.
1129
1130 Generally, *values* (like (C a b) and (\x.e)) deserve discounts.  But
1131 we must look through lets, eg (let x = e in C a b), because the let will
1132 float, exposing the value, if we inline.  That makes it different to
1133 exprIsHNF.
1134
1135 Before 2009 we said it was interesting if the argument had *any* structure
1136 at all; i.e. (hasSomeUnfolding v).  But does too much inlining; see Trac #3016.
1137
1138 But we don't regard (f x y) as interesting, unless f is unsaturated.
1139 If it's saturated and f hasn't inlined, then it's probably not going
1140 to now!
1141
1142 Note [Conlike is interesting]
1143 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1144 Consider
1145         f d = ...((*) d x y)...
1146         ... f (df d')...
1147 where df is con-like. Then we'd really like to inline 'f' so that the
1148 rule for (*) (df d) can fire.  To do this
1149   a) we give a discount for being an argument of a class-op (eg (*) d)
1150   b) we say that a con-like argument (eg (df d)) is interesting
1151
1152 \begin{code}
1153 data ArgSummary = TrivArg       -- Nothing interesting
1154                 | NonTrivArg    -- Arg has structure
1155                 | ValueArg      -- Arg is a con-app or PAP
1156                                 -- ..or con-like. Note [Conlike is interesting]
1157
1158 interestingArg :: CoreExpr -> ArgSummary
1159 -- See Note [Interesting arguments]
1160 interestingArg e = go e 0
1161   where
1162     -- n is # value args to which the expression is applied
1163     go (Lit {}) _          = ValueArg
1164     go (Var v)  n
1165        | isConLikeId v     = ValueArg   -- Experimenting with 'conlike' rather that
1166                                         --    data constructors here
1167        | idArity v > n     = ValueArg   -- Catches (eg) primops with arity but no unfolding
1168        | n > 0             = NonTrivArg -- Saturated or unknown call
1169        | conlike_unfolding = ValueArg   -- n==0; look for an interesting unfolding
1170                                         -- See Note [Conlike is interesting]
1171        | otherwise         = TrivArg    -- n==0, no useful unfolding
1172        where
1173          conlike_unfolding = isConLikeUnfolding (idUnfolding v)
1174
1175     go (Type _)          _ = TrivArg
1176     go (Coercion _)      _ = TrivArg
1177     go (App fn (Type _)) n = go fn n
1178     go (App fn (Coercion _)) n = go fn n
1179     go (App fn _)        n = go fn (n+1)
1180     go (Note _ a)        n = go a n
1181     go (Cast e _)        n = go e n
1182     go (Lam v e)         n
1183        | isTyVar v         = go e n
1184        | n>0               = go e (n-1)
1185        | otherwise         = ValueArg
1186     go (Let _ e)         n = case go e n of { ValueArg -> ValueArg; _ -> NonTrivArg }
1187     go (Case {})         _ = NonTrivArg
1188
1189 nonTriv ::  ArgSummary -> Bool
1190 nonTriv TrivArg = False
1191 nonTriv _       = True
1192 \end{code}
1193
1194 %************************************************************************
1195 %*                                                                      *
1196          exprIsConApp_maybe
1197 %*                                                                      *
1198 %************************************************************************
1199
1200 Note [exprIsConApp_maybe]
1201 ~~~~~~~~~~~~~~~~~~~~~~~~~
1202 exprIsConApp_maybe is a very important function.  There are two principal
1203 uses:
1204   * case e of { .... }
1205   * cls_op e, where cls_op is a class operation
1206
1207 In both cases you want to know if e is of form (C e1..en) where C is
1208 a data constructor.
1209
1210 However e might not *look* as if
1211
1212 \begin{code}
1213 -- | Returns @Just (dc, [t1..tk], [x1..xn])@ if the argument expression is
1214 -- a *saturated* constructor application of the form @dc t1..tk x1 .. xn@,
1215 -- where t1..tk are the *universally-qantified* type args of 'dc'
1216 exprIsConApp_maybe :: IdUnfoldingFun -> CoreExpr -> Maybe (DataCon, [Type], [CoreExpr])
1217
1218 exprIsConApp_maybe id_unf (Note note expr)
1219   | notSccNote note
1220   = exprIsConApp_maybe id_unf expr
1221         -- We ignore all notes except SCCs.  For example,
1222         --      case _scc_ "foo" (C a b) of
1223         --                      C a b -> e
1224         -- should not be optimised away, because we'll lose the
1225         -- entry count on 'foo'; see Trac #4414
1226
1227 exprIsConApp_maybe id_unf (Cast expr co)
1228   =     -- Here we do the KPush reduction rule as described in the FC paper
1229         -- The transformation applies iff we have
1230         --      (C e1 ... en) `cast` co
1231         -- where co :: (T t1 .. tn) ~ to_ty
1232         -- The left-hand one must be a T, because exprIsConApp returned True
1233         -- but the right-hand one might not be.  (Though it usually will.)
1234
1235     case exprIsConApp_maybe id_unf expr of {
1236         Nothing                          -> Nothing ;
1237         Just (dc, _dc_univ_args, dc_args) ->
1238
1239     let Pair _from_ty to_ty = coercionKind co
1240         dc_tc = dataConTyCon dc
1241     in
1242     case splitTyConApp_maybe to_ty of {
1243         Nothing -> Nothing ;
1244         Just (to_tc, to_tc_arg_tys)
1245                 | dc_tc /= to_tc -> Nothing
1246                 -- These two Nothing cases are possible; we might see
1247                 --      (C x y) `cast` (g :: T a ~ S [a]),
1248                 -- where S is a type function.  In fact, exprIsConApp
1249                 -- will probably not be called in such circumstances,
1250                 -- but there't nothing wrong with it
1251
1252                 | otherwise  ->
1253     let
1254         tc_arity       = tyConArity dc_tc
1255         dc_univ_tyvars = dataConUnivTyVars dc
1256         dc_ex_tyvars   = dataConExTyVars dc
1257         arg_tys        = dataConRepArgTys dc
1258
1259         (ex_args, val_args) = splitAtList dc_ex_tyvars dc_args
1260
1261         -- Make the "theta" from Fig 3 of the paper
1262         gammas = decomposeCo tc_arity co
1263         theta  = zipOpenCvSubst (dc_univ_tyvars ++ dc_ex_tyvars)
1264                                 (gammas         ++ map mkReflCo (stripTypeArgs ex_args))
1265
1266           -- Cast the value arguments (which include dictionaries)
1267         new_val_args = zipWith cast_arg arg_tys val_args
1268         cast_arg arg_ty arg = mkCoerce (liftCoSubst theta arg_ty) arg
1269     in
1270 #ifdef DEBUG
1271     let dump_doc = vcat [ppr dc,      ppr dc_univ_tyvars, ppr dc_ex_tyvars,
1272                          ppr arg_tys, ppr dc_args,        ppr _dc_univ_args,
1273                          ppr ex_args, ppr val_args]
1274     in
1275     ASSERT2( eqType _from_ty (mkTyConApp dc_tc _dc_univ_args), dump_doc )
1276     ASSERT2( all isTypeArg ex_args, dump_doc )
1277     ASSERT2( equalLength val_args arg_tys, dump_doc )
1278 #endif
1279
1280     Just (dc, to_tc_arg_tys, ex_args ++ new_val_args)
1281     }}
1282
1283 exprIsConApp_maybe id_unf expr
1284   = analyse expr []
1285   where
1286     analyse (App fun arg) args = analyse fun (arg:args)
1287     analyse fun@(Lam {})  args = beta fun [] args
1288
1289     analyse (Var fun) args
1290         | Just con <- isDataConWorkId_maybe fun
1291         , count isValArg args == idArity fun
1292         , let (univ_ty_args, rest_args) = splitAtList (dataConUnivTyVars con) args
1293         = Just (con, stripTypeArgs univ_ty_args, rest_args)
1294
1295         -- Look through dictionary functions; see Note [Unfolding DFuns]
1296         | DFunUnfolding dfun_nargs con ops <- unfolding
1297         , let sat = length args == dfun_nargs    -- See Note [DFun arity check]
1298           in if sat then True else
1299              pprTrace "Unsaturated dfun" (ppr fun <+> int dfun_nargs $$ ppr args) False
1300         , let (dfun_tvs, _n_theta, _cls, dfun_res_tys) = tcSplitDFunTy (idType fun)
1301               subst    = zipOpenTvSubst dfun_tvs (stripTypeArgs (takeList dfun_tvs args))
1302               mk_arg (DFunConstArg e) = e
1303               mk_arg (DFunLamArg i)   = args !! i
1304               mk_arg (DFunPolyArg e)  = mkApps e args
1305         = Just (con, substTys subst dfun_res_tys, map mk_arg ops)
1306
1307         -- Look through unfoldings, but only cheap ones, because
1308         -- we are effectively duplicating the unfolding
1309         | Just rhs <- expandUnfolding_maybe unfolding
1310         = -- pprTrace "expanding" (ppr fun $$ ppr rhs) $
1311           analyse rhs args
1312         where
1313           unfolding = id_unf fun
1314
1315     analyse _ _ = Nothing
1316
1317     -----------
1318     beta (Lam v body) pairs (arg : args)
1319         | isTyCoArg arg
1320         = beta body ((v,arg):pairs) args
1321
1322     beta (Lam {}) _ _    -- Un-saturated, or not a type lambda
1323         = Nothing
1324
1325     beta fun pairs args
1326         = analyse (substExpr (text "subst-expr-is-con-app") subst fun) args
1327         where
1328           subst = mkOpenSubst (mkInScopeSet (exprFreeVars fun)) pairs
1329           -- doc = vcat [ppr fun, ppr expr, ppr pairs, ppr args]
1330
1331 stripTypeArgs :: [CoreExpr] -> [Type]
1332 stripTypeArgs args = ASSERT2( all isTypeArg args, ppr args )
1333                      [ty | Type ty <- args]
1334   -- We really do want isTypeArg here, not isTyCoArg!
1335 \end{code}
1336
1337 Note [Unfolding DFuns]
1338 ~~~~~~~~~~~~~~~~~~~~~~
1339 DFuns look like
1340
1341   df :: forall a b. (Eq a, Eq b) -> Eq (a,b)
1342   df a b d_a d_b = MkEqD (a,b) ($c1 a b d_a d_b)
1343                                ($c2 a b d_a d_b)
1344
1345 So to split it up we just need to apply the ops $c1, $c2 etc
1346 to the very same args as the dfun.  It takes a little more work
1347 to compute the type arguments to the dictionary constructor.
1348
1349 Note [DFun arity check]
1350 ~~~~~~~~~~~~~~~~~~~~~~~
1351 Here we check that the total number of supplied arguments (inclding
1352 type args) matches what the dfun is expecting.  This may be *less*
1353 than the ordinary arity of the dfun: see Note [DFun unfoldings] in CoreSyn