compiler/coreSyn/CoreUnfold.lhs

   1 %
   2 % (c) The University of Glasgow 2006
   3 % (c) The AQUA Project, Glasgow University, 1994-1998
   4 %
   5
   6 Core-syntax unfoldings
   7
   8 Unfoldings (which can travel across module boundaries) are in Core
   9 syntax (namely @CoreExpr@s).
  10
  11 The type @Unfolding@ sits ``above'' simply-Core-expressions
  12 unfoldings, capturing ``higher-level'' things we know about a binding,
  13 usually things that the simplifier found out (e.g., ``it's a
  14 literal'').  In the corner of a @CoreUnfolding@ unfolding, you will
  15 find, unsurprisingly, a Core expression.
  16
  17 \begin{code}
  18 module CoreUnfold (
  19         Unfolding, UnfoldingGuidance,   -- Abstract types
  20
  21         noUnfolding, mkImplicitUnfolding,
  22         mkTopUnfolding, mkUnfolding, mkCoreUnfolding,
  23         mkInlineRule, mkWwInlineRule,
  24         mkCompulsoryUnfolding, mkDFunUnfolding,
  25
  26         interestingArg, ArgSummary(..),
  27
  28         couldBeSmallEnoughToInline,
  29         certainlyWillInline, smallEnoughToInline,
  30
  31         callSiteInline, CallCtxt(..),
  32
  33         exprIsConApp_maybe
  34
  35     ) where
  36
  37 #include "HsVersions.h"
  38
  39 import StaticFlags
  40 import DynFlags
  41 import CoreSyn
  42 import PprCore          ()      -- Instances
  43 import OccurAnal
  44 import CoreSubst hiding( substTy )
  45 import CoreFVs         ( exprFreeVars )
  46 import CoreArity       ( manifestArity )
  47 import CoreUtils
  48 import Id
  49 import DataCon
  50 import TyCon
  51 import Literal
  52 import PrimOp
  53 import IdInfo
  54 import BasicTypes       ( Arity )
  55 import TcType           ( tcSplitDFunTy )
  56 import Type
  57 import Coercion
  58 import PrelNames
  59 import VarEnv           ( mkInScopeSet )
  60 import Bag
  61 import Util
  62 import FastTypes
  63 import FastString
  64 import Outputable
  65
  66 \end{code}
  67
  68
  69 %************************************************************************
  70 %*                                                                      *
  71 \subsection{Making unfoldings}
  72 %*                                                                      *
  73 %************************************************************************
  74
  75 \begin{code}
  76 mkTopUnfolding :: Bool -> CoreExpr -> Unfolding
  77 mkTopUnfolding is_bottoming expr
  78   = mkUnfolding True {- Top level -} is_bottoming expr
  79
  80 mkImplicitUnfolding :: CoreExpr -> Unfolding
  81 -- For implicit Ids, do a tiny bit of optimising first
  82 mkImplicitUnfolding expr = mkTopUnfolding False (simpleOptExpr expr)
  83
  84 -- Note [Top-level flag on inline rules]
  85 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  86 -- Slight hack: note that mk_inline_rules conservatively sets the
  87 -- top-level flag to True.  It gets set more accurately by the simplifier
  88 -- Simplify.simplUnfolding.
  89
  90 mkUnfolding :: Bool -> Bool -> CoreExpr -> Unfolding
  91 mkUnfolding top_lvl is_bottoming expr
  92   = CoreUnfolding { uf_tmpl       = occurAnalyseExpr expr,
  93                     uf_src        = InlineRhs,
  94                     uf_arity      = arity,
  95                     uf_is_top     = top_lvl,
  96                     uf_is_value   = exprIsHNF        expr,
  97                     uf_is_conlike = exprIsConLike    expr,
  98                     uf_expandable = exprIsExpandable expr,
  99                     uf_is_cheap   = is_cheap,
 100                     uf_guidance   = guidance }
 101   where
 102     is_cheap = exprIsCheap expr
 103     (arity, guidance) = calcUnfoldingGuidance is_cheap (top_lvl && is_bottoming)
 104                                               opt_UF_CreationThreshold expr
 105         -- Sometimes during simplification, there's a large let-bound thing
 106         -- which has been substituted, and so is now dead; so 'expr' contains
 107         -- two copies of the thing while the occurrence-analysed expression doesn't
 108         -- Nevertheless, we *don't* occ-analyse before computing the size because the
 109         -- size computation bales out after a while, whereas occurrence analysis does not.
 110         --
 111         -- This can occasionally mean that the guidance is very pessimistic;
 112         -- it gets fixed up next round.  And it should be rare, because large
 113         -- let-bound things that are dead are usually caught by preInlineUnconditionally
 114
 115 mkCoreUnfolding :: Bool -> UnfoldingSource -> CoreExpr
 116                 -> Arity -> UnfoldingGuidance -> Unfolding
 117 -- Occurrence-analyses the expression before capturing it
 118 mkCoreUnfolding top_lvl src expr arity guidance
 119   = CoreUnfolding { uf_tmpl       = occurAnalyseExpr expr,
 120                     uf_src        = src,
 121                     uf_arity      = arity,
 122                     uf_is_top     = top_lvl,
 123                     uf_is_value   = exprIsHNF        expr,
 124                     uf_is_conlike = exprIsConLike    expr,
 125                     uf_is_cheap   = exprIsCheap      expr,
 126                     uf_expandable = exprIsExpandable expr,
 127                     uf_guidance   = guidance }
 128
 129 mkDFunUnfolding :: DataCon -> [Id] -> Unfolding
 130 mkDFunUnfolding con ops = DFunUnfolding con (map Var ops)
 131
 132 mkWwInlineRule :: Id -> CoreExpr -> Arity -> Unfolding
 133 mkWwInlineRule id expr arity
 134   = mkCoreUnfolding True (InlineWrapper id)
 135                    (simpleOptExpr expr) arity
 136                    (UnfWhen unSaturatedOk boringCxtNotOk)
 137
 138 mkCompulsoryUnfolding :: CoreExpr -> Unfolding
 139 mkCompulsoryUnfolding expr         -- Used for things that absolutely must be unfolded
 140   = mkCoreUnfolding True InlineCompulsory
 141                     expr 0    -- Arity of unfolding doesn't matter
 142                     (UnfWhen unSaturatedOk boringCxtOk)
 143
 144 mkInlineRule :: CoreExpr -> Maybe Arity -> Unfolding
 145 mkInlineRule expr mb_arity
 146   = mkCoreUnfolding True InlineRule      -- Note [Top-level flag on inline rules]
 147                     expr' arity
 148                     (UnfWhen unsat_ok boring_ok)
 149   where
 150     expr' = simpleOptExpr expr
 151     (unsat_ok, arity) = case mb_arity of
 152                           Nothing -> (unSaturatedOk, manifestArity expr')
 153                           Just ar -> (needSaturated, ar)
 154
 155     boring_ok = case calcUnfoldingGuidance True    -- Treat as cheap
 156                                            False   -- But not bottoming
 157                                            (arity+1) expr' of
 158                   (_, UnfWhen _ boring_ok) -> boring_ok
 159                   _other                   -> boringCxtNotOk
 160      -- See Note [INLINE for small functions]
 161 \end{code}
 162
 163
 164 %************************************************************************
 165 %*                                                                      *
 166 \subsection{The UnfoldingGuidance type}
 167 %*                                                                      *
 168 %************************************************************************
 169
 170 \begin{code}
 171 calcUnfoldingGuidance
 172         :: Bool         -- True <=> the rhs is cheap, or we want to treat it
 173                         --          as cheap (INLINE things)
 174         -> Bool         -- True <=> this is a top-level unfolding for a
 175                         --          diverging function; don't inline this
 176         -> Int          -- Bomb out if size gets bigger than this
 177         -> CoreExpr     -- Expression to look at
 178         -> (Arity, UnfoldingGuidance)
 179 calcUnfoldingGuidance expr_is_cheap top_bot bOMB_OUT_SIZE expr
 180   = case collectBinders expr of { (bndrs, body) ->
 181     let
 182         val_bndrs   = filter isId bndrs
 183         n_val_bndrs = length val_bndrs
 184
 185         guidance
 186           = case (sizeExpr (iUnbox bOMB_OUT_SIZE) val_bndrs body) of
 187               TooBig -> UnfNever
 188               SizeIs size cased_bndrs scrut_discount
 189                 | uncondInline n_val_bndrs (iBox size)
 190                 , expr_is_cheap
 191                 -> UnfWhen unSaturatedOk boringCxtOk   -- Note [INLINE for small functions]
 192                 | top_bot  -- See Note [Do not inline top-level bottoming functions]
 193                 -> UnfNever
 194
 195                 | otherwise
 196                 -> UnfIfGoodArgs { ug_args  = map (discount cased_bndrs) val_bndrs
 197                                  , ug_size  = iBox size
 198                                  , ug_res   = iBox scrut_discount }
 199
 200         discount cbs bndr
 201            = foldlBag (\acc (b',n) -> if bndr==b' then acc+n else acc)
 202                       0 cbs
 203     in
 204     (n_val_bndrs, guidance) }
 205 \end{code}
 206
 207 Note [Computing the size of an expression]
 208 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 209 The basic idea of sizeExpr is obvious enough: count nodes.  But getting the
 210 heuristics right has taken a long time.  Here's the basic strategy:
 211
 212     * Variables, literals: 0
 213       (Exception for string literals, see litSize.)
 214
 215     * Function applications (f e1 .. en): 1 + #value args
 216
 217     * Constructor applications: 1, regardless of #args
 218
 219     * Let(rec): 1 + size of components
 220
 221     * Note, cast: 0
 222
 223 Examples
 224
 225   Size  Term
 226   --------------
 227     0     42#
 228     0     x
 229     0     True
 230     2     f x
 231     1     Just x
 232     4     f (g x)
 233
 234 Notice that 'x' counts 0, while (f x) counts 2.  That's deliberate: there's
 235 a function call to account for.  Notice also that constructor applications
 236 are very cheap, because exposing them to a caller is so valuable.
 237
 238
 239 Note [Do not inline top-level bottoming functions]
 240 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 241 The FloatOut pass has gone to some trouble to float out calls to 'error'
 242 and similar friends.  See Note [Bottoming floats] in SetLevels.
 243 Do not re-inline them!  But we *do* still inline if they are very small
 244 (the uncondInline stuff).
 245
 246
 247 Note [INLINE for small functions]
 248 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 249 Consider        {-# INLINE f #-}
 250                 f x = Just x
 251                 g y = f y
 252 Then f's RHS is no larger than its LHS, so we should inline it into
 253 even the most boring context.  In general, f the function is
 254 sufficiently small that its body is as small as the call itself, the
 255 inline unconditionally, regardless of how boring the context is.
 256
 257 Things to note:
 258
 259  * We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
 260    than the thing it's replacing.  Notice that
 261       (f x) --> (g 3)             -- YES, unconditionally
 262       (f x) --> x : []            -- YES, *even though* there are two
 263                                   --      arguments to the cons
 264       x     --> g 3               -- NO
 265       x     --> Just v            -- NO
 266
 267   It's very important not to unconditionally replace a variable by
 268   a non-atomic term.
 269
 270 * We do this even if the thing isn't saturated, else we end up with the
 271   silly situation that
 272      f x y = x
 273      ...map (f 3)...
 274   doesn't inline.  Even in a boring context, inlining without being
 275   saturated will give a lambda instead of a PAP, and will be more
 276   efficient at runtime.
 277
 278 * However, when the function's arity > 0, we do insist that it
 279   has at least one value argument at the call site.  Otherwise we find this:
 280        f = /\a \x:a. x
 281        d = /\b. MkD (f b)
 282   If we inline f here we get
 283        d = /\b. MkD (\x:b. x)
 284   and then prepareRhs floats out the argument, abstracting the type
 285   variables, so we end up with the original again!
 286
 287
 288 \begin{code}
 289 uncondInline :: Arity -> Int -> Bool
 290 -- Inline unconditionally if there no size increase
 291 -- Size of call is arity (+1 for the function)
 292 -- See Note [INLINE for small functions]
 293 uncondInline arity size
 294   | arity == 0 = size == 0
 295   | otherwise  = size <= arity + 1
 296 \end{code}
 297
 298
 299 \begin{code}
 300 sizeExpr :: FastInt         -- Bomb out if it gets bigger than this
 301          -> [Id]            -- Arguments; we're interested in which of these
 302                             -- get case'd
 303          -> CoreExpr
 304          -> ExprSize
 305
 306 -- Note [Computing the size of an expression]
 307
 308 sizeExpr bOMB_OUT_SIZE top_args expr
 309   = size_up expr
 310   where
 311     size_up (Cast e _) = size_up e
 312     size_up (Note _ e) = size_up e
 313     size_up (Type _)   = sizeZero           -- Types cost nothing
 314     size_up (Lit lit)  = sizeN (litSize lit)
 315     size_up (Var f)    = size_up_call f []  -- Make sure we get constructor
 316                                             -- discounts even on nullary constructors
 317
 318     size_up (App fun (Type _)) = size_up fun
 319     size_up (App fun arg)      = size_up arg  `addSizeNSD`
 320                                  size_up_app fun [arg]
 321
 322     size_up (Lam b e) | isId b    = lamScrutDiscount (size_up e `addSizeN` 1)
 323                       | otherwise = size_up e
 324
 325     size_up (Let (NonRec binder rhs) body)
 326       = size_up rhs             `addSizeNSD`
 327         size_up body            `addSizeN`
 328         (if isUnLiftedType (idType binder) then 0 else 1)
 329                 -- For the allocation
 330                 -- If the binder has an unlifted type there is no allocation
 331
 332     size_up (Let (Rec pairs) body)
 333       = foldr (addSizeNSD . size_up . snd)
 334               (size_up body `addSizeN` length pairs)    -- (length pairs) for the allocation
 335               pairs
 336
 337     size_up (Case (Var v) _ _ alts)
 338         | v `elem` top_args             -- We are scrutinising an argument variable
 339         = alts_size (foldr1 addAltSize alt_sizes)
 340                     (foldr1 maxSize alt_sizes)
 341                 -- Good to inline if an arg is scrutinised, because
 342                 -- that may eliminate allocation in the caller
 343                 -- And it eliminates the case itself
 344         where
 345           alt_sizes = map size_up_alt alts
 346
 347                 -- alts_size tries to compute a good discount for
 348                 -- the case when we are scrutinising an argument variable
 349           alts_size (SizeIs tot tot_disc tot_scrut)  -- Size of all alternatives
 350                     (SizeIs max _        _)          -- Size of biggest alternative
 351                 = SizeIs tot (unitBag (v, iBox (_ILIT(2) +# tot -# max)) `unionBags` tot_disc) tot_scrut
 352                         -- If the variable is known, we produce a discount that
 353                         -- will take us back to 'max', the size of the largest alternative
 354                         -- The 1+ is a little discount for reduced allocation in the caller
 355                         --
 356                         -- Notice though, that we return tot_disc, the total discount from
 357                         -- all branches.  I think that's right.
 358
 359           alts_size tot_size _ = tot_size
 360
 361     size_up (Case e _ _ alts) = size_up e  `addSizeNSD`
 362                                 foldr (addAltSize . size_up_alt) sizeZero alts
 363                 -- We don't charge for the case itself
 364                 -- It's a strict thing, and the price of the call
 365                 -- is paid by scrut.  Also consider
 366                 --      case f x of DEFAULT -> e
 367                 -- This is just ';'!  Don't charge for it.
 368                 --
 369                 -- Moreover, we charge one per alternative.
 370
 371     ------------
 372     -- size_up_app is used when there's ONE OR MORE value args
 373     size_up_app (App fun arg) args
 374         | isTypeArg arg            = size_up_app fun args
 375         | otherwise                = size_up arg  `addSizeNSD`
 376                                      size_up_app fun (arg:args)
 377     size_up_app (Var fun)     args = size_up_call fun args
 378     size_up_app other         args = size_up other `addSizeN` length args
 379
 380     ------------
 381     size_up_call :: Id -> [CoreExpr] -> ExprSize
 382     size_up_call fun val_args
 383        = case idDetails fun of
 384            FCallId _        -> sizeN opt_UF_DearOp
 385            DataConWorkId dc -> conSize    dc (length val_args)
 386            PrimOpId op      -> primOpSize op (length val_args)
 387            ClassOpId _      -> classOpSize top_args val_args
 388            _                -> funSize top_args fun (length val_args)
 389
 390     ------------
 391     size_up_alt (_con, _bndrs, rhs) = size_up rhs `addSizeN` 1
 392         -- Don't charge for args, so that wrappers look cheap
 393         -- (See comments about wrappers with Case)
 394         --
 395         -- IMPORATANT: *do* charge 1 for the alternative, else we
 396         -- find that giant case nests are treated as practically free
 397         -- A good example is Foreign.C.Error.errrnoToIOError
 398
 399     ------------
 400         -- These addSize things have to be here because
 401         -- I don't want to give them bOMB_OUT_SIZE as an argument
 402     addSizeN TooBig          _  = TooBig
 403     addSizeN (SizeIs n xs d) m  = mkSizeIs bOMB_OUT_SIZE (n +# iUnbox m) xs d
 404
 405         -- addAltSize is used to add the sizes of case alternatives
 406     addAltSize TooBig            _      = TooBig
 407     addAltSize _                 TooBig = TooBig
 408     addAltSize (SizeIs n1 xs d1) (SizeIs n2 ys d2)
 409         = mkSizeIs bOMB_OUT_SIZE (n1 +# n2)
 410                                  (xs `unionBags` ys)
 411                                  (d1 +# d2)   -- Note [addAltSize result discounts]
 412
 413         -- This variant ignores the result discount from its LEFT argument
 414         -- It's used when the second argument isn't part of the result
 415     addSizeNSD TooBig            _      = TooBig
 416     addSizeNSD _                 TooBig = TooBig
 417     addSizeNSD (SizeIs n1 xs _) (SizeIs n2 ys d2)
 418         = mkSizeIs bOMB_OUT_SIZE (n1 +# n2)
 419                                  (xs `unionBags` ys)
 420                                  d2  -- Ignore d1
 421 \end{code}
 422
 423 \begin{code}
 424 -- | Finds a nominal size of a string literal.
 425 litSize :: Literal -> Int
 426 -- Used by CoreUnfold.sizeExpr
 427 litSize (MachStr str) = 1 + ((lengthFS str + 3) `div` 4)
 428         -- If size could be 0 then @f "x"@ might be too small
 429         -- [Sept03: make literal strings a bit bigger to avoid fruitless
 430         --  duplication of little strings]
 431 litSize _other = 0    -- Must match size of nullary constructors
 432                       -- Key point: if  x |-> 4, then x must inline unconditionally
 433                       --            (eg via case binding)
 434
 435 classOpSize :: [Id] -> [CoreExpr] -> ExprSize
 436 -- See Note [Conlike is interesting]
 437 classOpSize _ []
 438   = sizeZero
 439 classOpSize top_args (arg1 : other_args)
 440   = SizeIs (iUnbox size) arg_discount (_ILIT(0))
 441   where
 442     size = 2 + length other_args
 443     -- If the class op is scrutinising a lambda bound dictionary then
 444     -- give it a discount, to encourage the inlining of this function
 445     -- The actual discount is rather arbitrarily chosen
 446     arg_discount = case arg1 of
 447                      Var dict | dict `elem` top_args
 448                               -> unitBag (dict, opt_UF_DictDiscount)
 449                      _other   -> emptyBag
 450
 451 funSize :: [Id] -> Id -> Int -> ExprSize
 452 -- Size for functions that are not constructors or primops
 453 -- Note [Function applications]
 454 funSize top_args fun n_val_args
 455   | fun `hasKey` buildIdKey   = buildSize
 456   | fun `hasKey` augmentIdKey = augmentSize
 457   | otherwise = SizeIs (iUnbox size) arg_discount (iUnbox res_discount)
 458   where
 459     some_val_args = n_val_args > 0
 460
 461     arg_discount | some_val_args && fun `elem` top_args
 462                  = unitBag (fun, opt_UF_FunAppDiscount)
 463                  | otherwise = emptyBag
 464         -- If the function is an argument and is applied
 465         -- to some values, give it an arg-discount
 466
 467     res_discount | idArity fun > n_val_args = opt_UF_FunAppDiscount
 468                  | otherwise                = 0
 469         -- If the function is partially applied, show a result discount
 470
 471     size | some_val_args = 1 + n_val_args
 472          | otherwise     = 0
 473         -- The 1+ is for the function itself
 474         -- Add 1 for each non-trivial arg;
 475         -- the allocation cost, as in let(rec)
 476
 477
 478 conSize :: DataCon -> Int -> ExprSize
 479 conSize dc n_val_args
 480   | n_val_args == 0      = SizeIs (_ILIT(0)) emptyBag (_ILIT(1))        -- Like variables
 481   | isUnboxedTupleCon dc = SizeIs (_ILIT(0)) emptyBag (iUnbox n_val_args +# _ILIT(1))
 482   | otherwise            = SizeIs (_ILIT(1)) emptyBag (iUnbox n_val_args +# _ILIT(1))
 483         -- Treat a constructors application as size 1, regardless of how
 484         -- many arguments it has; we are keen to expose them
 485         -- (and we charge separately for their args).  We can't treat
 486         -- them as size zero, else we find that (Just x) has size 0,
 487         -- which is the same as a lone variable; and hence 'v' will
 488         -- always be replaced by (Just x), where v is bound to Just x.
 489         --
 490         -- However, unboxed tuples count as size zero
 491         -- I found occasions where we had
 492         --      f x y z = case op# x y z of { s -> (# s, () #) }
 493         -- and f wasn't getting inlined
 494
 495 primOpSize :: PrimOp -> Int -> ExprSize
 496 primOpSize op n_val_args
 497  | not (primOpIsDupable op) = sizeN opt_UF_DearOp
 498  | not (primOpOutOfLine op) = sizeN 1
 499         -- Be very keen to inline simple primops.
 500         -- We give a discount of 1 for each arg so that (op# x y z) costs 2.
 501         -- We can't make it cost 1, else we'll inline let v = (op# x y z)
 502         -- at every use of v, which is excessive.
 503         --
 504         -- A good example is:
 505         --      let x = +# p q in C {x}
 506         -- Even though x get's an occurrence of 'many', its RHS looks cheap,
 507         -- and there's a good chance it'll get inlined back into C's RHS. Urgh!
 508
 509  | otherwise = sizeN n_val_args
 510
 511
 512 buildSize :: ExprSize
 513 buildSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(4))
 514         -- We really want to inline applications of build
 515         -- build t (\cn -> e) should cost only the cost of e (because build will be inlined later)
 516         -- Indeed, we should add a result_discount becuause build is
 517         -- very like a constructor.  We don't bother to check that the
 518         -- build is saturated (it usually is).  The "-2" discounts for the \c n,
 519         -- The "4" is rather arbitrary.
 520
 521 augmentSize :: ExprSize
 522 augmentSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(4))
 523         -- Ditto (augment t (\cn -> e) ys) should cost only the cost of
 524         -- e plus ys. The -2 accounts for the \cn
 525
 526 -- When we return a lambda, give a discount if it's used (applied)
 527 lamScrutDiscount :: ExprSize -> ExprSize
 528 lamScrutDiscount (SizeIs n vs _) = SizeIs n vs (iUnbox opt_UF_FunAppDiscount)
 529 lamScrutDiscount TooBig          = TooBig
 530 \end{code}
 531
 532 Note [addAltSize result discounts]
 533 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 534 When adding the size of alternatives, we *add* the result discounts
 535 too, rather than take the *maximum*.  For a multi-branch case, this
 536 gives a discount for each branch that returns a constructor, making us
 537 keener to inline.  I did try using 'max' instead, but it makes nofib
 538 'rewrite' and 'puzzle' allocate significantly more, and didn't make
 539 binary sizes shrink significantly either.
 540
 541 Note [Discounts and thresholds]
 542 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 543 Constants for discounts and thesholds are defined in main/StaticFlags,
 544 all of form opt_UF_xxxx.   They are:
 545
 546 opt_UF_CreationThreshold (45)
 547      At a definition site, if the unfolding is bigger than this, we
 548      may discard it altogether
 549
 550 opt_UF_UseThreshold (6)
 551      At a call site, if the unfolding, less discounts, is smaller than
 552      this, then it's small enough inline
 553
 554 opt_UF_KeennessFactor (1.5)
 555      Factor by which the discounts are multiplied before
 556      subtracting from size
 557
 558 opt_UF_DictDiscount (1)
 559      The discount for each occurrence of a dictionary argument
 560      as an argument of a class method.  Should be pretty small
 561      else big functions may get inlined
 562
 563 opt_UF_FunAppDiscount (6)
 564      Discount for a function argument that is applied.  Quite
 565      large, because if we inline we avoid the higher-order call.
 566
 567 opt_UF_DearOp (4)
 568      The size of a foreign call or not-dupable PrimOp
 569
 570
 571 Note [Function applications]
 572 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 573 In a function application (f a b)
 574
 575   - If 'f' is an argument to the function being analysed,
 576     and there's at least one value arg, record a FunAppDiscount for f
 577
 578   - If the application if a PAP (arity > 2 in this example)
 579     record a *result* discount (because inlining
 580     with "extra" args in the call may mean that we now
 581     get a saturated application)
 582
 583 Code for manipulating sizes
 584
 585 \begin{code}
 586 data ExprSize = TooBig
 587               | SizeIs FastInt          -- Size found
 588                        (Bag (Id,Int))   -- Arguments cased herein, and discount for each such
 589                        FastInt          -- Size to subtract if result is scrutinised
 590                                         -- by a case expression
 591
 592 instance Outputable ExprSize where
 593   ppr TooBig         = ptext (sLit "TooBig")
 594   ppr (SizeIs a _ c) = brackets (int (iBox a) <+> int (iBox c))
 595
 596 -- subtract the discount before deciding whether to bale out. eg. we
 597 -- want to inline a large constructor application into a selector:
 598 --      tup = (a_1, ..., a_99)
 599 --      x = case tup of ...
 600 --
 601 mkSizeIs :: FastInt -> FastInt -> Bag (Id, Int) -> FastInt -> ExprSize
 602 mkSizeIs max n xs d | (n -# d) ># max = TooBig
 603                     | otherwise       = SizeIs n xs d
 604
 605 maxSize :: ExprSize -> ExprSize -> ExprSize
 606 maxSize TooBig         _                                  = TooBig
 607 maxSize _              TooBig                             = TooBig
 608 maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 ># n2  = s1
 609                                               | otherwise = s2
 610
 611 sizeZero :: ExprSize
 612 sizeN :: Int -> ExprSize
 613
 614 sizeZero = SizeIs (_ILIT(0))  emptyBag (_ILIT(0))
 615 sizeN n  = SizeIs (iUnbox n) emptyBag (_ILIT(0))
 616 \end{code}
 617
 618
 619 %************************************************************************
 620 %*                                                                      *
 621 \subsection[considerUnfolding]{Given all the info, do (not) do the unfolding}
 622 %*                                                                      *
 623 %************************************************************************
 624
 625 We use 'couldBeSmallEnoughToInline' to avoid exporting inlinings that
 626 we ``couldn't possibly use'' on the other side.  Can be overridden w/
 627 flaggery.  Just the same as smallEnoughToInline, except that it has no
 628 actual arguments.
 629
 630 \begin{code}
 631 couldBeSmallEnoughToInline :: Int -> CoreExpr -> Bool
 632 couldBeSmallEnoughToInline threshold rhs
 633   = case sizeExpr (iUnbox threshold) [] body of
 634        TooBig -> False
 635        _      -> True
 636   where
 637     (_, body) = collectBinders rhs
 638
 639 ----------------
 640 smallEnoughToInline :: Unfolding -> Bool
 641 smallEnoughToInline (CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_size = size}})
 642   = size <= opt_UF_UseThreshold
 643 smallEnoughToInline _
 644   = False
 645
 646 ----------------
 647 certainlyWillInline :: Unfolding -> Bool
 648   -- Sees if the unfolding is pretty certain to inline
 649 certainlyWillInline (CoreUnfolding { uf_is_cheap = is_cheap, uf_arity = n_vals, uf_guidance = guidance })
 650   = case guidance of
 651       UnfNever      -> False
 652       UnfWhen {}    -> True
 653       UnfIfGoodArgs { ug_size = size}
 654                     -> is_cheap && size - (n_vals +1) <= opt_UF_UseThreshold
 655
 656 certainlyWillInline _
 657   = False
 658 \end{code}
 659
 660 %************************************************************************
 661 %*                                                                      *
 662 \subsection{callSiteInline}
 663 %*                                                                      *
 664 %************************************************************************
 665
 666 This is the key function.  It decides whether to inline a variable at a call site
 667
 668 callSiteInline is used at call sites, so it is a bit more generous.
 669 It's a very important function that embodies lots of heuristics.
 670 A non-WHNF can be inlined if it doesn't occur inside a lambda,
 671 and occurs exactly once or
 672     occurs once in each branch of a case and is small
 673
 674 If the thing is in WHNF, there's no danger of duplicating work,
 675 so we can inline if it occurs once, or is small
 676
 677 NOTE: we don't want to inline top-level functions that always diverge.
 678 It just makes the code bigger.  Tt turns out that the convenient way to prevent
 679 them inlining is to give them a NOINLINE pragma, which we do in
 680 StrictAnal.addStrictnessInfoToTopId
 681
 682 \begin{code}
 683 callSiteInline :: DynFlags
 684                -> Id                    -- The Id
 685                -> Unfolding             -- Its unfolding (if active)
 686                -> Bool                  -- True if there are are no arguments at all (incl type args)
 687                -> [ArgSummary]          -- One for each value arg; True if it is interesting
 688                -> CallCtxt              -- True <=> continuation is interesting
 689                -> Maybe CoreExpr        -- Unfolding, if any
 690
 691
 692 instance Outputable ArgSummary where
 693   ppr TrivArg    = ptext (sLit "TrivArg")
 694   ppr NonTrivArg = ptext (sLit "NonTrivArg")
 695   ppr ValueArg   = ptext (sLit "ValueArg")
 696
 697 data CallCtxt = BoringCtxt
 698
 699               | ArgCtxt         -- We are somewhere in the argument of a function
 700                         Bool    -- True  <=> we're somewhere in the RHS of function with rules
 701                                 -- False <=> we *are* the argument of a function with non-zero
 702                                 --           arg discount
 703                                 --        OR
 704                                 --           we *are* the RHS of a let  Note [RHS of lets]
 705                                 -- In both cases, be a little keener to inline
 706
 707               | ValAppCtxt      -- We're applied to at least one value arg
 708                                 -- This arises when we have ((f x |> co) y)
 709                                 -- Then the (f x) has argument 'x' but in a ValAppCtxt
 710
 711               | CaseCtxt        -- We're the scrutinee of a case
 712                                 -- that decomposes its scrutinee
 713
 714 instance Outputable CallCtxt where
 715   ppr BoringCtxt      = ptext (sLit "BoringCtxt")
 716   ppr (ArgCtxt rules) = ptext (sLit "ArgCtxt") <+> ppr rules
 717   ppr CaseCtxt        = ptext (sLit "CaseCtxt")
 718   ppr ValAppCtxt      = ptext (sLit "ValAppCtxt")
 719
 720 callSiteInline dflags id unfolding lone_variable arg_infos cont_info
 721   = case unfolding of {
 722         NoUnfolding      -> Nothing ;
 723         OtherCon _       -> Nothing ;
 724         DFunUnfolding {} -> Nothing ;   -- Never unfold a DFun
 725         CoreUnfolding { uf_tmpl = unf_template, uf_is_top = is_top, uf_is_value = is_value,
 726                         uf_is_cheap = is_cheap, uf_arity = uf_arity, uf_guidance = guidance } ->
 727                         -- uf_arity will typically be equal to (idArity id),
 728                         -- but may be less for InlineRules
 729     let
 730         n_val_args = length arg_infos
 731         saturated  = n_val_args >= uf_arity
 732
 733         result | yes_or_no = Just unf_template
 734                | otherwise = Nothing
 735
 736         interesting_args = any nonTriv arg_infos
 737                 -- NB: (any nonTriv arg_infos) looks at the
 738                 -- over-saturated args too which is "wrong";
 739                 -- but if over-saturated we inline anyway.
 740
 741                -- some_benefit is used when the RHS is small enough
 742                -- and the call has enough (or too many) value
 743                -- arguments (ie n_val_args >= arity). But there must
 744                -- be *something* interesting about some argument, or the
 745                -- result context, to make it worth inlining
 746         some_benefit
 747            | not saturated = interesting_args   -- Under-saturated
 748                                                 -- Note [Unsaturated applications]
 749            | n_val_args > uf_arity = True       -- Over-saturated
 750            | otherwise = interesting_args       -- Saturated
 751                       || interesting_saturated_call
 752
 753         interesting_saturated_call
 754           = case cont_info of
 755               BoringCtxt -> not is_top && uf_arity > 0          -- Note [Nested functions]
 756               CaseCtxt   -> not (lone_variable && is_value)     -- Note [Lone variables]
 757               ArgCtxt {} -> uf_arity > 0                        -- Note [Inlining in ArgCtxt]
 758               ValAppCtxt -> True                                -- Note [Cast then apply]
 759
 760         (yes_or_no, extra_doc)
 761           = case guidance of
 762               UnfNever -> (False, empty)
 763
 764               UnfWhen unsat_ok boring_ok
 765                  -> (enough_args && (boring_ok || some_benefit), empty )
 766                  where      -- See Note [INLINE for small functions]
 767                    enough_args = saturated || (unsat_ok && n_val_args > 0)
 768
 769               UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size }
 770                  -> ( is_cheap && some_benefit && small_enough
 771                     , (text "discounted size =" <+> int discounted_size) )
 772                  where
 773                    discounted_size = size - discount
 774                    small_enough = discounted_size <= opt_UF_UseThreshold
 775                    discount = computeDiscount uf_arity arg_discounts
 776                                               res_discount arg_infos cont_info
 777
 778     in
 779     if (dopt Opt_D_dump_inlinings dflags && dopt Opt_D_verbose_core2core dflags) then
 780         pprTrace ("Considering inlining: " ++ showSDoc (ppr id))
 781                  (vcat [text "arg infos" <+> ppr arg_infos,
 782                         text "uf arity" <+> ppr uf_arity,
 783                         text "interesting continuation" <+> ppr cont_info,
 784                         text "some_benefit" <+> ppr some_benefit,
 785                         text "is value:" <+> ppr is_value,
 786                         text "is cheap:" <+> ppr is_cheap,
 787                         text "guidance" <+> ppr guidance,
 788                         extra_doc,
 789                         text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"])
 790                   result
 791     else
 792     result
 793     }
 794 \end{code}
 795
 796 Note [RHS of lets]
 797 ~~~~~~~~~~~~~~~~~~
 798 Be a tiny bit keener to inline in the RHS of a let, because that might
 799 lead to good thing later
 800      f y = (y,y,y)
 801      g y = let x = f y in ...(case x of (a,b,c) -> ...) ...
 802 We'd inline 'f' if the call was in a case context, and it kind-of-is,
 803 only we can't see it.  So we treat the RHS of a let as not-totally-boring.
 804
 805 Note [Unsaturated applications]
 806 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 807 When a call is not saturated, we *still* inline if one of the
 808 arguments has interesting structure.  That's sometimes very important.
 809 A good example is the Ord instance for Bool in Base:
 810
 811  Rec {
 812     $fOrdBool =GHC.Classes.D:Ord
 813                  @ Bool
 814                  ...
 815                  $cmin_ajX
 816
 817     $cmin_ajX [Occ=LoopBreaker] :: Bool -> Bool -> Bool
 818     $cmin_ajX = GHC.Classes.$dmmin @ Bool $fOrdBool
 819   }
 820
 821 But the defn of GHC.Classes.$dmmin is:
 822
 823   $dmmin :: forall a. GHC.Classes.Ord a => a -> a -> a
 824     {- Arity: 3, HasNoCafRefs, Strictness: SLL,
 825        Unfolding: (\ @ a $dOrd :: GHC.Classes.Ord a x :: a y :: a ->
 826                    case @ a GHC.Classes.<= @ a $dOrd x y of wild {
 827                      GHC.Bool.False -> y GHC.Bool.True -> x }) -}
 828
 829 We *really* want to inline $dmmin, even though it has arity 3, in
 830 order to unravel the recursion.
 831
 832
 833 Note [Things to watch]
 834 ~~~~~~~~~~~~~~~~~~~~~~
 835 *   { y = I# 3; x = y `cast` co; ...case (x `cast` co) of ... }
 836     Assume x is exported, so not inlined unconditionally.
 837     Then we want x to inline unconditionally; no reason for it
 838     not to, and doing so avoids an indirection.
 839
 840 *   { x = I# 3; ....f x.... }
 841     Make sure that x does not inline unconditionally!
 842     Lest we get extra allocation.
 843
 844 Note [Inlining an InlineRule]
 845 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 846 An InlineRules is used for
 847   (a) programmer INLINE pragmas
 848   (b) inlinings from worker/wrapper
 849
 850 For (a) the RHS may be large, and our contract is that we *only* inline
 851 when the function is applied to all the arguments on the LHS of the
 852 source-code defn.  (The uf_arity in the rule.)
 853
 854 However for worker/wrapper it may be worth inlining even if the
 855 arity is not satisfied (as we do in the CoreUnfolding case) so we don't
 856 require saturation.
 857
 858
 859 Note [Nested functions]
 860 ~~~~~~~~~~~~~~~~~~~~~~~
 861 If a function has a nested defn we also record some-benefit, on the
 862 grounds that we are often able to eliminate the binding, and hence the
 863 allocation, for the function altogether; this is good for join points.
 864 But this only makes sense for *functions*; inlining a constructor
 865 doesn't help allocation unless the result is scrutinised.  UNLESS the
 866 constructor occurs just once, albeit possibly in multiple case
 867 branches.  Then inlining it doesn't increase allocation, but it does
 868 increase the chance that the constructor won't be allocated at all in
 869 the branches that don't use it.
 870
 871 Note [Cast then apply]
 872 ~~~~~~~~~~~~~~~~~~~~~~
 873 Consider
 874    myIndex = __inline_me ( (/\a. <blah>) |> co )
 875    co :: (forall a. a -> a) ~ (forall a. T a)
 876      ... /\a.\x. case ((myIndex a) |> sym co) x of { ... } ...
 877
 878 We need to inline myIndex to unravel this; but the actual call (myIndex a) has
 879 no value arguments.  The ValAppCtxt gives it enough incentive to inline.
 880
 881 Note [Inlining in ArgCtxt]
 882 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 883 The condition (arity > 0) here is very important, because otherwise
 884 we end up inlining top-level stuff into useless places; eg
 885    x = I# 3#
 886    f = \y.  g x
 887 This can make a very big difference: it adds 16% to nofib 'integer' allocs,
 888 and 20% to 'power'.
 889
 890 At one stage I replaced this condition by 'True' (leading to the above
 891 slow-down).  The motivation was test eyeball/inline1.hs; but that seems
 892 to work ok now.
 893
 894 NOTE: arguably, we should inline in ArgCtxt only if the result of the
 895 call is at least CONLIKE.  At least for the cases where we use ArgCtxt
 896 for the RHS of a 'let', we only profit from the inlining if we get a
 897 CONLIKE thing (modulo lets).
 898
 899 Note [Lone variables]
 900 ~~~~~~~~~~~~~~~~~~~~~
 901 The "lone-variable" case is important.  I spent ages messing about
 902 with unsatisfactory varaints, but this is nice.  The idea is that if a
 903 variable appears all alone
 904
 905         as an arg of lazy fn, or rhs    BoringCtxt
 906         as scrutinee of a case          CaseCtxt
 907         as arg of a fn                  ArgCtxt
 908 AND
 909         it is bound to a value
 910
 911 then we should not inline it (unless there is some other reason,
 912 e.g. is is the sole occurrence).  That is what is happening at
 913 the use of 'lone_variable' in 'interesting_saturated_call'.
 914
 915 Why?  At least in the case-scrutinee situation, turning
 916         let x = (a,b) in case x of y -> ...
 917 into
 918         let x = (a,b) in case (a,b) of y -> ...
 919 and thence to
 920         let x = (a,b) in let y = (a,b) in ...
 921 is bad if the binding for x will remain.
 922
 923 Another example: I discovered that strings
 924 were getting inlined straight back into applications of 'error'
 925 because the latter is strict.
 926         s = "foo"
 927         f = \x -> ...(error s)...
 928
 929 Fundamentally such contexts should not encourage inlining because the
 930 context can ``see'' the unfolding of the variable (e.g. case or a
 931 RULE) so there's no gain.  If the thing is bound to a value.
 932
 933 However, watch out:
 934
 935  * Consider this:
 936         foo = _inline_ (\n. [n])
 937         bar = _inline_ (foo 20)
 938         baz = \n. case bar of { (m:_) -> m + n }
 939    Here we really want to inline 'bar' so that we can inline 'foo'
 940    and the whole thing unravels as it should obviously do.  This is
 941    important: in the NDP project, 'bar' generates a closure data
 942    structure rather than a list.
 943
 944    So the non-inlining of lone_variables should only apply if the
 945    unfolding is regarded as cheap; because that is when exprIsConApp_maybe
 946    looks through the unfolding.  Hence the "&& is_cheap" in the
 947    InlineRule branch.
 948
 949  * Even a type application or coercion isn't a lone variable.
 950    Consider
 951         case $fMonadST @ RealWorld of { :DMonad a b c -> c }
 952    We had better inline that sucker!  The case won't see through it.
 953
 954    For now, I'm treating treating a variable applied to types
 955    in a *lazy* context "lone". The motivating example was
 956         f = /\a. \x. BIG
 957         g = /\a. \y.  h (f a)
 958    There's no advantage in inlining f here, and perhaps
 959    a significant disadvantage.  Hence some_val_args in the Stop case
 960
 961 \begin{code}
 962 computeDiscount :: Int -> [Int] -> Int -> [ArgSummary] -> CallCtxt -> Int
 963 computeDiscount n_vals_wanted arg_discounts res_discount arg_infos cont_info
 964         -- We multiple the raw discounts (args_discount and result_discount)
 965         -- ty opt_UnfoldingKeenessFactor because the former have to do with
 966         --  *size* whereas the discounts imply that there's some extra
 967         --  *efficiency* to be gained (e.g. beta reductions, case reductions)
 968         -- by inlining.
 969
 970   = 1           -- Discount of 1 because the result replaces the call
 971                 -- so we count 1 for the function itself
 972
 973     + length (take n_vals_wanted arg_infos)
 974                -- Discount of (un-scaled) 1 for each arg supplied,
 975                -- because the result replaces the call
 976
 977     + round (opt_UF_KeenessFactor *
 978              fromIntegral (arg_discount + res_discount'))
 979   where
 980     arg_discount = sum (zipWith mk_arg_discount arg_discounts arg_infos)
 981
 982     mk_arg_discount _        TrivArg    = 0
 983     mk_arg_discount _        NonTrivArg = 1
 984     mk_arg_discount discount ValueArg   = discount
 985
 986     res_discount' = case cont_info of
 987                         BoringCtxt  -> 0
 988                         CaseCtxt    -> res_discount
 989                         _other      -> 4 `min` res_discount
 990                 -- res_discount can be very large when a function returns
 991                 -- constructors; but we only want to invoke that large discount
 992                 -- when there's a case continuation.
 993                 -- Otherwise we, rather arbitrarily, threshold it.  Yuk.
 994                 -- But we want to aovid inlining large functions that return
 995                 -- constructors into contexts that are simply "interesting"
 996 \end{code}
 997
 998 %************************************************************************
 999 %*                                                                      *
1000         Interesting arguments
1001 %*                                                                      *
1002 %************************************************************************
1003
1004 Note [Interesting arguments]
1005 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1006 An argument is interesting if it deserves a discount for unfoldings
1007 with a discount in that argument position.  The idea is to avoid
1008 unfolding a function that is applied only to variables that have no
1009 unfolding (i.e. they are probably lambda bound): f x y z There is
1010 little point in inlining f here.
1011
1012 Generally, *values* (like (C a b) and (\x.e)) deserve discounts.  But
1013 we must look through lets, eg (let x = e in C a b), because the let will
1014 float, exposing the value, if we inline.  That makes it different to
1015 exprIsHNF.
1016
1017 Before 2009 we said it was interesting if the argument had *any* structure
1018 at all; i.e. (hasSomeUnfolding v).  But does too much inlining; see Trac #3016.
1019
1020 But we don't regard (f x y) as interesting, unless f is unsaturated.
1021 If it's saturated and f hasn't inlined, then it's probably not going
1022 to now!
1023
1024 Note [Conlike is interesting]
1025 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1026 Consider
1027         f d = ...((*) d x y)...
1028         ... f (df d')...
1029 where df is con-like. Then we'd really like to inline 'f' so that the
1030 rule for (*) (df d) can fire.  To do this
1031   a) we give a discount for being an argument of a class-op (eg (*) d)
1032   b) we say that a con-like argument (eg (df d)) is interesting
1033
1034 \begin{code}
1035 data ArgSummary = TrivArg       -- Nothing interesting
1036                 | NonTrivArg    -- Arg has structure
1037                 | ValueArg      -- Arg is a con-app or PAP
1038                                 -- ..or con-like. Note [Conlike is interesting]
1039
1040 interestingArg :: CoreExpr -> ArgSummary
1041 -- See Note [Interesting arguments]
1042 interestingArg e = go e 0
1043   where
1044     -- n is # value args to which the expression is applied
1045     go (Lit {}) _          = ValueArg
1046     go (Var v)  n
1047        | isConLikeId v     = ValueArg   -- Experimenting with 'conlike' rather that
1048                                         --    data constructors here
1049        | idArity v > n     = ValueArg   -- Catches (eg) primops with arity but no unfolding
1050        | n > 0             = NonTrivArg -- Saturated or unknown call
1051        | conlike_unfolding = ValueArg   -- n==0; look for an interesting unfolding
1052                                         -- See Note [Conlike is interesting]
1053        | otherwise         = TrivArg    -- n==0, no useful unfolding
1054        where
1055          conlike_unfolding = isConLikeUnfolding (idUnfolding v)
1056
1057     go (Type _)          _ = TrivArg
1058     go (App fn (Type _)) n = go fn n
1059     go (App fn _)        n = go fn (n+1)
1060     go (Note _ a)        n = go a n
1061     go (Cast e _)        n = go e n
1062     go (Lam v e)         n
1063        | isTyVar v         = go e n
1064        | n>0               = go e (n-1)
1065        | otherwise         = ValueArg
1066     go (Let _ e)         n = case go e n of { ValueArg -> ValueArg; _ -> NonTrivArg }
1067     go (Case {})         _ = NonTrivArg
1068
1069 nonTriv ::  ArgSummary -> Bool
1070 nonTriv TrivArg = False
1071 nonTriv _       = True
1072 \end{code}
1073
1074 %************************************************************************
1075 %*                                                                      *
1076          exprIsConApp_maybe
1077 %*                                                                      *
1078 %************************************************************************
1079
1080 Note [exprIsConApp_maybe]
1081 ~~~~~~~~~~~~~~~~~~~~~~~~~
1082 exprIsConApp_maybe is a very important function.  There are two principal
1083 uses:
1084   * case e of { .... }
1085   * cls_op e, where cls_op is a class operation
1086
1087 In both cases you want to know if e is of form (C e1..en) where C is
1088 a data constructor.
1089
1090 However e might not *look* as if
1091
1092 \begin{code}
1093 -- | Returns @Just (dc, [t1..tk], [x1..xn])@ if the argument expression is
1094 -- a *saturated* constructor application of the form @dc t1..tk x1 .. xn@,
1095 -- where t1..tk are the *universally-qantified* type args of 'dc'
1096 exprIsConApp_maybe :: IdUnfoldingFun -> CoreExpr -> Maybe (DataCon, [Type], [CoreExpr])
1097
1098 exprIsConApp_maybe id_unf (Note _ expr)
1099   = exprIsConApp_maybe id_unf expr
1100         -- We ignore all notes.  For example,
1101         --      case _scc_ "foo" (C a b) of
1102         --                      C a b -> e
1103         -- should be optimised away, but it will be only if we look
1104         -- through the SCC note.
1105
1106 exprIsConApp_maybe id_unf (Cast expr co)
1107   =     -- Here we do the KPush reduction rule as described in the FC paper
1108         -- The transformation applies iff we have
1109         --      (C e1 ... en) `cast` co
1110         -- where co :: (T t1 .. tn) ~ to_ty
1111         -- The left-hand one must be a T, because exprIsConApp returned True
1112         -- but the right-hand one might not be.  (Though it usually will.)
1113
1114     case exprIsConApp_maybe id_unf expr of {
1115         Nothing                          -> Nothing ;
1116         Just (dc, _dc_univ_args, dc_args) ->
1117
1118     let (_from_ty, to_ty) = coercionKind co
1119         dc_tc = dataConTyCon dc
1120     in
1121     case splitTyConApp_maybe to_ty of {
1122         Nothing -> Nothing ;
1123         Just (to_tc, to_tc_arg_tys)
1124                 | dc_tc /= to_tc -> Nothing
1125                 -- These two Nothing cases are possible; we might see
1126                 --      (C x y) `cast` (g :: T a ~ S [a]),
1127                 -- where S is a type function.  In fact, exprIsConApp
1128                 -- will probably not be called in such circumstances,
1129                 -- but there't nothing wrong with it
1130
1131                 | otherwise  ->
1132     let
1133         tc_arity       = tyConArity dc_tc
1134         dc_univ_tyvars = dataConUnivTyVars dc
1135         dc_ex_tyvars   = dataConExTyVars dc
1136         arg_tys        = dataConRepArgTys dc
1137
1138         dc_eqs :: [(Type,Type)]   -- All equalities from the DataCon
1139         dc_eqs = [(mkTyVarTy tv, ty)   | (tv,ty) <- dataConEqSpec dc] ++
1140                  [getEqPredTys eq_pred | eq_pred <- dataConEqTheta dc]
1141
1142         (ex_args, rest1)    = splitAtList dc_ex_tyvars dc_args
1143         (co_args, val_args) = splitAtList dc_eqs rest1
1144
1145         -- Make the "theta" from Fig 3 of the paper
1146         gammas = decomposeCo tc_arity co
1147         theta  = zipOpenTvSubst (dc_univ_tyvars ++ dc_ex_tyvars)
1148                                 (gammas         ++ stripTypeArgs ex_args)
1149
1150           -- Cast the existential coercion arguments
1151         cast_co (ty1, ty2) (Type co)
1152           = Type $ mkSymCoercion (substTy theta ty1)
1153                    `mkTransCoercion` co
1154                    `mkTransCoercion` (substTy theta ty2)
1155         cast_co _ other_arg = pprPanic "cast_co" (ppr other_arg)
1156         new_co_args = zipWith cast_co dc_eqs co_args
1157
1158           -- Cast the value arguments (which include dictionaries)
1159         new_val_args = zipWith cast_arg arg_tys val_args
1160         cast_arg arg_ty arg = mkCoerce (substTy theta arg_ty) arg
1161     in
1162 #ifdef DEBUG
1163     let dump_doc = vcat [ppr dc,      ppr dc_univ_tyvars, ppr dc_ex_tyvars,
1164                          ppr arg_tys, ppr dc_args,        ppr _dc_univ_args,
1165                          ppr ex_args, ppr val_args]
1166     in
1167     ASSERT2( coreEqType _from_ty (mkTyConApp dc_tc _dc_univ_args), dump_doc )
1168     ASSERT2( all isTypeArg (ex_args ++ co_args), dump_doc )
1169     ASSERT2( equalLength val_args arg_tys, dump_doc )
1170 #endif
1171
1172     Just (dc, to_tc_arg_tys, ex_args ++ new_co_args ++ new_val_args)
1173     }}
1174
1175 exprIsConApp_maybe id_unf expr
1176   = analyse expr []
1177   where
1178     analyse (App fun arg) args = analyse fun (arg:args)
1179     analyse fun@(Lam {})  args = beta fun [] args
1180
1181     analyse (Var fun) args
1182         | Just con <- isDataConWorkId_maybe fun
1183         , is_saturated
1184         , let (univ_ty_args, rest_args) = splitAtList (dataConUnivTyVars con) args
1185         = Just (con, stripTypeArgs univ_ty_args, rest_args)
1186
1187         -- Look through dictionary functions; see Note [Unfolding DFuns]
1188         | DFunUnfolding con ops <- unfolding
1189         , is_saturated
1190         , let (dfun_tvs, _cls, dfun_res_tys) = tcSplitDFunTy (idType fun)
1191               subst = zipOpenTvSubst dfun_tvs (stripTypeArgs (takeList dfun_tvs args))
1192         = Just (con, substTys subst dfun_res_tys,
1193                      [mkApps op args | op <- ops])
1194
1195         -- Look through unfoldings, but only cheap ones, because
1196         -- we are effectively duplicating the unfolding
1197         | Just rhs <- expandUnfolding_maybe unfolding
1198         = -- pprTrace "expanding" (ppr fun $$ ppr rhs) $
1199           analyse rhs args
1200         where
1201           is_saturated = count isValArg args == idArity fun
1202           unfolding = id_unf fun
1203
1204     analyse _ _ = Nothing
1205
1206     -----------
1207     beta (Lam v body) pairs (arg : args)
1208         | isTypeArg arg
1209         = beta body ((v,arg):pairs) args
1210
1211     beta (Lam {}) _ _    -- Un-saturated, or not a type lambda
1212         = Nothing
1213
1214     beta fun pairs args
1215         = case analyse (substExpr (text "subst-expr-is-con-app") subst fun) args of
1216             Nothing  -> -- pprTrace "Bale out! exprIsConApp_maybe" doc $
1217                         Nothing
1218             Just ans -> -- pprTrace "Woo-hoo! exprIsConApp_maybe" doc $
1219                         Just ans
1220         where
1221           subst = mkOpenSubst (mkInScopeSet (exprFreeVars fun)) pairs
1222           -- doc = vcat [ppr fun, ppr expr, ppr pairs, ppr args]
1223
1224
1225 stripTypeArgs :: [CoreExpr] -> [Type]
1226 stripTypeArgs args = ASSERT2( all isTypeArg args, ppr args )
1227                      [ty | Type ty <- args]
1228 \end{code}
1229
1230 Note [Unfolding DFuns]
1231 ~~~~~~~~~~~~~~~~~~~~~~
1232 DFuns look like
1233
1234   df :: forall a b. (Eq a, Eq b) -> Eq (a,b)
1235   df a b d_a d_b = MkEqD (a,b) ($c1 a b d_a d_b)
1236                                ($c2 a b d_a d_b)
1237
1238 So to split it up we just need to apply the ops $c1, $c2 etc
1239 to the very same args as the dfun.  It takes a little more work
1240 to compute the type arguments to the dictionary constructor.
1241