compiler/coreSyn/CoreUnfold.lhs

   1 calcU%
   2 % (c) The University of Glasgow 2006
   3 % (c) The AQUA Project, Glasgow University, 1994-1998
   4 %
   5
   6 Core-syntax unfoldings
   7
   8 Unfoldings (which can travel across module boundaries) are in Core
   9 syntax (namely @CoreExpr@s).
  10
  11 The type @Unfolding@ sits ``above'' simply-Core-expressions
  12 unfoldings, capturing ``higher-level'' things we know about a binding,
  13 usually things that the simplifier found out (e.g., ``it's a
  14 literal'').  In the corner of a @CoreUnfolding@ unfolding, you will
  15 find, unsurprisingly, a Core expression.
  16
  17 \begin{code}
  18 module CoreUnfold (
  19         Unfolding, UnfoldingGuidance,   -- Abstract types
  20
  21         noUnfolding, mkImplicitUnfolding,
  22         mkTopUnfolding, mkUnfolding, mkCoreUnfolding,
  23         mkInlineRule, mkWwInlineRule,
  24         mkCompulsoryUnfolding, mkDFunUnfolding,
  25
  26         interestingArg, ArgSummary(..),
  27
  28         couldBeSmallEnoughToInline,
  29         certainlyWillInline, smallEnoughToInline,
  30
  31         callSiteInline, CallCtxt(..),
  32
  33         exprIsConApp_maybe
  34
  35     ) where
  36
  37 #include "HsVersions.h"
  38
  39 import StaticFlags
  40 import DynFlags
  41 import CoreSyn
  42 import PprCore          ()      -- Instances
  43 import OccurAnal
  44 import CoreSubst hiding( substTy )
  45 import CoreFVs         ( exprFreeVars )
  46 import CoreUtils
  47 import Id
  48 import DataCon
  49 import TyCon
  50 import Literal
  51 import PrimOp
  52 import IdInfo
  53 import BasicTypes       ( Arity )
  54 import TcType           ( tcSplitDFunTy )
  55 import Type
  56 import Coercion
  57 import PrelNames
  58 import VarEnv           ( mkInScopeSet )
  59 import Bag
  60 import Util
  61 import FastTypes
  62 import FastString
  63 import Outputable
  64
  65 \end{code}
  66
  67
  68 %************************************************************************
  69 %*                                                                      *
  70 \subsection{Making unfoldings}
  71 %*                                                                      *
  72 %************************************************************************
  73
  74 \begin{code}
  75 mkTopUnfolding :: CoreExpr -> Unfolding
  76 mkTopUnfolding expr = mkUnfolding True {- Top level -} expr
  77
  78 mkImplicitUnfolding :: CoreExpr -> Unfolding
  79 -- For implicit Ids, do a tiny bit of optimising first
  80 mkImplicitUnfolding expr = mkTopUnfolding (simpleOptExpr expr)
  81
  82 -- Note [Top-level flag on inline rules]
  83 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  84 -- Slight hack: note that mk_inline_rules conservatively sets the
  85 -- top-level flag to True.  It gets set more accurately by the simplifier
  86 -- Simplify.simplUnfolding.
  87
  88 mkUnfolding :: Bool -> CoreExpr -> Unfolding
  89 mkUnfolding top_lvl expr
  90   = CoreUnfolding { uf_tmpl       = occurAnalyseExpr expr,
  91                     uf_src        = InlineRhs,
  92                     uf_arity      = arity,
  93                     uf_is_top     = top_lvl,
  94                     uf_is_value   = exprIsHNF        expr,
  95                     uf_is_conlike = exprIsConLike    expr,
  96                     uf_expandable = exprIsExpandable expr,
  97                     uf_is_cheap   = is_cheap,
  98                     uf_guidance   = guidance }
  99   where
 100     is_cheap = exprIsCheap expr
 101     (arity, guidance) = calcUnfoldingGuidance is_cheap opt_UF_CreationThreshold expr
 102         -- Sometimes during simplification, there's a large let-bound thing
 103         -- which has been substituted, and so is now dead; so 'expr' contains
 104         -- two copies of the thing while the occurrence-analysed expression doesn't
 105         -- Nevertheless, we *don't* occ-analyse before computing the size because the
 106         -- size computation bales out after a while, whereas occurrence analysis does not.
 107         --
 108         -- This can occasionally mean that the guidance is very pessimistic;
 109         -- it gets fixed up next round.  And it should be rare, because large
 110         -- let-bound things that are dead are usually caught by preInlineUnconditionally
 111
 112 mkCoreUnfolding :: Bool -> UnfoldingSource -> CoreExpr
 113                 -> Arity -> UnfoldingGuidance -> Unfolding
 114 -- Occurrence-analyses the expression before capturing it
 115 mkCoreUnfolding top_lvl src expr arity guidance
 116   = CoreUnfolding { uf_tmpl       = occurAnalyseExpr expr,
 117                     uf_src        = src,
 118                     uf_arity      = arity,
 119                     uf_is_top     = top_lvl,
 120                     uf_is_value   = exprIsHNF        expr,
 121                     uf_is_conlike = exprIsConLike    expr,
 122                     uf_is_cheap   = exprIsCheap      expr,
 123                     uf_expandable = exprIsExpandable expr,
 124                     uf_guidance   = guidance }
 125
 126 mkDFunUnfolding :: DataCon -> [Id] -> Unfolding
 127 mkDFunUnfolding con ops = DFunUnfolding con (map Var ops)
 128
 129 mkWwInlineRule :: Id -> CoreExpr -> Arity -> Unfolding
 130 mkWwInlineRule id expr arity
 131   = mkCoreUnfolding True (InlineWrapper id)
 132                    (simpleOptExpr expr) arity
 133                    (UnfWhen unSaturatedOk boringCxtNotOk)
 134
 135 mkCompulsoryUnfolding :: CoreExpr -> Unfolding
 136 mkCompulsoryUnfolding expr         -- Used for things that absolutely must be unfolded
 137   = mkCoreUnfolding True InlineCompulsory
 138                     expr 0    -- Arity of unfolding doesn't matter
 139                     (UnfWhen unSaturatedOk boringCxtOk)
 140
 141 mkInlineRule :: Bool -> CoreExpr -> Arity -> Unfolding
 142 mkInlineRule unsat_ok expr arity
 143   = mkCoreUnfolding True InlineRule      -- Note [Top-level flag on inline rules]
 144                     expr' arity
 145                     (UnfWhen unsat_ok boring_ok)
 146   where
 147     expr' = simpleOptExpr expr
 148     boring_ok = case calcUnfoldingGuidance True    -- Treat as cheap
 149                                            (arity+1) expr' of
 150                   (_, UnfWhen _ boring_ok) -> boring_ok
 151                   _other                   -> boringCxtNotOk
 152      -- See Note [INLINE for small functions]
 153 \end{code}
 154
 155
 156 %************************************************************************
 157 %*                                                                      *
 158 \subsection{The UnfoldingGuidance type}
 159 %*                                                                      *
 160 %************************************************************************
 161
 162 \begin{code}
 163 calcUnfoldingGuidance
 164         :: Bool         -- True <=> the rhs is cheap, or we want to treat it
 165                         --          as cheap (INLINE things)
 166         -> Int          -- Bomb out if size gets bigger than this
 167         -> CoreExpr     -- Expression to look at
 168         -> (Arity, UnfoldingGuidance)
 169 calcUnfoldingGuidance expr_is_cheap bOMB_OUT_SIZE expr
 170   = case collectBinders expr of { (bndrs, body) ->
 171     let
 172         val_bndrs   = filter isId bndrs
 173         n_val_bndrs = length val_bndrs
 174
 175         guidance
 176           = case (sizeExpr (iUnbox bOMB_OUT_SIZE) val_bndrs body) of
 177               TooBig -> UnfNever
 178               SizeIs size cased_bndrs scrut_discount
 179                 | uncondInline n_val_bndrs (iBox size) && expr_is_cheap
 180                 -> UnfWhen needSaturated boringCxtOk
 181
 182                 | otherwise
 183                 -> UnfIfGoodArgs { ug_args  = map (discount cased_bndrs) val_bndrs
 184                                  , ug_size  = iBox size
 185                                  , ug_res   = iBox scrut_discount }
 186
 187         discount cbs bndr
 188            = foldlBag (\acc (b',n) -> if bndr==b' then acc+n else acc)
 189                       0 cbs
 190     in
 191     (n_val_bndrs, guidance) }
 192 \end{code}
 193
 194 Note [Computing the size of an expression]
 195 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 196 The basic idea of sizeExpr is obvious enough: count nodes.  But getting the
 197 heuristics right has taken a long time.  Here's the basic strategy:
 198
 199     * Variables, literals: 0
 200       (Exception for string literals, see litSize.)
 201
 202     * Function applications (f e1 .. en): 1 + #value args
 203
 204     * Constructor applications: 1, regardless of #args
 205
 206     * Let(rec): 1 + size of components
 207
 208     * Note, cast: 0
 209
 210 Examples
 211
 212   Size  Term
 213   --------------
 214     0     42#
 215     0     x
 216     0     True
 217     2     f x
 218     1     Just x
 219     4     f (g x)
 220
 221 Notice that 'x' counts 0, while (f x) counts 2.  That's deliberate: there's
 222 a function call to account for.  Notice also that constructor applications
 223 are very cheap, because exposing them to a caller is so valuable.
 224
 225 Note [Unconditional inlining]
 226 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 227 We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
 228 than the thing it's replacing.  Notice that
 229       (f x) --> (g 3)             -- YES, unconditionally
 230       (f x) --> x : []            -- YES, *even though* there are two
 231                                   --      arguments to the cons
 232       x     --> g 3               -- NO
 233       x     --> Just v            -- NO
 234
 235 It's very important not to unconditionally replace a variable by
 236 a non-atomic term.
 237
 238 \begin{code}
 239 uncondInline :: Arity -> Int -> Bool
 240 -- Inline unconditionally if there no size increase
 241 -- Size of call is arity (+1 for the function)
 242 -- See Note [Unconditional inlining]
 243 uncondInline arity size
 244   | arity == 0 = size == 0
 245   | otherwise  = size <= arity + 1
 246 \end{code}
 247
 248
 249 \begin{code}
 250 sizeExpr :: FastInt         -- Bomb out if it gets bigger than this
 251          -> [Id]            -- Arguments; we're interested in which of these
 252                             -- get case'd
 253          -> CoreExpr
 254          -> ExprSize
 255
 256 -- Note [Computing the size of an expression]
 257
 258 sizeExpr bOMB_OUT_SIZE top_args expr
 259   = size_up expr
 260   where
 261     size_up (Cast e _) = size_up e
 262     size_up (Note _ e) = size_up e
 263     size_up (Type _)   = sizeZero           -- Types cost nothing
 264     size_up (Lit lit)  = sizeN (litSize lit)
 265     size_up (Var f)    = size_up_call f []  -- Make sure we get constructor
 266                                             -- discounts even on nullary constructors
 267
 268     size_up (App fun (Type _)) = size_up fun
 269     size_up (App fun arg)      = size_up_app fun [arg]
 270                                   `addSize` nukeScrutDiscount (size_up arg)
 271
 272     size_up (Lam b e) | isId b    = lamScrutDiscount (size_up e `addSizeN` 1)
 273                       | otherwise = size_up e
 274
 275     size_up (Let (NonRec binder rhs) body)
 276       = nukeScrutDiscount (size_up rhs)         `addSize`
 277         size_up body                            `addSizeN`
 278         (if isUnLiftedType (idType binder) then 0 else 1)
 279                 -- For the allocation
 280                 -- If the binder has an unlifted type there is no allocation
 281
 282     size_up (Let (Rec pairs) body)
 283       = nukeScrutDiscount rhs_size              `addSize`
 284         size_up body                            `addSizeN`
 285         length pairs            -- For the allocation
 286       where
 287         rhs_size = foldr (addSize . size_up . snd) sizeZero pairs
 288
 289     size_up (Case (Var v) _ _ alts)
 290         | v `elem` top_args             -- We are scrutinising an argument variable
 291         = alts_size (foldr1 addSize alt_sizes)  -- The 1 is for the case itself
 292                     (foldr1 maxSize alt_sizes)
 293                 -- Good to inline if an arg is scrutinised, because
 294                 -- that may eliminate allocation in the caller
 295                 -- And it eliminates the case itself
 296         where
 297           alt_sizes = map size_up_alt alts
 298
 299                 -- alts_size tries to compute a good discount for
 300                 -- the case when we are scrutinising an argument variable
 301           alts_size (SizeIs tot tot_disc _tot_scrut)           -- Size of all alternatives
 302                     (SizeIs max _max_disc  max_scrut)           -- Size of biggest alternative
 303                 = SizeIs tot (unitBag (v, iBox (_ILIT(2) +# tot -# max)) `unionBags` tot_disc) max_scrut
 304                         -- If the variable is known, we produce a discount that
 305                         -- will take us back to 'max', the size of the largest alternative
 306                         -- The 1+ is a little discount for reduced allocation in the caller
 307                         --
 308                         -- Notice though, that we return tot_disc, the total discount from
 309                         -- all branches.  I think that's right.
 310
 311           alts_size tot_size _ = tot_size
 312
 313     size_up (Case e _ _ alts) = foldr (addSize . size_up_alt)
 314                                       (nukeScrutDiscount (size_up e))
 315                                       alts
 316                 -- We don't charge for the case itself
 317                 -- It's a strict thing, and the price of the call
 318                 -- is paid by scrut.  Also consider
 319                 --      case f x of DEFAULT -> e
 320                 -- This is just ';'!  Don't charge for it.
 321                 --
 322                 -- Moreover, we charge one per alternative.
 323
 324     ------------
 325     -- size_up_app is used when there's ONE OR MORE value args
 326     size_up_app (App fun arg) args
 327         | isTypeArg arg            = size_up_app fun args
 328         | otherwise                = size_up_app fun (arg:args)
 329                                      `addSize` nukeScrutDiscount (size_up arg)
 330     size_up_app (Var fun)     args = size_up_call fun args
 331     size_up_app other         args = size_up other `addSizeN` length args
 332
 333     ------------
 334     size_up_call :: Id -> [CoreExpr] -> ExprSize
 335     size_up_call fun val_args
 336        = case idDetails fun of
 337            FCallId _        -> sizeN opt_UF_DearOp
 338            DataConWorkId dc -> conSize    dc (length val_args)
 339            PrimOpId op      -> primOpSize op (length val_args)
 340            ClassOpId _      -> classOpSize top_args val_args
 341            _                -> funSize top_args fun (length val_args)
 342
 343     ------------
 344     size_up_alt (_con, _bndrs, rhs) = size_up rhs `addSizeN` 1
 345         -- Don't charge for args, so that wrappers look cheap
 346         -- (See comments about wrappers with Case)
 347         --
 348         -- IMPORATANT: *do* charge 1 for the alternative, else we
 349         -- find that giant case nests are treated as practically free
 350         -- A good example is Foreign.C.Error.errrnoToIOError
 351
 352     ------------
 353         -- These addSize things have to be here because
 354         -- I don't want to give them bOMB_OUT_SIZE as an argument
 355     addSizeN TooBig          _  = TooBig
 356     addSizeN (SizeIs n xs d) m  = mkSizeIs bOMB_OUT_SIZE (n +# iUnbox m) xs d
 357
 358     addSize TooBig            _                 = TooBig
 359     addSize _                 TooBig            = TooBig
 360     addSize (SizeIs n1 xs d1) (SizeIs n2 ys d2)
 361         = mkSizeIs bOMB_OUT_SIZE (n1 +# n2) (xs `unionBags` ys) (d1 +# d2)
 362 \end{code}
 363
 364 \begin{code}
 365 -- | Finds a nominal size of a string literal.
 366 litSize :: Literal -> Int
 367 -- Used by CoreUnfold.sizeExpr
 368 litSize (MachStr str) = 1 + ((lengthFS str + 3) `div` 4)
 369         -- If size could be 0 then @f "x"@ might be too small
 370         -- [Sept03: make literal strings a bit bigger to avoid fruitless
 371         --  duplication of little strings]
 372 litSize _other = 0    -- Must match size of nullary constructors
 373                       -- Key point: if  x |-> 4, then x must inline unconditionally
 374                       --            (eg via case binding)
 375
 376 classOpSize :: [Id] -> [CoreExpr] -> ExprSize
 377 -- See Note [Conlike is interesting]
 378 classOpSize _ []
 379   = sizeZero
 380 classOpSize top_args (arg1 : other_args)
 381   = SizeIs (iUnbox size) arg_discount (_ILIT(0))
 382   where
 383     size = 2 + length other_args
 384     -- If the class op is scrutinising a lambda bound dictionary then
 385     -- give it a discount, to encourage the inlining of this function
 386     -- The actual discount is rather arbitrarily chosen
 387     arg_discount = case arg1 of
 388                      Var dict | dict `elem` top_args
 389                               -> unitBag (dict, opt_UF_DictDiscount)
 390                      _other   -> emptyBag
 391
 392 funSize :: [Id] -> Id -> Int -> ExprSize
 393 -- Size for functions that are not constructors or primops
 394 -- Note [Function applications]
 395 funSize top_args fun n_val_args
 396   | fun `hasKey` buildIdKey   = buildSize
 397   | fun `hasKey` augmentIdKey = augmentSize
 398   | otherwise = SizeIs (iUnbox size) arg_discount (iUnbox res_discount)
 399   where
 400     some_val_args = n_val_args > 0
 401
 402     arg_discount | some_val_args && fun `elem` top_args
 403                  = unitBag (fun, opt_UF_FunAppDiscount)
 404                  | otherwise = emptyBag
 405         -- If the function is an argument and is applied
 406         -- to some values, give it an arg-discount
 407
 408     res_discount | idArity fun > n_val_args = opt_UF_FunAppDiscount
 409                  | otherwise                = 0
 410         -- If the function is partially applied, show a result discount
 411
 412     size | some_val_args = 1 + n_val_args
 413          | otherwise     = 0
 414         -- The 1+ is for the function itself
 415         -- Add 1 for each non-trivial arg;
 416         -- the allocation cost, as in let(rec)
 417
 418
 419 conSize :: DataCon -> Int -> ExprSize
 420 conSize dc n_val_args
 421   | n_val_args == 0      = SizeIs (_ILIT(0)) emptyBag (_ILIT(1))        -- Like variables
 422   | isUnboxedTupleCon dc = SizeIs (_ILIT(0)) emptyBag (iUnbox n_val_args +# _ILIT(1))
 423   | otherwise            = SizeIs (_ILIT(1)) emptyBag (iUnbox n_val_args +# _ILIT(1))
 424         -- Treat a constructors application as size 1, regardless of how
 425         -- many arguments it has; we are keen to expose them
 426         -- (and we charge separately for their args).  We can't treat
 427         -- them as size zero, else we find that (Just x) has size 0,
 428         -- which is the same as a lone variable; and hence 'v' will
 429         -- always be replaced by (Just x), where v is bound to Just x.
 430         --
 431         -- However, unboxed tuples count as size zero
 432         -- I found occasions where we had
 433         --      f x y z = case op# x y z of { s -> (# s, () #) }
 434         -- and f wasn't getting inlined
 435
 436 primOpSize :: PrimOp -> Int -> ExprSize
 437 primOpSize op n_val_args
 438  | not (primOpIsDupable op) = sizeN opt_UF_DearOp
 439  | not (primOpOutOfLine op) = sizeN 1
 440         -- Be very keen to inline simple primops.
 441         -- We give a discount of 1 for each arg so that (op# x y z) costs 2.
 442         -- We can't make it cost 1, else we'll inline let v = (op# x y z)
 443         -- at every use of v, which is excessive.
 444         --
 445         -- A good example is:
 446         --      let x = +# p q in C {x}
 447         -- Even though x get's an occurrence of 'many', its RHS looks cheap,
 448         -- and there's a good chance it'll get inlined back into C's RHS. Urgh!
 449
 450  | otherwise = sizeN n_val_args
 451
 452
 453 buildSize :: ExprSize
 454 buildSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(4))
 455         -- We really want to inline applications of build
 456         -- build t (\cn -> e) should cost only the cost of e (because build will be inlined later)
 457         -- Indeed, we should add a result_discount becuause build is
 458         -- very like a constructor.  We don't bother to check that the
 459         -- build is saturated (it usually is).  The "-2" discounts for the \c n,
 460         -- The "4" is rather arbitrary.
 461
 462 augmentSize :: ExprSize
 463 augmentSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(4))
 464         -- Ditto (augment t (\cn -> e) ys) should cost only the cost of
 465         -- e plus ys. The -2 accounts for the \cn
 466
 467 nukeScrutDiscount :: ExprSize -> ExprSize
 468 nukeScrutDiscount (SizeIs n vs _) = SizeIs n vs (_ILIT(0))
 469 nukeScrutDiscount TooBig          = TooBig
 470
 471 -- When we return a lambda, give a discount if it's used (applied)
 472 lamScrutDiscount :: ExprSize -> ExprSize
 473 lamScrutDiscount (SizeIs n vs _) = SizeIs n vs (iUnbox opt_UF_FunAppDiscount)
 474 lamScrutDiscount TooBig          = TooBig
 475 \end{code}
 476
 477 Note [Discounts and thresholds]
 478 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 479 Constants for discounts and thesholds are defined in main/StaticFlags,
 480 all of form opt_UF_xxxx.   They are:
 481
 482 opt_UF_CreationThreshold (45)
 483      At a definition site, if the unfolding is bigger than this, we
 484      may discard it altogether
 485
 486 opt_UF_UseThreshold (6)
 487      At a call site, if the unfolding, less discounts, is smaller than
 488      this, then it's small enough inline
 489
 490 opt_UF_KeennessFactor (1.5)
 491      Factor by which the discounts are multiplied before
 492      subtracting from size
 493
 494 opt_UF_DictDiscount (1)
 495      The discount for each occurrence of a dictionary argument
 496      as an argument of a class method.  Should be pretty small
 497      else big functions may get inlined
 498
 499 opt_UF_FunAppDiscount (6)
 500      Discount for a function argument that is applied.  Quite
 501      large, because if we inline we avoid the higher-order call.
 502
 503 opt_UF_DearOp (4)
 504      The size of a foreign call or not-dupable PrimOp
 505
 506
 507 Note [Function applications]
 508 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 509 In a function application (f a b)
 510
 511   - If 'f' is an argument to the function being analysed,
 512     and there's at least one value arg, record a FunAppDiscount for f
 513
 514   - If the application if a PAP (arity > 2 in this example)
 515     record a *result* discount (because inlining
 516     with "extra" args in the call may mean that we now
 517     get a saturated application)
 518
 519 Code for manipulating sizes
 520
 521 \begin{code}
 522 data ExprSize = TooBig
 523               | SizeIs FastInt          -- Size found
 524                        (Bag (Id,Int))   -- Arguments cased herein, and discount for each such
 525                        FastInt          -- Size to subtract if result is scrutinised
 526                                         -- by a case expression
 527
 528 instance Outputable ExprSize where
 529   ppr TooBig         = ptext (sLit "TooBig")
 530   ppr (SizeIs a _ c) = brackets (int (iBox a) <+> int (iBox c))
 531
 532 -- subtract the discount before deciding whether to bale out. eg. we
 533 -- want to inline a large constructor application into a selector:
 534 --      tup = (a_1, ..., a_99)
 535 --      x = case tup of ...
 536 --
 537 mkSizeIs :: FastInt -> FastInt -> Bag (Id, Int) -> FastInt -> ExprSize
 538 mkSizeIs max n xs d | (n -# d) ># max = TooBig
 539                     | otherwise       = SizeIs n xs d
 540
 541 maxSize :: ExprSize -> ExprSize -> ExprSize
 542 maxSize TooBig         _                                  = TooBig
 543 maxSize _              TooBig                             = TooBig
 544 maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 ># n2  = s1
 545                                               | otherwise = s2
 546
 547 sizeZero :: ExprSize
 548 sizeN :: Int -> ExprSize
 549
 550 sizeZero = SizeIs (_ILIT(0))  emptyBag (_ILIT(0))
 551 sizeN n  = SizeIs (iUnbox n) emptyBag (_ILIT(0))
 552 \end{code}
 553
 554
 555 %************************************************************************
 556 %*                                                                      *
 557 \subsection[considerUnfolding]{Given all the info, do (not) do the unfolding}
 558 %*                                                                      *
 559 %************************************************************************
 560
 561 We use 'couldBeSmallEnoughToInline' to avoid exporting inlinings that
 562 we ``couldn't possibly use'' on the other side.  Can be overridden w/
 563 flaggery.  Just the same as smallEnoughToInline, except that it has no
 564 actual arguments.
 565
 566 \begin{code}
 567 couldBeSmallEnoughToInline :: Int -> CoreExpr -> Bool
 568 couldBeSmallEnoughToInline threshold rhs
 569   = case calcUnfoldingGuidance False threshold rhs of
 570        (_, UnfNever) -> False
 571        _             -> True
 572
 573 ----------------
 574 smallEnoughToInline :: Unfolding -> Bool
 575 smallEnoughToInline (CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_size = size}})
 576   = size <= opt_UF_UseThreshold
 577 smallEnoughToInline _
 578   = False
 579
 580 ----------------
 581 certainlyWillInline :: Unfolding -> Bool
 582   -- Sees if the unfolding is pretty certain to inline
 583 certainlyWillInline (CoreUnfolding { uf_is_cheap = is_cheap, uf_arity = n_vals, uf_guidance = guidance })
 584   = case guidance of
 585       UnfNever      -> False
 586       UnfWhen {}    -> True
 587       UnfIfGoodArgs { ug_size = size}
 588                     -> is_cheap && size - (n_vals +1) <= opt_UF_UseThreshold
 589
 590 certainlyWillInline _
 591   = False
 592 \end{code}
 593
 594 %************************************************************************
 595 %*                                                                      *
 596 \subsection{callSiteInline}
 597 %*                                                                      *
 598 %************************************************************************
 599
 600 This is the key function.  It decides whether to inline a variable at a call site
 601
 602 callSiteInline is used at call sites, so it is a bit more generous.
 603 It's a very important function that embodies lots of heuristics.
 604 A non-WHNF can be inlined if it doesn't occur inside a lambda,
 605 and occurs exactly once or
 606     occurs once in each branch of a case and is small
 607
 608 If the thing is in WHNF, there's no danger of duplicating work,
 609 so we can inline if it occurs once, or is small
 610
 611 NOTE: we don't want to inline top-level functions that always diverge.
 612 It just makes the code bigger.  Tt turns out that the convenient way to prevent
 613 them inlining is to give them a NOINLINE pragma, which we do in
 614 StrictAnal.addStrictnessInfoToTopId
 615
 616 \begin{code}
 617 callSiteInline :: DynFlags
 618                -> Id                    -- The Id
 619                -> Unfolding             -- Its unfolding (if active)
 620                -> Bool                  -- True if there are are no arguments at all (incl type args)
 621                -> [ArgSummary]          -- One for each value arg; True if it is interesting
 622                -> CallCtxt              -- True <=> continuation is interesting
 623                -> Maybe CoreExpr        -- Unfolding, if any
 624
 625
 626 instance Outputable ArgSummary where
 627   ppr TrivArg    = ptext (sLit "TrivArg")
 628   ppr NonTrivArg = ptext (sLit "NonTrivArg")
 629   ppr ValueArg   = ptext (sLit "ValueArg")
 630
 631 data CallCtxt = BoringCtxt
 632
 633               | ArgCtxt         -- We are somewhere in the argument of a function
 634                         Bool    -- True  <=> we're somewhere in the RHS of function with rules
 635                                 -- False <=> we *are* the argument of a function with non-zero
 636                                 --           arg discount
 637                                 --        OR
 638                                 --           we *are* the RHS of a let  Note [RHS of lets]
 639                                 -- In both cases, be a little keener to inline
 640
 641               | ValAppCtxt      -- We're applied to at least one value arg
 642                                 -- This arises when we have ((f x |> co) y)
 643                                 -- Then the (f x) has argument 'x' but in a ValAppCtxt
 644
 645               | CaseCtxt        -- We're the scrutinee of a case
 646                                 -- that decomposes its scrutinee
 647
 648 instance Outputable CallCtxt where
 649   ppr BoringCtxt      = ptext (sLit "BoringCtxt")
 650   ppr (ArgCtxt rules) = ptext (sLit "ArgCtxt") <+> ppr rules
 651   ppr CaseCtxt        = ptext (sLit "CaseCtxt")
 652   ppr ValAppCtxt      = ptext (sLit "ValAppCtxt")
 653
 654 callSiteInline dflags id unfolding lone_variable arg_infos cont_info
 655   = case unfolding of {
 656         NoUnfolding      -> Nothing ;
 657         OtherCon _       -> Nothing ;
 658         DFunUnfolding {} -> Nothing ;   -- Never unfold a DFun
 659         CoreUnfolding { uf_tmpl = unf_template, uf_is_top = is_top, uf_is_value = is_value,
 660                         uf_is_cheap = is_cheap, uf_arity = uf_arity, uf_guidance = guidance } ->
 661                         -- uf_arity will typically be equal to (idArity id),
 662                         -- but may be less for InlineRules
 663     let
 664         n_val_args = length arg_infos
 665         saturated  = n_val_args >= uf_arity
 666
 667         result | yes_or_no = Just unf_template
 668                | otherwise = Nothing
 669
 670         interesting_args = any nonTriv arg_infos
 671                 -- NB: (any nonTriv arg_infos) looks at the
 672                 -- over-saturated args too which is "wrong";
 673                 -- but if over-saturated we inline anyway.
 674
 675                -- some_benefit is used when the RHS is small enough
 676                -- and the call has enough (or too many) value
 677                -- arguments (ie n_val_args >= arity). But there must
 678                -- be *something* interesting about some argument, or the
 679                -- result context, to make it worth inlining
 680         some_benefit
 681            | not saturated = interesting_args   -- Under-saturated
 682                                                 -- Note [Unsaturated applications]
 683            | n_val_args > uf_arity = True       -- Over-saturated
 684            | otherwise = interesting_args       -- Saturated
 685                       || interesting_saturated_call
 686
 687         interesting_saturated_call
 688           = case cont_info of
 689               BoringCtxt -> not is_top && uf_arity > 0          -- Note [Nested functions]
 690               CaseCtxt   -> not (lone_variable && is_value)     -- Note [Lone variables]
 691               ArgCtxt {} -> uf_arity > 0                        -- Note [Inlining in ArgCtxt]
 692               ValAppCtxt -> True                                -- Note [Cast then apply]
 693
 694         (yes_or_no, extra_doc)
 695           = case guidance of
 696               UnfNever -> (False, empty)
 697
 698               UnfWhen unsat_ok boring_ok -> ( (unsat_ok  || saturated)
 699                                            && (boring_ok || some_benefit)
 700                                             , empty )
 701                    -- For the boring_ok part see Note [INLINE for small functions]
 702
 703               UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size }
 704                  -> ( is_cheap && some_benefit && small_enough
 705                     , (text "discounted size =" <+> int discounted_size) )
 706                  where
 707                    discounted_size = size - discount
 708                    small_enough = discounted_size <= opt_UF_UseThreshold
 709                    discount = computeDiscount uf_arity arg_discounts
 710                                               res_discount arg_infos cont_info
 711
 712     in
 713     if dopt Opt_D_dump_inlinings dflags then
 714         pprTrace ("Considering inlining: " ++ showSDoc (ppr id))
 715                  (vcat [text "arg infos" <+> ppr arg_infos,
 716                         text "uf arity" <+> ppr uf_arity,
 717                         text "interesting continuation" <+> ppr cont_info,
 718                         text "some_benefit" <+> ppr some_benefit,
 719                         text "is value:" <+> ppr is_value,
 720                         text "is cheap:" <+> ppr is_cheap,
 721                         text "guidance" <+> ppr guidance,
 722                         extra_doc,
 723                         text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"])
 724                   result
 725     else
 726     result
 727     }
 728 \end{code}
 729
 730 Note [RHS of lets]
 731 ~~~~~~~~~~~~~~~~~~
 732 Be a tiny bit keener to inline in the RHS of a let, because that might
 733 lead to good thing later
 734      f y = (y,y,y)
 735      g y = let x = f y in ...(case x of (a,b,c) -> ...) ...
 736 We'd inline 'f' if the call was in a case context, and it kind-of-is,
 737 only we can't see it.  So we treat the RHS of a let as not-totally-boring.
 738
 739 Note [Unsaturated applications]
 740 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 741 When a call is not saturated, we *still* inline if one of the
 742 arguments has interesting structure.  That's sometimes very important.
 743 A good example is the Ord instance for Bool in Base:
 744
 745  Rec {
 746     $fOrdBool =GHC.Classes.D:Ord
 747                  @ Bool
 748                  ...
 749                  $cmin_ajX
 750
 751     $cmin_ajX [Occ=LoopBreaker] :: Bool -> Bool -> Bool
 752     $cmin_ajX = GHC.Classes.$dmmin @ Bool $fOrdBool
 753   }
 754
 755 But the defn of GHC.Classes.$dmmin is:
 756
 757   $dmmin :: forall a. GHC.Classes.Ord a => a -> a -> a
 758     {- Arity: 3, HasNoCafRefs, Strictness: SLL,
 759        Unfolding: (\ @ a $dOrd :: GHC.Classes.Ord a x :: a y :: a ->
 760                    case @ a GHC.Classes.<= @ a $dOrd x y of wild {
 761                      GHC.Bool.False -> y GHC.Bool.True -> x }) -}
 762
 763 We *really* want to inline $dmmin, even though it has arity 3, in
 764 order to unravel the recursion.
 765
 766
 767 Note [INLINE for small functions]
 768 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 769 Consider        {-# INLINE f #-}
 770                 f x = Just x
 771                 g y = f y
 772 Then f's RHS is no larger than its LHS, so we should inline it
 773 into even the most boring context.  (We do so if there is no INLINE
 774 pragma!)
 775
 776
 777 Note [Things to watch]
 778 ~~~~~~~~~~~~~~~~~~~~~~
 779 *   { y = I# 3; x = y `cast` co; ...case (x `cast` co) of ... }
 780     Assume x is exported, so not inlined unconditionally.
 781     Then we want x to inline unconditionally; no reason for it
 782     not to, and doing so avoids an indirection.
 783
 784 *   { x = I# 3; ....f x.... }
 785     Make sure that x does not inline unconditionally!
 786     Lest we get extra allocation.
 787
 788 Note [Inlining an InlineRule]
 789 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 790 An InlineRules is used for
 791   (a) programmer INLINE pragmas
 792   (b) inlinings from worker/wrapper
 793
 794 For (a) the RHS may be large, and our contract is that we *only* inline
 795 when the function is applied to all the arguments on the LHS of the
 796 source-code defn.  (The uf_arity in the rule.)
 797
 798 However for worker/wrapper it may be worth inlining even if the
 799 arity is not satisfied (as we do in the CoreUnfolding case) so we don't
 800 require saturation.
 801
 802
 803 Note [Nested functions]
 804 ~~~~~~~~~~~~~~~~~~~~~~~
 805 If a function has a nested defn we also record some-benefit, on the
 806 grounds that we are often able to eliminate the binding, and hence the
 807 allocation, for the function altogether; this is good for join points.
 808 But this only makes sense for *functions*; inlining a constructor
 809 doesn't help allocation unless the result is scrutinised.  UNLESS the
 810 constructor occurs just once, albeit possibly in multiple case
 811 branches.  Then inlining it doesn't increase allocation, but it does
 812 increase the chance that the constructor won't be allocated at all in
 813 the branches that don't use it.
 814
 815 Note [Cast then apply]
 816 ~~~~~~~~~~~~~~~~~~~~~~
 817 Consider
 818    myIndex = __inline_me ( (/\a. <blah>) |> co )
 819    co :: (forall a. a -> a) ~ (forall a. T a)
 820      ... /\a.\x. case ((myIndex a) |> sym co) x of { ... } ...
 821
 822 We need to inline myIndex to unravel this; but the actual call (myIndex a) has
 823 no value arguments.  The ValAppCtxt gives it enough incentive to inline.
 824
 825 Note [Inlining in ArgCtxt]
 826 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 827 The condition (arity > 0) here is very important, because otherwise
 828 we end up inlining top-level stuff into useless places; eg
 829    x = I# 3#
 830    f = \y.  g x
 831 This can make a very big difference: it adds 16% to nofib 'integer' allocs,
 832 and 20% to 'power'.
 833
 834 At one stage I replaced this condition by 'True' (leading to the above
 835 slow-down).  The motivation was test eyeball/inline1.hs; but that seems
 836 to work ok now.
 837
 838 NOTE: arguably, we should inline in ArgCtxt only if the result of the
 839 call is at least CONLIKE.  At least for the cases where we use ArgCtxt
 840 for the RHS of a 'let', we only profit from the inlining if we get a
 841 CONLIKE thing (modulo lets).
 842
 843 Note [Lone variables]
 844 ~~~~~~~~~~~~~~~~~~~~~
 845 The "lone-variable" case is important.  I spent ages messing about
 846 with unsatisfactory varaints, but this is nice.  The idea is that if a
 847 variable appears all alone
 848
 849         as an arg of lazy fn, or rhs    BoringCtxt
 850         as scrutinee of a case          CaseCtxt
 851         as arg of a fn                  ArgCtxt
 852 AND
 853         it is bound to a value
 854
 855 then we should not inline it (unless there is some other reason,
 856 e.g. is is the sole occurrence).  That is what is happening at
 857 the use of 'lone_variable' in 'interesting_saturated_call'.
 858
 859 Why?  At least in the case-scrutinee situation, turning
 860         let x = (a,b) in case x of y -> ...
 861 into
 862         let x = (a,b) in case (a,b) of y -> ...
 863 and thence to
 864         let x = (a,b) in let y = (a,b) in ...
 865 is bad if the binding for x will remain.
 866
 867 Another example: I discovered that strings
 868 were getting inlined straight back into applications of 'error'
 869 because the latter is strict.
 870         s = "foo"
 871         f = \x -> ...(error s)...
 872
 873 Fundamentally such contexts should not encourage inlining because the
 874 context can ``see'' the unfolding of the variable (e.g. case or a
 875 RULE) so there's no gain.  If the thing is bound to a value.
 876
 877 However, watch out:
 878
 879  * Consider this:
 880         foo = _inline_ (\n. [n])
 881         bar = _inline_ (foo 20)
 882         baz = \n. case bar of { (m:_) -> m + n }
 883    Here we really want to inline 'bar' so that we can inline 'foo'
 884    and the whole thing unravels as it should obviously do.  This is
 885    important: in the NDP project, 'bar' generates a closure data
 886    structure rather than a list.
 887
 888    So the non-inlining of lone_variables should only apply if the
 889    unfolding is regarded as cheap; because that is when exprIsConApp_maybe
 890    looks through the unfolding.  Hence the "&& is_cheap" in the
 891    InlineRule branch.
 892
 893  * Even a type application or coercion isn't a lone variable.
 894    Consider
 895         case $fMonadST @ RealWorld of { :DMonad a b c -> c }
 896    We had better inline that sucker!  The case won't see through it.
 897
 898    For now, I'm treating treating a variable applied to types
 899    in a *lazy* context "lone". The motivating example was
 900         f = /\a. \x. BIG
 901         g = /\a. \y.  h (f a)
 902    There's no advantage in inlining f here, and perhaps
 903    a significant disadvantage.  Hence some_val_args in the Stop case
 904
 905 \begin{code}
 906 computeDiscount :: Int -> [Int] -> Int -> [ArgSummary] -> CallCtxt -> Int
 907 computeDiscount n_vals_wanted arg_discounts res_discount arg_infos cont_info
 908         -- We multiple the raw discounts (args_discount and result_discount)
 909         -- ty opt_UnfoldingKeenessFactor because the former have to do with
 910         --  *size* whereas the discounts imply that there's some extra
 911         --  *efficiency* to be gained (e.g. beta reductions, case reductions)
 912         -- by inlining.
 913
 914   = 1           -- Discount of 1 because the result replaces the call
 915                 -- so we count 1 for the function itself
 916
 917     + length (take n_vals_wanted arg_infos)
 918                -- Discount of (un-scaled) 1 for each arg supplied,
 919                -- because the result replaces the call
 920
 921     + round (opt_UF_KeenessFactor *
 922              fromIntegral (arg_discount + res_discount'))
 923   where
 924     arg_discount = sum (zipWith mk_arg_discount arg_discounts arg_infos)
 925
 926     mk_arg_discount _        TrivArg    = 0
 927     mk_arg_discount _        NonTrivArg = 1
 928     mk_arg_discount discount ValueArg   = discount
 929
 930     res_discount' = case cont_info of
 931                         BoringCtxt  -> 0
 932                         CaseCtxt    -> res_discount
 933                         _other      -> 4 `min` res_discount
 934                 -- res_discount can be very large when a function returns
 935                 -- constructors; but we only want to invoke that large discount
 936                 -- when there's a case continuation.
 937                 -- Otherwise we, rather arbitrarily, threshold it.  Yuk.
 938                 -- But we want to aovid inlining large functions that return
 939                 -- constructors into contexts that are simply "interesting"
 940 \end{code}
 941
 942 %************************************************************************
 943 %*                                                                      *
 944         Interesting arguments
 945 %*                                                                      *
 946 %************************************************************************
 947
 948 Note [Interesting arguments]
 949 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 950 An argument is interesting if it deserves a discount for unfoldings
 951 with a discount in that argument position.  The idea is to avoid
 952 unfolding a function that is applied only to variables that have no
 953 unfolding (i.e. they are probably lambda bound): f x y z There is
 954 little point in inlining f here.
 955
 956 Generally, *values* (like (C a b) and (\x.e)) deserve discounts.  But
 957 we must look through lets, eg (let x = e in C a b), because the let will
 958 float, exposing the value, if we inline.  That makes it different to
 959 exprIsHNF.
 960
 961 Before 2009 we said it was interesting if the argument had *any* structure
 962 at all; i.e. (hasSomeUnfolding v).  But does too much inlining; see Trac #3016.
 963
 964 But we don't regard (f x y) as interesting, unless f is unsaturated.
 965 If it's saturated and f hasn't inlined, then it's probably not going
 966 to now!
 967
 968 Note [Conlike is interesting]
 969 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 970 Consider
 971         f d = ...((*) d x y)...
 972         ... f (df d')...
 973 where df is con-like. Then we'd really like to inline 'f' so that the
 974 rule for (*) (df d) can fire.  To do this
 975   a) we give a discount for being an argument of a class-op (eg (*) d)
 976   b) we say that a con-like argument (eg (df d)) is interesting
 977
 978 \begin{code}
 979 data ArgSummary = TrivArg       -- Nothing interesting
 980                 | NonTrivArg    -- Arg has structure
 981                 | ValueArg      -- Arg is a con-app or PAP
 982                                 -- ..or con-like. Note [Conlike is interesting]
 983
 984 interestingArg :: CoreExpr -> ArgSummary
 985 -- See Note [Interesting arguments]
 986 interestingArg e = go e 0
 987   where
 988     -- n is # value args to which the expression is applied
 989     go (Lit {}) _          = ValueArg
 990     go (Var v)  n
 991        | isConLikeId v     = ValueArg   -- Experimenting with 'conlike' rather that
 992                                         --    data constructors here
 993        | idArity v > n     = ValueArg   -- Catches (eg) primops with arity but no unfolding
 994        | n > 0             = NonTrivArg -- Saturated or unknown call
 995        | conlike_unfolding = ValueArg   -- n==0; look for an interesting unfolding
 996                                         -- See Note [Conlike is interesting]
 997        | otherwise         = TrivArg    -- n==0, no useful unfolding
 998        where
 999          conlike_unfolding = isConLikeUnfolding (idUnfolding v)
1000
1001     go (Type _)          _ = TrivArg
1002     go (App fn (Type _)) n = go fn n
1003     go (App fn _)        n = go fn (n+1)
1004     go (Note _ a)        n = go a n
1005     go (Cast e _)        n = go e n
1006     go (Lam v e)         n
1007        | isTyVar v         = go e n
1008        | n>0               = go e (n-1)
1009        | otherwise         = ValueArg
1010     go (Let _ e)         n = case go e n of { ValueArg -> ValueArg; _ -> NonTrivArg }
1011     go (Case {})         _ = NonTrivArg
1012
1013 nonTriv ::  ArgSummary -> Bool
1014 nonTriv TrivArg = False
1015 nonTriv _       = True
1016 \end{code}
1017
1018 %************************************************************************
1019 %*                                                                      *
1020          exprIsConApp_maybe
1021 %*                                                                      *
1022 %************************************************************************
1023
1024 Note [exprIsConApp_maybe]
1025 ~~~~~~~~~~~~~~~~~~~~~~~~~
1026 exprIsConApp_maybe is a very important function.  There are two principal
1027 uses:
1028   * case e of { .... }
1029   * cls_op e, where cls_op is a class operation
1030
1031 In both cases you want to know if e is of form (C e1..en) where C is
1032 a data constructor.
1033
1034 However e might not *look* as if
1035
1036 \begin{code}
1037 -- | Returns @Just (dc, [t1..tk], [x1..xn])@ if the argument expression is
1038 -- a *saturated* constructor application of the form @dc t1..tk x1 .. xn@,
1039 -- where t1..tk are the *universally-qantified* type args of 'dc'
1040 exprIsConApp_maybe :: IdUnfoldingFun -> CoreExpr -> Maybe (DataCon, [Type], [CoreExpr])
1041
1042 exprIsConApp_maybe id_unf (Note _ expr)
1043   = exprIsConApp_maybe id_unf expr
1044         -- We ignore all notes.  For example,
1045         --      case _scc_ "foo" (C a b) of
1046         --                      C a b -> e
1047         -- should be optimised away, but it will be only if we look
1048         -- through the SCC note.
1049
1050 exprIsConApp_maybe id_unf (Cast expr co)
1051   =     -- Here we do the KPush reduction rule as described in the FC paper
1052         -- The transformation applies iff we have
1053         --      (C e1 ... en) `cast` co
1054         -- where co :: (T t1 .. tn) ~ to_ty
1055         -- The left-hand one must be a T, because exprIsConApp returned True
1056         -- but the right-hand one might not be.  (Though it usually will.)
1057
1058     case exprIsConApp_maybe id_unf expr of {
1059         Nothing                          -> Nothing ;
1060         Just (dc, _dc_univ_args, dc_args) ->
1061
1062     let (_from_ty, to_ty) = coercionKind co
1063         dc_tc = dataConTyCon dc
1064     in
1065     case splitTyConApp_maybe to_ty of {
1066         Nothing -> Nothing ;
1067         Just (to_tc, to_tc_arg_tys)
1068                 | dc_tc /= to_tc -> Nothing
1069                 -- These two Nothing cases are possible; we might see
1070                 --      (C x y) `cast` (g :: T a ~ S [a]),
1071                 -- where S is a type function.  In fact, exprIsConApp
1072                 -- will probably not be called in such circumstances,
1073                 -- but there't nothing wrong with it
1074
1075                 | otherwise  ->
1076     let
1077         tc_arity       = tyConArity dc_tc
1078         dc_univ_tyvars = dataConUnivTyVars dc
1079         dc_ex_tyvars   = dataConExTyVars dc
1080         arg_tys        = dataConRepArgTys dc
1081
1082         dc_eqs :: [(Type,Type)]   -- All equalities from the DataCon
1083         dc_eqs = [(mkTyVarTy tv, ty)   | (tv,ty) <- dataConEqSpec dc] ++
1084                  [getEqPredTys eq_pred | eq_pred <- dataConEqTheta dc]
1085
1086         (ex_args, rest1)    = splitAtList dc_ex_tyvars dc_args
1087         (co_args, val_args) = splitAtList dc_eqs rest1
1088
1089         -- Make the "theta" from Fig 3 of the paper
1090         gammas = decomposeCo tc_arity co
1091         theta  = zipOpenTvSubst (dc_univ_tyvars ++ dc_ex_tyvars)
1092                                 (gammas         ++ stripTypeArgs ex_args)
1093
1094           -- Cast the existential coercion arguments
1095         cast_co (ty1, ty2) (Type co)
1096           = Type $ mkSymCoercion (substTy theta ty1)
1097                    `mkTransCoercion` co
1098                    `mkTransCoercion` (substTy theta ty2)
1099         cast_co _ other_arg = pprPanic "cast_co" (ppr other_arg)
1100         new_co_args = zipWith cast_co dc_eqs co_args
1101
1102           -- Cast the value arguments (which include dictionaries)
1103         new_val_args = zipWith cast_arg arg_tys val_args
1104         cast_arg arg_ty arg = mkCoerce (substTy theta arg_ty) arg
1105     in
1106 #ifdef DEBUG
1107     let dump_doc = vcat [ppr dc,      ppr dc_univ_tyvars, ppr dc_ex_tyvars,
1108                          ppr arg_tys, ppr dc_args,        ppr _dc_univ_args,
1109                          ppr ex_args, ppr val_args]
1110     in
1111     ASSERT2( coreEqType _from_ty (mkTyConApp dc_tc _dc_univ_args), dump_doc )
1112     ASSERT2( all isTypeArg (ex_args ++ co_args), dump_doc )
1113     ASSERT2( equalLength val_args arg_tys, dump_doc )
1114 #endif
1115
1116     Just (dc, to_tc_arg_tys, ex_args ++ new_co_args ++ new_val_args)
1117     }}
1118
1119 exprIsConApp_maybe id_unf expr
1120   = analyse expr []
1121   where
1122     analyse (App fun arg) args = analyse fun (arg:args)
1123     analyse fun@(Lam {})  args = beta fun [] args
1124
1125     analyse (Var fun) args
1126         | Just con <- isDataConWorkId_maybe fun
1127         , is_saturated
1128         , let (univ_ty_args, rest_args) = splitAtList (dataConUnivTyVars con) args
1129         = Just (con, stripTypeArgs univ_ty_args, rest_args)
1130
1131         -- Look through dictionary functions; see Note [Unfolding DFuns]
1132         | DFunUnfolding con ops <- unfolding
1133         , is_saturated
1134         , let (dfun_tvs, _cls, dfun_res_tys) = tcSplitDFunTy (idType fun)
1135               subst = zipOpenTvSubst dfun_tvs (stripTypeArgs (takeList dfun_tvs args))
1136         = Just (con, substTys subst dfun_res_tys,
1137                      [mkApps op args | op <- ops])
1138
1139         -- Look through unfoldings, but only cheap ones, because
1140         -- we are effectively duplicating the unfolding
1141         | CoreUnfolding { uf_expandable = expand_me, uf_tmpl = rhs } <- unfolding
1142         , expand_me = -- pprTrace "expanding" (ppr fun $$ ppr rhs) $
1143                       analyse rhs args
1144         where
1145           is_saturated = count isValArg args == idArity fun
1146           unfolding = id_unf fun    -- Does not look through loop breakers
1147                     -- ToDo: we *may* look through variables that are NOINLINE
1148                     --       in this phase, and that is really not right
1149
1150     analyse _ _ = Nothing
1151
1152     -----------
1153     in_scope = mkInScopeSet (exprFreeVars expr)
1154
1155     -----------
1156     beta (Lam v body) pairs (arg : args)
1157         | isTypeArg arg
1158         = beta body ((v,arg):pairs) args
1159
1160     beta (Lam {}) _ _    -- Un-saturated, or not a type lambda
1161         = Nothing
1162
1163     beta fun pairs args
1164         = case analyse (substExpr subst fun) args of
1165             Nothing  -> -- pprTrace "Bale out! exprIsConApp_maybe" doc $
1166                         Nothing
1167             Just ans -> -- pprTrace "Woo-hoo! exprIsConApp_maybe" doc $
1168                         Just ans
1169         where
1170           subst = mkOpenSubst in_scope pairs
1171           -- doc = vcat [ppr fun, ppr expr, ppr pairs, ppr args]
1172
1173
1174 stripTypeArgs :: [CoreExpr] -> [Type]
1175 stripTypeArgs args = ASSERT2( all isTypeArg args, ppr args )
1176                      [ty | Type ty <- args]
1177 \end{code}
1178
1179 Note [Unfolding DFuns]
1180 ~~~~~~~~~~~~~~~~~~~~~~
1181 DFuns look like
1182
1183   df :: forall a b. (Eq a, Eq b) -> Eq (a,b)
1184   df a b d_a d_b = MkEqD (a,b) ($c1 a b d_a d_b)
1185                                ($c2 a b d_a d_b)
1186
1187 So to split it up we just need to apply the ops $c1, $c2 etc
1188 to the very same args as the dfun.  It takes a little more work
1189 to compute the type arguments to the dictionary constructor.
1190