compiler/coreSyn/CoreUnfold.lhs

   1 %
   2 % (c) The University of Glasgow 2006
   3 % (c) The AQUA Project, Glasgow University, 1994-1998
   4 %
   5
   6 Core-syntax unfoldings
   7
   8 Unfoldings (which can travel across module boundaries) are in Core
   9 syntax (namely @CoreExpr@s).
  10
  11 The type @Unfolding@ sits ``above'' simply-Core-expressions
  12 unfoldings, capturing ``higher-level'' things we know about a binding,
  13 usually things that the simplifier found out (e.g., ``it's a
  14 literal'').  In the corner of a @CoreUnfolding@ unfolding, you will
  15 find, unsurprisingly, a Core expression.
  16
  17 \begin{code}
  18 module CoreUnfold (
  19         Unfolding, UnfoldingGuidance,   -- Abstract types
  20
  21         noUnfolding, mkImplicitUnfolding,
  22         mkTopUnfolding, mkUnfolding, mkCoreUnfolding,
  23         mkInlineRule, mkWwInlineRule,
  24         mkCompulsoryUnfolding, mkDFunUnfolding,
  25
  26         interestingArg, ArgSummary(..),
  27
  28         couldBeSmallEnoughToInline,
  29         certainlyWillInline, smallEnoughToInline,
  30
  31         callSiteInline, CallCtxt(..),
  32
  33         exprIsConApp_maybe
  34
  35     ) where
  36
  37 #include "HsVersions.h"
  38
  39 import StaticFlags
  40 import DynFlags
  41 import CoreSyn
  42 import PprCore          ()      -- Instances
  43 import OccurAnal
  44 import CoreSubst hiding( substTy )
  45 import CoreFVs         ( exprFreeVars )
  46 import CoreUtils
  47 import Id
  48 import DataCon
  49 import TyCon
  50 import Literal
  51 import PrimOp
  52 import IdInfo
  53 import BasicTypes       ( Arity )
  54 import TcType           ( tcSplitDFunTy )
  55 import Type
  56 import Coercion
  57 import PrelNames
  58 import VarEnv           ( mkInScopeSet )
  59 import Bag
  60 import Util
  61 import FastTypes
  62 import FastString
  63 import Outputable
  64
  65 \end{code}
  66
  67
  68 %************************************************************************
  69 %*                                                                      *
  70 \subsection{Making unfoldings}
  71 %*                                                                      *
  72 %************************************************************************
  73
  74 \begin{code}
  75 mkTopUnfolding :: Bool -> CoreExpr -> Unfolding
  76 mkTopUnfolding is_bottoming expr
  77   = mkUnfolding True {- Top level -} is_bottoming expr
  78
  79 mkImplicitUnfolding :: CoreExpr -> Unfolding
  80 -- For implicit Ids, do a tiny bit of optimising first
  81 mkImplicitUnfolding expr = mkTopUnfolding False (simpleOptExpr expr)
  82
  83 -- Note [Top-level flag on inline rules]
  84 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  85 -- Slight hack: note that mk_inline_rules conservatively sets the
  86 -- top-level flag to True.  It gets set more accurately by the simplifier
  87 -- Simplify.simplUnfolding.
  88
  89 mkUnfolding :: Bool -> Bool -> CoreExpr -> Unfolding
  90 mkUnfolding top_lvl is_bottoming expr
  91   = CoreUnfolding { uf_tmpl       = occurAnalyseExpr expr,
  92                     uf_src        = InlineRhs,
  93                     uf_arity      = arity,
  94                     uf_is_top     = top_lvl,
  95                     uf_is_value   = exprIsHNF        expr,
  96                     uf_is_conlike = exprIsConLike    expr,
  97                     uf_expandable = exprIsExpandable expr,
  98                     uf_is_cheap   = is_cheap,
  99                     uf_guidance   = guidance }
 100   where
 101     is_cheap = exprIsCheap expr
 102     (arity, guidance) = calcUnfoldingGuidance is_cheap (top_lvl && is_bottoming)
 103                                               opt_UF_CreationThreshold expr
 104         -- Sometimes during simplification, there's a large let-bound thing
 105         -- which has been substituted, and so is now dead; so 'expr' contains
 106         -- two copies of the thing while the occurrence-analysed expression doesn't
 107         -- Nevertheless, we *don't* occ-analyse before computing the size because the
 108         -- size computation bales out after a while, whereas occurrence analysis does not.
 109         --
 110         -- This can occasionally mean that the guidance is very pessimistic;
 111         -- it gets fixed up next round.  And it should be rare, because large
 112         -- let-bound things that are dead are usually caught by preInlineUnconditionally
 113
 114 mkCoreUnfolding :: Bool -> UnfoldingSource -> CoreExpr
 115                 -> Arity -> UnfoldingGuidance -> Unfolding
 116 -- Occurrence-analyses the expression before capturing it
 117 mkCoreUnfolding top_lvl src expr arity guidance
 118   = CoreUnfolding { uf_tmpl       = occurAnalyseExpr expr,
 119                     uf_src        = src,
 120                     uf_arity      = arity,
 121                     uf_is_top     = top_lvl,
 122                     uf_is_value   = exprIsHNF        expr,
 123                     uf_is_conlike = exprIsConLike    expr,
 124                     uf_is_cheap   = exprIsCheap      expr,
 125                     uf_expandable = exprIsExpandable expr,
 126                     uf_guidance   = guidance }
 127
 128 mkDFunUnfolding :: DataCon -> [Id] -> Unfolding
 129 mkDFunUnfolding con ops = DFunUnfolding con (map Var ops)
 130
 131 mkWwInlineRule :: Id -> CoreExpr -> Arity -> Unfolding
 132 mkWwInlineRule id expr arity
 133   = mkCoreUnfolding True (InlineWrapper id)
 134                    (simpleOptExpr expr) arity
 135                    (UnfWhen unSaturatedOk boringCxtNotOk)
 136
 137 mkCompulsoryUnfolding :: CoreExpr -> Unfolding
 138 mkCompulsoryUnfolding expr         -- Used for things that absolutely must be unfolded
 139   = mkCoreUnfolding True InlineCompulsory
 140                     expr 0    -- Arity of unfolding doesn't matter
 141                     (UnfWhen unSaturatedOk boringCxtOk)
 142
 143 mkInlineRule :: Bool -> CoreExpr -> Arity -> Unfolding
 144 mkInlineRule unsat_ok expr arity
 145   = mkCoreUnfolding True InlineRule      -- Note [Top-level flag on inline rules]
 146                     expr' arity
 147                     (UnfWhen unsat_ok boring_ok)
 148   where
 149     expr' = simpleOptExpr expr
 150     boring_ok = case calcUnfoldingGuidance True    -- Treat as cheap
 151                                            False   -- But not bottoming
 152                                            (arity+1) expr' of
 153                   (_, UnfWhen _ boring_ok) -> boring_ok
 154                   _other                   -> boringCxtNotOk
 155      -- See Note [INLINE for small functions]
 156 \end{code}
 157
 158
 159 %************************************************************************
 160 %*                                                                      *
 161 \subsection{The UnfoldingGuidance type}
 162 %*                                                                      *
 163 %************************************************************************
 164
 165 \begin{code}
 166 calcUnfoldingGuidance
 167         :: Bool         -- True <=> the rhs is cheap, or we want to treat it
 168                         --          as cheap (INLINE things)
 169         -> Bool         -- True <=> this is a top-level unfolding for a
 170                         --          diverging function; don't inline this
 171         -> Int          -- Bomb out if size gets bigger than this
 172         -> CoreExpr     -- Expression to look at
 173         -> (Arity, UnfoldingGuidance)
 174 calcUnfoldingGuidance expr_is_cheap top_bot bOMB_OUT_SIZE expr
 175   = case collectBinders expr of { (bndrs, body) ->
 176     let
 177         val_bndrs   = filter isId bndrs
 178         n_val_bndrs = length val_bndrs
 179
 180         guidance
 181           = case (sizeExpr (iUnbox bOMB_OUT_SIZE) val_bndrs body) of
 182               TooBig -> UnfNever
 183               SizeIs size cased_bndrs scrut_discount
 184                 | uncondInline n_val_bndrs (iBox size) && expr_is_cheap
 185                 -> UnfWhen needSaturated boringCxtOk
 186
 187                 | top_bot  -- See Note [Do not inline top-level bottoming functions]
 188                 -> UnfNever
 189
 190                 | otherwise
 191                 -> UnfIfGoodArgs { ug_args  = map (discount cased_bndrs) val_bndrs
 192                                  , ug_size  = iBox size
 193                                  , ug_res   = iBox scrut_discount }
 194
 195         discount cbs bndr
 196            = foldlBag (\acc (b',n) -> if bndr==b' then acc+n else acc)
 197                       0 cbs
 198     in
 199     (n_val_bndrs, guidance) }
 200 \end{code}
 201
 202 Note [Computing the size of an expression]
 203 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 204 The basic idea of sizeExpr is obvious enough: count nodes.  But getting the
 205 heuristics right has taken a long time.  Here's the basic strategy:
 206
 207     * Variables, literals: 0
 208       (Exception for string literals, see litSize.)
 209
 210     * Function applications (f e1 .. en): 1 + #value args
 211
 212     * Constructor applications: 1, regardless of #args
 213
 214     * Let(rec): 1 + size of components
 215
 216     * Note, cast: 0
 217
 218 Examples
 219
 220   Size  Term
 221   --------------
 222     0     42#
 223     0     x
 224     0     True
 225     2     f x
 226     1     Just x
 227     4     f (g x)
 228
 229 Notice that 'x' counts 0, while (f x) counts 2.  That's deliberate: there's
 230 a function call to account for.  Notice also that constructor applications
 231 are very cheap, because exposing them to a caller is so valuable.
 232
 233
 234 Note [Do not inline top-level bottoming functions]
 235 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 236 The FloatOut pass has gone to some trouble to float out calls to 'error'
 237 and similar friends.  See Note [Bottoming floats] in SetLevels.
 238 Do not re-inline them!  But we *do* still inline if they are very small
 239 (the uncondInline stuff).
 240
 241
 242 Note [Unconditional inlining]
 243 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 244 We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
 245 than the thing it's replacing.  Notice that
 246       (f x) --> (g 3)             -- YES, unconditionally
 247       (f x) --> x : []            -- YES, *even though* there are two
 248                                   --      arguments to the cons
 249       x     --> g 3               -- NO
 250       x     --> Just v            -- NO
 251
 252 It's very important not to unconditionally replace a variable by
 253 a non-atomic term.
 254
 255 \begin{code}
 256 uncondInline :: Arity -> Int -> Bool
 257 -- Inline unconditionally if there no size increase
 258 -- Size of call is arity (+1 for the function)
 259 -- See Note [Unconditional inlining]
 260 uncondInline arity size
 261   | arity == 0 = size == 0
 262   | otherwise  = size <= arity + 1
 263 \end{code}
 264
 265
 266 \begin{code}
 267 sizeExpr :: FastInt         -- Bomb out if it gets bigger than this
 268          -> [Id]            -- Arguments; we're interested in which of these
 269                             -- get case'd
 270          -> CoreExpr
 271          -> ExprSize
 272
 273 -- Note [Computing the size of an expression]
 274
 275 sizeExpr bOMB_OUT_SIZE top_args expr
 276   = size_up expr
 277   where
 278     size_up (Cast e _) = size_up e
 279     size_up (Note _ e) = size_up e
 280     size_up (Type _)   = sizeZero           -- Types cost nothing
 281     size_up (Lit lit)  = sizeN (litSize lit)
 282     size_up (Var f)    = size_up_call f []  -- Make sure we get constructor
 283                                             -- discounts even on nullary constructors
 284
 285     size_up (App fun (Type _)) = size_up fun
 286     size_up (App fun arg)      = size_up arg  `addSizeNSD`
 287                                  size_up_app fun [arg]
 288
 289     size_up (Lam b e) | isId b    = lamScrutDiscount (size_up e `addSizeN` 1)
 290                       | otherwise = size_up e
 291
 292     size_up (Let (NonRec binder rhs) body)
 293       = size_up rhs             `addSizeNSD`
 294         size_up body            `addSizeN`
 295         (if isUnLiftedType (idType binder) then 0 else 1)
 296                 -- For the allocation
 297                 -- If the binder has an unlifted type there is no allocation
 298
 299     size_up (Let (Rec pairs) body)
 300       = foldr (addSizeNSD . size_up . snd)
 301               (size_up body `addSizeN` length pairs)    -- (length pairs) for the allocation
 302               pairs
 303
 304     size_up (Case (Var v) _ _ alts)
 305         | v `elem` top_args             -- We are scrutinising an argument variable
 306         = alts_size (foldr1 addAltSize alt_sizes)
 307                     (foldr1 maxSize alt_sizes)
 308                 -- Good to inline if an arg is scrutinised, because
 309                 -- that may eliminate allocation in the caller
 310                 -- And it eliminates the case itself
 311         where
 312           alt_sizes = map size_up_alt alts
 313
 314                 -- alts_size tries to compute a good discount for
 315                 -- the case when we are scrutinising an argument variable
 316           alts_size (SizeIs tot tot_disc tot_scrut)  -- Size of all alternatives
 317                     (SizeIs max _        _)          -- Size of biggest alternative
 318                 = SizeIs tot (unitBag (v, iBox (_ILIT(2) +# tot -# max)) `unionBags` tot_disc) tot_scrut
 319                         -- If the variable is known, we produce a discount that
 320                         -- will take us back to 'max', the size of the largest alternative
 321                         -- The 1+ is a little discount for reduced allocation in the caller
 322                         --
 323                         -- Notice though, that we return tot_disc, the total discount from
 324                         -- all branches.  I think that's right.
 325
 326           alts_size tot_size _ = tot_size
 327
 328     size_up (Case e _ _ alts) = size_up e  `addSizeNSD`
 329                                 foldr (addAltSize . size_up_alt) sizeZero alts
 330                 -- We don't charge for the case itself
 331                 -- It's a strict thing, and the price of the call
 332                 -- is paid by scrut.  Also consider
 333                 --      case f x of DEFAULT -> e
 334                 -- This is just ';'!  Don't charge for it.
 335                 --
 336                 -- Moreover, we charge one per alternative.
 337
 338     ------------
 339     -- size_up_app is used when there's ONE OR MORE value args
 340     size_up_app (App fun arg) args
 341         | isTypeArg arg            = size_up_app fun args
 342         | otherwise                = size_up arg  `addSizeNSD`
 343                                      size_up_app fun (arg:args)
 344     size_up_app (Var fun)     args = size_up_call fun args
 345     size_up_app other         args = size_up other `addSizeN` length args
 346
 347     ------------
 348     size_up_call :: Id -> [CoreExpr] -> ExprSize
 349     size_up_call fun val_args
 350        = case idDetails fun of
 351            FCallId _        -> sizeN opt_UF_DearOp
 352            DataConWorkId dc -> conSize    dc (length val_args)
 353            PrimOpId op      -> primOpSize op (length val_args)
 354            ClassOpId _      -> classOpSize top_args val_args
 355            _                -> funSize top_args fun (length val_args)
 356
 357     ------------
 358     size_up_alt (_con, _bndrs, rhs) = size_up rhs `addSizeN` 1
 359         -- Don't charge for args, so that wrappers look cheap
 360         -- (See comments about wrappers with Case)
 361         --
 362         -- IMPORATANT: *do* charge 1 for the alternative, else we
 363         -- find that giant case nests are treated as practically free
 364         -- A good example is Foreign.C.Error.errrnoToIOError
 365
 366     ------------
 367         -- These addSize things have to be here because
 368         -- I don't want to give them bOMB_OUT_SIZE as an argument
 369     addSizeN TooBig          _  = TooBig
 370     addSizeN (SizeIs n xs d) m  = mkSizeIs bOMB_OUT_SIZE (n +# iUnbox m) xs d
 371
 372         -- addAltSize is used to add the sizes of case alternatives
 373     addAltSize TooBig            _      = TooBig
 374     addAltSize _                 TooBig = TooBig
 375     addAltSize (SizeIs n1 xs d1) (SizeIs n2 ys d2)
 376         = mkSizeIs bOMB_OUT_SIZE (n1 +# n2)
 377                                  (xs `unionBags` ys)
 378                                  (d1 +# d2)   -- Note [addAltSize result discounts]
 379
 380         -- This variant ignores the result discount from its LEFT argument
 381         -- It's used when the second argument isn't part of the result
 382     addSizeNSD TooBig            _      = TooBig
 383     addSizeNSD _                 TooBig = TooBig
 384     addSizeNSD (SizeIs n1 xs _) (SizeIs n2 ys d2)
 385         = mkSizeIs bOMB_OUT_SIZE (n1 +# n2)
 386                                  (xs `unionBags` ys)
 387                                  d2  -- Ignore d1
 388 \end{code}
 389
 390 \begin{code}
 391 -- | Finds a nominal size of a string literal.
 392 litSize :: Literal -> Int
 393 -- Used by CoreUnfold.sizeExpr
 394 litSize (MachStr str) = 1 + ((lengthFS str + 3) `div` 4)
 395         -- If size could be 0 then @f "x"@ might be too small
 396         -- [Sept03: make literal strings a bit bigger to avoid fruitless
 397         --  duplication of little strings]
 398 litSize _other = 0    -- Must match size of nullary constructors
 399                       -- Key point: if  x |-> 4, then x must inline unconditionally
 400                       --            (eg via case binding)
 401
 402 classOpSize :: [Id] -> [CoreExpr] -> ExprSize
 403 -- See Note [Conlike is interesting]
 404 classOpSize _ []
 405   = sizeZero
 406 classOpSize top_args (arg1 : other_args)
 407   = SizeIs (iUnbox size) arg_discount (_ILIT(0))
 408   where
 409     size = 2 + length other_args
 410     -- If the class op is scrutinising a lambda bound dictionary then
 411     -- give it a discount, to encourage the inlining of this function
 412     -- The actual discount is rather arbitrarily chosen
 413     arg_discount = case arg1 of
 414                      Var dict | dict `elem` top_args
 415                               -> unitBag (dict, opt_UF_DictDiscount)
 416                      _other   -> emptyBag
 417
 418 funSize :: [Id] -> Id -> Int -> ExprSize
 419 -- Size for functions that are not constructors or primops
 420 -- Note [Function applications]
 421 funSize top_args fun n_val_args
 422   | fun `hasKey` buildIdKey   = buildSize
 423   | fun `hasKey` augmentIdKey = augmentSize
 424   | otherwise = SizeIs (iUnbox size) arg_discount (iUnbox res_discount)
 425   where
 426     some_val_args = n_val_args > 0
 427
 428     arg_discount | some_val_args && fun `elem` top_args
 429                  = unitBag (fun, opt_UF_FunAppDiscount)
 430                  | otherwise = emptyBag
 431         -- If the function is an argument and is applied
 432         -- to some values, give it an arg-discount
 433
 434     res_discount | idArity fun > n_val_args = opt_UF_FunAppDiscount
 435                  | otherwise                = 0
 436         -- If the function is partially applied, show a result discount
 437
 438     size | some_val_args = 1 + n_val_args
 439          | otherwise     = 0
 440         -- The 1+ is for the function itself
 441         -- Add 1 for each non-trivial arg;
 442         -- the allocation cost, as in let(rec)
 443
 444
 445 conSize :: DataCon -> Int -> ExprSize
 446 conSize dc n_val_args
 447   | n_val_args == 0      = SizeIs (_ILIT(0)) emptyBag (_ILIT(1))        -- Like variables
 448   | isUnboxedTupleCon dc = SizeIs (_ILIT(0)) emptyBag (iUnbox n_val_args +# _ILIT(1))
 449   | otherwise            = SizeIs (_ILIT(1)) emptyBag (iUnbox n_val_args +# _ILIT(1))
 450         -- Treat a constructors application as size 1, regardless of how
 451         -- many arguments it has; we are keen to expose them
 452         -- (and we charge separately for their args).  We can't treat
 453         -- them as size zero, else we find that (Just x) has size 0,
 454         -- which is the same as a lone variable; and hence 'v' will
 455         -- always be replaced by (Just x), where v is bound to Just x.
 456         --
 457         -- However, unboxed tuples count as size zero
 458         -- I found occasions where we had
 459         --      f x y z = case op# x y z of { s -> (# s, () #) }
 460         -- and f wasn't getting inlined
 461
 462 primOpSize :: PrimOp -> Int -> ExprSize
 463 primOpSize op n_val_args
 464  | not (primOpIsDupable op) = sizeN opt_UF_DearOp
 465  | not (primOpOutOfLine op) = sizeN 1
 466         -- Be very keen to inline simple primops.
 467         -- We give a discount of 1 for each arg so that (op# x y z) costs 2.
 468         -- We can't make it cost 1, else we'll inline let v = (op# x y z)
 469         -- at every use of v, which is excessive.
 470         --
 471         -- A good example is:
 472         --      let x = +# p q in C {x}
 473         -- Even though x get's an occurrence of 'many', its RHS looks cheap,
 474         -- and there's a good chance it'll get inlined back into C's RHS. Urgh!
 475
 476  | otherwise = sizeN n_val_args
 477
 478
 479 buildSize :: ExprSize
 480 buildSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(4))
 481         -- We really want to inline applications of build
 482         -- build t (\cn -> e) should cost only the cost of e (because build will be inlined later)
 483         -- Indeed, we should add a result_discount becuause build is
 484         -- very like a constructor.  We don't bother to check that the
 485         -- build is saturated (it usually is).  The "-2" discounts for the \c n,
 486         -- The "4" is rather arbitrary.
 487
 488 augmentSize :: ExprSize
 489 augmentSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(4))
 490         -- Ditto (augment t (\cn -> e) ys) should cost only the cost of
 491         -- e plus ys. The -2 accounts for the \cn
 492
 493 -- When we return a lambda, give a discount if it's used (applied)
 494 lamScrutDiscount :: ExprSize -> ExprSize
 495 lamScrutDiscount (SizeIs n vs _) = SizeIs n vs (iUnbox opt_UF_FunAppDiscount)
 496 lamScrutDiscount TooBig          = TooBig
 497 \end{code}
 498
 499 Note [addAltSize result discounts]
 500 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 501 When adding the size of alternatives, we *add* the result discounts
 502 too, rather than take the *maximum*.  For a multi-branch case, this
 503 gives a discount for each branch that returns a constructor, making us
 504 keener to inline.  I did try using 'max' instead, but it makes nofib
 505 'rewrite' and 'puzzle' allocate significantly more, and didn't make
 506 binary sizes shrink significantly either.
 507
 508 Note [Discounts and thresholds]
 509 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 510 Constants for discounts and thesholds are defined in main/StaticFlags,
 511 all of form opt_UF_xxxx.   They are:
 512
 513 opt_UF_CreationThreshold (45)
 514      At a definition site, if the unfolding is bigger than this, we
 515      may discard it altogether
 516
 517 opt_UF_UseThreshold (6)
 518      At a call site, if the unfolding, less discounts, is smaller than
 519      this, then it's small enough inline
 520
 521 opt_UF_KeennessFactor (1.5)
 522      Factor by which the discounts are multiplied before
 523      subtracting from size
 524
 525 opt_UF_DictDiscount (1)
 526      The discount for each occurrence of a dictionary argument
 527      as an argument of a class method.  Should be pretty small
 528      else big functions may get inlined
 529
 530 opt_UF_FunAppDiscount (6)
 531      Discount for a function argument that is applied.  Quite
 532      large, because if we inline we avoid the higher-order call.
 533
 534 opt_UF_DearOp (4)
 535      The size of a foreign call or not-dupable PrimOp
 536
 537
 538 Note [Function applications]
 539 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 540 In a function application (f a b)
 541
 542   - If 'f' is an argument to the function being analysed,
 543     and there's at least one value arg, record a FunAppDiscount for f
 544
 545   - If the application if a PAP (arity > 2 in this example)
 546     record a *result* discount (because inlining
 547     with "extra" args in the call may mean that we now
 548     get a saturated application)
 549
 550 Code for manipulating sizes
 551
 552 \begin{code}
 553 data ExprSize = TooBig
 554               | SizeIs FastInt          -- Size found
 555                        (Bag (Id,Int))   -- Arguments cased herein, and discount for each such
 556                        FastInt          -- Size to subtract if result is scrutinised
 557                                         -- by a case expression
 558
 559 instance Outputable ExprSize where
 560   ppr TooBig         = ptext (sLit "TooBig")
 561   ppr (SizeIs a _ c) = brackets (int (iBox a) <+> int (iBox c))
 562
 563 -- subtract the discount before deciding whether to bale out. eg. we
 564 -- want to inline a large constructor application into a selector:
 565 --      tup = (a_1, ..., a_99)
 566 --      x = case tup of ...
 567 --
 568 mkSizeIs :: FastInt -> FastInt -> Bag (Id, Int) -> FastInt -> ExprSize
 569 mkSizeIs max n xs d | (n -# d) ># max = TooBig
 570                     | otherwise       = SizeIs n xs d
 571
 572 maxSize :: ExprSize -> ExprSize -> ExprSize
 573 maxSize TooBig         _                                  = TooBig
 574 maxSize _              TooBig                             = TooBig
 575 maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 ># n2  = s1
 576                                               | otherwise = s2
 577
 578 sizeZero :: ExprSize
 579 sizeN :: Int -> ExprSize
 580
 581 sizeZero = SizeIs (_ILIT(0))  emptyBag (_ILIT(0))
 582 sizeN n  = SizeIs (iUnbox n) emptyBag (_ILIT(0))
 583 \end{code}
 584
 585
 586 %************************************************************************
 587 %*                                                                      *
 588 \subsection[considerUnfolding]{Given all the info, do (not) do the unfolding}
 589 %*                                                                      *
 590 %************************************************************************
 591
 592 We use 'couldBeSmallEnoughToInline' to avoid exporting inlinings that
 593 we ``couldn't possibly use'' on the other side.  Can be overridden w/
 594 flaggery.  Just the same as smallEnoughToInline, except that it has no
 595 actual arguments.
 596
 597 \begin{code}
 598 couldBeSmallEnoughToInline :: Int -> CoreExpr -> Bool
 599 couldBeSmallEnoughToInline threshold rhs
 600   = case calcUnfoldingGuidance False False threshold rhs of
 601        (_, UnfNever) -> False
 602        _             -> True
 603
 604 ----------------
 605 smallEnoughToInline :: Unfolding -> Bool
 606 smallEnoughToInline (CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_size = size}})
 607   = size <= opt_UF_UseThreshold
 608 smallEnoughToInline _
 609   = False
 610
 611 ----------------
 612 certainlyWillInline :: Unfolding -> Bool
 613   -- Sees if the unfolding is pretty certain to inline
 614 certainlyWillInline (CoreUnfolding { uf_is_cheap = is_cheap, uf_arity = n_vals, uf_guidance = guidance })
 615   = case guidance of
 616       UnfNever      -> False
 617       UnfWhen {}    -> True
 618       UnfIfGoodArgs { ug_size = size}
 619                     -> is_cheap && size - (n_vals +1) <= opt_UF_UseThreshold
 620
 621 certainlyWillInline _
 622   = False
 623 \end{code}
 624
 625 %************************************************************************
 626 %*                                                                      *
 627 \subsection{callSiteInline}
 628 %*                                                                      *
 629 %************************************************************************
 630
 631 This is the key function.  It decides whether to inline a variable at a call site
 632
 633 callSiteInline is used at call sites, so it is a bit more generous.
 634 It's a very important function that embodies lots of heuristics.
 635 A non-WHNF can be inlined if it doesn't occur inside a lambda,
 636 and occurs exactly once or
 637     occurs once in each branch of a case and is small
 638
 639 If the thing is in WHNF, there's no danger of duplicating work,
 640 so we can inline if it occurs once, or is small
 641
 642 NOTE: we don't want to inline top-level functions that always diverge.
 643 It just makes the code bigger.  Tt turns out that the convenient way to prevent
 644 them inlining is to give them a NOINLINE pragma, which we do in
 645 StrictAnal.addStrictnessInfoToTopId
 646
 647 \begin{code}
 648 callSiteInline :: DynFlags
 649                -> Id                    -- The Id
 650                -> Unfolding             -- Its unfolding (if active)
 651                -> Bool                  -- True if there are are no arguments at all (incl type args)
 652                -> [ArgSummary]          -- One for each value arg; True if it is interesting
 653                -> CallCtxt              -- True <=> continuation is interesting
 654                -> Maybe CoreExpr        -- Unfolding, if any
 655
 656
 657 instance Outputable ArgSummary where
 658   ppr TrivArg    = ptext (sLit "TrivArg")
 659   ppr NonTrivArg = ptext (sLit "NonTrivArg")
 660   ppr ValueArg   = ptext (sLit "ValueArg")
 661
 662 data CallCtxt = BoringCtxt
 663
 664               | ArgCtxt         -- We are somewhere in the argument of a function
 665                         Bool    -- True  <=> we're somewhere in the RHS of function with rules
 666                                 -- False <=> we *are* the argument of a function with non-zero
 667                                 --           arg discount
 668                                 --        OR
 669                                 --           we *are* the RHS of a let  Note [RHS of lets]
 670                                 -- In both cases, be a little keener to inline
 671
 672               | ValAppCtxt      -- We're applied to at least one value arg
 673                                 -- This arises when we have ((f x |> co) y)
 674                                 -- Then the (f x) has argument 'x' but in a ValAppCtxt
 675
 676               | CaseCtxt        -- We're the scrutinee of a case
 677                                 -- that decomposes its scrutinee
 678
 679 instance Outputable CallCtxt where
 680   ppr BoringCtxt      = ptext (sLit "BoringCtxt")
 681   ppr (ArgCtxt rules) = ptext (sLit "ArgCtxt") <+> ppr rules
 682   ppr CaseCtxt        = ptext (sLit "CaseCtxt")
 683   ppr ValAppCtxt      = ptext (sLit "ValAppCtxt")
 684
 685 callSiteInline dflags id unfolding lone_variable arg_infos cont_info
 686   = case unfolding of {
 687         NoUnfolding      -> Nothing ;
 688         OtherCon _       -> Nothing ;
 689         DFunUnfolding {} -> Nothing ;   -- Never unfold a DFun
 690         CoreUnfolding { uf_tmpl = unf_template, uf_is_top = is_top, uf_is_value = is_value,
 691                         uf_is_cheap = is_cheap, uf_arity = uf_arity, uf_guidance = guidance } ->
 692                         -- uf_arity will typically be equal to (idArity id),
 693                         -- but may be less for InlineRules
 694     let
 695         n_val_args = length arg_infos
 696         saturated  = n_val_args >= uf_arity
 697
 698         result | yes_or_no = Just unf_template
 699                | otherwise = Nothing
 700
 701         interesting_args = any nonTriv arg_infos
 702                 -- NB: (any nonTriv arg_infos) looks at the
 703                 -- over-saturated args too which is "wrong";
 704                 -- but if over-saturated we inline anyway.
 705
 706                -- some_benefit is used when the RHS is small enough
 707                -- and the call has enough (or too many) value
 708                -- arguments (ie n_val_args >= arity). But there must
 709                -- be *something* interesting about some argument, or the
 710                -- result context, to make it worth inlining
 711         some_benefit
 712            | not saturated = interesting_args   -- Under-saturated
 713                                                 -- Note [Unsaturated applications]
 714            | n_val_args > uf_arity = True       -- Over-saturated
 715            | otherwise = interesting_args       -- Saturated
 716                       || interesting_saturated_call
 717
 718         interesting_saturated_call
 719           = case cont_info of
 720               BoringCtxt -> not is_top && uf_arity > 0          -- Note [Nested functions]
 721               CaseCtxt   -> not (lone_variable && is_value)     -- Note [Lone variables]
 722               ArgCtxt {} -> uf_arity > 0                        -- Note [Inlining in ArgCtxt]
 723               ValAppCtxt -> True                                -- Note [Cast then apply]
 724
 725         (yes_or_no, extra_doc)
 726           = case guidance of
 727               UnfNever -> (False, empty)
 728
 729               UnfWhen unsat_ok boring_ok -> ( (unsat_ok  || saturated)
 730                                            && (boring_ok || some_benefit)
 731                                             , empty )
 732                    -- For the boring_ok part see Note [INLINE for small functions]
 733
 734               UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size }
 735                  -> ( is_cheap && some_benefit && small_enough
 736                     , (text "discounted size =" <+> int discounted_size) )
 737                  where
 738                    discounted_size = size - discount
 739                    small_enough = discounted_size <= opt_UF_UseThreshold
 740                    discount = computeDiscount uf_arity arg_discounts
 741                                               res_discount arg_infos cont_info
 742
 743     in
 744     if (dopt Opt_D_dump_inlinings dflags && dopt Opt_D_verbose_core2core dflags) then
 745         pprTrace ("Considering inlining: " ++ showSDoc (ppr id))
 746                  (vcat [text "arg infos" <+> ppr arg_infos,
 747                         text "uf arity" <+> ppr uf_arity,
 748                         text "interesting continuation" <+> ppr cont_info,
 749                         text "some_benefit" <+> ppr some_benefit,
 750                         text "is value:" <+> ppr is_value,
 751                         text "is cheap:" <+> ppr is_cheap,
 752                         text "guidance" <+> ppr guidance,
 753                         extra_doc,
 754                         text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"])
 755                   result
 756     else
 757     result
 758     }
 759 \end{code}
 760
 761 Note [RHS of lets]
 762 ~~~~~~~~~~~~~~~~~~
 763 Be a tiny bit keener to inline in the RHS of a let, because that might
 764 lead to good thing later
 765      f y = (y,y,y)
 766      g y = let x = f y in ...(case x of (a,b,c) -> ...) ...
 767 We'd inline 'f' if the call was in a case context, and it kind-of-is,
 768 only we can't see it.  So we treat the RHS of a let as not-totally-boring.
 769
 770 Note [Unsaturated applications]
 771 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 772 When a call is not saturated, we *still* inline if one of the
 773 arguments has interesting structure.  That's sometimes very important.
 774 A good example is the Ord instance for Bool in Base:
 775
 776  Rec {
 777     $fOrdBool =GHC.Classes.D:Ord
 778                  @ Bool
 779                  ...
 780                  $cmin_ajX
 781
 782     $cmin_ajX [Occ=LoopBreaker] :: Bool -> Bool -> Bool
 783     $cmin_ajX = GHC.Classes.$dmmin @ Bool $fOrdBool
 784   }
 785
 786 But the defn of GHC.Classes.$dmmin is:
 787
 788   $dmmin :: forall a. GHC.Classes.Ord a => a -> a -> a
 789     {- Arity: 3, HasNoCafRefs, Strictness: SLL,
 790        Unfolding: (\ @ a $dOrd :: GHC.Classes.Ord a x :: a y :: a ->
 791                    case @ a GHC.Classes.<= @ a $dOrd x y of wild {
 792                      GHC.Bool.False -> y GHC.Bool.True -> x }) -}
 793
 794 We *really* want to inline $dmmin, even though it has arity 3, in
 795 order to unravel the recursion.
 796
 797
 798 Note [INLINE for small functions]
 799 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 800 Consider        {-# INLINE f #-}
 801                 f x = Just x
 802                 g y = f y
 803 Then f's RHS is no larger than its LHS, so we should inline it
 804 into even the most boring context.  (We do so if there is no INLINE
 805 pragma!)
 806
 807
 808 Note [Things to watch]
 809 ~~~~~~~~~~~~~~~~~~~~~~
 810 *   { y = I# 3; x = y `cast` co; ...case (x `cast` co) of ... }
 811     Assume x is exported, so not inlined unconditionally.
 812     Then we want x to inline unconditionally; no reason for it
 813     not to, and doing so avoids an indirection.
 814
 815 *   { x = I# 3; ....f x.... }
 816     Make sure that x does not inline unconditionally!
 817     Lest we get extra allocation.
 818
 819 Note [Inlining an InlineRule]
 820 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 821 An InlineRules is used for
 822   (a) programmer INLINE pragmas
 823   (b) inlinings from worker/wrapper
 824
 825 For (a) the RHS may be large, and our contract is that we *only* inline
 826 when the function is applied to all the arguments on the LHS of the
 827 source-code defn.  (The uf_arity in the rule.)
 828
 829 However for worker/wrapper it may be worth inlining even if the
 830 arity is not satisfied (as we do in the CoreUnfolding case) so we don't
 831 require saturation.
 832
 833
 834 Note [Nested functions]
 835 ~~~~~~~~~~~~~~~~~~~~~~~
 836 If a function has a nested defn we also record some-benefit, on the
 837 grounds that we are often able to eliminate the binding, and hence the
 838 allocation, for the function altogether; this is good for join points.
 839 But this only makes sense for *functions*; inlining a constructor
 840 doesn't help allocation unless the result is scrutinised.  UNLESS the
 841 constructor occurs just once, albeit possibly in multiple case
 842 branches.  Then inlining it doesn't increase allocation, but it does
 843 increase the chance that the constructor won't be allocated at all in
 844 the branches that don't use it.
 845
 846 Note [Cast then apply]
 847 ~~~~~~~~~~~~~~~~~~~~~~
 848 Consider
 849    myIndex = __inline_me ( (/\a. <blah>) |> co )
 850    co :: (forall a. a -> a) ~ (forall a. T a)
 851      ... /\a.\x. case ((myIndex a) |> sym co) x of { ... } ...
 852
 853 We need to inline myIndex to unravel this; but the actual call (myIndex a) has
 854 no value arguments.  The ValAppCtxt gives it enough incentive to inline.
 855
 856 Note [Inlining in ArgCtxt]
 857 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 858 The condition (arity > 0) here is very important, because otherwise
 859 we end up inlining top-level stuff into useless places; eg
 860    x = I# 3#
 861    f = \y.  g x
 862 This can make a very big difference: it adds 16% to nofib 'integer' allocs,
 863 and 20% to 'power'.
 864
 865 At one stage I replaced this condition by 'True' (leading to the above
 866 slow-down).  The motivation was test eyeball/inline1.hs; but that seems
 867 to work ok now.
 868
 869 NOTE: arguably, we should inline in ArgCtxt only if the result of the
 870 call is at least CONLIKE.  At least for the cases where we use ArgCtxt
 871 for the RHS of a 'let', we only profit from the inlining if we get a
 872 CONLIKE thing (modulo lets).
 873
 874 Note [Lone variables]
 875 ~~~~~~~~~~~~~~~~~~~~~
 876 The "lone-variable" case is important.  I spent ages messing about
 877 with unsatisfactory varaints, but this is nice.  The idea is that if a
 878 variable appears all alone
 879
 880         as an arg of lazy fn, or rhs    BoringCtxt
 881         as scrutinee of a case          CaseCtxt
 882         as arg of a fn                  ArgCtxt
 883 AND
 884         it is bound to a value
 885
 886 then we should not inline it (unless there is some other reason,
 887 e.g. is is the sole occurrence).  That is what is happening at
 888 the use of 'lone_variable' in 'interesting_saturated_call'.
 889
 890 Why?  At least in the case-scrutinee situation, turning
 891         let x = (a,b) in case x of y -> ...
 892 into
 893         let x = (a,b) in case (a,b) of y -> ...
 894 and thence to
 895         let x = (a,b) in let y = (a,b) in ...
 896 is bad if the binding for x will remain.
 897
 898 Another example: I discovered that strings
 899 were getting inlined straight back into applications of 'error'
 900 because the latter is strict.
 901         s = "foo"
 902         f = \x -> ...(error s)...
 903
 904 Fundamentally such contexts should not encourage inlining because the
 905 context can ``see'' the unfolding of the variable (e.g. case or a
 906 RULE) so there's no gain.  If the thing is bound to a value.
 907
 908 However, watch out:
 909
 910  * Consider this:
 911         foo = _inline_ (\n. [n])
 912         bar = _inline_ (foo 20)
 913         baz = \n. case bar of { (m:_) -> m + n }
 914    Here we really want to inline 'bar' so that we can inline 'foo'
 915    and the whole thing unravels as it should obviously do.  This is
 916    important: in the NDP project, 'bar' generates a closure data
 917    structure rather than a list.
 918
 919    So the non-inlining of lone_variables should only apply if the
 920    unfolding is regarded as cheap; because that is when exprIsConApp_maybe
 921    looks through the unfolding.  Hence the "&& is_cheap" in the
 922    InlineRule branch.
 923
 924  * Even a type application or coercion isn't a lone variable.
 925    Consider
 926         case $fMonadST @ RealWorld of { :DMonad a b c -> c }
 927    We had better inline that sucker!  The case won't see through it.
 928
 929    For now, I'm treating treating a variable applied to types
 930    in a *lazy* context "lone". The motivating example was
 931         f = /\a. \x. BIG
 932         g = /\a. \y.  h (f a)
 933    There's no advantage in inlining f here, and perhaps
 934    a significant disadvantage.  Hence some_val_args in the Stop case
 935
 936 \begin{code}
 937 computeDiscount :: Int -> [Int] -> Int -> [ArgSummary] -> CallCtxt -> Int
 938 computeDiscount n_vals_wanted arg_discounts res_discount arg_infos cont_info
 939         -- We multiple the raw discounts (args_discount and result_discount)
 940         -- ty opt_UnfoldingKeenessFactor because the former have to do with
 941         --  *size* whereas the discounts imply that there's some extra
 942         --  *efficiency* to be gained (e.g. beta reductions, case reductions)
 943         -- by inlining.
 944
 945   = 1           -- Discount of 1 because the result replaces the call
 946                 -- so we count 1 for the function itself
 947
 948     + length (take n_vals_wanted arg_infos)
 949                -- Discount of (un-scaled) 1 for each arg supplied,
 950                -- because the result replaces the call
 951
 952     + round (opt_UF_KeenessFactor *
 953              fromIntegral (arg_discount + res_discount'))
 954   where
 955     arg_discount = sum (zipWith mk_arg_discount arg_discounts arg_infos)
 956
 957     mk_arg_discount _        TrivArg    = 0
 958     mk_arg_discount _        NonTrivArg = 1
 959     mk_arg_discount discount ValueArg   = discount
 960
 961     res_discount' = case cont_info of
 962                         BoringCtxt  -> 0
 963                         CaseCtxt    -> res_discount
 964                         _other      -> 4 `min` res_discount
 965                 -- res_discount can be very large when a function returns
 966                 -- constructors; but we only want to invoke that large discount
 967                 -- when there's a case continuation.
 968                 -- Otherwise we, rather arbitrarily, threshold it.  Yuk.
 969                 -- But we want to aovid inlining large functions that return
 970                 -- constructors into contexts that are simply "interesting"
 971 \end{code}
 972
 973 %************************************************************************
 974 %*                                                                      *
 975         Interesting arguments
 976 %*                                                                      *
 977 %************************************************************************
 978
 979 Note [Interesting arguments]
 980 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 981 An argument is interesting if it deserves a discount for unfoldings
 982 with a discount in that argument position.  The idea is to avoid
 983 unfolding a function that is applied only to variables that have no
 984 unfolding (i.e. they are probably lambda bound): f x y z There is
 985 little point in inlining f here.
 986
 987 Generally, *values* (like (C a b) and (\x.e)) deserve discounts.  But
 988 we must look through lets, eg (let x = e in C a b), because the let will
 989 float, exposing the value, if we inline.  That makes it different to
 990 exprIsHNF.
 991
 992 Before 2009 we said it was interesting if the argument had *any* structure
 993 at all; i.e. (hasSomeUnfolding v).  But does too much inlining; see Trac #3016.
 994
 995 But we don't regard (f x y) as interesting, unless f is unsaturated.
 996 If it's saturated and f hasn't inlined, then it's probably not going
 997 to now!
 998
 999 Note [Conlike is interesting]
1000 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1001 Consider
1002         f d = ...((*) d x y)...
1003         ... f (df d')...
1004 where df is con-like. Then we'd really like to inline 'f' so that the
1005 rule for (*) (df d) can fire.  To do this
1006   a) we give a discount for being an argument of a class-op (eg (*) d)
1007   b) we say that a con-like argument (eg (df d)) is interesting
1008
1009 \begin{code}
1010 data ArgSummary = TrivArg       -- Nothing interesting
1011                 | NonTrivArg    -- Arg has structure
1012                 | ValueArg      -- Arg is a con-app or PAP
1013                                 -- ..or con-like. Note [Conlike is interesting]
1014
1015 interestingArg :: CoreExpr -> ArgSummary
1016 -- See Note [Interesting arguments]
1017 interestingArg e = go e 0
1018   where
1019     -- n is # value args to which the expression is applied
1020     go (Lit {}) _          = ValueArg
1021     go (Var v)  n
1022        | isConLikeId v     = ValueArg   -- Experimenting with 'conlike' rather that
1023                                         --    data constructors here
1024        | idArity v > n     = ValueArg   -- Catches (eg) primops with arity but no unfolding
1025        | n > 0             = NonTrivArg -- Saturated or unknown call
1026        | conlike_unfolding = ValueArg   -- n==0; look for an interesting unfolding
1027                                         -- See Note [Conlike is interesting]
1028        | otherwise         = TrivArg    -- n==0, no useful unfolding
1029        where
1030          conlike_unfolding = isConLikeUnfolding (idUnfolding v)
1031
1032     go (Type _)          _ = TrivArg
1033     go (App fn (Type _)) n = go fn n
1034     go (App fn _)        n = go fn (n+1)
1035     go (Note _ a)        n = go a n
1036     go (Cast e _)        n = go e n
1037     go (Lam v e)         n
1038        | isTyVar v         = go e n
1039        | n>0               = go e (n-1)
1040        | otherwise         = ValueArg
1041     go (Let _ e)         n = case go e n of { ValueArg -> ValueArg; _ -> NonTrivArg }
1042     go (Case {})         _ = NonTrivArg
1043
1044 nonTriv ::  ArgSummary -> Bool
1045 nonTriv TrivArg = False
1046 nonTriv _       = True
1047 \end{code}
1048
1049 %************************************************************************
1050 %*                                                                      *
1051          exprIsConApp_maybe
1052 %*                                                                      *
1053 %************************************************************************
1054
1055 Note [exprIsConApp_maybe]
1056 ~~~~~~~~~~~~~~~~~~~~~~~~~
1057 exprIsConApp_maybe is a very important function.  There are two principal
1058 uses:
1059   * case e of { .... }
1060   * cls_op e, where cls_op is a class operation
1061
1062 In both cases you want to know if e is of form (C e1..en) where C is
1063 a data constructor.
1064
1065 However e might not *look* as if
1066
1067 \begin{code}
1068 -- | Returns @Just (dc, [t1..tk], [x1..xn])@ if the argument expression is
1069 -- a *saturated* constructor application of the form @dc t1..tk x1 .. xn@,
1070 -- where t1..tk are the *universally-qantified* type args of 'dc'
1071 exprIsConApp_maybe :: IdUnfoldingFun -> CoreExpr -> Maybe (DataCon, [Type], [CoreExpr])
1072
1073 exprIsConApp_maybe id_unf (Note _ expr)
1074   = exprIsConApp_maybe id_unf expr
1075         -- We ignore all notes.  For example,
1076         --      case _scc_ "foo" (C a b) of
1077         --                      C a b -> e
1078         -- should be optimised away, but it will be only if we look
1079         -- through the SCC note.
1080
1081 exprIsConApp_maybe id_unf (Cast expr co)
1082   =     -- Here we do the KPush reduction rule as described in the FC paper
1083         -- The transformation applies iff we have
1084         --      (C e1 ... en) `cast` co
1085         -- where co :: (T t1 .. tn) ~ to_ty
1086         -- The left-hand one must be a T, because exprIsConApp returned True
1087         -- but the right-hand one might not be.  (Though it usually will.)
1088
1089     case exprIsConApp_maybe id_unf expr of {
1090         Nothing                          -> Nothing ;
1091         Just (dc, _dc_univ_args, dc_args) ->
1092
1093     let (_from_ty, to_ty) = coercionKind co
1094         dc_tc = dataConTyCon dc
1095     in
1096     case splitTyConApp_maybe to_ty of {
1097         Nothing -> Nothing ;
1098         Just (to_tc, to_tc_arg_tys)
1099                 | dc_tc /= to_tc -> Nothing
1100                 -- These two Nothing cases are possible; we might see
1101                 --      (C x y) `cast` (g :: T a ~ S [a]),
1102                 -- where S is a type function.  In fact, exprIsConApp
1103                 -- will probably not be called in such circumstances,
1104                 -- but there't nothing wrong with it
1105
1106                 | otherwise  ->
1107     let
1108         tc_arity       = tyConArity dc_tc
1109         dc_univ_tyvars = dataConUnivTyVars dc
1110         dc_ex_tyvars   = dataConExTyVars dc
1111         arg_tys        = dataConRepArgTys dc
1112
1113         dc_eqs :: [(Type,Type)]   -- All equalities from the DataCon
1114         dc_eqs = [(mkTyVarTy tv, ty)   | (tv,ty) <- dataConEqSpec dc] ++
1115                  [getEqPredTys eq_pred | eq_pred <- dataConEqTheta dc]
1116
1117         (ex_args, rest1)    = splitAtList dc_ex_tyvars dc_args
1118         (co_args, val_args) = splitAtList dc_eqs rest1
1119
1120         -- Make the "theta" from Fig 3 of the paper
1121         gammas = decomposeCo tc_arity co
1122         theta  = zipOpenTvSubst (dc_univ_tyvars ++ dc_ex_tyvars)
1123                                 (gammas         ++ stripTypeArgs ex_args)
1124
1125           -- Cast the existential coercion arguments
1126         cast_co (ty1, ty2) (Type co)
1127           = Type $ mkSymCoercion (substTy theta ty1)
1128                    `mkTransCoercion` co
1129                    `mkTransCoercion` (substTy theta ty2)
1130         cast_co _ other_arg = pprPanic "cast_co" (ppr other_arg)
1131         new_co_args = zipWith cast_co dc_eqs co_args
1132
1133           -- Cast the value arguments (which include dictionaries)
1134         new_val_args = zipWith cast_arg arg_tys val_args
1135         cast_arg arg_ty arg = mkCoerce (substTy theta arg_ty) arg
1136     in
1137 #ifdef DEBUG
1138     let dump_doc = vcat [ppr dc,      ppr dc_univ_tyvars, ppr dc_ex_tyvars,
1139                          ppr arg_tys, ppr dc_args,        ppr _dc_univ_args,
1140                          ppr ex_args, ppr val_args]
1141     in
1142     ASSERT2( coreEqType _from_ty (mkTyConApp dc_tc _dc_univ_args), dump_doc )
1143     ASSERT2( all isTypeArg (ex_args ++ co_args), dump_doc )
1144     ASSERT2( equalLength val_args arg_tys, dump_doc )
1145 #endif
1146
1147     Just (dc, to_tc_arg_tys, ex_args ++ new_co_args ++ new_val_args)
1148     }}
1149
1150 exprIsConApp_maybe id_unf expr
1151   = analyse expr []
1152   where
1153     analyse (App fun arg) args = analyse fun (arg:args)
1154     analyse fun@(Lam {})  args = beta fun [] args
1155
1156     analyse (Var fun) args
1157         | Just con <- isDataConWorkId_maybe fun
1158         , is_saturated
1159         , let (univ_ty_args, rest_args) = splitAtList (dataConUnivTyVars con) args
1160         = Just (con, stripTypeArgs univ_ty_args, rest_args)
1161
1162         -- Look through dictionary functions; see Note [Unfolding DFuns]
1163         | DFunUnfolding con ops <- unfolding
1164         , is_saturated
1165         , let (dfun_tvs, _cls, dfun_res_tys) = tcSplitDFunTy (idType fun)
1166               subst = zipOpenTvSubst dfun_tvs (stripTypeArgs (takeList dfun_tvs args))
1167         = Just (con, substTys subst dfun_res_tys,
1168                      [mkApps op args | op <- ops])
1169
1170         -- Look through unfoldings, but only cheap ones, because
1171         -- we are effectively duplicating the unfolding
1172         | Just rhs <- expandUnfolding_maybe unfolding
1173         = -- pprTrace "expanding" (ppr fun $$ ppr rhs) $
1174           analyse rhs args
1175         where
1176           is_saturated = count isValArg args == idArity fun
1177           unfolding = id_unf fun
1178
1179     analyse _ _ = Nothing
1180
1181     -----------
1182     beta (Lam v body) pairs (arg : args)
1183         | isTypeArg arg
1184         = beta body ((v,arg):pairs) args
1185
1186     beta (Lam {}) _ _    -- Un-saturated, or not a type lambda
1187         = Nothing
1188
1189     beta fun pairs args
1190         = case analyse (substExpr subst fun) args of
1191             Nothing  -> -- pprTrace "Bale out! exprIsConApp_maybe" doc $
1192                         Nothing
1193             Just ans -> -- pprTrace "Woo-hoo! exprIsConApp_maybe" doc $
1194                         Just ans
1195         where
1196           subst = mkOpenSubst (mkInScopeSet (exprFreeVars fun)) pairs
1197           -- doc = vcat [ppr fun, ppr expr, ppr pairs, ppr args]
1198
1199
1200 stripTypeArgs :: [CoreExpr] -> [Type]
1201 stripTypeArgs args = ASSERT2( all isTypeArg args, ppr args )
1202                      [ty | Type ty <- args]
1203 \end{code}
1204
1205 Note [Unfolding DFuns]
1206 ~~~~~~~~~~~~~~~~~~~~~~
1207 DFuns look like
1208
1209   df :: forall a b. (Eq a, Eq b) -> Eq (a,b)
1210   df a b d_a d_b = MkEqD (a,b) ($c1 a b d_a d_b)
1211                                ($c2 a b d_a d_b)
1212
1213 So to split it up we just need to apply the ops $c1, $c2 etc
1214 to the very same args as the dfun.  It takes a little more work
1215 to compute the type arguments to the dictionary constructor.
1216