compiler/coreSyn/CoreUnfold.lhs

   1 %
   2 % (c) The University of Glasgow 2006
   3 % (c) The AQUA Project, Glasgow University, 1994-1998
   4 %
   5
   6 Core-syntax unfoldings
   7
   8 Unfoldings (which can travel across module boundaries) are in Core
   9 syntax (namely @CoreExpr@s).
  10
  11 The type @Unfolding@ sits ``above'' simply-Core-expressions
  12 unfoldings, capturing ``higher-level'' things we know about a binding,
  13 usually things that the simplifier found out (e.g., ``it's a
  14 literal'').  In the corner of a @CoreUnfolding@ unfolding, you will
  15 find, unsurprisingly, a Core expression.
  16
  17 \begin{code}
  18 module CoreUnfold (
  19         Unfolding, UnfoldingGuidance,   -- Abstract types
  20
  21         noUnfolding, mkImplicitUnfolding,
  22         mkTopUnfolding, mkUnfolding, mkCoreUnfolding,
  23         mkInlineRule, mkWwInlineRule,
  24         mkCompulsoryUnfolding, mkDFunUnfolding,
  25
  26         interestingArg, ArgSummary(..),
  27
  28         couldBeSmallEnoughToInline,
  29         certainlyWillInline, smallEnoughToInline,
  30
  31         callSiteInline, CallCtxt(..),
  32
  33         exprIsConApp_maybe
  34
  35     ) where
  36
  37 #include "HsVersions.h"
  38
  39 import StaticFlags
  40 import DynFlags
  41 import CoreSyn
  42 import PprCore          ()      -- Instances
  43 import OccurAnal
  44 import CoreSubst hiding( substTy )
  45 import CoreFVs         ( exprFreeVars )
  46 import CoreUtils
  47 import Id
  48 import DataCon
  49 import TyCon
  50 import Literal
  51 import PrimOp
  52 import IdInfo
  53 import BasicTypes       ( Arity )
  54 import TcType           ( tcSplitDFunTy )
  55 import Type
  56 import Coercion
  57 import PrelNames
  58 import VarEnv           ( mkInScopeSet )
  59 import Bag
  60 import Util
  61 import FastTypes
  62 import FastString
  63 import Outputable
  64
  65 \end{code}
  66
  67
  68 %************************************************************************
  69 %*                                                                      *
  70 \subsection{Making unfoldings}
  71 %*                                                                      *
  72 %************************************************************************
  73
  74 \begin{code}
  75 mkTopUnfolding :: CoreExpr -> Unfolding
  76 mkTopUnfolding expr = mkUnfolding True {- Top level -} expr
  77
  78 mkImplicitUnfolding :: CoreExpr -> Unfolding
  79 -- For implicit Ids, do a tiny bit of optimising first
  80 mkImplicitUnfolding expr = mkTopUnfolding (simpleOptExpr expr)
  81
  82 -- Note [Top-level flag on inline rules]
  83 -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  84 -- Slight hack: note that mk_inline_rules conservatively sets the
  85 -- top-level flag to True.  It gets set more accurately by the simplifier
  86 -- Simplify.simplUnfolding.
  87
  88 mkUnfolding :: Bool -> CoreExpr -> Unfolding
  89 mkUnfolding top_lvl expr
  90   = mkCoreUnfolding top_lvl expr arity guidance
  91   where
  92     (arity, guidance) = calcUnfoldingGuidance opt_UF_CreationThreshold expr
  93         -- Sometimes during simplification, there's a large let-bound thing
  94         -- which has been substituted, and so is now dead; so 'expr' contains
  95         -- two copies of the thing while the occurrence-analysed expression doesn't
  96         -- Nevertheless, we don't occ-analyse before computing the size because the
  97         -- size computation bales out after a while, whereas occurrence analysis does not.
  98         --
  99         -- This can occasionally mean that the guidance is very pessimistic;
 100         -- it gets fixed up next round
 101
 102 mkCoreUnfolding :: Bool -> CoreExpr -> Arity -> UnfoldingGuidance -> Unfolding
 103 -- Occurrence-analyses the expression before capturing it
 104 mkCoreUnfolding top_lvl expr arity guidance
 105   = CoreUnfolding { uf_tmpl       = occurAnalyseExpr expr,
 106                     uf_arity      = arity,
 107                     uf_is_top     = top_lvl,
 108                     uf_is_value   = exprIsHNF        expr,
 109                     uf_is_conlike = exprIsConLike    expr,
 110                     uf_is_cheap   = exprIsCheap      expr,
 111                     uf_expandable = exprIsExpandable expr,
 112                     uf_guidance   = guidance }
 113
 114 mkDFunUnfolding :: DataCon -> [Id] -> Unfolding
 115 mkDFunUnfolding con ops = DFunUnfolding con (map Var ops)
 116
 117 mkWwInlineRule :: Id -> CoreExpr -> Arity -> Unfolding
 118 mkWwInlineRule id expr arity
 119   = mkCoreUnfolding True (simpleOptExpr expr) arity
 120          (InlineRule { ir_sat = InlUnSat, ir_info = InlWrapper id })
 121
 122 mkCompulsoryUnfolding :: CoreExpr -> Unfolding
 123 mkCompulsoryUnfolding expr         -- Used for things that absolutely must be unfolded
 124   = mkCoreUnfolding True expr 0    -- Arity of unfolding doesn't matter
 125                     (InlineRule { ir_info = InlAlways, ir_sat = InlUnSat })
 126
 127 mkInlineRule :: InlSatFlag -> CoreExpr -> Arity -> Unfolding
 128 mkInlineRule sat expr arity
 129   = mkCoreUnfolding True         -- Note [Top-level flag on inline rules]
 130                     expr' arity
 131                     (InlineRule { ir_sat = sat, ir_info = info })
 132   where
 133     expr' = simpleOptExpr expr
 134     info = if small then InlSmall else InlVanilla
 135     small = case calcUnfoldingGuidance (arity+1) expr' of
 136               (arity_e, UnfoldIfGoodArgs { ug_size = size_e })
 137                    -> uncondInline arity_e size_e
 138               _other {- actually UnfoldNever -} -> False
 139 \end{code}
 140
 141
 142 %************************************************************************
 143 %*                                                                      *
 144 \subsection{The UnfoldingGuidance type}
 145 %*                                                                      *
 146 %************************************************************************
 147
 148 \begin{code}
 149 calcUnfoldingGuidance
 150         :: Int                  -- bomb out if size gets bigger than this
 151         -> CoreExpr             -- expression to look at
 152         -> (Arity, UnfoldingGuidance)
 153 calcUnfoldingGuidance bOMB_OUT_SIZE expr
 154   = case collectBinders expr of { (binders, body) ->
 155     let
 156         val_binders = filter isId binders
 157         n_val_binders = length val_binders
 158     in
 159     case (sizeExpr (iUnbox bOMB_OUT_SIZE) val_binders body) of
 160       TooBig -> (n_val_binders, UnfoldNever)
 161       SizeIs size cased_args scrut_discount
 162         -> (n_val_binders, UnfoldIfGoodArgs { ug_args  = map discount_for val_binders
 163                                             , ug_size  = iBox size
 164                                             , ug_res   = iBox scrut_discount })
 165         where
 166             discount_for b = foldlBag (\acc (b',n) -> if b==b' then acc+n else acc)
 167                                       0 cased_args
 168     }
 169 \end{code}
 170
 171 Note [Computing the size of an expression]
 172 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 173 The basic idea of sizeExpr is obvious enough: count nodes.  But getting the
 174 heuristics right has taken a long time.  Here's the basic strategy:
 175
 176     * Variables, literals: 0
 177       (Exception for string literals, see litSize.)
 178
 179     * Function applications (f e1 .. en): 1 + #value args
 180
 181     * Constructor applications: 1, regardless of #args
 182
 183     * Let(rec): 1 + size of components
 184
 185     * Note, cast: 0
 186
 187 Examples
 188
 189   Size  Term
 190   --------------
 191     0     42#
 192     0     x
 193     2     f x
 194     1     Just x
 195     4     f (g x)
 196
 197 Notice that 'x' counts 0, while (f x) counts 2.  That's deliberate: there's
 198 a function call to account for.  Notice also that constructor applications
 199 are very cheap, because exposing them to a caller is so valuable.
 200
 201 Note [Unconditional inlining]
 202 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 203 We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
 204 than the thing it's replacing.  Notice that
 205       (f x) --> (g 3)             -- YES, unconditionally
 206       (f x) --> x : []            -- YES, *even though* there are two
 207                                   --      arguments to the cons
 208       x     --> g 3               -- NO
 209       x     --> Just v            -- NO
 210
 211 It's very important not to unconditionally replace a variable by
 212 a non-atomic term.
 213
 214 \begin{code}
 215 uncondInline :: Arity -> Int -> Bool
 216 -- Inline unconditionally if there no size increase
 217 -- Size of call is arity (+1 for the function)
 218 -- See Note [Unconditional inlining]
 219 uncondInline arity size
 220   | arity == 0 = size == 0
 221   | otherwise  = size <= arity + 1
 222 \end{code}
 223
 224
 225 \begin{code}
 226 sizeExpr :: FastInt         -- Bomb out if it gets bigger than this
 227          -> [Id]            -- Arguments; we're interested in which of these
 228                             -- get case'd
 229          -> CoreExpr
 230          -> ExprSize
 231
 232 -- Note [Computing the size of an expression]
 233
 234 sizeExpr bOMB_OUT_SIZE top_args expr
 235   = size_up expr
 236   where
 237     size_up (Cast e _) = size_up e
 238     size_up (Note _ e) = size_up e
 239     size_up (Type _)   = sizeZero           -- Types cost nothing
 240     size_up (Lit lit)  = sizeN (litSize lit)
 241     size_up (Var f)    = size_up_call f []  -- Make sure we get constructor
 242                                             -- discounts even on nullary constructors
 243
 244     size_up (App fun (Type _)) = size_up fun
 245     size_up (App fun arg)      = size_up_app fun [arg]
 246                                   `addSize` nukeScrutDiscount (size_up arg)
 247
 248     size_up (Lam b e) | isId b    = lamScrutDiscount (size_up e `addSizeN` 1)
 249                       | otherwise = size_up e
 250
 251     size_up (Let (NonRec binder rhs) body)
 252       = nukeScrutDiscount (size_up rhs)         `addSize`
 253         size_up body                            `addSizeN`
 254         (if isUnLiftedType (idType binder) then 0 else 1)
 255                 -- For the allocation
 256                 -- If the binder has an unlifted type there is no allocation
 257
 258     size_up (Let (Rec pairs) body)
 259       = nukeScrutDiscount rhs_size              `addSize`
 260         size_up body                            `addSizeN`
 261         length pairs            -- For the allocation
 262       where
 263         rhs_size = foldr (addSize . size_up . snd) sizeZero pairs
 264
 265     size_up (Case (Var v) _ _ alts)
 266         | v `elem` top_args             -- We are scrutinising an argument variable
 267         = alts_size (foldr addSize sizeOne alt_sizes)   -- The 1 is for the case itself
 268                     (foldr1 maxSize alt_sizes)
 269                 -- Good to inline if an arg is scrutinised, because
 270                 -- that may eliminate allocation in the caller
 271                 -- And it eliminates the case itself
 272         where
 273           alt_sizes = map size_up_alt alts
 274
 275                 -- alts_size tries to compute a good discount for
 276                 -- the case when we are scrutinising an argument variable
 277           alts_size (SizeIs tot tot_disc _tot_scrut)           -- Size of all alternatives
 278                     (SizeIs max _max_disc  max_scrut)           -- Size of biggest alternative
 279                 = SizeIs tot (unitBag (v, iBox (_ILIT(1) +# tot -# max)) `unionBags` tot_disc) max_scrut
 280                         -- If the variable is known, we produce a discount that
 281                         -- will take us back to 'max', the size of the largest alternative
 282                         -- The 1+ is a little discount for reduced allocation in the caller
 283                         --
 284                         -- Notice though, that we return tot_disc, the total discount from
 285                         -- all branches.  I think that's right.
 286
 287           alts_size tot_size _ = tot_size
 288
 289     size_up (Case e _ _ alts) = foldr (addSize . size_up_alt)
 290                                       (nukeScrutDiscount (size_up e))
 291                                       alts
 292                                 `addSizeN` 1    -- Add 1 for the case itself
 293                 -- We don't charge for the case itself
 294                 -- It's a strict thing, and the price of the call
 295                 -- is paid by scrut.  Also consider
 296                 --      case f x of DEFAULT -> e
 297                 -- This is just ';'!  Don't charge for it.
 298
 299     ------------
 300     -- size_up_app is used when there's ONE OR MORE value args
 301     size_up_app (App fun arg) args
 302         | isTypeArg arg            = size_up_app fun args
 303         | otherwise                = size_up_app fun (arg:args)
 304                                      `addSize` nukeScrutDiscount (size_up arg)
 305     size_up_app (Var fun)     args = size_up_call fun args
 306     size_up_app other         args = size_up other `addSizeN` length args
 307
 308     ------------
 309     size_up_call :: Id -> [CoreExpr] -> ExprSize
 310     size_up_call fun val_args
 311        = case idDetails fun of
 312            FCallId _        -> sizeN opt_UF_DearOp
 313            DataConWorkId dc -> conSize    dc (length val_args)
 314            PrimOpId op      -> primOpSize op (length val_args)
 315            ClassOpId _      -> classOpSize top_args val_args
 316            _                -> funSize top_args fun (length val_args)
 317
 318     ------------
 319     size_up_alt (_con, _bndrs, rhs) = size_up rhs
 320         -- Don't charge for args, so that wrappers look cheap
 321         -- (See comments about wrappers with Case)
 322
 323     ------------
 324         -- These addSize things have to be here because
 325         -- I don't want to give them bOMB_OUT_SIZE as an argument
 326     addSizeN TooBig          _  = TooBig
 327     addSizeN (SizeIs n xs d) m  = mkSizeIs bOMB_OUT_SIZE (n +# iUnbox m) xs d
 328
 329     addSize TooBig            _                 = TooBig
 330     addSize _                 TooBig            = TooBig
 331     addSize (SizeIs n1 xs d1) (SizeIs n2 ys d2)
 332         = mkSizeIs bOMB_OUT_SIZE (n1 +# n2) (xs `unionBags` ys) (d1 +# d2)
 333 \end{code}
 334
 335 \begin{code}
 336 -- | Finds a nominal size of a string literal.
 337 litSize :: Literal -> Int
 338 -- Used by CoreUnfold.sizeExpr
 339 litSize (MachStr str) = 1 + ((lengthFS str + 3) `div` 4)
 340         -- If size could be 0 then @f "x"@ might be too small
 341         -- [Sept03: make literal strings a bit bigger to avoid fruitless
 342         --  duplication of little strings]
 343 litSize _other = 0    -- Must match size of nullary constructors
 344                       -- Key point: if  x |-> 4, then x must inline unconditionally
 345                       --            (eg via case binding)
 346
 347 classOpSize :: [Id] -> [CoreExpr] -> ExprSize
 348 -- See Note [Conlike is interesting]
 349 classOpSize _ []
 350   = sizeZero
 351 classOpSize top_args (arg1 : other_args)
 352   = SizeIs (iUnbox size) arg_discount (_ILIT(0))
 353   where
 354     size = 2 + length other_args
 355     -- If the class op is scrutinising a lambda bound dictionary then
 356     -- give it a discount, to encourage the inlining of this function
 357     -- The actual discount is rather arbitrarily chosen
 358     arg_discount = case arg1 of
 359                      Var dict | dict `elem` top_args
 360                               -> unitBag (dict, opt_UF_DictDiscount)
 361                      _other   -> emptyBag
 362
 363 funSize :: [Id] -> Id -> Int -> ExprSize
 364 -- Size for functions that are not constructors or primops
 365 -- Note [Function applications]
 366 funSize top_args fun n_val_args
 367   | fun `hasKey` buildIdKey   = buildSize
 368   | fun `hasKey` augmentIdKey = augmentSize
 369   | otherwise = SizeIs (iUnbox size) arg_discount (iUnbox res_discount)
 370   where
 371     some_val_args = n_val_args > 0
 372
 373     arg_discount | some_val_args && fun `elem` top_args
 374                  = unitBag (fun, opt_UF_FunAppDiscount)
 375                  | otherwise = emptyBag
 376         -- If the function is an argument and is applied
 377         -- to some values, give it an arg-discount
 378
 379     res_discount | idArity fun > n_val_args = opt_UF_FunAppDiscount
 380                  | otherwise                = 0
 381         -- If the function is partially applied, show a result discount
 382
 383     size | some_val_args = 1 + n_val_args
 384          | otherwise     = 0
 385         -- The 1+ is for the function itself
 386         -- Add 1 for each non-trivial arg;
 387         -- the allocation cost, as in let(rec)
 388
 389
 390 conSize :: DataCon -> Int -> ExprSize
 391 conSize dc n_val_args
 392   | n_val_args == 0      = SizeIs (_ILIT(0)) emptyBag (_ILIT(1))
 393   | isUnboxedTupleCon dc = SizeIs (_ILIT(0)) emptyBag (iUnbox n_val_args +# _ILIT(1))
 394   | otherwise            = SizeIs (_ILIT(1)) emptyBag (iUnbox n_val_args +# _ILIT(1))
 395         -- Treat a constructors application as size 1, regardless of how
 396         -- many arguments it has; we are keen to expose them
 397         -- (and we charge separately for their args).  We can't treat
 398         -- them as size zero, else we find that (Just x) has size 0,
 399         -- which is the same as a lone variable; and hence 'v' will
 400         -- always be replaced by (Just x), where v is bound to Just x.
 401         --
 402         -- However, unboxed tuples count as size zero
 403         -- I found occasions where we had
 404         --      f x y z = case op# x y z of { s -> (# s, () #) }
 405         -- and f wasn't getting inlined
 406
 407 primOpSize :: PrimOp -> Int -> ExprSize
 408 primOpSize op n_val_args
 409  | not (primOpIsDupable op) = sizeN opt_UF_DearOp
 410  | not (primOpOutOfLine op) = sizeN 1
 411         -- Be very keen to inline simple primops.
 412         -- We give a discount of 1 for each arg so that (op# x y z) costs 2.
 413         -- We can't make it cost 1, else we'll inline let v = (op# x y z)
 414         -- at every use of v, which is excessive.
 415         --
 416         -- A good example is:
 417         --      let x = +# p q in C {x}
 418         -- Even though x get's an occurrence of 'many', its RHS looks cheap,
 419         -- and there's a good chance it'll get inlined back into C's RHS. Urgh!
 420
 421  | otherwise = sizeN n_val_args
 422
 423
 424 buildSize :: ExprSize
 425 buildSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(4))
 426         -- We really want to inline applications of build
 427         -- build t (\cn -> e) should cost only the cost of e (because build will be inlined later)
 428         -- Indeed, we should add a result_discount becuause build is
 429         -- very like a constructor.  We don't bother to check that the
 430         -- build is saturated (it usually is).  The "-2" discounts for the \c n,
 431         -- The "4" is rather arbitrary.
 432
 433 augmentSize :: ExprSize
 434 augmentSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(4))
 435         -- Ditto (augment t (\cn -> e) ys) should cost only the cost of
 436         -- e plus ys. The -2 accounts for the \cn
 437
 438 nukeScrutDiscount :: ExprSize -> ExprSize
 439 nukeScrutDiscount (SizeIs n vs _) = SizeIs n vs (_ILIT(0))
 440 nukeScrutDiscount TooBig          = TooBig
 441
 442 -- When we return a lambda, give a discount if it's used (applied)
 443 lamScrutDiscount :: ExprSize -> ExprSize
 444 lamScrutDiscount (SizeIs n vs _) = SizeIs n vs (iUnbox opt_UF_FunAppDiscount)
 445 lamScrutDiscount TooBig          = TooBig
 446 \end{code}
 447
 448 Note [Discounts and thresholds]
 449 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 450 Constants for discounts and thesholds are defined in main/StaticFlags,
 451 all of form opt_UF_xxxx.   They are:
 452
 453 opt_UF_CreationThreshold (45)
 454      At a definition site, if the unfolding is bigger than this, we
 455      may discard it altogether
 456
 457 opt_UF_UseThreshold (6)
 458      At a call site, if the unfolding, less discounts, is smaller than
 459      this, then it's small enough inline
 460
 461 opt_UF_KeennessFactor (1.5)
 462      Factor by which the discounts are multiplied before
 463      subtracting from size
 464
 465 opt_UF_DictDiscount (1)
 466      The discount for each occurrence of a dictionary argument
 467      as an argument of a class method.  Should be pretty small
 468      else big functions may get inlined
 469
 470 opt_UF_FunAppDiscount (6)
 471      Discount for a function argument that is applied.  Quite
 472      large, because if we inline we avoid the higher-order call.
 473
 474 opt_UF_DearOp (4)
 475      The size of a foreign call or not-dupable PrimOp
 476
 477
 478 Note [Function applications]
 479 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 480 In a function application (f a b)
 481
 482   - If 'f' is an argument to the function being analysed,
 483     and there's at least one value arg, record a FunAppDiscount for f
 484
 485   - If the application if a PAP (arity > 2 in this example)
 486     record a *result* discount (because inlining
 487     with "extra" args in the call may mean that we now
 488     get a saturated application)
 489
 490 Code for manipulating sizes
 491
 492 \begin{code}
 493 data ExprSize = TooBig
 494               | SizeIs FastInt          -- Size found
 495                        (Bag (Id,Int))   -- Arguments cased herein, and discount for each such
 496                        FastInt          -- Size to subtract if result is scrutinised
 497                                         -- by a case expression
 498
 499 instance Outputable ExprSize where
 500   ppr TooBig         = ptext (sLit "TooBig")
 501   ppr (SizeIs a _ c) = brackets (int (iBox a) <+> int (iBox c))
 502
 503 -- subtract the discount before deciding whether to bale out. eg. we
 504 -- want to inline a large constructor application into a selector:
 505 --      tup = (a_1, ..., a_99)
 506 --      x = case tup of ...
 507 --
 508 mkSizeIs :: FastInt -> FastInt -> Bag (Id, Int) -> FastInt -> ExprSize
 509 mkSizeIs max n xs d | (n -# d) ># max = TooBig
 510                     | otherwise       = SizeIs n xs d
 511
 512 maxSize :: ExprSize -> ExprSize -> ExprSize
 513 maxSize TooBig         _                                  = TooBig
 514 maxSize _              TooBig                             = TooBig
 515 maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 ># n2  = s1
 516                                               | otherwise = s2
 517
 518 sizeZero, sizeOne :: ExprSize
 519 sizeN :: Int -> ExprSize
 520
 521 sizeZero = SizeIs (_ILIT(0))  emptyBag (_ILIT(0))
 522 sizeOne  = SizeIs (_ILIT(1))  emptyBag (_ILIT(0))
 523 sizeN n  = SizeIs (iUnbox n) emptyBag (_ILIT(0))
 524 \end{code}
 525
 526
 527
 528
 529 %************************************************************************
 530 %*                                                                      *
 531 \subsection[considerUnfolding]{Given all the info, do (not) do the unfolding}
 532 %*                                                                      *
 533 %************************************************************************
 534
 535 We use 'couldBeSmallEnoughToInline' to avoid exporting inlinings that
 536 we ``couldn't possibly use'' on the other side.  Can be overridden w/
 537 flaggery.  Just the same as smallEnoughToInline, except that it has no
 538 actual arguments.
 539
 540 \begin{code}
 541 couldBeSmallEnoughToInline :: Int -> CoreExpr -> Bool
 542 couldBeSmallEnoughToInline threshold rhs
 543   = case calcUnfoldingGuidance threshold rhs of
 544        (_, UnfoldNever) -> False
 545        _                -> True
 546
 547 ----------------
 548 smallEnoughToInline :: Unfolding -> Bool
 549 smallEnoughToInline (CoreUnfolding {uf_guidance = UnfoldIfGoodArgs {ug_size = size}})
 550   = size <= opt_UF_UseThreshold
 551 smallEnoughToInline _
 552   = False
 553
 554 ----------------
 555 certainlyWillInline :: Unfolding -> Bool
 556   -- Sees if the unfolding is pretty certain to inline
 557 certainlyWillInline (CoreUnfolding { uf_is_cheap = is_cheap, uf_arity = n_vals, uf_guidance = guidance })
 558   = case guidance of
 559       UnfoldNever     -> False
 560       InlineRule {}   -> True
 561       UnfoldIfGoodArgs { ug_size = size}
 562                     -> is_cheap && size - (n_vals +1) <= opt_UF_UseThreshold
 563
 564 certainlyWillInline _
 565   = False
 566 \end{code}
 567
 568 %************************************************************************
 569 %*                                                                      *
 570 \subsection{callSiteInline}
 571 %*                                                                      *
 572 %************************************************************************
 573
 574 This is the key function.  It decides whether to inline a variable at a call site
 575
 576 callSiteInline is used at call sites, so it is a bit more generous.
 577 It's a very important function that embodies lots of heuristics.
 578 A non-WHNF can be inlined if it doesn't occur inside a lambda,
 579 and occurs exactly once or
 580     occurs once in each branch of a case and is small
 581
 582 If the thing is in WHNF, there's no danger of duplicating work,
 583 so we can inline if it occurs once, or is small
 584
 585 NOTE: we don't want to inline top-level functions that always diverge.
 586 It just makes the code bigger.  Tt turns out that the convenient way to prevent
 587 them inlining is to give them a NOINLINE pragma, which we do in
 588 StrictAnal.addStrictnessInfoToTopId
 589
 590 \begin{code}
 591 callSiteInline :: DynFlags
 592                -> Bool                  -- True <=> the Id can be inlined
 593                -> Id                    -- The Id
 594                -> Bool                  -- True if there are are no arguments at all (incl type args)
 595                -> [ArgSummary]          -- One for each value arg; True if it is interesting
 596                -> CallCtxt              -- True <=> continuation is interesting
 597                -> Maybe CoreExpr        -- Unfolding, if any
 598
 599
 600 instance Outputable ArgSummary where
 601   ppr TrivArg    = ptext (sLit "TrivArg")
 602   ppr NonTrivArg = ptext (sLit "NonTrivArg")
 603   ppr ValueArg   = ptext (sLit "ValueArg")
 604
 605 data CallCtxt = BoringCtxt
 606
 607               | ArgCtxt         -- We are somewhere in the argument of a function
 608                         Bool    -- True  <=> we're somewhere in the RHS of function with rules
 609                                 -- False <=> we *are* the argument of a function with non-zero
 610                                 --           arg discount
 611                                 --        OR
 612                                 --           we *are* the RHS of a let  Note [RHS of lets]
 613                                 -- In both cases, be a little keener to inline
 614
 615               | ValAppCtxt      -- We're applied to at least one value arg
 616                                 -- This arises when we have ((f x |> co) y)
 617                                 -- Then the (f x) has argument 'x' but in a ValAppCtxt
 618
 619               | CaseCtxt        -- We're the scrutinee of a case
 620                                 -- that decomposes its scrutinee
 621
 622 instance Outputable CallCtxt where
 623   ppr BoringCtxt      = ptext (sLit "BoringCtxt")
 624   ppr (ArgCtxt rules) = ptext (sLit "ArgCtxt") <+> ppr rules
 625   ppr CaseCtxt        = ptext (sLit "CaseCtxt")
 626   ppr ValAppCtxt      = ptext (sLit "ValAppCtxt")
 627
 628 callSiteInline dflags active_inline id lone_variable arg_infos cont_info
 629   = let
 630         n_val_args  = length arg_infos
 631     in
 632     case idUnfolding id of {
 633         NoUnfolding      -> Nothing ;
 634         OtherCon _       -> Nothing ;
 635         DFunUnfolding {} -> Nothing ;   -- Never unfold a DFun
 636         CoreUnfolding { uf_tmpl = unf_template, uf_is_top = is_top, uf_is_value = is_value,
 637                         uf_is_cheap = is_cheap, uf_arity = uf_arity, uf_guidance = guidance } ->
 638                         -- uf_arity will typically be equal to (idArity id),
 639                         -- but may be less for InlineRules
 640     let
 641         result | yes_or_no = Just unf_template
 642                | otherwise = Nothing
 643
 644         interesting_args = any nonTriv arg_infos
 645                 -- NB: (any nonTriv arg_infos) looks at the
 646                 -- over-saturated args too which is "wrong";
 647                 -- but if over-saturated we inline anyway.
 648
 649                -- some_benefit is used when the RHS is small enough
 650                -- and the call has enough (or too many) value
 651                -- arguments (ie n_val_args >= arity). But there must
 652                -- be *something* interesting about some argument, or the
 653                -- result context, to make it worth inlining
 654         some_benefit =  interesting_args
 655                      || n_val_args > uf_arity       -- Over-saturated
 656                      || interesting_saturated_call  -- Exactly saturated
 657
 658         interesting_saturated_call
 659           = case cont_info of
 660               BoringCtxt -> not is_top && uf_arity > 0          -- Note [Nested functions]
 661               CaseCtxt   -> not (lone_variable && is_value)     -- Note [Lone variables]
 662               ArgCtxt {} -> uf_arity > 0                        -- Note [Inlining in ArgCtxt]
 663               ValAppCtxt -> True                                -- Note [Cast then apply]
 664
 665         yes_or_no
 666           = case guidance of
 667               UnfoldNever  -> False
 668
 669               InlineRule { ir_info = inl_info, ir_sat = sat }
 670                  | InlAlways <- inl_info -> True         -- No top-level binding, so inline!
 671                                                          -- Ignore is_active because we want to
 672                                                          -- inline even if SimplGently is on.
 673                  | not active_inline     -> False
 674                  | n_val_args < uf_arity -> yes_unsat    -- Not enough value args
 675                  | InlSmall <- inl_info  -> True         -- Note [INLINE for small functions]
 676                  | otherwise             -> some_benefit -- Saturated or over-saturated
 677                  where
 678                    -- See Note [Inlining an InlineRule]
 679                    yes_unsat = case sat of
 680                                  InlSat   -> False
 681                                  InlUnSat -> interesting_args
 682
 683               UnfoldIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size }
 684                  | not active_inline          -> False
 685                  | not is_cheap               -> False
 686                  | n_val_args < uf_arity      -> interesting_args && small_enough
 687                                                         -- Note [Unsaturated applications]
 688                  | uncondInline uf_arity size -> True
 689                  | otherwise                  -> some_benefit && small_enough
 690
 691                  where
 692                    small_enough = (size - discount) <= opt_UF_UseThreshold
 693                    discount = computeDiscount uf_arity arg_discounts
 694                                               res_discount arg_infos cont_info
 695
 696     in
 697     if dopt Opt_D_dump_inlinings dflags then
 698         pprTrace ("Considering inlining: " ++ showSDoc (ppr id))
 699                  (vcat [text "active:" <+> ppr active_inline,
 700                         text "arg infos" <+> ppr arg_infos,
 701                         text "interesting continuation" <+> ppr cont_info,
 702                         text "is value:" <+> ppr is_value,
 703                         text "is cheap:" <+> ppr is_cheap,
 704                         text "guidance" <+> ppr guidance,
 705                         text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"])
 706                   result
 707     else
 708     result
 709     }
 710 \end{code}
 711
 712 Note [RHS of lets]
 713 ~~~~~~~~~~~~~~~~~~
 714 Be a tiny bit keener to inline in the RHS of a let, because that might
 715 lead to good thing later
 716      f y = (y,y,y)
 717      g y = let x = f y in ...(case x of (a,b,c) -> ...) ...
 718 We'd inline 'f' if the call was in a case context, and it kind-of-is,
 719 only we can't see it.  So we treat the RHS of a let as not-totally-boring.
 720
 721 Note [Unsaturated applications]
 722 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 723 When a call is not saturated, we *still* inline if one of the
 724 arguments has interesting structure.  That's sometimes very important.
 725 A good example is the Ord instance for Bool in Base:
 726
 727  Rec {
 728     $fOrdBool =GHC.Classes.D:Ord
 729                  @ Bool
 730                  ...
 731                  $cmin_ajX
 732
 733     $cmin_ajX [Occ=LoopBreaker] :: Bool -> Bool -> Bool
 734     $cmin_ajX = GHC.Classes.$dmmin @ Bool $fOrdBool
 735   }
 736
 737 But the defn of GHC.Classes.$dmmin is:
 738
 739   $dmmin :: forall a. GHC.Classes.Ord a => a -> a -> a
 740     {- Arity: 3, HasNoCafRefs, Strictness: SLL,
 741        Unfolding: (\ @ a $dOrd :: GHC.Classes.Ord a x :: a y :: a ->
 742                    case @ a GHC.Classes.<= @ a $dOrd x y of wild {
 743                      GHC.Bool.False -> y GHC.Bool.True -> x }) -}
 744
 745 We *really* want to inline $dmmin, even though it has arity 3, in
 746 order to unravel the recursion.
 747
 748
 749 Note [INLINE for small functions]
 750 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 751 Consider        {-# INLINE f #-}
 752                 f x = Just x
 753                 g y = f y
 754 Then f's RHS is no larger than its LHS, so we should inline it
 755 into even the most boring context.  (We do so if there is no INLINE
 756 pragma!)  That's the reason for the 'ug_small' flag on an InlineRule.
 757
 758
 759 Note [Things to watch]
 760 ~~~~~~~~~~~~~~~~~~~~~~
 761 *   { y = I# 3; x = y `cast` co; ...case (x `cast` co) of ... }
 762     Assume x is exported, so not inlined unconditionally.
 763     Then we want x to inline unconditionally; no reason for it
 764     not to, and doing so avoids an indirection.
 765
 766 *   { x = I# 3; ....f x.... }
 767     Make sure that x does not inline unconditionally!
 768     Lest we get extra allocation.
 769
 770 Note [Inlining an InlineRule]
 771 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 772 An InlineRules is used for
 773   (a) pogrammer INLINE pragmas
 774   (b) inlinings from worker/wrapper
 775
 776 For (a) the RHS may be large, and our contract is that we *only* inline
 777 when the function is applied to all the arguments on the LHS of the
 778 source-code defn.  (The uf_arity in the rule.)
 779
 780 However for worker/wrapper it may be worth inlining even if the
 781 arity is not satisfied (as we do in the CoreUnfolding case) so we don't
 782 require saturation.
 783
 784
 785 Note [Nested functions]
 786 ~~~~~~~~~~~~~~~~~~~~~~~
 787 If a function has a nested defn we also record some-benefit, on the
 788 grounds that we are often able to eliminate the binding, and hence the
 789 allocation, for the function altogether; this is good for join points.
 790 But this only makes sense for *functions*; inlining a constructor
 791 doesn't help allocation unless the result is scrutinised.  UNLESS the
 792 constructor occurs just once, albeit possibly in multiple case
 793 branches.  Then inlining it doesn't increase allocation, but it does
 794 increase the chance that the constructor won't be allocated at all in
 795 the branches that don't use it.
 796
 797 Note [Cast then apply]
 798 ~~~~~~~~~~~~~~~~~~~~~~
 799 Consider
 800    myIndex = __inline_me ( (/\a. <blah>) |> co )
 801    co :: (forall a. a -> a) ~ (forall a. T a)
 802      ... /\a.\x. case ((myIndex a) |> sym co) x of { ... } ...
 803
 804 We need to inline myIndex to unravel this; but the actual call (myIndex a) has
 805 no value arguments.  The ValAppCtxt gives it enough incentive to inline.
 806
 807 Note [Inlining in ArgCtxt]
 808 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 809 The condition (arity > 0) here is very important, because otherwise
 810 we end up inlining top-level stuff into useless places; eg
 811    x = I# 3#
 812    f = \y.  g x
 813 This can make a very big difference: it adds 16% to nofib 'integer' allocs,
 814 and 20% to 'power'.
 815
 816 At one stage I replaced this condition by 'True' (leading to the above
 817 slow-down).  The motivation was test eyeball/inline1.hs; but that seems
 818 to work ok now.
 819
 820 NOTE: arguably, we should inline in ArgCtxt only if the result of the
 821 call is at least CONLIKE.  At least for the cases where we use ArgCtxt
 822 for the RHS of a 'let', we only profit from the inlining if we get a
 823 CONLIKE thing (modulo lets).
 824
 825 Note [Lone variables]
 826 ~~~~~~~~~~~~~~~~~~~~~
 827 The "lone-variable" case is important.  I spent ages messing about
 828 with unsatisfactory varaints, but this is nice.  The idea is that if a
 829 variable appears all alone
 830
 831         as an arg of lazy fn, or rhs    BoringCtxt
 832         as scrutinee of a case          CaseCtxt
 833         as arg of a fn                  ArgCtxt
 834 AND
 835         it is bound to a value
 836
 837 then we should not inline it (unless there is some other reason,
 838 e.g. is is the sole occurrence).  That is what is happening at
 839 the use of 'lone_variable' in 'interesting_saturated_call'.
 840
 841 Why?  At least in the case-scrutinee situation, turning
 842         let x = (a,b) in case x of y -> ...
 843 into
 844         let x = (a,b) in case (a,b) of y -> ...
 845 and thence to
 846         let x = (a,b) in let y = (a,b) in ...
 847 is bad if the binding for x will remain.
 848
 849 Another example: I discovered that strings
 850 were getting inlined straight back into applications of 'error'
 851 because the latter is strict.
 852         s = "foo"
 853         f = \x -> ...(error s)...
 854
 855 Fundamentally such contexts should not encourage inlining because the
 856 context can ``see'' the unfolding of the variable (e.g. case or a
 857 RULE) so there's no gain.  If the thing is bound to a value.
 858
 859 However, watch out:
 860
 861  * Consider this:
 862         foo = _inline_ (\n. [n])
 863         bar = _inline_ (foo 20)
 864         baz = \n. case bar of { (m:_) -> m + n }
 865    Here we really want to inline 'bar' so that we can inline 'foo'
 866    and the whole thing unravels as it should obviously do.  This is
 867    important: in the NDP project, 'bar' generates a closure data
 868    structure rather than a list.
 869
 870    So the non-inlining of lone_variables should only apply if the
 871    unfolding is regarded as cheap; because that is when exprIsConApp_maybe
 872    looks through the unfolding.  Hence the "&& is_cheap" in the
 873    InlineRule branch.
 874
 875  * Even a type application or coercion isn't a lone variable.
 876    Consider
 877         case $fMonadST @ RealWorld of { :DMonad a b c -> c }
 878    We had better inline that sucker!  The case won't see through it.
 879
 880    For now, I'm treating treating a variable applied to types
 881    in a *lazy* context "lone". The motivating example was
 882         f = /\a. \x. BIG
 883         g = /\a. \y.  h (f a)
 884    There's no advantage in inlining f here, and perhaps
 885    a significant disadvantage.  Hence some_val_args in the Stop case
 886
 887 \begin{code}
 888 computeDiscount :: Int -> [Int] -> Int -> [ArgSummary] -> CallCtxt -> Int
 889 computeDiscount n_vals_wanted arg_discounts res_discount arg_infos cont_info
 890         -- We multiple the raw discounts (args_discount and result_discount)
 891         -- ty opt_UnfoldingKeenessFactor because the former have to do with
 892         --  *size* whereas the discounts imply that there's some extra
 893         --  *efficiency* to be gained (e.g. beta reductions, case reductions)
 894         -- by inlining.
 895
 896   = 1           -- Discount of 1 because the result replaces the call
 897                 -- so we count 1 for the function itself
 898
 899     + length (take n_vals_wanted arg_infos)
 900                -- Discount of (un-scaled) 1 for each arg supplied,
 901                -- because the result replaces the call
 902
 903     + round (opt_UF_KeenessFactor *
 904              fromIntegral (arg_discount + res_discount'))
 905   where
 906     arg_discount = sum (zipWith mk_arg_discount arg_discounts arg_infos)
 907
 908     mk_arg_discount _        TrivArg    = 0
 909     mk_arg_discount _        NonTrivArg = 1
 910     mk_arg_discount discount ValueArg   = discount
 911
 912     res_discount' = case cont_info of
 913                         BoringCtxt  -> 0
 914                         CaseCtxt    -> res_discount
 915                         _other      -> 4 `min` res_discount
 916                 -- res_discount can be very large when a function returns
 917                 -- constructors; but we only want to invoke that large discount
 918                 -- when there's a case continuation.
 919                 -- Otherwise we, rather arbitrarily, threshold it.  Yuk.
 920                 -- But we want to aovid inlining large functions that return
 921                 -- constructors into contexts that are simply "interesting"
 922 \end{code}
 923
 924 %************************************************************************
 925 %*                                                                      *
 926         Interesting arguments
 927 %*                                                                      *
 928 %************************************************************************
 929
 930 Note [Interesting arguments]
 931 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 932 An argument is interesting if it deserves a discount for unfoldings
 933 with a discount in that argument position.  The idea is to avoid
 934 unfolding a function that is applied only to variables that have no
 935 unfolding (i.e. they are probably lambda bound): f x y z There is
 936 little point in inlining f here.
 937
 938 Generally, *values* (like (C a b) and (\x.e)) deserve discounts.  But
 939 we must look through lets, eg (let x = e in C a b), because the let will
 940 float, exposing the value, if we inline.  That makes it different to
 941 exprIsHNF.
 942
 943 Before 2009 we said it was interesting if the argument had *any* structure
 944 at all; i.e. (hasSomeUnfolding v).  But does too much inlining; see Trac #3016.
 945
 946 But we don't regard (f x y) as interesting, unless f is unsaturated.
 947 If it's saturated and f hasn't inlined, then it's probably not going
 948 to now!
 949
 950 Note [Conlike is interesting]
 951 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 952 Consider
 953         f d = ...((*) d x y)...
 954         ... f (df d')...
 955 where df is con-like. Then we'd really like to inline 'f' so that the
 956 rule for (*) (df d) can fire.  To do this
 957   a) we give a discount for being an argument of a class-op (eg (*) d)
 958   b) we say that a con-like argument (eg (df d)) is interesting
 959
 960 \begin{code}
 961 data ArgSummary = TrivArg       -- Nothing interesting
 962                 | NonTrivArg    -- Arg has structure
 963                 | ValueArg      -- Arg is a con-app or PAP
 964                                 -- ..or con-like. Note [Conlike is interesting]
 965
 966 interestingArg :: CoreExpr -> ArgSummary
 967 -- See Note [Interesting arguments]
 968 interestingArg e = go e 0
 969   where
 970     -- n is # value args to which the expression is applied
 971     go (Lit {}) _          = ValueArg
 972     go (Var v)  n
 973        | isConLikeId v     = ValueArg   -- Experimenting with 'conlike' rather that
 974                                         --    data constructors here
 975        | idArity v > n     = ValueArg   -- Catches (eg) primops with arity but no unfolding
 976        | n > 0             = NonTrivArg -- Saturated or unknown call
 977        | conlike_unfolding = ValueArg   -- n==0; look for an interesting unfolding
 978                                         -- See Note [Conlike is interesting]
 979        | otherwise         = TrivArg    -- n==0, no useful unfolding
 980        where
 981          conlike_unfolding = isConLikeUnfolding (idUnfolding v)
 982
 983     go (Type _)          _ = TrivArg
 984     go (App fn (Type _)) n = go fn n
 985     go (App fn _)        n = go fn (n+1)
 986     go (Note _ a)        n = go a n
 987     go (Cast e _)        n = go e n
 988     go (Lam v e)         n
 989        | isTyVar v         = go e n
 990        | n>0               = go e (n-1)
 991        | otherwise         = ValueArg
 992     go (Let _ e)         n = case go e n of { ValueArg -> ValueArg; _ -> NonTrivArg }
 993     go (Case {})         _ = NonTrivArg
 994
 995 nonTriv ::  ArgSummary -> Bool
 996 nonTriv TrivArg = False
 997 nonTriv _       = True
 998 \end{code}
 999
1000 %************************************************************************
1001 %*                                                                      *
1002          exprIsConApp_maybe
1003 %*                                                                      *
1004 %************************************************************************
1005
1006 Note [exprIsConApp_maybe]
1007 ~~~~~~~~~~~~~~~~~~~~~~~~~
1008 exprIsConApp_maybe is a very important function.  There are two principal
1009 uses:
1010   * case e of { .... }
1011   * cls_op e, where cls_op is a class operation
1012
1013 In both cases you want to know if e is of form (C e1..en) where C is
1014 a data constructor.
1015
1016 However e might not *look* as if
1017
1018 \begin{code}
1019 -- | Returns @Just (dc, [t1..tk], [x1..xn])@ if the argument expression is
1020 -- a *saturated* constructor application of the form @dc t1..tk x1 .. xn@,
1021 -- where t1..tk are the *universally-qantified* type args of 'dc'
1022 exprIsConApp_maybe :: CoreExpr -> Maybe (DataCon, [Type], [CoreExpr])
1023
1024 exprIsConApp_maybe (Note _ expr)
1025   = exprIsConApp_maybe expr
1026         -- We ignore all notes.  For example,
1027         --      case _scc_ "foo" (C a b) of
1028         --                      C a b -> e
1029         -- should be optimised away, but it will be only if we look
1030         -- through the SCC note.
1031
1032 exprIsConApp_maybe (Cast expr co)
1033   =     -- Here we do the KPush reduction rule as described in the FC paper
1034         -- The transformation applies iff we have
1035         --      (C e1 ... en) `cast` co
1036         -- where co :: (T t1 .. tn) ~ to_ty
1037         -- The left-hand one must be a T, because exprIsConApp returned True
1038         -- but the right-hand one might not be.  (Though it usually will.)
1039
1040     case exprIsConApp_maybe expr of {
1041         Nothing                          -> Nothing ;
1042         Just (dc, _dc_univ_args, dc_args) ->
1043
1044     let (_from_ty, to_ty) = coercionKind co
1045         dc_tc = dataConTyCon dc
1046     in
1047     case splitTyConApp_maybe to_ty of {
1048         Nothing -> Nothing ;
1049         Just (to_tc, to_tc_arg_tys)
1050                 | dc_tc /= to_tc -> Nothing
1051                 -- These two Nothing cases are possible; we might see
1052                 --      (C x y) `cast` (g :: T a ~ S [a]),
1053                 -- where S is a type function.  In fact, exprIsConApp
1054                 -- will probably not be called in such circumstances,
1055                 -- but there't nothing wrong with it
1056
1057                 | otherwise  ->
1058     let
1059         tc_arity       = tyConArity dc_tc
1060         dc_univ_tyvars = dataConUnivTyVars dc
1061         dc_ex_tyvars   = dataConExTyVars dc
1062         arg_tys        = dataConRepArgTys dc
1063
1064         dc_eqs :: [(Type,Type)]   -- All equalities from the DataCon
1065         dc_eqs = [(mkTyVarTy tv, ty)   | (tv,ty) <- dataConEqSpec dc] ++
1066                  [getEqPredTys eq_pred | eq_pred <- dataConEqTheta dc]
1067
1068         (ex_args, rest1)    = splitAtList dc_ex_tyvars dc_args
1069         (co_args, val_args) = splitAtList dc_eqs rest1
1070
1071         -- Make the "theta" from Fig 3 of the paper
1072         gammas = decomposeCo tc_arity co
1073         theta  = zipOpenTvSubst (dc_univ_tyvars ++ dc_ex_tyvars)
1074                                 (gammas         ++ stripTypeArgs ex_args)
1075
1076           -- Cast the existential coercion arguments
1077         cast_co (ty1, ty2) (Type co)
1078           = Type $ mkSymCoercion (substTy theta ty1)
1079                    `mkTransCoercion` co
1080                    `mkTransCoercion` (substTy theta ty2)
1081         cast_co _ other_arg = pprPanic "cast_co" (ppr other_arg)
1082         new_co_args = zipWith cast_co dc_eqs co_args
1083
1084           -- Cast the value arguments (which include dictionaries)
1085         new_val_args = zipWith cast_arg arg_tys val_args
1086         cast_arg arg_ty arg = mkCoerce (substTy theta arg_ty) arg
1087     in
1088 #ifdef DEBUG
1089     let dump_doc = vcat [ppr dc,      ppr dc_univ_tyvars, ppr dc_ex_tyvars,
1090                          ppr arg_tys, ppr dc_args,        ppr _dc_univ_args,
1091                          ppr ex_args, ppr val_args]
1092     in
1093     ASSERT2( coreEqType _from_ty (mkTyConApp dc_tc _dc_univ_args), dump_doc )
1094     ASSERT2( all isTypeArg (ex_args ++ co_args), dump_doc )
1095     ASSERT2( equalLength val_args arg_tys, dump_doc )
1096 #endif
1097
1098     Just (dc, to_tc_arg_tys, ex_args ++ new_co_args ++ new_val_args)
1099     }}
1100
1101 exprIsConApp_maybe expr
1102   = analyse expr []
1103   where
1104     analyse (App fun arg) args = analyse fun (arg:args)
1105     analyse fun@(Lam {})  args = beta fun [] args
1106
1107     analyse (Var fun) args
1108         | Just con <- isDataConWorkId_maybe fun
1109         , is_saturated
1110         , let (univ_ty_args, rest_args) = splitAtList (dataConUnivTyVars con) args
1111         = Just (con, stripTypeArgs univ_ty_args, rest_args)
1112
1113         -- Look through dictionary functions; see Note [Unfolding DFuns]
1114         | DFunUnfolding con ops <- unfolding
1115         , is_saturated
1116         , let (dfun_tvs, _cls, dfun_res_tys) = tcSplitDFunTy (idType fun)
1117               subst = zipOpenTvSubst dfun_tvs (stripTypeArgs (takeList dfun_tvs args))
1118         = Just (con, substTys subst dfun_res_tys,
1119                      [mkApps op args | op <- ops])
1120
1121         -- Look through unfoldings, but only cheap ones, because
1122         -- we are effectively duplicating the unfolding
1123         | CoreUnfolding { uf_expandable = expand_me, uf_tmpl = rhs } <- unfolding
1124         , expand_me = -- pprTrace "expanding" (ppr fun $$ ppr rhs) $
1125                       analyse rhs args
1126         where
1127           is_saturated = count isValArg args == idArity fun
1128           unfolding = idUnfolding fun
1129
1130     analyse _ _ = Nothing
1131
1132     -----------
1133     in_scope = mkInScopeSet (exprFreeVars expr)
1134
1135     -----------
1136     beta (Lam v body) pairs (arg : args)
1137         | isTypeArg arg
1138         = beta body ((v,arg):pairs) args
1139
1140     beta (Lam {}) _ _    -- Un-saturated, or not a type lambda
1141         = Nothing
1142
1143     beta fun pairs args
1144         = case analyse (substExpr subst fun) args of
1145             Nothing  -> -- pprTrace "Bale out! exprIsConApp_maybe" doc $
1146                         Nothing
1147             Just ans -> -- pprTrace "Woo-hoo! exprIsConApp_maybe" doc $
1148                         Just ans
1149         where
1150           subst = mkOpenSubst in_scope pairs
1151           -- doc = vcat [ppr fun, ppr expr, ppr pairs, ppr args]
1152
1153
1154 stripTypeArgs :: [CoreExpr] -> [Type]
1155 stripTypeArgs args = ASSERT2( all isTypeArg args, ppr args )
1156                      [ty | Type ty <- args]
1157 \end{code}
1158
1159 Note [Unfolding DFuns]
1160 ~~~~~~~~~~~~~~~~~~~~~~
1161 DFuns look like
1162
1163   df :: forall a b. (Eq a, Eq b) -> Eq (a,b)
1164   df a b d_a d_b = MkEqD (a,b) ($c1 a b d_a d_b)
1165                                ($c2 a b d_a d_b)
1166
1167 So to split it up we just need to apply the ops $c1, $c2 etc
1168 to the very same args as the dfun.  It takes a little more work
1169 to compute the type arguments to the dictionary constructor.
1170