compiler/simplCore/SimplUtils.lhs

   1 %
   2 % (c) The AQUA Project, Glasgow University, 1993-1998
   3 %
   4 \section[SimplUtils]{The simplifier utilities}
   5
   6 \begin{code}
   7 module SimplUtils (
   8         -- Rebuilding
   9         mkLam, mkCase, prepareAlts, bindCaseBndr,
  10
  11         -- Inlining,
  12         preInlineUnconditionally, postInlineUnconditionally,
  13         activeInline, activeRule,
  14
  15         -- The continuation type
  16         SimplCont(..), DupFlag(..), ArgInfo(..),
  17         contIsDupable, contResultType, contIsTrivial, contArgs, dropArgs,
  18         countValArgs, countArgs,
  19         mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg,
  20         interestingCallContext,
  21
  22         interestingArg, mkArgInfo,
  23
  24         abstractFloats
  25     ) where
  26
  27 #include "HsVersions.h"
  28
  29 import SimplEnv
  30 import DynFlags
  31 import StaticFlags
  32 import CoreSyn
  33 import qualified CoreSubst
  34 import PprCore
  35 import CoreFVs
  36 import CoreUtils
  37 import CoreArity        ( etaExpand, exprEtaExpandArity )
  38 import CoreUnfold
  39 import Name
  40 import Id
  41 import Var      ( isCoVar )
  42 import NewDemand
  43 import SimplMonad
  44 import Type     hiding( substTy )
  45 import Coercion ( coercionKind )
  46 import TyCon
  47 import Unify    ( dataConCannotMatch )
  48 import VarSet
  49 import BasicTypes
  50 import Util
  51 import MonadUtils
  52 import Outputable
  53 import FastString
  54
  55 import Data.List
  56 \end{code}
  57
  58
  59 %************************************************************************
  60 %*                                                                      *
  61                 The SimplCont type
  62 %*                                                                      *
  63 %************************************************************************
  64
  65 A SimplCont allows the simplifier to traverse the expression in a
  66 zipper-like fashion.  The SimplCont represents the rest of the expression,
  67 "above" the point of interest.
  68
  69 You can also think of a SimplCont as an "evaluation context", using
  70 that term in the way it is used for operational semantics. This is the
  71 way I usually think of it, For example you'll often see a syntax for
  72 evaluation context looking like
  73         C ::= []  |  C e   |  case C of alts  |  C `cast` co
  74 That's the kind of thing we are doing here, and I use that syntax in
  75 the comments.
  76
  77
  78 Key points:
  79   * A SimplCont describes a *strict* context (just like
  80     evaluation contexts do).  E.g. Just [] is not a SimplCont
  81
  82   * A SimplCont describes a context that *does not* bind
  83     any variables.  E.g. \x. [] is not a SimplCont
  84
  85 \begin{code}
  86 data SimplCont
  87   = Stop                -- An empty context, or hole, []
  88         CallCtxt        -- True <=> There is something interesting about
  89                         --          the context, and hence the inliner
  90                         --          should be a bit keener (see interestingCallContext)
  91                         -- Specifically:
  92                         --     This is an argument of a function that has RULES
  93                         --     Inlining the call might allow the rule to fire
  94
  95   | CoerceIt            -- C `cast` co
  96         OutCoercion             -- The coercion simplified
  97         SimplCont
  98
  99   | ApplyTo             -- C arg
 100         DupFlag
 101         InExpr SimplEnv         -- The argument and its static env
 102         SimplCont
 103
 104   | Select              -- case C of alts
 105         DupFlag
 106         InId [InAlt] SimplEnv   -- The case binder, alts, and subst-env
 107         SimplCont
 108
 109   -- The two strict forms have no DupFlag, because we never duplicate them
 110   | StrictBind          -- (\x* \xs. e) C
 111         InId [InBndr]           -- let x* = [] in e
 112         InExpr SimplEnv         --      is a special case
 113         SimplCont
 114
 115   | StrictArg           -- e C
 116         OutExpr                 -- e; *always* of form (Var v `App1` e1 .. `App` en)
 117         CallCtxt                -- Whether *this* argument position is interesting
 118         ArgInfo                 -- Whether the function at the head of e has rules, etc
 119         SimplCont               --     plus strictness flags for *further* args
 120
 121 data ArgInfo
 122   = ArgInfo {
 123         ai_rules :: Bool,       -- Function has rules (recursively)
 124                                 --      => be keener to inline in all args
 125         ai_strs :: [Bool],      -- Strictness of arguments
 126                                 --   Usually infinite, but if it is finite it guarantees
 127                                 --   that the function diverges after being given
 128                                 --   that number of args
 129         ai_discs :: [Int]       -- Discounts for arguments; non-zero => be keener to inline
 130                                 --   Always infinite
 131     }
 132
 133 instance Outputable SimplCont where
 134   ppr (Stop interesting)             = ptext (sLit "Stop") <> brackets (ppr interesting)
 135   ppr (ApplyTo dup arg _ cont)       = ((ptext (sLit "ApplyTo") <+> ppr dup <+> pprParendExpr arg)
 136                                           {-  $$ nest 2 (pprSimplEnv se) -}) $$ ppr cont
 137   ppr (StrictBind b _ _ _ cont)      = (ptext (sLit "StrictBind") <+> ppr b) $$ ppr cont
 138   ppr (StrictArg f _ _ cont)         = (ptext (sLit "StrictArg") <+> ppr f) $$ ppr cont
 139   ppr (Select dup bndr alts _ cont)  = (ptext (sLit "Select") <+> ppr dup <+> ppr bndr) $$
 140                                        (nest 4 (ppr alts)) $$ ppr cont
 141   ppr (CoerceIt co cont)             = (ptext (sLit "CoerceIt") <+> ppr co) $$ ppr cont
 142
 143 data DupFlag = OkToDup | NoDup
 144
 145 instance Outputable DupFlag where
 146   ppr OkToDup = ptext (sLit "ok")
 147   ppr NoDup   = ptext (sLit "nodup")
 148
 149
 150
 151 -------------------
 152 mkBoringStop :: SimplCont
 153 mkBoringStop = Stop BoringCtxt
 154
 155 mkRhsStop :: SimplCont  -- See Note [RHS of lets] in CoreUnfold
 156 mkRhsStop = Stop (ArgCtxt False)
 157
 158 mkLazyArgStop :: CallCtxt -> SimplCont
 159 mkLazyArgStop cci = Stop cci
 160
 161 -------------------
 162 contIsRhsOrArg :: SimplCont -> Bool
 163 contIsRhsOrArg (Stop {})       = True
 164 contIsRhsOrArg (StrictBind {}) = True
 165 contIsRhsOrArg (StrictArg {})  = True
 166 contIsRhsOrArg _               = False
 167
 168 -------------------
 169 contIsDupable :: SimplCont -> Bool
 170 contIsDupable (Stop {})                  = True
 171 contIsDupable (ApplyTo  OkToDup _ _ _)   = True
 172 contIsDupable (Select   OkToDup _ _ _ _) = True
 173 contIsDupable (CoerceIt _ cont)          = contIsDupable cont
 174 contIsDupable _                          = False
 175
 176 -------------------
 177 contIsTrivial :: SimplCont -> Bool
 178 contIsTrivial (Stop {})                   = True
 179 contIsTrivial (ApplyTo _ (Type _) _ cont) = contIsTrivial cont
 180 contIsTrivial (CoerceIt _ cont)           = contIsTrivial cont
 181 contIsTrivial _                           = False
 182
 183 -------------------
 184 contResultType :: SimplEnv -> OutType -> SimplCont -> OutType
 185 contResultType env ty cont
 186   = go cont ty
 187   where
 188     subst_ty se ty = substTy (se `setInScope` env) ty
 189
 190     go (Stop {})                      ty = ty
 191     go (CoerceIt co cont)             _  = go cont (snd (coercionKind co))
 192     go (StrictBind _ bs body se cont) _  = go cont (subst_ty se (exprType (mkLams bs body)))
 193     go (StrictArg fn _ _ cont)        _  = go cont (funResultTy (exprType fn))
 194     go (Select _ _ alts se cont)      _  = go cont (subst_ty se (coreAltsType alts))
 195     go (ApplyTo _ arg se cont)        ty = go cont (apply_to_arg ty arg se)
 196
 197     apply_to_arg ty (Type ty_arg) se = applyTy ty (subst_ty se ty_arg)
 198     apply_to_arg ty _             _  = funResultTy ty
 199
 200 -------------------
 201 countValArgs :: SimplCont -> Int
 202 countValArgs (ApplyTo _ (Type _) _ cont) = countValArgs cont
 203 countValArgs (ApplyTo _ _        _ cont) = 1 + countValArgs cont
 204 countValArgs _                           = 0
 205
 206 countArgs :: SimplCont -> Int
 207 countArgs (ApplyTo _ _ _ cont) = 1 + countArgs cont
 208 countArgs _                    = 0
 209
 210 contArgs :: SimplCont -> ([OutExpr], SimplCont)
 211 -- Uses substitution to turn each arg into an OutExpr
 212 contArgs cont = go [] cont
 213   where
 214     go args (ApplyTo _ arg se cont) = go (substExpr se arg : args) cont
 215     go args cont                    = (reverse args, cont)
 216
 217 dropArgs :: Int -> SimplCont -> SimplCont
 218 dropArgs 0 cont = cont
 219 dropArgs n (ApplyTo _ _ _ cont) = dropArgs (n-1) cont
 220 dropArgs n other                = pprPanic "dropArgs" (ppr n <+> ppr other)
 221 \end{code}
 222
 223
 224 Note [Interesting call context]
 225 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 226 We want to avoid inlining an expression where there can't possibly be
 227 any gain, such as in an argument position.  Hence, if the continuation
 228 is interesting (eg. a case scrutinee, application etc.) then we
 229 inline, otherwise we don't.
 230
 231 Previously some_benefit used to return True only if the variable was
 232 applied to some value arguments.  This didn't work:
 233
 234         let x = _coerce_ (T Int) Int (I# 3) in
 235         case _coerce_ Int (T Int) x of
 236                 I# y -> ....
 237
 238 we want to inline x, but can't see that it's a constructor in a case
 239 scrutinee position, and some_benefit is False.
 240
 241 Another example:
 242
 243 dMonadST = _/\_ t -> :Monad (g1 _@_ t, g2 _@_ t, g3 _@_ t)
 244
 245 ....  case dMonadST _@_ x0 of (a,b,c) -> ....
 246
 247 we'd really like to inline dMonadST here, but we *don't* want to
 248 inline if the case expression is just
 249
 250         case x of y { DEFAULT -> ... }
 251
 252 since we can just eliminate this case instead (x is in WHNF).  Similar
 253 applies when x is bound to a lambda expression.  Hence
 254 contIsInteresting looks for case expressions with just a single
 255 default case.
 256
 257
 258 \begin{code}
 259 interestingCallContext :: SimplCont -> CallCtxt
 260 -- See Note [Interesting call context]
 261 interestingCallContext cont
 262   = interesting cont
 263   where
 264     interesting (Select _ bndr _ _ _)
 265         | isDeadBinder bndr = CaseCtxt
 266         | otherwise         = ArgCtxt False     -- If the binder is used, this
 267                                                 -- is like a strict let
 268                                                 -- See Note [RHS of lets] in CoreUnfold
 269
 270     interesting (ApplyTo _ arg _ cont)
 271         | isTypeArg arg = interesting cont
 272         | otherwise     = ValAppCtxt    -- Can happen if we have (f Int |> co) y
 273                                         -- If f has an INLINE prag we need to give it some
 274                                         -- motivation to inline. See Note [Cast then apply]
 275                                         -- in CoreUnfold
 276
 277     interesting (StrictArg _ cci _ _)   = cci
 278     interesting (StrictBind {})         = BoringCtxt
 279     interesting (Stop cci)              = cci
 280     interesting (CoerceIt _ cont)       = interesting cont
 281         -- If this call is the arg of a strict function, the context
 282         -- is a bit interesting.  If we inline here, we may get useful
 283         -- evaluation information to avoid repeated evals: e.g.
 284         --      x + (y * z)
 285         -- Here the contIsInteresting makes the '*' keener to inline,
 286         -- which in turn exposes a constructor which makes the '+' inline.
 287         -- Assuming that +,* aren't small enough to inline regardless.
 288         --
 289         -- It's also very important to inline in a strict context for things
 290         -- like
 291         --              foldr k z (f x)
 292         -- Here, the context of (f x) is strict, and if f's unfolding is
 293         -- a build it's *great* to inline it here.  So we must ensure that
 294         -- the context for (f x) is not totally uninteresting.
 295
 296
 297 -------------------
 298 mkArgInfo :: Id
 299           -> [CoreRule] -- Rules for function
 300           -> Int        -- Number of value args
 301           -> SimplCont  -- Context of the call
 302           -> ArgInfo
 303
 304 mkArgInfo fun rules n_val_args call_cont
 305   | n_val_args < idArity fun            -- Note [Unsaturated functions]
 306   = ArgInfo { ai_rules = False
 307             , ai_strs = vanilla_stricts
 308             , ai_discs = vanilla_discounts }
 309   | otherwise
 310   = ArgInfo { ai_rules = interestingArgContext rules call_cont
 311             , ai_strs  = add_type_str (idType fun) arg_stricts
 312             , ai_discs = arg_discounts }
 313   where
 314     vanilla_discounts, arg_discounts :: [Int]
 315     vanilla_discounts = repeat 0
 316     arg_discounts = case idUnfolding fun of
 317                         CoreUnfolding {uf_guidance = UnfoldIfGoodArgs {ug_args = discounts}}
 318                               -> discounts ++ vanilla_discounts
 319                         _     -> vanilla_discounts
 320
 321     vanilla_stricts, arg_stricts :: [Bool]
 322     vanilla_stricts  = repeat False
 323
 324     arg_stricts
 325       = case splitStrictSig (idNewStrictness fun) of
 326           (demands, result_info)
 327                 | not (demands `lengthExceeds` n_val_args)
 328                 ->      -- Enough args, use the strictness given.
 329                         -- For bottoming functions we used to pretend that the arg
 330                         -- is lazy, so that we don't treat the arg as an
 331                         -- interesting context.  This avoids substituting
 332                         -- top-level bindings for (say) strings into
 333                         -- calls to error.  But now we are more careful about
 334                         -- inlining lone variables, so its ok (see SimplUtils.analyseCont)
 335                    if isBotRes result_info then
 336                         map isStrictDmd demands         -- Finite => result is bottom
 337                    else
 338                         map isStrictDmd demands ++ vanilla_stricts
 339                | otherwise
 340                -> WARN( True, text "More demands than arity" <+> ppr fun <+> ppr (idArity fun)
 341                                 <+> ppr n_val_args <+> ppr demands )
 342                    vanilla_stricts      -- Not enough args, or no strictness
 343
 344     add_type_str :: Type -> [Bool] -> [Bool]
 345     -- If the function arg types are strict, record that in the 'strictness bits'
 346     -- No need to instantiate because unboxed types (which dominate the strict
 347     -- types) can't instantiate type variables.
 348     -- add_type_str is done repeatedly (for each call); might be better
 349     -- once-for-all in the function
 350     -- But beware primops/datacons with no strictness
 351     add_type_str _ [] = []
 352     add_type_str fun_ty strs            -- Look through foralls
 353         | Just (_, fun_ty') <- splitForAllTy_maybe fun_ty       -- Includes coercions
 354         = add_type_str fun_ty' strs
 355     add_type_str fun_ty (str:strs)      -- Add strict-type info
 356         | Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty
 357         = (str || isStrictType arg_ty) : add_type_str fun_ty' strs
 358     add_type_str _ strs
 359         = strs
 360
 361 {- Note [Unsaturated functions]
 362   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 363 Consider (test eyeball/inline4)
 364         x = a:as
 365         y = f x
 366 where f has arity 2.  Then we do not want to inline 'x', because
 367 it'll just be floated out again.  Even if f has lots of discounts
 368 on its first argument -- it must be saturated for these to kick in
 369 -}
 370
 371 interestingArgContext :: [CoreRule] -> SimplCont -> Bool
 372 -- If the argument has form (f x y), where x,y are boring,
 373 -- and f is marked INLINE, then we don't want to inline f.
 374 -- But if the context of the argument is
 375 --      g (f x y)
 376 -- where g has rules, then we *do* want to inline f, in case it
 377 -- exposes a rule that might fire.  Similarly, if the context is
 378 --      h (g (f x x))
 379 -- where h has rules, then we do want to inline f; hence the
 380 -- call_cont argument to interestingArgContext
 381 --
 382 -- The ai-rules flag makes this happen; if it's
 383 -- set, the inliner gets just enough keener to inline f
 384 -- regardless of how boring f's arguments are, if it's marked INLINE
 385 --
 386 -- The alternative would be to *always* inline an INLINE function,
 387 -- regardless of how boring its context is; but that seems overkill
 388 -- For example, it'd mean that wrapper functions were always inlined
 389 interestingArgContext rules call_cont
 390   = notNull rules || enclosing_fn_has_rules
 391   where
 392     enclosing_fn_has_rules = go call_cont
 393
 394     go (Select {})           = False
 395     go (ApplyTo {})          = False
 396     go (StrictArg _ cci _ _) = interesting cci
 397     go (StrictBind {})       = False    -- ??
 398     go (CoerceIt _ c)        = go c
 399     go (Stop cci)            = interesting cci
 400
 401     interesting (ArgCtxt rules) = rules
 402     interesting _               = False
 403 \end{code}
 404
 405
 406
 407 %************************************************************************
 408 %*                                                                      *
 409 \subsection{Decisions about inlining}
 410 %*                                                                      *
 411 %************************************************************************
 412
 413 Inlining is controlled partly by the SimplifierMode switch.  This has two
 414 settings:
 415
 416         SimplGently     (a) Simplifying before specialiser/full laziness
 417                         (b) Simplifiying inside InlineRules
 418                         (c) Simplifying the LHS of a rule
 419                         (d) Simplifying a GHCi expression or Template
 420                                 Haskell splice
 421
 422         SimplPhase n _   Used at all other times
 423
 424 The key thing about SimplGently is that it does no call-site inlining.
 425 Before full laziness we must be careful not to inline wrappers,
 426 because doing so inhibits floating
 427     e.g. ...(case f x of ...)...
 428     ==> ...(case (case x of I# x# -> fw x#) of ...)...
 429     ==> ...(case x of I# x# -> case fw x# of ...)...
 430 and now the redex (f x) isn't floatable any more.
 431
 432 The no-inlining thing is also important for Template Haskell.  You might be
 433 compiling in one-shot mode with -O2; but when TH compiles a splice before
 434 running it, we don't want to use -O2.  Indeed, we don't want to inline
 435 anything, because the byte-code interpreter might get confused about
 436 unboxed tuples and suchlike.
 437
 438 Note [Simplifying gently inside InlineRules]
 439 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 440 We don't do much simplification inside InlineRules (which come from
 441 INLINE pragmas).  It really is important to switch off inlinings
 442 inside such expressions.  Consider the following example
 443
 444         let f = \pq -> BIG
 445         in
 446         let g = \y -> f y y
 447             {-# INLINE g #-}
 448         in ...g...g...g...g...g...
 449
 450 Now, if that's the ONLY occurrence of f, it will be inlined inside g,
 451 and thence copied multiple times when g is inlined.
 452
 453 This function may be inlinined in other modules, so we don't want to
 454 remove (by inlining) calls to functions that have specialisations, or
 455 that may have transformation rules in an importing scope.
 456
 457 E.g.    {-# INLINE f #-}
 458         f x = ...g...
 459
 460 and suppose that g is strict *and* has specialisations.  If we inline
 461 g's wrapper, we deny f the chance of getting the specialised version
 462 of g when f is inlined at some call site (perhaps in some other
 463 module).
 464
 465 It's also important not to inline a worker back into a wrapper.
 466 A wrapper looks like
 467         wraper = inline_me (\x -> ...worker... )
 468 Normally, the inline_me prevents the worker getting inlined into
 469 the wrapper (initially, the worker's only call site!).  But,
 470 if the wrapper is sure to be called, the strictness analyser will
 471 mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf
 472 continuation.  That's why the keep_inline predicate returns True for
 473 ArgOf continuations.  It shouldn't do any harm not to dissolve the
 474 inline-me note under these circumstances.
 475
 476 Although we do very little simplification inside an InlineRule,
 477 the RHS is simplified as normal.  For example:
 478
 479         all xs = foldr (&&) True xs
 480         any p = all . map p  {-# INLINE any #-}
 481
 482 The RHS of 'any' will get optimised and deforested; but the InlineRule
 483 will still mention the original RHS.
 484
 485
 486 preInlineUnconditionally
 487 ~~~~~~~~~~~~~~~~~~~~~~~~
 488 @preInlineUnconditionally@ examines a bndr to see if it is used just
 489 once in a completely safe way, so that it is safe to discard the
 490 binding inline its RHS at the (unique) usage site, REGARDLESS of how
 491 big the RHS might be.  If this is the case we don't simplify the RHS
 492 first, but just inline it un-simplified.
 493
 494 This is much better than first simplifying a perhaps-huge RHS and then
 495 inlining and re-simplifying it.  Indeed, it can be at least quadratically
 496 better.  Consider
 497
 498         x1 = e1
 499         x2 = e2[x1]
 500         x3 = e3[x2]
 501         ...etc...
 502         xN = eN[xN-1]
 503
 504 We may end up simplifying e1 N times, e2 N-1 times, e3 N-3 times etc.
 505 This can happen with cascades of functions too:
 506
 507         f1 = \x1.e1
 508         f2 = \xs.e2[f1]
 509         f3 = \xs.e3[f3]
 510         ...etc...
 511
 512 THE MAIN INVARIANT is this:
 513
 514         ----  preInlineUnconditionally invariant -----
 515    IF preInlineUnconditionally chooses to inline x = <rhs>
 516    THEN doing the inlining should not change the occurrence
 517         info for the free vars of <rhs>
 518         ----------------------------------------------
 519
 520 For example, it's tempting to look at trivial binding like
 521         x = y
 522 and inline it unconditionally.  But suppose x is used many times,
 523 but this is the unique occurrence of y.  Then inlining x would change
 524 y's occurrence info, which breaks the invariant.  It matters: y
 525 might have a BIG rhs, which will now be dup'd at every occurrenc of x.
 526
 527
 528 Even RHSs labelled InlineMe aren't caught here, because there might be
 529 no benefit from inlining at the call site.
 530
 531 [Sept 01] Don't unconditionally inline a top-level thing, because that
 532 can simply make a static thing into something built dynamically.  E.g.
 533         x = (a,b)
 534         main = \s -> h x
 535
 536 [Remember that we treat \s as a one-shot lambda.]  No point in
 537 inlining x unless there is something interesting about the call site.
 538
 539 But watch out: if you aren't careful, some useful foldr/build fusion
 540 can be lost (most notably in spectral/hartel/parstof) because the
 541 foldr didn't see the build.  Doing the dynamic allocation isn't a big
 542 deal, in fact, but losing the fusion can be.  But the right thing here
 543 seems to be to do a callSiteInline based on the fact that there is
 544 something interesting about the call site (it's strict).  Hmm.  That
 545 seems a bit fragile.
 546
 547 Conclusion: inline top level things gaily until Phase 0 (the last
 548 phase), at which point don't.
 549
 550 \begin{code}
 551 preInlineUnconditionally :: SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
 552 preInlineUnconditionally env top_lvl bndr rhs
 553   | not active             = False
 554   | opt_SimplNoPreInlining = False
 555   | otherwise = case idOccInfo bndr of
 556                   IAmDead                    -> True    -- Happens in ((\x.1) v)
 557                   OneOcc in_lam True int_cxt -> try_once in_lam int_cxt
 558                   _                          -> False
 559   where
 560     phase = getMode env
 561     active = case phase of
 562                    SimplGently    -> isEarlyActive act
 563                    SimplPhase n _ -> isActive n act
 564     act = idInlineActivation bndr
 565
 566     try_once in_lam int_cxt     -- There's one textual occurrence
 567         | not in_lam = isNotTopLevel top_lvl || early_phase
 568         | otherwise  = int_cxt && canInlineInLam rhs
 569
 570 -- Be very careful before inlining inside a lambda, becuase (a) we must not
 571 -- invalidate occurrence information, and (b) we want to avoid pushing a
 572 -- single allocation (here) into multiple allocations (inside lambda).
 573 -- Inlining a *function* with a single *saturated* call would be ok, mind you.
 574 --      || (if is_cheap && not (canInlineInLam rhs) then pprTrace "preinline" (ppr bndr <+> ppr rhs) ok else ok)
 575 --      where
 576 --              is_cheap = exprIsCheap rhs
 577 --              ok = is_cheap && int_cxt
 578
 579         --      int_cxt         The context isn't totally boring
 580         -- E.g. let f = \ab.BIG in \y. map f xs
 581         --      Don't want to substitute for f, because then we allocate
 582         --      its closure every time the \y is called
 583         -- But: let f = \ab.BIG in \y. map (f y) xs
 584         --      Now we do want to substitute for f, even though it's not
 585         --      saturated, because we're going to allocate a closure for
 586         --      (f y) every time round the loop anyhow.
 587
 588         -- canInlineInLam => free vars of rhs are (Once in_lam) or Many,
 589         -- so substituting rhs inside a lambda doesn't change the occ info.
 590         -- Sadly, not quite the same as exprIsHNF.
 591     canInlineInLam (Lit _)              = True
 592     canInlineInLam (Lam b e)            = isRuntimeVar b || canInlineInLam e
 593     canInlineInLam (Note _ e)           = canInlineInLam e
 594     canInlineInLam _                    = False
 595
 596     early_phase = case phase of
 597                         SimplPhase 0 _ -> False
 598                         _              -> True
 599 -- If we don't have this early_phase test, consider
 600 --      x = length [1,2,3]
 601 -- The full laziness pass carefully floats all the cons cells to
 602 -- top level, and preInlineUnconditionally floats them all back in.
 603 -- Result is (a) static allocation replaced by dynamic allocation
 604 --           (b) many simplifier iterations because this tickles
 605 --               a related problem; only one inlining per pass
 606 --
 607 -- On the other hand, I have seen cases where top-level fusion is
 608 -- lost if we don't inline top level thing (e.g. string constants)
 609 -- Hence the test for phase zero (which is the phase for all the final
 610 -- simplifications).  Until phase zero we take no special notice of
 611 -- top level things, but then we become more leery about inlining
 612 -- them.
 613
 614 \end{code}
 615
 616 postInlineUnconditionally
 617 ~~~~~~~~~~~~~~~~~~~~~~~~~
 618 @postInlineUnconditionally@ decides whether to unconditionally inline
 619 a thing based on the form of its RHS; in particular if it has a
 620 trivial RHS.  If so, we can inline and discard the binding altogether.
 621
 622 NB: a loop breaker has must_keep_binding = True and non-loop-breakers
 623 only have *forward* references Hence, it's safe to discard the binding
 624
 625 NOTE: This isn't our last opportunity to inline.  We're at the binding
 626 site right now, and we'll get another opportunity when we get to the
 627 ocurrence(s)
 628
 629 Note that we do this unconditional inlining only for trival RHSs.
 630 Don't inline even WHNFs inside lambdas; doing so may simply increase
 631 allocation when the function is called. This isn't the last chance; see
 632 NOTE above.
 633
 634 NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here Why?
 635 Because we don't even want to inline them into the RHS of constructor
 636 arguments. See NOTE above
 637
 638 NB: At one time even NOINLINE was ignored here: if the rhs is trivial
 639 it's best to inline it anyway.  We often get a=E; b=a from desugaring,
 640 with both a and b marked NOINLINE.  But that seems incompatible with
 641 our new view that inlining is like a RULE, so I'm sticking to the 'active'
 642 story for now.
 643
 644 \begin{code}
 645 postInlineUnconditionally
 646     :: SimplEnv -> TopLevelFlag
 647     -> OutId            -- The binder (an InId would be fine too)
 648     -> OccInfo          -- From the InId
 649     -> OutExpr
 650     -> Unfolding
 651     -> Bool
 652 postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
 653   | not active             = False
 654   | isLoopBreaker occ_info = False      -- If it's a loop-breaker of any kind, don't inline
 655                                         -- because it might be referred to "earlier"
 656   | isExportedId bndr      = False
 657   | isInlineRule unfolding = False      -- Note [InlineRule and postInlineUnconditionally]
 658   | exprIsTrivial rhs      = True
 659   | otherwise
 660   = case occ_info of
 661         -- The point of examining occ_info here is that for *non-values*
 662         -- that occur outside a lambda, the call-site inliner won't have
 663         -- a chance (becuase it doesn't know that the thing
 664         -- only occurs once).   The pre-inliner won't have gotten
 665         -- it either, if the thing occurs in more than one branch
 666         -- So the main target is things like
 667         --      let x = f y in
 668         --      case v of
 669         --         True  -> case x of ...
 670         --         False -> case x of ...
 671         -- I'm not sure how important this is in practice
 672       OneOcc in_lam _one_br int_cxt     -- OneOcc => no code-duplication issue
 673         ->     smallEnoughToInline unfolding    -- Small enough to dup
 674                         -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
 675                         --
 676                         -- NB: Do NOT inline arbitrarily big things, even if one_br is True
 677                         -- Reason: doing so risks exponential behaviour.  We simplify a big
 678                         --         expression, inline it, and simplify it again.  But if the
 679                         --         very same thing happens in the big expression, we get
 680                         --         exponential cost!
 681                         -- PRINCIPLE: when we've already simplified an expression once,
 682                         -- make sure that we only inline it if it's reasonably small.
 683
 684            &&  ((isNotTopLevel top_lvl && not in_lam) ||
 685                         -- But outside a lambda, we want to be reasonably aggressive
 686                         -- about inlining into multiple branches of case
 687                         -- e.g. let x = <non-value>
 688                         --      in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... }
 689                         -- Inlining can be a big win if C3 is the hot-spot, even if
 690                         -- the uses in C1, C2 are not 'interesting'
 691                         -- An example that gets worse if you add int_cxt here is 'clausify'
 692
 693                 (isCheapUnfolding unfolding && int_cxt))
 694                         -- isCheap => acceptable work duplication; in_lam may be true
 695                         -- int_cxt to prevent us inlining inside a lambda without some
 696                         -- good reason.  See the notes on int_cxt in preInlineUnconditionally
 697
 698       IAmDead -> True   -- This happens; for example, the case_bndr during case of
 699                         -- known constructor:  case (a,b) of x { (p,q) -> ... }
 700                         -- Here x isn't mentioned in the RHS, so we don't want to
 701                         -- create the (dead) let-binding  let x = (a,b) in ...
 702
 703       _ -> False
 704
 705 -- Here's an example that we don't handle well:
 706 --      let f = if b then Left (\x.BIG) else Right (\y.BIG)
 707 --      in \y. ....case f of {...} ....
 708 -- Here f is used just once, and duplicating the case work is fine (exprIsCheap).
 709 -- But
 710 --  - We can't preInlineUnconditionally because that woud invalidate
 711 --    the occ info for b.
 712 --  - We can't postInlineUnconditionally because the RHS is big, and
 713 --    that risks exponential behaviour
 714 --  - We can't call-site inline, because the rhs is big
 715 -- Alas!
 716
 717   where
 718     active = case getMode env of
 719                    SimplGently    -> isAlwaysActive act
 720                    SimplPhase n _ -> isActive n act
 721     act = idInlineActivation bndr
 722
 723 activeInline :: SimplEnv -> OutId -> Bool
 724 activeInline env id
 725   = case getMode env of
 726       SimplGently -> False
 727         -- No inlining at all when doing gentle stuff,
 728         -- except for local things that occur once (pre/postInlineUnconditionally)
 729         -- The reason is that too little clean-up happens if you
 730         -- don't inline use-once things.   Also a bit of inlining is *good* for
 731         -- full laziness; it can expose constant sub-expressions.
 732         -- Example in spectral/mandel/Mandel.hs, where the mandelset
 733         -- function gets a useful let-float if you inline windowToViewport
 734
 735         -- NB: we used to have a second exception, for data con wrappers.
 736         -- On the grounds that we use gentle mode for rule LHSs, and
 737         -- they match better when data con wrappers are inlined.
 738         -- But that only really applies to the trivial wrappers (like (:)),
 739         -- and they are now constructed as Compulsory unfoldings (in MkId)
 740         -- so they'll happen anyway.
 741
 742       SimplPhase n _ -> isActive n act
 743   where
 744     act = idInlineActivation id
 745
 746 activeRule :: DynFlags -> SimplEnv -> Maybe (Activation -> Bool)
 747 -- Nothing => No rules at all
 748 activeRule dflags env
 749   | not (dopt Opt_EnableRewriteRules dflags)
 750   = Nothing     -- Rewriting is off
 751   | otherwise
 752   = case getMode env of
 753         SimplGently    -> Just isAlwaysActive
 754                         -- Used to be Nothing (no rules in gentle mode)
 755                         -- Main motivation for changing is that I wanted
 756                         --      lift String ===> ...
 757                         -- to work in Template Haskell when simplifying
 758                         -- splices, so we get simpler code for literal strings
 759         SimplPhase n _ -> Just (isActive n)
 760 \end{code}
 761
 762 Note [InlineRule and postInlineUnconditionally]
 763 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 764 Do not do postInlineUnconditionally if the Id has an InlineRule, otherwise
 765 we lose the unfolding.  Example
 766
 767      -- f has InlineRule with rhs (e |> co)
 768      --   where 'e' is big
 769      f = e |> co
 770
 771 Then there's a danger we'll optimise to
 772
 773      f' = e
 774      f = f' |> co
 775
 776 and now postInlineUnconditionally, losing the InlineRule on f.  Now f'
 777 won't inline because 'e' is too big.
 778
 779
 780 %************************************************************************
 781 %*                                                                      *
 782         Rebuilding a lambda
 783 %*                                                                      *
 784 %************************************************************************
 785
 786 \begin{code}
 787 mkLam :: SimplEnv -> [OutBndr] -> OutExpr -> SimplM OutExpr
 788 -- mkLam tries three things
 789 --      a) eta reduction, if that gives a trivial expression
 790 --      b) eta expansion [only if there are some value lambdas]
 791
 792 mkLam _b [] body
 793   = return body
 794 mkLam env bndrs body
 795   = do  { dflags <- getDOptsSmpl
 796         ; mkLam' dflags bndrs body }
 797   where
 798     mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr
 799     mkLam' dflags bndrs (Cast body co)
 800       | not (any bad bndrs)
 801         -- Note [Casts and lambdas]
 802       = do { lam <- mkLam' dflags bndrs body
 803            ; return (mkCoerce (mkPiTypes bndrs co) lam) }
 804       where
 805         co_vars  = tyVarsOfType co
 806         bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars
 807
 808     mkLam' dflags bndrs body
 809       | dopt Opt_DoEtaReduction dflags,
 810         Just etad_lam <- tryEtaReduce bndrs body
 811       = do { tick (EtaReduction (head bndrs))
 812            ; return etad_lam }
 813
 814       | dopt Opt_DoLambdaEtaExpansion dflags,
 815         not (inGentleMode env),       -- In gentle mode don't eta-expansion
 816         any isRuntimeVar bndrs        -- because it can clutter up the code
 817                                       -- with casts etc that may not be removed
 818       = do { let body' = tryEtaExpansion dflags body
 819            ; return (mkLams bndrs body') }
 820
 821       | otherwise
 822       = return (mkLams bndrs body)
 823 \end{code}
 824
 825 Note [Casts and lambdas]
 826 ~~~~~~~~~~~~~~~~~~~~~~~~
 827 Consider
 828         (\x. (\y. e) `cast` g1) `cast` g2
 829 There is a danger here that the two lambdas look separated, and the
 830 full laziness pass might float an expression to between the two.
 831
 832 So this equation in mkLam' floats the g1 out, thus:
 833         (\x. e `cast` g1)  -->  (\x.e) `cast` (tx -> g1)
 834 where x:tx.
 835
 836 In general, this floats casts outside lambdas, where (I hope) they
 837 might meet and cancel with some other cast:
 838         \x. e `cast` co   ===>   (\x. e) `cast` (tx -> co)
 839         /\a. e `cast` co  ===>   (/\a. e) `cast` (/\a. co)
 840         /\g. e `cast` co  ===>   (/\g. e) `cast` (/\g. co)
 841                           (if not (g `in` co))
 842
 843 Notice that it works regardless of 'e'.  Originally it worked only
 844 if 'e' was itself a lambda, but in some cases that resulted in
 845 fruitless iteration in the simplifier.  A good example was when
 846 compiling Text.ParserCombinators.ReadPrec, where we had a definition
 847 like    (\x. Get `cast` g)
 848 where Get is a constructor with nonzero arity.  Then mkLam eta-expanded
 849 the Get, and the next iteration eta-reduced it, and then eta-expanded
 850 it again.
 851
 852 Note also the side condition for the case of coercion binders.
 853 It does not make sense to transform
 854         /\g. e `cast` g  ==>  (/\g.e) `cast` (/\g.g)
 855 because the latter is not well-kinded.
 856
 857 --      c) floating lets out through big lambdas
 858 --              [only if all tyvar lambdas, and only if this lambda
 859 --               is the RHS of a let]
 860
 861 {-      Sept 01: I'm experimenting with getting the
 862         full laziness pass to float out past big lambdsa
 863  | all isTyVar bndrs,   -- Only for big lambdas
 864    contIsRhs cont       -- Only try the rhs type-lambda floating
 865                         -- if this is indeed a right-hand side; otherwise
 866                         -- we end up floating the thing out, only for float-in
 867                         -- to float it right back in again!
 868  = do (floats, body') <- tryRhsTyLam env bndrs body
 869       return (floats, mkLams bndrs body')
 870 -}
 871
 872
 873 %************************************************************************
 874 %*                                                                      *
 875                 Eta reduction
 876 %*                                                                      *
 877 %************************************************************************
 878
 879 Note [Eta reduction conditions]
 880 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 881 We try for eta reduction here, but *only* if we get all the way to an
 882 trivial expression.  We don't want to remove extra lambdas unless we
 883 are going to avoid allocating this thing altogether.
 884
 885 There are some particularly delicate points here:
 886
 887 * Eta reduction is not valid in general:
 888         \x. bot  /=  bot
 889   This matters, partly for old-fashioned correctness reasons but,
 890   worse, getting it wrong can yield a seg fault. Consider
 891         f = \x.f x
 892         h y = case (case y of { True -> f `seq` True; False -> False }) of
 893                 True -> ...; False -> ...
 894
 895   If we (unsoundly) eta-reduce f to get f=f, the strictness analyser
 896   says f=bottom, and replaces the (f `seq` True) with just
 897   (f `cast` unsafe-co).  BUT, as thing stand, 'f' got arity 1, and it
 898   *keeps* arity 1 (perhaps also wrongly).  So CorePrep eta-expands
 899   the definition again, so that it does not termninate after all.
 900   Result: seg-fault because the boolean case actually gets a function value.
 901   See Trac #1947.
 902
 903   So it's important to to the right thing.
 904
 905 * Note [Arity care]: we need to be careful if we just look at f's
 906   arity. Currently (Dec07), f's arity is visible in its own RHS (see
 907   Note [Arity robustness] in SimplEnv) so we must *not* trust the
 908   arity when checking that 'f' is a value.  Otherwise we will
 909   eta-reduce
 910       f = \x. f x
 911   to
 912       f = f
 913   Which might change a terminiating program (think (f `seq` e)) to a
 914   non-terminating one.  So we check for being a loop breaker first.
 915
 916   However for GlobalIds we can look at the arity; and for primops we
 917   must, since they have no unfolding.
 918
 919 * Regardless of whether 'f' is a value, we always want to
 920   reduce (/\a -> f a) to f
 921   This came up in a RULE: foldr (build (/\a -> g a))
 922   did not match           foldr (build (/\b -> ...something complex...))
 923   The type checker can insert these eta-expanded versions,
 924   with both type and dictionary lambdas; hence the slightly
 925   ad-hoc isDictId
 926
 927 * Never *reduce* arity. For example
 928       f = \xy. g x y
 929   Then if h has arity 1 we don't want to eta-reduce because then
 930   f's arity would decrease, and that is bad
 931
 932 These delicacies are why we don't use exprIsTrivial and exprIsHNF here.
 933 Alas.
 934
 935 \begin{code}
 936 tryEtaReduce :: [OutBndr] -> OutExpr -> Maybe OutExpr
 937 tryEtaReduce bndrs body
 938   = go (reverse bndrs) body
 939   where
 940     incoming_arity = count isId bndrs
 941
 942     go (b : bs) (App fun arg) | ok_arg b arg = go bs fun        -- Loop round
 943     go []       fun           | ok_fun fun   = Just fun         -- Success!
 944     go _        _                            = Nothing          -- Failure!
 945
 946         -- Note [Eta reduction conditions]
 947     ok_fun (App fun (Type ty))
 948         | not (any (`elemVarSet` tyVarsOfType ty) bndrs)
 949         =  ok_fun fun
 950     ok_fun (Var fun_id)
 951         =  not (fun_id `elem` bndrs)
 952         && (ok_fun_id fun_id || all ok_lam bndrs)
 953     ok_fun _fun = False
 954
 955     ok_fun_id fun = fun_arity fun >= incoming_arity
 956
 957     fun_arity fun             -- See Note [Arity care]
 958        | isLocalId fun && isLoopBreaker (idOccInfo fun) = 0
 959        | otherwise = idArity fun
 960
 961     ok_lam v = isTyVar v || isDictId v
 962
 963     ok_arg b arg = varToCoreExpr b `cheapEqExpr` arg
 964 \end{code}
 965
 966
 967 %************************************************************************
 968 %*                                                                      *
 969                 Eta expansion
 970 %*                                                                      *
 971 %************************************************************************
 972
 973
 974 We go for:
 975    f = \x1..xn -> N  ==>   f = \x1..xn y1..ym -> N y1..ym
 976                                  (n >= 0)
 977
 978 where (in both cases)
 979
 980         * The xi can include type variables
 981
 982         * The yi are all value variables
 983
 984         * N is a NORMAL FORM (i.e. no redexes anywhere)
 985           wanting a suitable number of extra args.
 986
 987 The biggest reason for doing this is for cases like
 988
 989         f = \x -> case x of
 990                     True  -> \y -> e1
 991                     False -> \y -> e2
 992
 993 Here we want to get the lambdas together.  A good exmaple is the nofib
 994 program fibheaps, which gets 25% more allocation if you don't do this
 995 eta-expansion.
 996
 997 We may have to sandwich some coerces between the lambdas
 998 to make the types work.   exprEtaExpandArity looks through coerces
 999 when computing arity; and etaExpand adds the coerces as necessary when
1000 actually computing the expansion.
1001
1002 \begin{code}
1003 tryEtaExpansion :: DynFlags -> OutExpr -> OutExpr
1004 -- There is at least one runtime binder in the binders
1005 tryEtaExpansion dflags body
1006   = etaExpand fun_arity body
1007   where
1008     fun_arity = exprEtaExpandArity dflags body
1009 \end{code}
1010
1011
1012 %************************************************************************
1013 %*                                                                      *
1014 \subsection{Floating lets out of big lambdas}
1015 %*                                                                      *
1016 %************************************************************************
1017
1018 Note [Floating and type abstraction]
1019 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1020 Consider this:
1021         x = /\a. C e1 e2
1022 We'd like to float this to
1023         y1 = /\a. e1
1024         y2 = /\a. e2
1025         x  = /\a. C (y1 a) (y2 a)
1026 for the usual reasons: we want to inline x rather vigorously.
1027
1028 You may think that this kind of thing is rare.  But in some programs it is
1029 common.  For example, if you do closure conversion you might get:
1030
1031         data a :-> b = forall e. (e -> a -> b) :$ e
1032
1033         f_cc :: forall a. a :-> a
1034         f_cc = /\a. (\e. id a) :$ ()
1035
1036 Now we really want to inline that f_cc thing so that the
1037 construction of the closure goes away.
1038
1039 So I have elaborated simplLazyBind to understand right-hand sides that look
1040 like
1041         /\ a1..an. body
1042
1043 and treat them specially. The real work is done in SimplUtils.abstractFloats,
1044 but there is quite a bit of plumbing in simplLazyBind as well.
1045
1046 The same transformation is good when there are lets in the body:
1047
1048         /\abc -> let(rec) x = e in b
1049    ==>
1050         let(rec) x' = /\abc -> let x = x' a b c in e
1051         in
1052         /\abc -> let x = x' a b c in b
1053
1054 This is good because it can turn things like:
1055
1056         let f = /\a -> letrec g = ... g ... in g
1057 into
1058         letrec g' = /\a -> ... g' a ...
1059         in
1060         let f = /\ a -> g' a
1061
1062 which is better.  In effect, it means that big lambdas don't impede
1063 let-floating.
1064
1065 This optimisation is CRUCIAL in eliminating the junk introduced by
1066 desugaring mutually recursive definitions.  Don't eliminate it lightly!
1067
1068 [May 1999]  If we do this transformation *regardless* then we can
1069 end up with some pretty silly stuff.  For example,
1070
1071         let
1072             st = /\ s -> let { x1=r1 ; x2=r2 } in ...
1073         in ..
1074 becomes
1075         let y1 = /\s -> r1
1076             y2 = /\s -> r2
1077             st = /\s -> ...[y1 s/x1, y2 s/x2]
1078         in ..
1079
1080 Unless the "..." is a WHNF there is really no point in doing this.
1081 Indeed it can make things worse.  Suppose x1 is used strictly,
1082 and is of the form
1083
1084         x1* = case f y of { (a,b) -> e }
1085
1086 If we abstract this wrt the tyvar we then can't do the case inline
1087 as we would normally do.
1088
1089 That's why the whole transformation is part of the same process that
1090 floats let-bindings and constructor arguments out of RHSs.  In particular,
1091 it is guarded by the doFloatFromRhs call in simplLazyBind.
1092
1093
1094 \begin{code}
1095 abstractFloats :: [OutTyVar] -> SimplEnv -> OutExpr -> SimplM ([OutBind], OutExpr)
1096 abstractFloats main_tvs body_env body
1097   = ASSERT( notNull body_floats )
1098     do  { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
1099         ; return (float_binds, CoreSubst.substExpr subst body) }
1100   where
1101     main_tv_set = mkVarSet main_tvs
1102     body_floats = getFloats body_env
1103     empty_subst = CoreSubst.mkEmptySubst (seInScope body_env)
1104
1105     abstract :: CoreSubst.Subst -> OutBind -> SimplM (CoreSubst.Subst, OutBind)
1106     abstract subst (NonRec id rhs)
1107       = do { (poly_id, poly_app) <- mk_poly tvs_here id
1108            ; let poly_rhs = mkLams tvs_here rhs'
1109                  subst'   = CoreSubst.extendIdSubst subst id poly_app
1110            ; return (subst', (NonRec poly_id poly_rhs)) }
1111       where
1112         rhs' = CoreSubst.substExpr subst rhs
1113         tvs_here | any isCoVar main_tvs = main_tvs      -- Note [Abstract over coercions]
1114                  | otherwise
1115                  = varSetElems (main_tv_set `intersectVarSet` exprSomeFreeVars isTyVar rhs')
1116
1117                 -- Abstract only over the type variables free in the rhs
1118                 -- wrt which the new binding is abstracted.  But the naive
1119                 -- approach of abstract wrt the tyvars free in the Id's type
1120                 -- fails. Consider:
1121                 --      /\ a b -> let t :: (a,b) = (e1, e2)
1122                 --                    x :: a     = fst t
1123                 --                in ...
1124                 -- Here, b isn't free in x's type, but we must nevertheless
1125                 -- abstract wrt b as well, because t's type mentions b.
1126                 -- Since t is floated too, we'd end up with the bogus:
1127                 --      poly_t = /\ a b -> (e1, e2)
1128                 --      poly_x = /\ a   -> fst (poly_t a *b*)
1129                 -- So for now we adopt the even more naive approach of
1130                 -- abstracting wrt *all* the tyvars.  We'll see if that
1131                 -- gives rise to problems.   SLPJ June 98
1132
1133     abstract subst (Rec prs)
1134        = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly tvs_here) ids
1135             ; let subst' = CoreSubst.extendSubstList subst (ids `zip` poly_apps)
1136                   poly_rhss = [mkLams tvs_here (CoreSubst.substExpr subst' rhs) | rhs <- rhss]
1137             ; return (subst', Rec (poly_ids `zip` poly_rhss)) }
1138        where
1139          (ids,rhss) = unzip prs
1140                 -- For a recursive group, it's a bit of a pain to work out the minimal
1141                 -- set of tyvars over which to abstract:
1142                 --      /\ a b c.  let x = ...a... in
1143                 --                 letrec { p = ...x...q...
1144                 --                          q = .....p...b... } in
1145                 --                 ...
1146                 -- Since 'x' is abstracted over 'a', the {p,q} group must be abstracted
1147                 -- over 'a' (because x is replaced by (poly_x a)) as well as 'b'.
1148                 -- Since it's a pain, we just use the whole set, which is always safe
1149                 --
1150                 -- If you ever want to be more selective, remember this bizarre case too:
1151                 --      x::a = x
1152                 -- Here, we must abstract 'x' over 'a'.
1153          tvs_here = main_tvs
1154
1155     mk_poly tvs_here var
1156       = do { uniq <- getUniqueM
1157            ; let  poly_name = setNameUnique (idName var) uniq           -- Keep same name
1158                   poly_ty   = mkForAllTys tvs_here (idType var) -- But new type of course
1159                   poly_id   = transferPolyIdInfo var tvs_here $ -- Note [transferPolyIdInfo] in Id.lhs
1160                               mkLocalId poly_name poly_ty
1161            ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
1162                 -- In the olden days, it was crucial to copy the occInfo of the original var,
1163                 -- because we were looking at occurrence-analysed but as yet unsimplified code!
1164                 -- In particular, we mustn't lose the loop breakers.  BUT NOW we are looking
1165                 -- at already simplified code, so it doesn't matter
1166                 --
1167                 -- It's even right to retain single-occurrence or dead-var info:
1168                 -- Suppose we started with  /\a -> let x = E in B
1169                 -- where x occurs once in B. Then we transform to:
1170                 --      let x' = /\a -> E in /\a -> let x* = x' a in B
1171                 -- where x* has an INLINE prag on it.  Now, once x* is inlined,
1172                 -- the occurrences of x' will be just the occurrences originally
1173                 -- pinned on x.
1174 \end{code}
1175
1176 Note [Abstract over coercions]
1177 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1178 If a coercion variable (g :: a ~ Int) is free in the RHS, then so is the
1179 type variable a.  Rather than sort this mess out, we simply bale out and abstract
1180 wrt all the type variables if any of them are coercion variables.
1181
1182
1183 Historical note: if you use let-bindings instead of a substitution, beware of this:
1184
1185                 -- Suppose we start with:
1186                 --
1187                 --      x = /\ a -> let g = G in E
1188                 --
1189                 -- Then we'll float to get
1190                 --
1191                 --      x = let poly_g = /\ a -> G
1192                 --          in /\ a -> let g = poly_g a in E
1193                 --
1194                 -- But now the occurrence analyser will see just one occurrence
1195                 -- of poly_g, not inside a lambda, so the simplifier will
1196                 -- PreInlineUnconditionally poly_g back into g!  Badk to square 1!
1197                 -- (I used to think that the "don't inline lone occurrences" stuff
1198                 --  would stop this happening, but since it's the *only* occurrence,
1199                 --  PreInlineUnconditionally kicks in first!)
1200                 --
1201                 -- Solution: put an INLINE note on g's RHS, so that poly_g seems
1202                 --           to appear many times.  (NB: mkInlineMe eliminates
1203                 --           such notes on trivial RHSs, so do it manually.)
1204
1205 %************************************************************************
1206 %*                                                                      *
1207                 prepareAlts
1208 %*                                                                      *
1209 %************************************************************************
1210
1211 prepareAlts tries these things:
1212
1213 1.  If several alternatives are identical, merge them into
1214     a single DEFAULT alternative.  I've occasionally seen this
1215     making a big difference:
1216
1217         case e of               =====>     case e of
1218           C _ -> f x                         D v -> ....v....
1219           D v -> ....v....                   DEFAULT -> f x
1220           DEFAULT -> f x
1221
1222    The point is that we merge common RHSs, at least for the DEFAULT case.
1223    [One could do something more elaborate but I've never seen it needed.]
1224    To avoid an expensive test, we just merge branches equal to the *first*
1225    alternative; this picks up the common cases
1226         a) all branches equal
1227         b) some branches equal to the DEFAULT (which occurs first)
1228
1229 2.  Case merging:
1230        case e of b {             ==>   case e of b {
1231          p1 -> rhs1                      p1 -> rhs1
1232          ...                             ...
1233          pm -> rhsm                      pm -> rhsm
1234          _  -> case b of b' {            pn -> let b'=b in rhsn
1235                      pn -> rhsn          ...
1236                      ...                 po -> let b'=b in rhso
1237                      po -> rhso          _  -> let b'=b in rhsd
1238                      _  -> rhsd
1239        }
1240
1241     which merges two cases in one case when -- the default alternative of
1242     the outer case scrutises the same variable as the outer case This
1243     transformation is called Case Merging.  It avoids that the same
1244     variable is scrutinised multiple times.
1245
1246
1247 The case where transformation (1) showed up was like this (lib/std/PrelCError.lhs):
1248
1249         x | p `is` 1 -> e1
1250           | p `is` 2 -> e2
1251         ...etc...
1252
1253 where @is@ was something like
1254
1255         p `is` n = p /= (-1) && p == n
1256
1257 This gave rise to a horrible sequence of cases
1258
1259         case p of
1260           (-1) -> $j p
1261           1    -> e1
1262           DEFAULT -> $j p
1263
1264 and similarly in cascade for all the join points!
1265
1266 Note [Dead binders]
1267 ~~~~~~~~~~~~~~~~~~~~
1268 We do this *here*, looking at un-simplified alternatives, because we
1269 have to check that r doesn't mention the variables bound by the
1270 pattern in each alternative, so the binder-info is rather useful.
1271
1272 \begin{code}
1273 prepareAlts :: SimplEnv -> OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
1274 prepareAlts env scrut case_bndr' alts
1275   = do  { dflags <- getDOptsSmpl
1276         ; alts <- combineIdenticalAlts case_bndr' alts
1277
1278         ; let (alts_wo_default, maybe_deflt) = findDefault alts
1279               alt_cons = [con | (con,_,_) <- alts_wo_default]
1280               imposs_deflt_cons = nub (imposs_cons ++ alt_cons)
1281                 -- "imposs_deflt_cons" are handled
1282                 --   EITHER by the context,
1283                 --   OR by a non-DEFAULT branch in this case expression.
1284
1285         ; default_alts <- prepareDefault dflags env case_bndr' mb_tc_app
1286                                          imposs_deflt_cons maybe_deflt
1287
1288         ; let trimmed_alts = filterOut impossible_alt alts_wo_default
1289               merged_alts = mergeAlts trimmed_alts default_alts
1290                 -- We need the mergeAlts in case the new default_alt
1291                 -- has turned into a constructor alternative.
1292                 -- The merge keeps the inner DEFAULT at the front, if there is one
1293                 -- and interleaves the alternatives in the right order
1294
1295         ; return (imposs_deflt_cons, merged_alts) }
1296   where
1297     mb_tc_app = splitTyConApp_maybe (idType case_bndr')
1298     Just (_, inst_tys) = mb_tc_app
1299
1300     imposs_cons = case scrut of
1301                     Var v -> otherCons (idUnfolding v)
1302                     _     -> []
1303
1304     impossible_alt :: CoreAlt -> Bool
1305     impossible_alt (con, _, _) | con `elem` imposs_cons = True
1306     impossible_alt (DataAlt con, _, _) = dataConCannotMatch inst_tys con
1307     impossible_alt _                   = False
1308
1309
1310 --------------------------------------------------
1311 --      1. Merge identical branches
1312 --------------------------------------------------
1313 combineIdenticalAlts :: OutId -> [InAlt] -> SimplM [InAlt]
1314
1315 combineIdenticalAlts case_bndr ((_con1,bndrs1,rhs1) : con_alts)
1316   | all isDeadBinder bndrs1,                    -- Remember the default
1317     length filtered_alts < length con_alts      -- alternative comes first
1318         -- Also Note [Dead binders]
1319   = do  { tick (AltMerge case_bndr)
1320         ; return ((DEFAULT, [], rhs1) : filtered_alts) }
1321   where
1322     filtered_alts        = filter keep con_alts
1323     keep (_con,bndrs,rhs) = not (all isDeadBinder bndrs && rhs `cheapEqExpr` rhs1)
1324
1325 combineIdenticalAlts _ alts = return alts
1326
1327 -------------------------------------------------------------------------
1328 --                      Prepare the default alternative
1329 -------------------------------------------------------------------------
1330 prepareDefault :: DynFlags
1331                -> SimplEnv
1332                -> OutId         -- Case binder; need just for its type. Note that as an
1333                                 --   OutId, it has maximum information; this is important.
1334                                 --   Test simpl013 is an example
1335                -> Maybe (TyCon, [Type]) -- Type of scrutinee, decomposed
1336                -> [AltCon]      -- These cons can't happen when matching the default
1337                -> Maybe InExpr  -- Rhs
1338                -> SimplM [InAlt]        -- Still unsimplified
1339                                         -- We use a list because it's what mergeAlts expects,
1340                                         -- And becuase case-merging can cause many to show up
1341
1342 ------- Merge nested cases ----------
1343 prepareDefault dflags env outer_bndr _bndr_ty imposs_cons (Just deflt_rhs)
1344   | dopt Opt_CaseMerge dflags
1345   , Case (Var inner_scrut_var) inner_bndr _ inner_alts <- deflt_rhs
1346   , DoneId inner_scrut_var' <- substId env inner_scrut_var
1347         -- Remember, inner_scrut_var is an InId, but outer_bndr is an OutId
1348   , inner_scrut_var' == outer_bndr
1349         -- NB: the substId means that if the outer scrutinee was a
1350         --     variable, and inner scrutinee is the same variable,
1351         --     then inner_scrut_var' will be outer_bndr
1352         --     via the magic of simplCaseBinder
1353   = do  { tick (CaseMerge outer_bndr)
1354
1355         ; let munge_rhs rhs = bindCaseBndr inner_bndr (Var outer_bndr) rhs
1356         ; return [(con, args, munge_rhs rhs) | (con, args, rhs) <- inner_alts,
1357                                                not (con `elem` imposs_cons) ]
1358                 -- NB: filter out any imposs_cons.  Example:
1359                 --      case x of
1360                 --        A -> e1
1361                 --        DEFAULT -> case x of
1362                 --                      A -> e2
1363                 --                      B -> e3
1364                 -- When we merge, we must ensure that e1 takes
1365                 -- precedence over e2 as the value for A!
1366         }
1367         -- Warning: don't call prepareAlts recursively!
1368         -- Firstly, there's no point, because inner alts have already had
1369         -- mkCase applied to them, so they won't have a case in their default
1370         -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
1371         -- in munge_rhs may put a case into the DEFAULT branch!
1372
1373
1374 --------- Fill in known constructor -----------
1375 prepareDefault _ _ case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rhs)
1376   |     -- This branch handles the case where we are
1377         -- scrutinisng an algebraic data type
1378     isAlgTyCon tycon            -- It's a data type, tuple, or unboxed tuples.
1379   , not (isNewTyCon tycon)      -- We can have a newtype, if we are just doing an eval:
1380                                 --      case x of { DEFAULT -> e }
1381                                 -- and we don't want to fill in a default for them!
1382   , Just all_cons <- tyConDataCons_maybe tycon
1383   , not (null all_cons)         -- This is a tricky corner case.  If the data type has no constructors,
1384                                 -- which GHC allows, then the case expression will have at most a default
1385                                 -- alternative.  We don't want to eliminate that alternative, because the
1386                                 -- invariant is that there's always one alternative.  It's more convenient
1387                                 -- to leave
1388                                 --      case x of { DEFAULT -> e }
1389                                 -- as it is, rather than transform it to
1390                                 --      error "case cant match"
1391                                 -- which would be quite legitmate.  But it's a really obscure corner, and
1392                                 -- not worth wasting code on.
1393   , let imposs_data_cons = [con | DataAlt con <- imposs_cons]   -- We now know it's a data type
1394         impossible con  = con `elem` imposs_data_cons || dataConCannotMatch inst_tys con
1395   = case filterOut impossible all_cons of
1396         []    -> return []      -- Eliminate the default alternative
1397                                 -- altogether if it can't match
1398
1399         [con] ->        -- It matches exactly one constructor, so fill it in
1400                  do { tick (FillInCaseDefault case_bndr)
1401                     ; us <- getUniquesM
1402                     ; let (ex_tvs, co_tvs, arg_ids) =
1403                               dataConRepInstPat us con inst_tys
1404                     ; return [(DataAlt con, ex_tvs ++ co_tvs ++ arg_ids, deflt_rhs)] }
1405
1406         _ -> return [(DEFAULT, [], deflt_rhs)]
1407
1408   | debugIsOn, isAlgTyCon tycon, not (isOpenTyCon tycon), null (tyConDataCons tycon)
1409         -- This can legitimately happen for type families, so don't report that
1410   = pprTrace "prepareDefault" (ppr case_bndr <+> ppr tycon)
1411         $ return [(DEFAULT, [], deflt_rhs)]
1412
1413 --------- Catch-all cases -----------
1414 prepareDefault _dflags _env _case_bndr _bndr_ty _imposs_cons (Just deflt_rhs)
1415   = return [(DEFAULT, [], deflt_rhs)]
1416
1417 prepareDefault _dflags _env _case_bndr _bndr_ty _imposs_cons Nothing
1418   = return []   -- No default branch
1419 \end{code}
1420
1421
1422
1423 =================================================================================
1424
1425 mkCase tries these things
1426
1427 1.  Eliminate the case altogether if possible
1428
1429 2.  Case-identity:
1430
1431         case e of               ===> e
1432                 True  -> True;
1433                 False -> False
1434
1435     and similar friends.
1436
1437
1438 \begin{code}
1439 mkCase :: OutExpr -> OutId -> [OutAlt]  -- Increasing order
1440        -> SimplM OutExpr
1441
1442 --------------------------------------------------
1443 --      2. Identity case
1444 --------------------------------------------------
1445
1446 mkCase scrut case_bndr alts     -- Identity case
1447   | all identity_alt alts
1448   = do tick (CaseIdentity case_bndr)
1449        return (re_cast scrut)
1450   where
1451     identity_alt (con, args, rhs) = check_eq con args (de_cast rhs)
1452
1453     check_eq DEFAULT       _    (Var v)   = v == case_bndr
1454     check_eq (LitAlt lit') _    (Lit lit) = lit == lit'
1455     check_eq (DataAlt con) args rhs       = rhs `cheapEqExpr` mkConApp con (arg_tys ++ varsToCoreExprs args)
1456                                          || rhs `cheapEqExpr` Var case_bndr
1457     check_eq _ _ _ = False
1458
1459     arg_tys = map Type (tyConAppArgs (idType case_bndr))
1460
1461         -- We've seen this:
1462         --      case e of x { _ -> x `cast` c }
1463         -- And we definitely want to eliminate this case, to give
1464         --      e `cast` c
1465         -- So we throw away the cast from the RHS, and reconstruct
1466         -- it at the other end.  All the RHS casts must be the same
1467         -- if (all identity_alt alts) holds.
1468         --
1469         -- Don't worry about nested casts, because the simplifier combines them
1470     de_cast (Cast e _) = e
1471     de_cast e          = e
1472
1473     re_cast scrut = case head alts of
1474                         (_,_,Cast _ co) -> Cast scrut co
1475                         _               -> scrut
1476
1477
1478
1479 --------------------------------------------------
1480 --      Catch-all
1481 --------------------------------------------------
1482 mkCase scrut bndr alts = return (Case scrut bndr (coreAltsType alts) alts)
1483 \end{code}
1484
1485
1486 When adding auxiliary bindings for the case binder, it's worth checking if
1487 its dead, because it often is, and occasionally these mkCase transformations
1488 cascade rather nicely.
1489
1490 \begin{code}
1491 bindCaseBndr :: Id -> CoreExpr -> CoreExpr -> CoreExpr
1492 bindCaseBndr bndr rhs body
1493   | isDeadBinder bndr = body
1494   | otherwise         = bindNonRec bndr rhs body
1495 \end{code}