compiler/simplCore/SimplUtils.lhs

   1 %
   2 % (c) The AQUA Project, Glasgow University, 1993-1998
   3 %
   4 \section[SimplUtils]{The simplifier utilities}
   5
   6 \begin{code}
   7 {-# OPTIONS -w #-}
   8 -- The above warning supression flag is a temporary kludge.
   9 -- While working on this module you are encouraged to remove it and fix
  10 -- any warnings in the module. See
  11 --     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
  12 -- for details
  13
  14 module SimplUtils (
  15         -- Rebuilding
  16         mkLam, mkCase, prepareAlts, bindCaseBndr,
  17
  18         -- Inlining,
  19         preInlineUnconditionally, postInlineUnconditionally,
  20         activeInline, activeRule, inlineMode,
  21
  22         -- The continuation type
  23         SimplCont(..), DupFlag(..), ArgInfo(..),
  24         contIsDupable, contResultType, contIsTrivial, contArgs, dropArgs,
  25         countValArgs, countArgs, splitInlineCont,
  26         mkBoringStop, mkLazyArgStop, mkRhsStop, contIsRhsOrArg,
  27         interestingCallContext, interestingArgContext,
  28
  29         interestingArg, mkArgInfo,
  30
  31         abstractFloats
  32     ) where
  33
  34 #include "HsVersions.h"
  35
  36 import SimplEnv
  37 import DynFlags
  38 import StaticFlags
  39 import CoreSyn
  40 import qualified CoreSubst
  41 import PprCore
  42 import CoreFVs
  43 import CoreUtils
  44 import Literal
  45 import CoreUnfold
  46 import MkId
  47 import Name
  48 import Id
  49 import Var      ( isCoVar )
  50 import NewDemand
  51 import SimplMonad
  52 import Type     hiding( substTy )
  53 import TyCon
  54 import DataCon
  55 import Unify    ( dataConCannotMatch )
  56 import VarSet
  57 import BasicTypes
  58 import Util
  59 import MonadUtils
  60 import Outputable
  61 import FastString
  62
  63 import List( nub )
  64 \end{code}
  65
  66
  67 %************************************************************************
  68 %*                                                                      *
  69                 The SimplCont type
  70 %*                                                                      *
  71 %************************************************************************
  72
  73 A SimplCont allows the simplifier to traverse the expression in a
  74 zipper-like fashion.  The SimplCont represents the rest of the expression,
  75 "above" the point of interest.
  76
  77 You can also think of a SimplCont as an "evaluation context", using
  78 that term in the way it is used for operational semantics. This is the
  79 way I usually think of it, For example you'll often see a syntax for
  80 evaluation context looking like
  81         C ::= []  |  C e   |  case C of alts  |  C `cast` co
  82 That's the kind of thing we are doing here, and I use that syntax in
  83 the comments.
  84
  85
  86 Key points:
  87   * A SimplCont describes a *strict* context (just like
  88     evaluation contexts do).  E.g. Just [] is not a SimplCont
  89
  90   * A SimplCont describes a context that *does not* bind
  91     any variables.  E.g. \x. [] is not a SimplCont
  92
  93 \begin{code}
  94 data SimplCont
  95   = Stop                -- An empty context, or hole, []
  96         OutType         -- Type of the result
  97         CallCtxt        -- True <=> There is something interesting about
  98                         --          the context, and hence the inliner
  99                         --          should be a bit keener (see interestingCallContext)
 100                         -- Specifically:
 101                         --     This is an argument of a function that has RULES
 102                         --     Inlining the call might allow the rule to fire
 103
 104   | CoerceIt            -- C `cast` co
 105         OutCoercion             -- The coercion simplified
 106         SimplCont
 107
 108   | ApplyTo             -- C arg
 109         DupFlag
 110         InExpr SimplEnv         -- The argument and its static env
 111         SimplCont
 112
 113   | Select              -- case C of alts
 114         DupFlag
 115         InId [InAlt] SimplEnv   -- The case binder, alts, and subst-env
 116         SimplCont
 117
 118   -- The two strict forms have no DupFlag, because we never duplicate them
 119   | StrictBind          -- (\x* \xs. e) C
 120         InId [InBndr]           -- let x* = [] in e
 121         InExpr SimplEnv         --      is a special case
 122         SimplCont
 123
 124   | StrictArg           -- e C
 125         OutExpr OutType         -- e and its type
 126         CallCtxt                -- Whether *this* argument position is interesting
 127         ArgInfo                 -- Whether the function at the head of e has rules, etc
 128         SimplCont               --     plus strictness flags for *further* args
 129
 130 data ArgInfo
 131   = ArgInfo {
 132         ai_rules :: Bool,       -- Function has rules (recursively)
 133                                 --      => be keener to inline in all args
 134         ai_strs :: [Bool],      -- Strictness of arguments
 135                                 --   Usually infinite, but if it is finite it guarantees
 136                                 --   that the function diverges after being given
 137                                 --   that number of args
 138         ai_discs :: [Int]       -- Discounts for arguments; non-zero => be keener to inline
 139                                 --   Always infinite
 140     }
 141
 142 instance Outputable SimplCont where
 143   ppr (Stop ty _)                    = ptext SLIT("Stop") <+> ppr ty
 144   ppr (ApplyTo dup arg se cont)      = ((ptext SLIT("ApplyTo") <+> ppr dup <+> pprParendExpr arg)
 145                                           {-  $$ nest 2 (pprSimplEnv se) -}) $$ ppr cont
 146   ppr (StrictBind b _ _ _ cont)      = (ptext SLIT("StrictBind") <+> ppr b) $$ ppr cont
 147   ppr (StrictArg f _ _ _ cont)       = (ptext SLIT("StrictArg") <+> ppr f) $$ ppr cont
 148   ppr (Select dup bndr alts se cont) = (ptext SLIT("Select") <+> ppr dup <+> ppr bndr) $$
 149                                        (nest 4 (ppr alts)) $$ ppr cont
 150   ppr (CoerceIt co cont)             = (ptext SLIT("CoerceIt") <+> ppr co) $$ ppr cont
 151
 152 data DupFlag = OkToDup | NoDup
 153
 154 instance Outputable DupFlag where
 155   ppr OkToDup = ptext SLIT("ok")
 156   ppr NoDup   = ptext SLIT("nodup")
 157
 158
 159
 160 -------------------
 161 mkBoringStop :: OutType -> SimplCont
 162 mkBoringStop ty = Stop ty BoringCtxt
 163
 164 mkLazyArgStop :: OutType -> CallCtxt -> SimplCont
 165 mkLazyArgStop ty cci = Stop ty cci
 166
 167 mkRhsStop :: OutType -> SimplCont
 168 mkRhsStop ty = Stop ty BoringCtxt
 169
 170 -------------------
 171 contIsRhsOrArg (Stop {})         = True
 172 contIsRhsOrArg (StrictBind {})   = True
 173 contIsRhsOrArg (StrictArg {})    = True
 174 contIsRhsOrArg other             = False
 175
 176 -------------------
 177 contIsDupable :: SimplCont -> Bool
 178 contIsDupable (Stop {})                  = True
 179 contIsDupable (ApplyTo  OkToDup _ _ _)   = True
 180 contIsDupable (Select   OkToDup _ _ _ _) = True
 181 contIsDupable (CoerceIt _ cont)          = contIsDupable cont
 182 contIsDupable other                      = False
 183
 184 -------------------
 185 contIsTrivial :: SimplCont -> Bool
 186 contIsTrivial (Stop {})                   = True
 187 contIsTrivial (ApplyTo _ (Type _) _ cont) = contIsTrivial cont
 188 contIsTrivial (CoerceIt _ cont)           = contIsTrivial cont
 189 contIsTrivial other                       = False
 190
 191 -------------------
 192 contResultType :: SimplCont -> OutType
 193 contResultType (Stop to_ty _)            = to_ty
 194 contResultType (StrictArg _ _ _ _ cont)  = contResultType cont
 195 contResultType (StrictBind _ _ _ _ cont) = contResultType cont
 196 contResultType (ApplyTo _ _ _ cont)      = contResultType cont
 197 contResultType (CoerceIt _ cont)         = contResultType cont
 198 contResultType (Select _ _ _ _ cont)     = contResultType cont
 199
 200 -------------------
 201 countValArgs :: SimplCont -> Int
 202 countValArgs (ApplyTo _ (Type ty) se cont) = countValArgs cont
 203 countValArgs (ApplyTo _ val_arg   se cont) = 1 + countValArgs cont
 204 countValArgs other                         = 0
 205
 206 countArgs :: SimplCont -> Int
 207 countArgs (ApplyTo _ arg se cont) = 1 + countArgs cont
 208 countArgs other                   = 0
 209
 210 contArgs :: SimplCont -> ([OutExpr], SimplCont)
 211 -- Uses substitution to turn each arg into an OutExpr
 212 contArgs cont = go [] cont
 213   where
 214     go args (ApplyTo _ arg se cont) = go (substExpr se arg : args) cont
 215     go args cont                    = (reverse args, cont)
 216
 217 dropArgs :: Int -> SimplCont -> SimplCont
 218 dropArgs 0 cont = cont
 219 dropArgs n (ApplyTo _ _ _ cont) = dropArgs (n-1) cont
 220 dropArgs n other                = pprPanic "dropArgs" (ppr n <+> ppr other)
 221
 222 --------------------
 223 splitInlineCont :: SimplCont -> Maybe (SimplCont, SimplCont)
 224 -- Returns Nothing if the continuation should dissolve an InlineMe Note
 225 -- Return Just (c1,c2) otherwise,
 226 --      where c1 is the continuation to put inside the InlineMe
 227 --      and   c2 outside
 228
 229 -- Example: (__inline_me__ (/\a. e)) ty
 230 --      Here we want to do the beta-redex without dissolving the InlineMe
 231 -- See test simpl017 (and Trac #1627) for a good example of why this is important
 232
 233 splitInlineCont (ApplyTo dup (Type ty) se c)
 234   | Just (c1, c2) <- splitInlineCont c          = Just (ApplyTo dup (Type ty) se c1, c2)
 235 splitInlineCont cont@(Stop ty _)                = Just (mkBoringStop ty, cont)
 236 splitInlineCont cont@(StrictBind bndr _ _ se _) = Just (mkBoringStop (substTy se (idType bndr)), cont)
 237 splitInlineCont cont@(StrictArg _ fun_ty _ _ _) = Just (mkBoringStop (funArgTy fun_ty), cont)
 238 splitInlineCont other                           = Nothing
 239         -- NB: the calculation of the type for mkBoringStop is an annoying
 240         --     duplication of the same calucation in mkDupableCont
 241 \end{code}
 242
 243
 244 \begin{code}
 245 interestingArg :: OutExpr -> Bool
 246         -- An argument is interesting if it has *some* structure
 247         -- We are here trying to avoid unfolding a function that
 248         -- is applied only to variables that have no unfolding
 249         -- (i.e. they are probably lambda bound): f x y z
 250         -- There is little point in inlining f here.
 251 interestingArg (Var v)           = hasSomeUnfolding (idUnfolding v)
 252                                         -- Was: isValueUnfolding (idUnfolding v')
 253                                         -- But that seems over-pessimistic
 254                                  || isDataConWorkId v
 255                                         -- This accounts for an argument like
 256                                         -- () or [], which is definitely interesting
 257 interestingArg (Type _)          = False
 258 interestingArg (App fn (Type _)) = interestingArg fn
 259 interestingArg (Note _ a)        = interestingArg a
 260
 261 -- Idea (from Sam B); I'm not sure if it's a good idea, so commented out for now
 262 -- interestingArg expr | isUnLiftedType (exprType expr)
 263 --        -- Unlifted args are only ever interesting if we know what they are
 264 --  =                  case expr of
 265 --                        Lit lit -> True
 266 --                        _       -> False
 267
 268 interestingArg other             = True
 269         -- Consider     let x = 3 in f x
 270         -- The substitution will contain (x -> ContEx 3), and we want to
 271         -- to say that x is an interesting argument.
 272         -- But consider also (\x. f x y) y
 273         -- The substitution will contain (x -> ContEx y), and we want to say
 274         -- that x is not interesting (assuming y has no unfolding)
 275 \end{code}
 276
 277
 278 Comment about interestingCallContext
 279 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 280 We want to avoid inlining an expression where there can't possibly be
 281 any gain, such as in an argument position.  Hence, if the continuation
 282 is interesting (eg. a case scrutinee, application etc.) then we
 283 inline, otherwise we don't.
 284
 285 Previously some_benefit used to return True only if the variable was
 286 applied to some value arguments.  This didn't work:
 287
 288         let x = _coerce_ (T Int) Int (I# 3) in
 289         case _coerce_ Int (T Int) x of
 290                 I# y -> ....
 291
 292 we want to inline x, but can't see that it's a constructor in a case
 293 scrutinee position, and some_benefit is False.
 294
 295 Another example:
 296
 297 dMonadST = _/\_ t -> :Monad (g1 _@_ t, g2 _@_ t, g3 _@_ t)
 298
 299 ....  case dMonadST _@_ x0 of (a,b,c) -> ....
 300
 301 we'd really like to inline dMonadST here, but we *don't* want to
 302 inline if the case expression is just
 303
 304         case x of y { DEFAULT -> ... }
 305
 306 since we can just eliminate this case instead (x is in WHNF).  Similar
 307 applies when x is bound to a lambda expression.  Hence
 308 contIsInteresting looks for case expressions with just a single
 309 default case.
 310
 311
 312 \begin{code}
 313 interestingCallContext :: SimplCont -> CallCtxt
 314 interestingCallContext cont
 315   = interesting cont
 316   where
 317     interestingCtxt = ArgCtxt False 2   -- Give *some* incentive!
 318
 319     interesting (Select _ bndr _ _ _)
 320         | isDeadBinder bndr       = CaseCtxt
 321         | otherwise               = interestingCtxt
 322
 323     interesting (ApplyTo {})      = interestingCtxt
 324                                 -- Can happen if we have (coerce t (f x)) y
 325                                 -- Perhaps interestingCtxt is a bit over-keen, but I've
 326                                 -- seen (coerce f) x, where f has an INLINE prag,
 327                                 -- So we have to give some motivation for inlining it
 328
 329     interesting (StrictArg _ _ cci _ _) = cci
 330     interesting (StrictBind {})         = BoringCtxt
 331     interesting (Stop ty cci)           = cci
 332     interesting (CoerceIt _ cont)       = interesting cont
 333         -- If this call is the arg of a strict function, the context
 334         -- is a bit interesting.  If we inline here, we may get useful
 335         -- evaluation information to avoid repeated evals: e.g.
 336         --      x + (y * z)
 337         -- Here the contIsInteresting makes the '*' keener to inline,
 338         -- which in turn exposes a constructor which makes the '+' inline.
 339         -- Assuming that +,* aren't small enough to inline regardless.
 340         --
 341         -- It's also very important to inline in a strict context for things
 342         -- like
 343         --              foldr k z (f x)
 344         -- Here, the context of (f x) is strict, and if f's unfolding is
 345         -- a build it's *great* to inline it here.  So we must ensure that
 346         -- the context for (f x) is not totally uninteresting.
 347
 348
 349 -------------------
 350 mkArgInfo :: Id
 351           -> Int        -- Number of value args
 352           -> SimplCont  -- Context of the cal
 353           -> ArgInfo
 354
 355 mkArgInfo fun n_val_args call_cont
 356   | n_val_args < idArity fun            -- Note [Unsaturated functions]
 357   = ArgInfo { ai_rules = False
 358             , ai_strs = vanilla_stricts
 359             , ai_discs = vanilla_discounts }
 360   | otherwise
 361   = ArgInfo { ai_rules = interestingArgContext fun call_cont
 362             , ai_strs  = arg_stricts
 363             , ai_discs = arg_discounts }
 364   where
 365     vanilla_discounts, arg_discounts :: [Int]
 366     vanilla_discounts = repeat 0
 367     arg_discounts = case idUnfolding fun of
 368                         CoreUnfolding _ _ _ _ (UnfoldIfGoodArgs _ discounts _ _)
 369                               -> discounts ++ vanilla_discounts
 370                         other -> vanilla_discounts
 371
 372     vanilla_stricts, arg_stricts :: [Bool]
 373     vanilla_stricts  = repeat False
 374
 375     arg_stricts
 376       = case splitStrictSig (idNewStrictness fun) of
 377           (demands, result_info)
 378                 | not (demands `lengthExceeds` n_val_args)
 379                 ->      -- Enough args, use the strictness given.
 380                         -- For bottoming functions we used to pretend that the arg
 381                         -- is lazy, so that we don't treat the arg as an
 382                         -- interesting context.  This avoids substituting
 383                         -- top-level bindings for (say) strings into
 384                         -- calls to error.  But now we are more careful about
 385                         -- inlining lone variables, so its ok (see SimplUtils.analyseCont)
 386                    if isBotRes result_info then
 387                         map isStrictDmd demands         -- Finite => result is bottom
 388                    else
 389                         map isStrictDmd demands ++ vanilla_stricts
 390
 391                | otherwise
 392                -> WARN( True, text "More demands than arity" <+> ppr fun <+> ppr (idArity fun)
 393                                 <+> ppr n_val_args <+> ppr demands )
 394                    vanilla_stricts      -- Not enough args, or no strictness
 395
 396 {- Note [Unsaturated functions]
 397   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 398 Consider (test eyeball/inline4)
 399         x = a:as
 400         y = f x
 401 where f has arity 2.  Then we do not want to inline 'x', because
 402 it'll just be floated out again.  Even if f has lots of discounts
 403 on its first argument -- it must be saturated for these to kick in
 404 -}
 405
 406 interestingArgContext :: Id -> SimplCont -> Bool
 407 -- If the argument has form (f x y), where x,y are boring,
 408 -- and f is marked INLINE, then we don't want to inline f.
 409 -- But if the context of the argument is
 410 --      g (f x y)
 411 -- where g has rules, then we *do* want to inline f, in case it
 412 -- exposes a rule that might fire.  Similarly, if the context is
 413 --      h (g (f x x))
 414 -- where h has rules, then we do want to inline f; hence the
 415 -- call_cont argument to interestingArgContext
 416 --
 417 -- The interesting_arg_ctxt flag makes this happen; if it's
 418 -- set, the inliner gets just enough keener to inline f
 419 -- regardless of how boring f's arguments are, if it's marked INLINE
 420 --
 421 -- The alternative would be to *always* inline an INLINE function,
 422 -- regardless of how boring its context is; but that seems overkill
 423 -- For example, it'd mean that wrapper functions were always inlined
 424 interestingArgContext fn call_cont
 425   = idHasRules fn || go call_cont
 426   where
 427     go (Select {})             = False
 428     go (ApplyTo {})            = False
 429     go (StrictArg _ _ cci _ _) = interesting cci
 430     go (StrictBind {})         = False  -- ??
 431     go (CoerceIt _ c)          = go c
 432     go (Stop _ cci)            = interesting cci
 433
 434     interesting (ArgCtxt rules _) = rules
 435     interesting other             = False
 436 \end{code}
 437
 438
 439
 440 %************************************************************************
 441 %*                                                                      *
 442 \subsection{Decisions about inlining}
 443 %*                                                                      *
 444 %************************************************************************
 445
 446 Inlining is controlled partly by the SimplifierMode switch.  This has two
 447 settings:
 448
 449         SimplGently     (a) Simplifying before specialiser/full laziness
 450                         (b) Simplifiying inside INLINE pragma
 451                         (c) Simplifying the LHS of a rule
 452                         (d) Simplifying a GHCi expression or Template
 453                                 Haskell splice
 454
 455         SimplPhase n _   Used at all other times
 456
 457 The key thing about SimplGently is that it does no call-site inlining.
 458 Before full laziness we must be careful not to inline wrappers,
 459 because doing so inhibits floating
 460     e.g. ...(case f x of ...)...
 461     ==> ...(case (case x of I# x# -> fw x#) of ...)...
 462     ==> ...(case x of I# x# -> case fw x# of ...)...
 463 and now the redex (f x) isn't floatable any more.
 464
 465 The no-inlining thing is also important for Template Haskell.  You might be
 466 compiling in one-shot mode with -O2; but when TH compiles a splice before
 467 running it, we don't want to use -O2.  Indeed, we don't want to inline
 468 anything, because the byte-code interpreter might get confused about
 469 unboxed tuples and suchlike.
 470
 471 INLINE pragmas
 472 ~~~~~~~~~~~~~~
 473 SimplGently is also used as the mode to simplify inside an InlineMe note.
 474
 475 \begin{code}
 476 inlineMode :: SimplifierMode
 477 inlineMode = SimplGently
 478 \end{code}
 479
 480 It really is important to switch off inlinings inside such
 481 expressions.  Consider the following example
 482
 483         let f = \pq -> BIG
 484         in
 485         let g = \y -> f y y
 486             {-# INLINE g #-}
 487         in ...g...g...g...g...g...
 488
 489 Now, if that's the ONLY occurrence of f, it will be inlined inside g,
 490 and thence copied multiple times when g is inlined.
 491
 492
 493 This function may be inlinined in other modules, so we
 494 don't want to remove (by inlining) calls to functions that have
 495 specialisations, or that may have transformation rules in an importing
 496 scope.
 497
 498 E.g.    {-# INLINE f #-}
 499                 f x = ...g...
 500
 501 and suppose that g is strict *and* has specialisations.  If we inline
 502 g's wrapper, we deny f the chance of getting the specialised version
 503 of g when f is inlined at some call site (perhaps in some other
 504 module).
 505
 506 It's also important not to inline a worker back into a wrapper.
 507 A wrapper looks like
 508         wraper = inline_me (\x -> ...worker... )
 509 Normally, the inline_me prevents the worker getting inlined into
 510 the wrapper (initially, the worker's only call site!).  But,
 511 if the wrapper is sure to be called, the strictness analyser will
 512 mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf
 513 continuation.  That's why the keep_inline predicate returns True for
 514 ArgOf continuations.  It shouldn't do any harm not to dissolve the
 515 inline-me note under these circumstances.
 516
 517 Note that the result is that we do very little simplification
 518 inside an InlineMe.
 519
 520         all xs = foldr (&&) True xs
 521         any p = all . map p  {-# INLINE any #-}
 522
 523 Problem: any won't get deforested, and so if it's exported and the
 524 importer doesn't use the inlining, (eg passes it as an arg) then we
 525 won't get deforestation at all.  We havn't solved this problem yet!
 526
 527
 528 preInlineUnconditionally
 529 ~~~~~~~~~~~~~~~~~~~~~~~~
 530 @preInlineUnconditionally@ examines a bndr to see if it is used just
 531 once in a completely safe way, so that it is safe to discard the
 532 binding inline its RHS at the (unique) usage site, REGARDLESS of how
 533 big the RHS might be.  If this is the case we don't simplify the RHS
 534 first, but just inline it un-simplified.
 535
 536 This is much better than first simplifying a perhaps-huge RHS and then
 537 inlining and re-simplifying it.  Indeed, it can be at least quadratically
 538 better.  Consider
 539
 540         x1 = e1
 541         x2 = e2[x1]
 542         x3 = e3[x2]
 543         ...etc...
 544         xN = eN[xN-1]
 545
 546 We may end up simplifying e1 N times, e2 N-1 times, e3 N-3 times etc.
 547 This can happen with cascades of functions too:
 548
 549         f1 = \x1.e1
 550         f2 = \xs.e2[f1]
 551         f3 = \xs.e3[f3]
 552         ...etc...
 553
 554 THE MAIN INVARIANT is this:
 555
 556         ----  preInlineUnconditionally invariant -----
 557    IF preInlineUnconditionally chooses to inline x = <rhs>
 558    THEN doing the inlining should not change the occurrence
 559         info for the free vars of <rhs>
 560         ----------------------------------------------
 561
 562 For example, it's tempting to look at trivial binding like
 563         x = y
 564 and inline it unconditionally.  But suppose x is used many times,
 565 but this is the unique occurrence of y.  Then inlining x would change
 566 y's occurrence info, which breaks the invariant.  It matters: y
 567 might have a BIG rhs, which will now be dup'd at every occurrenc of x.
 568
 569
 570 Even RHSs labelled InlineMe aren't caught here, because there might be
 571 no benefit from inlining at the call site.
 572
 573 [Sept 01] Don't unconditionally inline a top-level thing, because that
 574 can simply make a static thing into something built dynamically.  E.g.
 575         x = (a,b)
 576         main = \s -> h x
 577
 578 [Remember that we treat \s as a one-shot lambda.]  No point in
 579 inlining x unless there is something interesting about the call site.
 580
 581 But watch out: if you aren't careful, some useful foldr/build fusion
 582 can be lost (most notably in spectral/hartel/parstof) because the
 583 foldr didn't see the build.  Doing the dynamic allocation isn't a big
 584 deal, in fact, but losing the fusion can be.  But the right thing here
 585 seems to be to do a callSiteInline based on the fact that there is
 586 something interesting about the call site (it's strict).  Hmm.  That
 587 seems a bit fragile.
 588
 589 Conclusion: inline top level things gaily until Phase 0 (the last
 590 phase), at which point don't.
 591
 592 \begin{code}
 593 preInlineUnconditionally :: SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
 594 preInlineUnconditionally env top_lvl bndr rhs
 595   | not active             = False
 596   | opt_SimplNoPreInlining = False
 597   | otherwise = case idOccInfo bndr of
 598                   IAmDead                    -> True    -- Happens in ((\x.1) v)
 599                   OneOcc in_lam True int_cxt -> try_once in_lam int_cxt
 600                   other                      -> False
 601   where
 602     phase = getMode env
 603     active = case phase of
 604                    SimplGently    -> isAlwaysActive prag
 605                    SimplPhase n _ -> isActive n prag
 606     prag = idInlinePragma bndr
 607
 608     try_once in_lam int_cxt     -- There's one textual occurrence
 609         | not in_lam = isNotTopLevel top_lvl || early_phase
 610         | otherwise  = int_cxt && canInlineInLam rhs
 611
 612 -- Be very careful before inlining inside a lambda, becuase (a) we must not
 613 -- invalidate occurrence information, and (b) we want to avoid pushing a
 614 -- single allocation (here) into multiple allocations (inside lambda).
 615 -- Inlining a *function* with a single *saturated* call would be ok, mind you.
 616 --      || (if is_cheap && not (canInlineInLam rhs) then pprTrace "preinline" (ppr bndr <+> ppr rhs) ok else ok)
 617 --      where
 618 --              is_cheap = exprIsCheap rhs
 619 --              ok = is_cheap && int_cxt
 620
 621         --      int_cxt         The context isn't totally boring
 622         -- E.g. let f = \ab.BIG in \y. map f xs
 623         --      Don't want to substitute for f, because then we allocate
 624         --      its closure every time the \y is called
 625         -- But: let f = \ab.BIG in \y. map (f y) xs
 626         --      Now we do want to substitute for f, even though it's not
 627         --      saturated, because we're going to allocate a closure for
 628         --      (f y) every time round the loop anyhow.
 629
 630         -- canInlineInLam => free vars of rhs are (Once in_lam) or Many,
 631         -- so substituting rhs inside a lambda doesn't change the occ info.
 632         -- Sadly, not quite the same as exprIsHNF.
 633     canInlineInLam (Lit l)              = True
 634     canInlineInLam (Lam b e)            = isRuntimeVar b || canInlineInLam e
 635     canInlineInLam (Note _ e)           = canInlineInLam e
 636     canInlineInLam _                    = False
 637
 638     early_phase = case phase of
 639                         SimplPhase 0 _ -> False
 640                         other          -> True
 641 -- If we don't have this early_phase test, consider
 642 --      x = length [1,2,3]
 643 -- The full laziness pass carefully floats all the cons cells to
 644 -- top level, and preInlineUnconditionally floats them all back in.
 645 -- Result is (a) static allocation replaced by dynamic allocation
 646 --           (b) many simplifier iterations because this tickles
 647 --               a related problem; only one inlining per pass
 648 --
 649 -- On the other hand, I have seen cases where top-level fusion is
 650 -- lost if we don't inline top level thing (e.g. string constants)
 651 -- Hence the test for phase zero (which is the phase for all the final
 652 -- simplifications).  Until phase zero we take no special notice of
 653 -- top level things, but then we become more leery about inlining
 654 -- them.
 655
 656 \end{code}
 657
 658 postInlineUnconditionally
 659 ~~~~~~~~~~~~~~~~~~~~~~~~~
 660 @postInlineUnconditionally@ decides whether to unconditionally inline
 661 a thing based on the form of its RHS; in particular if it has a
 662 trivial RHS.  If so, we can inline and discard the binding altogether.
 663
 664 NB: a loop breaker has must_keep_binding = True and non-loop-breakers
 665 only have *forward* references Hence, it's safe to discard the binding
 666
 667 NOTE: This isn't our last opportunity to inline.  We're at the binding
 668 site right now, and we'll get another opportunity when we get to the
 669 ocurrence(s)
 670
 671 Note that we do this unconditional inlining only for trival RHSs.
 672 Don't inline even WHNFs inside lambdas; doing so may simply increase
 673 allocation when the function is called. This isn't the last chance; see
 674 NOTE above.
 675
 676 NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here Why?
 677 Because we don't even want to inline them into the RHS of constructor
 678 arguments. See NOTE above
 679
 680 NB: At one time even NOINLINE was ignored here: if the rhs is trivial
 681 it's best to inline it anyway.  We often get a=E; b=a from desugaring,
 682 with both a and b marked NOINLINE.  But that seems incompatible with
 683 our new view that inlining is like a RULE, so I'm sticking to the 'active'
 684 story for now.
 685
 686 \begin{code}
 687 postInlineUnconditionally
 688     :: SimplEnv -> TopLevelFlag
 689     -> InId             -- The binder (an OutId would be fine too)
 690     -> OccInfo          -- From the InId
 691     -> OutExpr
 692     -> Unfolding
 693     -> Bool
 694 postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
 695   | not active             = False
 696   | isLoopBreaker occ_info = False      -- If it's a loop-breaker of any kind, dont' inline
 697                                         -- because it might be referred to "earlier"
 698   | isExportedId bndr      = False
 699   | exprIsTrivial rhs      = True
 700   | otherwise
 701   = case occ_info of
 702         -- The point of examining occ_info here is that for *non-values*
 703         -- that occur outside a lambda, the call-site inliner won't have
 704         -- a chance (becuase it doesn't know that the thing
 705         -- only occurs once).   The pre-inliner won't have gotten
 706         -- it either, if the thing occurs in more than one branch
 707         -- So the main target is things like
 708         --      let x = f y in
 709         --      case v of
 710         --         True  -> case x of ...
 711         --         False -> case x of ...
 712         -- I'm not sure how important this is in practice
 713       OneOcc in_lam one_br int_cxt      -- OneOcc => no code-duplication issue
 714         ->     smallEnoughToInline unfolding    -- Small enough to dup
 715                         -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
 716                         --
 717                         -- NB: Do NOT inline arbitrarily big things, even if one_br is True
 718                         -- Reason: doing so risks exponential behaviour.  We simplify a big
 719                         --         expression, inline it, and simplify it again.  But if the
 720                         --         very same thing happens in the big expression, we get
 721                         --         exponential cost!
 722                         -- PRINCIPLE: when we've already simplified an expression once,
 723                         -- make sure that we only inline it if it's reasonably small.
 724
 725            &&  ((isNotTopLevel top_lvl && not in_lam) ||
 726                         -- But outside a lambda, we want to be reasonably aggressive
 727                         -- about inlining into multiple branches of case
 728                         -- e.g. let x = <non-value>
 729                         --      in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... }
 730                         -- Inlining can be a big win if C3 is the hot-spot, even if
 731                         -- the uses in C1, C2 are not 'interesting'
 732                         -- An example that gets worse if you add int_cxt here is 'clausify'
 733
 734                 (isCheapUnfolding unfolding && int_cxt))
 735                         -- isCheap => acceptable work duplication; in_lam may be true
 736                         -- int_cxt to prevent us inlining inside a lambda without some
 737                         -- good reason.  See the notes on int_cxt in preInlineUnconditionally
 738
 739       IAmDead -> True   -- This happens; for example, the case_bndr during case of
 740                         -- known constructor:  case (a,b) of x { (p,q) -> ... }
 741                         -- Here x isn't mentioned in the RHS, so we don't want to
 742                         -- create the (dead) let-binding  let x = (a,b) in ...
 743
 744       other -> False
 745
 746 -- Here's an example that we don't handle well:
 747 --      let f = if b then Left (\x.BIG) else Right (\y.BIG)
 748 --      in \y. ....case f of {...} ....
 749 -- Here f is used just once, and duplicating the case work is fine (exprIsCheap).
 750 -- But
 751 -- * We can't preInlineUnconditionally because that woud invalidate
 752 --   the occ info for b.
 753 -- * We can't postInlineUnconditionally because the RHS is big, and
 754 --   that risks exponential behaviour
 755 -- * We can't call-site inline, because the rhs is big
 756 -- Alas!
 757
 758   where
 759     active = case getMode env of
 760                    SimplGently    -> isAlwaysActive prag
 761                    SimplPhase n _ -> isActive n prag
 762     prag = idInlinePragma bndr
 763
 764 activeInline :: SimplEnv -> OutId -> Bool
 765 activeInline env id
 766   = case getMode env of
 767       SimplGently -> False
 768         -- No inlining at all when doing gentle stuff,
 769         -- except for local things that occur once (pre/postInlineUnconditionally)
 770         -- The reason is that too little clean-up happens if you
 771         -- don't inline use-once things.   Also a bit of inlining is *good* for
 772         -- full laziness; it can expose constant sub-expressions.
 773         -- Example in spectral/mandel/Mandel.hs, where the mandelset
 774         -- function gets a useful let-float if you inline windowToViewport
 775
 776         -- NB: we used to have a second exception, for data con wrappers.
 777         -- On the grounds that we use gentle mode for rule LHSs, and
 778         -- they match better when data con wrappers are inlined.
 779         -- But that only really applies to the trivial wrappers (like (:)),
 780         -- and they are now constructed as Compulsory unfoldings (in MkId)
 781         -- so they'll happen anyway.
 782
 783       SimplPhase n _ -> isActive n prag
 784   where
 785     prag = idInlinePragma id
 786
 787 activeRule :: DynFlags -> SimplEnv -> Maybe (Activation -> Bool)
 788 -- Nothing => No rules at all
 789 activeRule dflags env
 790   | not (dopt Opt_RewriteRules dflags)
 791   = Nothing     -- Rewriting is off
 792   | otherwise
 793   = case getMode env of
 794         SimplGently    -> Just isAlwaysActive
 795                         -- Used to be Nothing (no rules in gentle mode)
 796                         -- Main motivation for changing is that I wanted
 797                         --      lift String ===> ...
 798                         -- to work in Template Haskell when simplifying
 799                         -- splices, so we get simpler code for literal strings
 800         SimplPhase n _ -> Just (isActive n)
 801 \end{code}
 802
 803
 804 %************************************************************************
 805 %*                                                                      *
 806         Rebuilding a lambda
 807 %*                                                                      *
 808 %************************************************************************
 809
 810 \begin{code}
 811 mkLam :: [OutBndr] -> OutExpr -> SimplM OutExpr
 812 -- mkLam tries three things
 813 --      a) eta reduction, if that gives a trivial expression
 814 --      b) eta expansion [only if there are some value lambdas]
 815
 816 mkLam [] body
 817   = return body
 818 mkLam bndrs body
 819   = do  { dflags <- getDOptsSmpl
 820         ; mkLam' dflags bndrs body }
 821   where
 822     mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr
 823     mkLam' dflags bndrs (Cast body co)
 824       | not (any bad bndrs)
 825         -- Note [Casts and lambdas]
 826       = do { lam <- mkLam' dflags bndrs body
 827            ; return (mkCoerce (mkPiTypes bndrs co) lam) }
 828       where
 829         co_vars  = tyVarsOfType co
 830         bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars
 831
 832     mkLam' dflags bndrs body
 833       | dopt Opt_DoEtaReduction dflags,
 834         Just etad_lam <- tryEtaReduce bndrs body
 835       = do { tick (EtaReduction (head bndrs))
 836            ; return etad_lam }
 837
 838       | dopt Opt_DoLambdaEtaExpansion dflags,
 839         any isRuntimeVar bndrs
 840       = do { body' <- tryEtaExpansion dflags body
 841            ; return (mkLams bndrs body') }
 842
 843       | otherwise
 844       = return (mkLams bndrs body)
 845 \end{code}
 846
 847 Note [Casts and lambdas]
 848 ~~~~~~~~~~~~~~~~~~~~~~~~
 849 Consider
 850         (\x. (\y. e) `cast` g1) `cast` g2
 851 There is a danger here that the two lambdas look separated, and the
 852 full laziness pass might float an expression to between the two.
 853
 854 So this equation in mkLam' floats the g1 out, thus:
 855         (\x. e `cast` g1)  -->  (\x.e) `cast` (tx -> g1)
 856 where x:tx.
 857
 858 In general, this floats casts outside lambdas, where (I hope) they
 859 might meet and cancel with some other cast:
 860         \x. e `cast` co   ===>   (\x. e) `cast` (tx -> co)
 861         /\a. e `cast` co  ===>   (/\a. e) `cast` (/\a. co)
 862         /\g. e `cast` co  ===>   (/\g. e) `cast` (/\g. co)
 863                           (if not (g `in` co))
 864
 865 Notice that it works regardless of 'e'.  Originally it worked only
 866 if 'e' was itself a lambda, but in some cases that resulted in
 867 fruitless iteration in the simplifier.  A good example was when
 868 compiling Text.ParserCombinators.ReadPrec, where we had a definition
 869 like    (\x. Get `cast` g)
 870 where Get is a constructor with nonzero arity.  Then mkLam eta-expanded
 871 the Get, and the next iteration eta-reduced it, and then eta-expanded
 872 it again.
 873
 874 Note also the side condition for the case of coercion binders.
 875 It does not make sense to transform
 876         /\g. e `cast` g  ==>  (/\g.e) `cast` (/\g.g)
 877 because the latter is not well-kinded.
 878
 879 --      c) floating lets out through big lambdas
 880 --              [only if all tyvar lambdas, and only if this lambda
 881 --               is the RHS of a let]
 882
 883 {-      Sept 01: I'm experimenting with getting the
 884         full laziness pass to float out past big lambdsa
 885  | all isTyVar bndrs,   -- Only for big lambdas
 886    contIsRhs cont       -- Only try the rhs type-lambda floating
 887                         -- if this is indeed a right-hand side; otherwise
 888                         -- we end up floating the thing out, only for float-in
 889                         -- to float it right back in again!
 890  = do (floats, body') <- tryRhsTyLam env bndrs body
 891       return (floats, mkLams bndrs body')
 892 -}
 893
 894
 895 %************************************************************************
 896 %*                                                                      *
 897                 Eta reduction
 898 %*                                                                      *
 899 %************************************************************************
 900
 901 Note [Eta reduction conditions]
 902 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 903 We try for eta reduction here, but *only* if we get all the way to an
 904 trivial expression.  We don't want to remove extra lambdas unless we
 905 are going to avoid allocating this thing altogether.
 906
 907 There are some particularly delicate points here:
 908
 909 * Eta reduction is not valid in general:
 910         \x. bot  /=  bot
 911   This matters, partly for old-fashioned correctness reasons but,
 912   worse, getting it wrong can yield a seg fault. Consider
 913         f = \x.f x
 914         h y = case (case y of { True -> f `seq` True; False -> False }) of
 915                 True -> ...; False -> ...
 916
 917   If we (unsoundly) eta-reduce f to get f=f, the strictness analyser
 918   says f=bottom, and replaces the (f `seq` True) with just
 919   (f `cast` unsafe-co).  BUT, as thing stand, 'f' got arity 1, and it
 920   *keeps* arity 1 (perhaps also wrongly).  So CorePrep eta-expands
 921   the definition again, so that it does not termninate after all.
 922   Result: seg-fault because the boolean case actually gets a function value.
 923   See Trac #1947.
 924
 925   So it's important to to the right thing.
 926
 927 * We need to be careful if we just look at f's arity. Currently (Dec07),
 928   f's arity is visible in its own RHS (see Note [Arity robustness] in
 929   SimplEnv) so we must *not* trust the arity when checking that 'f' is
 930   a value.  Instead, look at the unfolding.
 931
 932   However for GlobalIds we can look at the arity; and for primops we
 933   must, since they have no unfolding.
 934
 935 * Regardless of whether 'f' is a vlaue, we always want to
 936   reduce (/\a -> f a) to f
 937   This came up in a RULE: foldr (build (/\a -> g a))
 938   did not match            foldr (build (/\b -> ...something complex...))
 939   The type checker can insert these eta-expanded versions,
 940   with both type and dictionary lambdas; hence the slightly
 941   ad-hoc isDictId
 942
 943 These delicacies are why we don't use exprIsTrivial and exprIsHNF here.
 944 Alas.
 945
 946 \begin{code}
 947 tryEtaReduce :: [OutBndr] -> OutExpr -> Maybe OutExpr
 948 tryEtaReduce bndrs body
 949   = go (reverse bndrs) body
 950   where
 951     go (b : bs) (App fun arg) | ok_arg b arg = go bs fun        -- Loop round
 952     go []       fun           | ok_fun fun   = Just fun         -- Success!
 953     go _        _                            = Nothing          -- Failure!
 954
 955         -- Note [Eta reduction conditions]
 956     ok_fun (App fun (Type ty))
 957         | not (any (`elemVarSet` tyVarsOfType ty) bndrs)
 958         =  ok_fun fun
 959     ok_fun (Var fun_id)
 960         =  not (fun_id `elem` bndrs)
 961         && (ok_fun_id fun_id || all ok_lam bndrs)
 962     ok_fun _fun = False
 963
 964     ok_fun_id fun
 965         | isLocalId fun       = isEvaldUnfolding (idUnfolding fun)
 966         | isDataConWorkId fun = True
 967         | isGlobalId fun      = idArity fun > 0
 968
 969     ok_lam v = isTyVar v || isDictId v
 970
 971     ok_arg b arg = varToCoreExpr b `cheapEqExpr` arg
 972 \end{code}
 973
 974
 975 %************************************************************************
 976 %*                                                                      *
 977                 Eta expansion
 978 %*                                                                      *
 979 %************************************************************************
 980
 981
 982 We go for:
 983    f = \x1..xn -> N  ==>   f = \x1..xn y1..ym -> N y1..ym
 984                                  (n >= 0)
 985
 986 where (in both cases)
 987
 988         * The xi can include type variables
 989
 990         * The yi are all value variables
 991
 992         * N is a NORMAL FORM (i.e. no redexes anywhere)
 993           wanting a suitable number of extra args.
 994
 995 The biggest reason for doing this is for cases like
 996
 997         f = \x -> case x of
 998                     True  -> \y -> e1
 999                     False -> \y -> e2
1000
1001 Here we want to get the lambdas together.  A good exmaple is the nofib
1002 program fibheaps, which gets 25% more allocation if you don't do this
1003 eta-expansion.
1004
1005 We may have to sandwich some coerces between the lambdas
1006 to make the types work.   exprEtaExpandArity looks through coerces
1007 when computing arity; and etaExpand adds the coerces as necessary when
1008 actually computing the expansion.
1009
1010 \begin{code}
1011 tryEtaExpansion :: DynFlags -> OutExpr -> SimplM OutExpr
1012 -- There is at least one runtime binder in the binders
1013 tryEtaExpansion dflags body = do
1014     us <- getUniquesM
1015     return (etaExpand fun_arity us body (exprType body))
1016   where
1017     fun_arity = exprEtaExpandArity dflags body
1018 \end{code}
1019
1020
1021 %************************************************************************
1022 %*                                                                      *
1023 \subsection{Floating lets out of big lambdas}
1024 %*                                                                      *
1025 %************************************************************************
1026
1027 Note [Floating and type abstraction]
1028 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1029 Consider this:
1030         x = /\a. C e1 e2
1031 We'd like to float this to
1032         y1 = /\a. e1
1033         y2 = /\a. e2
1034         x = /\a. C (y1 a) (y2 a)
1035 for the usual reasons: we want to inline x rather vigorously.
1036
1037 You may think that this kind of thing is rare.  But in some programs it is
1038 common.  For example, if you do closure conversion you might get:
1039
1040         data a :-> b = forall e. (e -> a -> b) :$ e
1041
1042         f_cc :: forall a. a :-> a
1043         f_cc = /\a. (\e. id a) :$ ()
1044
1045 Now we really want to inline that f_cc thing so that the
1046 construction of the closure goes away.
1047
1048 So I have elaborated simplLazyBind to understand right-hand sides that look
1049 like
1050         /\ a1..an. body
1051
1052 and treat them specially. The real work is done in SimplUtils.abstractFloats,
1053 but there is quite a bit of plumbing in simplLazyBind as well.
1054
1055 The same transformation is good when there are lets in the body:
1056
1057         /\abc -> let(rec) x = e in b
1058    ==>
1059         let(rec) x' = /\abc -> let x = x' a b c in e
1060         in
1061         /\abc -> let x = x' a b c in b
1062
1063 This is good because it can turn things like:
1064
1065         let f = /\a -> letrec g = ... g ... in g
1066 into
1067         letrec g' = /\a -> ... g' a ...
1068         in
1069         let f = /\ a -> g' a
1070
1071 which is better.  In effect, it means that big lambdas don't impede
1072 let-floating.
1073
1074 This optimisation is CRUCIAL in eliminating the junk introduced by
1075 desugaring mutually recursive definitions.  Don't eliminate it lightly!
1076
1077 [May 1999]  If we do this transformation *regardless* then we can
1078 end up with some pretty silly stuff.  For example,
1079
1080         let
1081             st = /\ s -> let { x1=r1 ; x2=r2 } in ...
1082         in ..
1083 becomes
1084         let y1 = /\s -> r1
1085             y2 = /\s -> r2
1086             st = /\s -> ...[y1 s/x1, y2 s/x2]
1087         in ..
1088
1089 Unless the "..." is a WHNF there is really no point in doing this.
1090 Indeed it can make things worse.  Suppose x1 is used strictly,
1091 and is of the form
1092
1093         x1* = case f y of { (a,b) -> e }
1094
1095 If we abstract this wrt the tyvar we then can't do the case inline
1096 as we would normally do.
1097
1098 That's why the whole transformation is part of the same process that
1099 floats let-bindings and constructor arguments out of RHSs.  In particular,
1100 it is guarded by the doFloatFromRhs call in simplLazyBind.
1101
1102
1103 \begin{code}
1104 abstractFloats :: [OutTyVar] -> SimplEnv -> OutExpr -> SimplM ([OutBind], OutExpr)
1105 abstractFloats main_tvs body_env body
1106   = ASSERT( notNull body_floats )
1107     do  { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
1108         ; return (float_binds, CoreSubst.substExpr subst body) }
1109   where
1110     main_tv_set = mkVarSet main_tvs
1111     body_floats = getFloats body_env
1112     empty_subst = CoreSubst.mkEmptySubst (seInScope body_env)
1113
1114     abstract :: CoreSubst.Subst -> OutBind -> SimplM (CoreSubst.Subst, OutBind)
1115     abstract subst (NonRec id rhs)
1116       = do { (poly_id, poly_app) <- mk_poly tvs_here id
1117            ; let poly_rhs = mkLams tvs_here rhs'
1118                  subst'   = CoreSubst.extendIdSubst subst id poly_app
1119            ; return (subst', (NonRec poly_id poly_rhs)) }
1120       where
1121         rhs' = CoreSubst.substExpr subst rhs
1122         tvs_here | any isCoVar main_tvs = main_tvs      -- Note [Abstract over coercions]
1123                  | otherwise
1124                  = varSetElems (main_tv_set `intersectVarSet` exprSomeFreeVars isTyVar rhs')
1125
1126                 -- Abstract only over the type variables free in the rhs
1127                 -- wrt which the new binding is abstracted.  But the naive
1128                 -- approach of abstract wrt the tyvars free in the Id's type
1129                 -- fails. Consider:
1130                 --      /\ a b -> let t :: (a,b) = (e1, e2)
1131                 --                    x :: a     = fst t
1132                 --                in ...
1133                 -- Here, b isn't free in x's type, but we must nevertheless
1134                 -- abstract wrt b as well, because t's type mentions b.
1135                 -- Since t is floated too, we'd end up with the bogus:
1136                 --      poly_t = /\ a b -> (e1, e2)
1137                 --      poly_x = /\ a   -> fst (poly_t a *b*)
1138                 -- So for now we adopt the even more naive approach of
1139                 -- abstracting wrt *all* the tyvars.  We'll see if that
1140                 -- gives rise to problems.   SLPJ June 98
1141
1142     abstract subst (Rec prs)
1143        = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly tvs_here) ids
1144             ; let subst' = CoreSubst.extendSubstList subst (ids `zip` poly_apps)
1145                   poly_rhss = [mkLams tvs_here (CoreSubst.substExpr subst' rhs) | rhs <- rhss]
1146             ; return (subst', Rec (poly_ids `zip` poly_rhss)) }
1147        where
1148          (ids,rhss) = unzip prs
1149                 -- For a recursive group, it's a bit of a pain to work out the minimal
1150                 -- set of tyvars over which to abstract:
1151                 --      /\ a b c.  let x = ...a... in
1152                 --                 letrec { p = ...x...q...
1153                 --                          q = .....p...b... } in
1154                 --                 ...
1155                 -- Since 'x' is abstracted over 'a', the {p,q} group must be abstracted
1156                 -- over 'a' (because x is replaced by (poly_x a)) as well as 'b'.
1157                 -- Since it's a pain, we just use the whole set, which is always safe
1158                 --
1159                 -- If you ever want to be more selective, remember this bizarre case too:
1160                 --      x::a = x
1161                 -- Here, we must abstract 'x' over 'a'.
1162          tvs_here = main_tvs
1163
1164     mk_poly tvs_here var
1165       = do { uniq <- getUniqueM
1166            ; let  poly_name = setNameUnique (idName var) uniq           -- Keep same name
1167                   poly_ty   = mkForAllTys tvs_here (idType var) -- But new type of course
1168                   poly_id   = transferPolyIdInfo var $  -- Note [transferPolyIdInfo] in Id.lhs
1169                               mkLocalId poly_name poly_ty
1170            ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
1171                 -- In the olden days, it was crucial to copy the occInfo of the original var,
1172                 -- because we were looking at occurrence-analysed but as yet unsimplified code!
1173                 -- In particular, we mustn't lose the loop breakers.  BUT NOW we are looking
1174                 -- at already simplified code, so it doesn't matter
1175                 --
1176                 -- It's even right to retain single-occurrence or dead-var info:
1177                 -- Suppose we started with  /\a -> let x = E in B
1178                 -- where x occurs once in B. Then we transform to:
1179                 --      let x' = /\a -> E in /\a -> let x* = x' a in B
1180                 -- where x* has an INLINE prag on it.  Now, once x* is inlined,
1181                 -- the occurrences of x' will be just the occurrences originally
1182                 -- pinned on x.
1183 \end{code}
1184
1185 Note [Abstract over coercions]
1186 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1187 If a coercion variable (g :: a ~ Int) is free in the RHS, then so is the
1188 type variable a.  Rather than sort this mess out, we simply bale out and abstract
1189 wrt all the type variables if any of them are coercion variables.
1190
1191
1192 Historical note: if you use let-bindings instead of a substitution, beware of this:
1193
1194                 -- Suppose we start with:
1195                 --
1196                 --      x = /\ a -> let g = G in E
1197                 --
1198                 -- Then we'll float to get
1199                 --
1200                 --      x = let poly_g = /\ a -> G
1201                 --          in /\ a -> let g = poly_g a in E
1202                 --
1203                 -- But now the occurrence analyser will see just one occurrence
1204                 -- of poly_g, not inside a lambda, so the simplifier will
1205                 -- PreInlineUnconditionally poly_g back into g!  Badk to square 1!
1206                 -- (I used to think that the "don't inline lone occurrences" stuff
1207                 --  would stop this happening, but since it's the *only* occurrence,
1208                 --  PreInlineUnconditionally kicks in first!)
1209                 --
1210                 -- Solution: put an INLINE note on g's RHS, so that poly_g seems
1211                 --           to appear many times.  (NB: mkInlineMe eliminates
1212                 --           such notes on trivial RHSs, so do it manually.)
1213
1214 %************************************************************************
1215 %*                                                                      *
1216                 prepareAlts
1217 %*                                                                      *
1218 %************************************************************************
1219
1220 prepareAlts tries these things:
1221
1222 1.  If several alternatives are identical, merge them into
1223     a single DEFAULT alternative.  I've occasionally seen this
1224     making a big difference:
1225
1226         case e of               =====>     case e of
1227           C _ -> f x                         D v -> ....v....
1228           D v -> ....v....                   DEFAULT -> f x
1229           DEFAULT -> f x
1230
1231    The point is that we merge common RHSs, at least for the DEFAULT case.
1232    [One could do something more elaborate but I've never seen it needed.]
1233    To avoid an expensive test, we just merge branches equal to the *first*
1234    alternative; this picks up the common cases
1235         a) all branches equal
1236         b) some branches equal to the DEFAULT (which occurs first)
1237
1238 2.  Case merging:
1239        case e of b {             ==>   case e of b {
1240          p1 -> rhs1                      p1 -> rhs1
1241          ...                             ...
1242          pm -> rhsm                      pm -> rhsm
1243          _  -> case b of b' {            pn -> let b'=b in rhsn
1244                      pn -> rhsn          ...
1245                      ...                 po -> let b'=b in rhso
1246                      po -> rhso          _  -> let b'=b in rhsd
1247                      _  -> rhsd
1248        }
1249
1250     which merges two cases in one case when -- the default alternative of
1251     the outer case scrutises the same variable as the outer case This
1252     transformation is called Case Merging.  It avoids that the same
1253     variable is scrutinised multiple times.
1254
1255
1256 The case where transformation (1) showed up was like this (lib/std/PrelCError.lhs):
1257
1258         x | p `is` 1 -> e1
1259           | p `is` 2 -> e2
1260         ...etc...
1261
1262 where @is@ was something like
1263
1264         p `is` n = p /= (-1) && p == n
1265
1266 This gave rise to a horrible sequence of cases
1267
1268         case p of
1269           (-1) -> $j p
1270           1    -> e1
1271           DEFAULT -> $j p
1272
1273 and similarly in cascade for all the join points!
1274
1275 Note [Dead binders]
1276 ~~~~~~~~~~~~~~~~~~~~
1277 We do this *here*, looking at un-simplified alternatives, because we
1278 have to check that r doesn't mention the variables bound by the
1279 pattern in each alternative, so the binder-info is rather useful.
1280
1281 \begin{code}
1282 prepareAlts :: SimplEnv -> OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
1283 prepareAlts env scrut case_bndr' alts
1284   = do  { dflags <- getDOptsSmpl
1285         ; alts <- combineIdenticalAlts case_bndr' alts
1286
1287         ; let (alts_wo_default, maybe_deflt) = findDefault alts
1288               alt_cons = [con | (con,_,_) <- alts_wo_default]
1289               imposs_deflt_cons = nub (imposs_cons ++ alt_cons)
1290                 -- "imposs_deflt_cons" are handled
1291                 --   EITHER by the context,
1292                 --   OR by a non-DEFAULT branch in this case expression.
1293
1294         ; default_alts <- prepareDefault dflags env case_bndr' mb_tc_app
1295                                          imposs_deflt_cons maybe_deflt
1296
1297         ; let trimmed_alts = filterOut impossible_alt alts_wo_default
1298               merged_alts = mergeAlts trimmed_alts default_alts
1299                 -- We need the mergeAlts in case the new default_alt
1300                 -- has turned into a constructor alternative.
1301                 -- The merge keeps the inner DEFAULT at the front, if there is one
1302                 -- and interleaves the alternatives in the right order
1303
1304         ; return (imposs_deflt_cons, merged_alts) }
1305   where
1306     mb_tc_app = splitTyConApp_maybe (idType case_bndr')
1307     Just (_, inst_tys) = mb_tc_app
1308
1309     imposs_cons = case scrut of
1310                     Var v -> otherCons (idUnfolding v)
1311                     other -> []
1312
1313     impossible_alt :: CoreAlt -> Bool
1314     impossible_alt (con, _, _) | con `elem` imposs_cons = True
1315     impossible_alt (DataAlt con, _, _) = dataConCannotMatch inst_tys con
1316     impossible_alt alt                 = False
1317
1318
1319 --------------------------------------------------
1320 --      1. Merge identical branches
1321 --------------------------------------------------
1322 combineIdenticalAlts :: OutId -> [InAlt] -> SimplM [InAlt]
1323
1324 combineIdenticalAlts case_bndr alts@((con1,bndrs1,rhs1) : con_alts)
1325   | all isDeadBinder bndrs1,                    -- Remember the default
1326     length filtered_alts < length con_alts      -- alternative comes first
1327         -- Also Note [Dead binders]
1328   = do  { tick (AltMerge case_bndr)
1329         ; return ((DEFAULT, [], rhs1) : filtered_alts) }
1330   where
1331     filtered_alts        = filter keep con_alts
1332     keep (con,bndrs,rhs) = not (all isDeadBinder bndrs && rhs `cheapEqExpr` rhs1)
1333
1334 combineIdenticalAlts case_bndr alts = return alts
1335
1336 -------------------------------------------------------------------------
1337 --                      Prepare the default alternative
1338 -------------------------------------------------------------------------
1339 prepareDefault :: DynFlags
1340                -> SimplEnv
1341                -> OutId         -- Case binder; need just for its type. Note that as an
1342                                 --   OutId, it has maximum information; this is important.
1343                                 --   Test simpl013 is an example
1344                -> Maybe (TyCon, [Type]) -- Type of scrutinee, decomposed
1345                -> [AltCon]      -- These cons can't happen when matching the default
1346                -> Maybe InExpr  -- Rhs
1347                -> SimplM [InAlt]        -- Still unsimplified
1348                                         -- We use a list because it's what mergeAlts expects,
1349                                         -- And becuase case-merging can cause many to show up
1350
1351 ------- Merge nested cases ----------
1352 prepareDefault dflags env outer_bndr bndr_ty imposs_cons (Just deflt_rhs)
1353   | dopt Opt_CaseMerge dflags
1354   , Case (Var inner_scrut_var) inner_bndr _ inner_alts <- deflt_rhs
1355   , DoneId inner_scrut_var' <- substId env inner_scrut_var
1356         -- Remember, inner_scrut_var is an InId, but outer_bndr is an OutId
1357   , inner_scrut_var' == outer_bndr
1358         -- NB: the substId means that if the outer scrutinee was a
1359         --     variable, and inner scrutinee is the same variable,
1360         --     then inner_scrut_var' will be outer_bndr
1361         --     via the magic of simplCaseBinder
1362   = do  { tick (CaseMerge outer_bndr)
1363
1364         ; let munge_rhs rhs = bindCaseBndr inner_bndr (Var outer_bndr) rhs
1365         ; return [(con, args, munge_rhs rhs) | (con, args, rhs) <- inner_alts,
1366                                                not (con `elem` imposs_cons) ]
1367                 -- NB: filter out any imposs_cons.  Example:
1368                 --      case x of
1369                 --        A -> e1
1370                 --        DEFAULT -> case x of
1371                 --                      A -> e2
1372                 --                      B -> e3
1373                 -- When we merge, we must ensure that e1 takes
1374                 -- precedence over e2 as the value for A!
1375         }
1376         -- Warning: don't call prepareAlts recursively!
1377         -- Firstly, there's no point, because inner alts have already had
1378         -- mkCase applied to them, so they won't have a case in their default
1379         -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
1380         -- in munge_rhs may put a case into the DEFAULT branch!
1381
1382
1383 --------- Fill in known constructor -----------
1384 prepareDefault dflags env case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rhs)
1385   |     -- This branch handles the case where we are
1386         -- scrutinisng an algebraic data type
1387     isAlgTyCon tycon            -- It's a data type, tuple, or unboxed tuples.
1388   , not (isNewTyCon tycon)      -- We can have a newtype, if we are just doing an eval:
1389                                 --      case x of { DEFAULT -> e }
1390                                 -- and we don't want to fill in a default for them!
1391   , Just all_cons <- tyConDataCons_maybe tycon
1392   , not (null all_cons)         -- This is a tricky corner case.  If the data type has no constructors,
1393                                 -- which GHC allows, then the case expression will have at most a default
1394                                 -- alternative.  We don't want to eliminate that alternative, because the
1395                                 -- invariant is that there's always one alternative.  It's more convenient
1396                                 -- to leave
1397                                 --      case x of { DEFAULT -> e }
1398                                 -- as it is, rather than transform it to
1399                                 --      error "case cant match"
1400                                 -- which would be quite legitmate.  But it's a really obscure corner, and
1401                                 -- not worth wasting code on.
1402   , let imposs_data_cons = [con | DataAlt con <- imposs_cons]   -- We now know it's a data type
1403         impossible con  = con `elem` imposs_data_cons || dataConCannotMatch inst_tys con
1404   = case filterOut impossible all_cons of
1405         []    -> return []      -- Eliminate the default alternative
1406                                 -- altogether if it can't match
1407
1408         [con] ->        -- It matches exactly one constructor, so fill it in
1409                  do { tick (FillInCaseDefault case_bndr)
1410                     ; us <- getUniquesM
1411                     ; let (ex_tvs, co_tvs, arg_ids) =
1412                               dataConRepInstPat us con inst_tys
1413                     ; return [(DataAlt con, ex_tvs ++ co_tvs ++ arg_ids, deflt_rhs)] }
1414
1415         two_or_more -> return [(DEFAULT, [], deflt_rhs)]
1416
1417 --------- Catch-all cases -----------
1418 prepareDefault dflags env case_bndr bndr_ty imposs_cons (Just deflt_rhs)
1419   = return [(DEFAULT, [], deflt_rhs)]
1420
1421 prepareDefault dflags env case_bndr bndr_ty imposs_cons Nothing
1422   = return []   -- No default branch
1423 \end{code}
1424
1425
1426
1427 =================================================================================
1428
1429 mkCase tries these things
1430
1431 1.  Eliminate the case altogether if possible
1432
1433 2.  Case-identity:
1434
1435         case e of               ===> e
1436                 True  -> True;
1437                 False -> False
1438
1439     and similar friends.
1440
1441
1442 \begin{code}
1443 mkCase :: OutExpr -> OutId -> OutType
1444        -> [OutAlt]              -- Increasing order
1445        -> SimplM OutExpr
1446
1447 --------------------------------------------------
1448 --      1. Check for empty alternatives
1449 --------------------------------------------------
1450
1451 -- This isn't strictly an error.  It's possible that the simplifer might "see"
1452 -- that an inner case has no accessible alternatives before it "sees" that the
1453 -- entire branch of an outer case is inaccessible.  So we simply
1454 -- put an error case here insteadd
1455 mkCase scrut case_bndr ty []
1456   = pprTrace "mkCase: null alts" (ppr case_bndr <+> ppr scrut) $
1457     return (mkApps (Var rUNTIME_ERROR_ID)
1458                    [Type ty, Lit (mkStringLit "Impossible alternative")])
1459
1460
1461 --------------------------------------------------
1462 --      2. Identity case
1463 --------------------------------------------------
1464
1465 mkCase scrut case_bndr ty alts  -- Identity case
1466   | all identity_alt alts
1467   = do tick (CaseIdentity case_bndr)
1468        return (re_cast scrut)
1469   where
1470     identity_alt (con, args, rhs) = check_eq con args (de_cast rhs)
1471
1472     check_eq DEFAULT       _    (Var v)   = v == case_bndr
1473     check_eq (LitAlt lit') _    (Lit lit) = lit == lit'
1474     check_eq (DataAlt con) args rhs       = rhs `cheapEqExpr` mkConApp con (arg_tys ++ varsToCoreExprs args)
1475                                          || rhs `cheapEqExpr` Var case_bndr
1476     check_eq con args rhs = False
1477
1478     arg_tys = map Type (tyConAppArgs (idType case_bndr))
1479
1480         -- We've seen this:
1481         --      case e of x { _ -> x `cast` c }
1482         -- And we definitely want to eliminate this case, to give
1483         --      e `cast` c
1484         -- So we throw away the cast from the RHS, and reconstruct
1485         -- it at the other end.  All the RHS casts must be the same
1486         -- if (all identity_alt alts) holds.
1487         --
1488         -- Don't worry about nested casts, because the simplifier combines them
1489     de_cast (Cast e _) = e
1490     de_cast e          = e
1491
1492     re_cast scrut = case head alts of
1493                         (_,_,Cast _ co) -> Cast scrut co
1494                         other           -> scrut
1495
1496
1497
1498 --------------------------------------------------
1499 --      Catch-all
1500 --------------------------------------------------
1501 mkCase scrut bndr ty alts = return (Case scrut bndr ty alts)
1502 \end{code}
1503
1504
1505 When adding auxiliary bindings for the case binder, it's worth checking if
1506 its dead, because it often is, and occasionally these mkCase transformations
1507 cascade rather nicely.
1508
1509 \begin{code}
1510 bindCaseBndr bndr rhs body
1511   | isDeadBinder bndr = body
1512   | otherwise         = bindNonRec bndr rhs body
1513 \end{code}