compiler/simplCore/SimplUtils.lhs

   1 %
   2 % (c) The AQUA Project, Glasgow University, 1993-1998
   3 %
   4 \section[SimplUtils]{The simplifier utilities}
   5
   6 \begin{code}
   7 {-# OPTIONS -w #-}
   8 -- The above warning supression flag is a temporary kludge.
   9 -- While working on this module you are encouraged to remove it and fix
  10 -- any warnings in the module. See
  11 --     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
  12 -- for details
  13
  14 module SimplUtils (
  15         -- Rebuilding
  16         mkLam, mkCase, prepareAlts, bindCaseBndr,
  17
  18         -- Inlining,
  19         preInlineUnconditionally, postInlineUnconditionally,
  20         activeInline, activeRule, inlineMode,
  21
  22         -- The continuation type
  23         SimplCont(..), DupFlag(..), ArgInfo(..),
  24         contIsDupable, contResultType, contIsTrivial, contArgs, dropArgs,
  25         countValArgs, countArgs, splitInlineCont,
  26         mkBoringStop, mkLazyArgStop, mkRhsStop, contIsRhsOrArg,
  27         interestingCallContext, interestingArgContext,
  28
  29         interestingArg, mkArgInfo,
  30
  31         abstractFloats
  32     ) where
  33
  34 #include "HsVersions.h"
  35
  36 import SimplEnv
  37 import DynFlags
  38 import StaticFlags
  39 import CoreSyn
  40 import qualified CoreSubst
  41 import PprCore
  42 import CoreFVs
  43 import CoreUtils
  44 import Literal
  45 import CoreUnfold
  46 import MkId
  47 import Name
  48 import Id
  49 import Var      ( isCoVar )
  50 import NewDemand
  51 import SimplMonad
  52 import Type     hiding( substTy )
  53 import TyCon
  54 import DataCon
  55 import Unify    ( dataConCannotMatch )
  56 import VarSet
  57 import BasicTypes
  58 import Util
  59 import MonadUtils
  60 import Outputable
  61 import FastString
  62
  63 import List( nub )
  64 \end{code}
  65
  66
  67 %************************************************************************
  68 %*                                                                      *
  69                 The SimplCont type
  70 %*                                                                      *
  71 %************************************************************************
  72
  73 A SimplCont allows the simplifier to traverse the expression in a
  74 zipper-like fashion.  The SimplCont represents the rest of the expression,
  75 "above" the point of interest.
  76
  77 You can also think of a SimplCont as an "evaluation context", using
  78 that term in the way it is used for operational semantics. This is the
  79 way I usually think of it, For example you'll often see a syntax for
  80 evaluation context looking like
  81         C ::= []  |  C e   |  case C of alts  |  C `cast` co
  82 That's the kind of thing we are doing here, and I use that syntax in
  83 the comments.
  84
  85
  86 Key points:
  87   * A SimplCont describes a *strict* context (just like
  88     evaluation contexts do).  E.g. Just [] is not a SimplCont
  89
  90   * A SimplCont describes a context that *does not* bind
  91     any variables.  E.g. \x. [] is not a SimplCont
  92
  93 \begin{code}
  94 data SimplCont
  95   = Stop                -- An empty context, or hole, []
  96         OutType         -- Type of the result
  97         CallCtxt        -- True <=> There is something interesting about
  98                         --          the context, and hence the inliner
  99                         --          should be a bit keener (see interestingCallContext)
 100                         -- Specifically:
 101                         --     This is an argument of a function that has RULES
 102                         --     Inlining the call might allow the rule to fire
 103
 104   | CoerceIt            -- C `cast` co
 105         OutCoercion             -- The coercion simplified
 106         SimplCont
 107
 108   | ApplyTo             -- C arg
 109         DupFlag
 110         InExpr SimplEnv         -- The argument and its static env
 111         SimplCont
 112
 113   | Select              -- case C of alts
 114         DupFlag
 115         InId [InAlt] SimplEnv   -- The case binder, alts, and subst-env
 116         SimplCont
 117
 118   -- The two strict forms have no DupFlag, because we never duplicate them
 119   | StrictBind          -- (\x* \xs. e) C
 120         InId [InBndr]           -- let x* = [] in e
 121         InExpr SimplEnv         --      is a special case
 122         SimplCont
 123
 124   | StrictArg           -- e C
 125         OutExpr OutType         -- e and its type
 126         CallCtxt                -- Whether *this* argument position is interesting
 127         ArgInfo                 -- Whether the function at the head of e has rules, etc
 128         SimplCont               --     plus strictness flags for *further* args
 129
 130 data ArgInfo
 131   = ArgInfo {
 132         ai_rules :: Bool,       -- Function has rules (recursively)
 133                                 --      => be keener to inline in all args
 134         ai_strs :: [Bool],      -- Strictness of arguments
 135                                 --   Usually infinite, but if it is finite it guarantees
 136                                 --   that the function diverges after being given
 137                                 --   that number of args
 138         ai_discs :: [Int]       -- Discounts for arguments; non-zero => be keener to inline
 139                                 --   Always infinite
 140     }
 141
 142 instance Outputable SimplCont where
 143   ppr (Stop ty _)                    = ptext SLIT("Stop") <+> ppr ty
 144   ppr (ApplyTo dup arg se cont)      = ((ptext SLIT("ApplyTo") <+> ppr dup <+> pprParendExpr arg)
 145                                           {-  $$ nest 2 (pprSimplEnv se) -}) $$ ppr cont
 146   ppr (StrictBind b _ _ _ cont)      = (ptext SLIT("StrictBind") <+> ppr b) $$ ppr cont
 147   ppr (StrictArg f _ _ _ cont)       = (ptext SLIT("StrictArg") <+> ppr f) $$ ppr cont
 148   ppr (Select dup bndr alts se cont) = (ptext SLIT("Select") <+> ppr dup <+> ppr bndr) $$
 149                                        (nest 4 (ppr alts)) $$ ppr cont
 150   ppr (CoerceIt co cont)             = (ptext SLIT("CoerceIt") <+> ppr co) $$ ppr cont
 151
 152 data DupFlag = OkToDup | NoDup
 153
 154 instance Outputable DupFlag where
 155   ppr OkToDup = ptext SLIT("ok")
 156   ppr NoDup   = ptext SLIT("nodup")
 157
 158
 159
 160 -------------------
 161 mkBoringStop :: OutType -> SimplCont
 162 mkBoringStop ty = Stop ty BoringCtxt
 163
 164 mkLazyArgStop :: OutType -> CallCtxt -> SimplCont
 165 mkLazyArgStop ty cci = Stop ty cci
 166
 167 mkRhsStop :: OutType -> SimplCont
 168 mkRhsStop ty = Stop ty BoringCtxt
 169
 170 -------------------
 171 contIsRhsOrArg (Stop {})         = True
 172 contIsRhsOrArg (StrictBind {})   = True
 173 contIsRhsOrArg (StrictArg {})    = True
 174 contIsRhsOrArg other             = False
 175
 176 -------------------
 177 contIsDupable :: SimplCont -> Bool
 178 contIsDupable (Stop {})                  = True
 179 contIsDupable (ApplyTo  OkToDup _ _ _)   = True
 180 contIsDupable (Select   OkToDup _ _ _ _) = True
 181 contIsDupable (CoerceIt _ cont)          = contIsDupable cont
 182 contIsDupable other                      = False
 183
 184 -------------------
 185 contIsTrivial :: SimplCont -> Bool
 186 contIsTrivial (Stop {})                   = True
 187 contIsTrivial (ApplyTo _ (Type _) _ cont) = contIsTrivial cont
 188 contIsTrivial (CoerceIt _ cont)           = contIsTrivial cont
 189 contIsTrivial other                       = False
 190
 191 -------------------
 192 contResultType :: SimplCont -> OutType
 193 contResultType (Stop to_ty _)            = to_ty
 194 contResultType (StrictArg _ _ _ _ cont)  = contResultType cont
 195 contResultType (StrictBind _ _ _ _ cont) = contResultType cont
 196 contResultType (ApplyTo _ _ _ cont)      = contResultType cont
 197 contResultType (CoerceIt _ cont)         = contResultType cont
 198 contResultType (Select _ _ _ _ cont)     = contResultType cont
 199
 200 -------------------
 201 countValArgs :: SimplCont -> Int
 202 countValArgs (ApplyTo _ (Type ty) se cont) = countValArgs cont
 203 countValArgs (ApplyTo _ val_arg   se cont) = 1 + countValArgs cont
 204 countValArgs other                         = 0
 205
 206 countArgs :: SimplCont -> Int
 207 countArgs (ApplyTo _ arg se cont) = 1 + countArgs cont
 208 countArgs other                   = 0
 209
 210 contArgs :: SimplCont -> ([OutExpr], SimplCont)
 211 -- Uses substitution to turn each arg into an OutExpr
 212 contArgs cont = go [] cont
 213   where
 214     go args (ApplyTo _ arg se cont) = go (substExpr se arg : args) cont
 215     go args cont                    = (reverse args, cont)
 216
 217 dropArgs :: Int -> SimplCont -> SimplCont
 218 dropArgs 0 cont = cont
 219 dropArgs n (ApplyTo _ _ _ cont) = dropArgs (n-1) cont
 220 dropArgs n other                = pprPanic "dropArgs" (ppr n <+> ppr other)
 221
 222 --------------------
 223 splitInlineCont :: SimplCont -> Maybe (SimplCont, SimplCont)
 224 -- Returns Nothing if the continuation should dissolve an InlineMe Note
 225 -- Return Just (c1,c2) otherwise,
 226 --      where c1 is the continuation to put inside the InlineMe
 227 --      and   c2 outside
 228
 229 -- Example: (__inline_me__ (/\a. e)) ty
 230 --      Here we want to do the beta-redex without dissolving the InlineMe
 231 -- See test simpl017 (and Trac #1627) for a good example of why this is important
 232
 233 splitInlineCont (ApplyTo dup (Type ty) se c)
 234   | Just (c1, c2) <- splitInlineCont c          = Just (ApplyTo dup (Type ty) se c1, c2)
 235 splitInlineCont cont@(Stop ty _)                = Just (mkBoringStop ty, cont)
 236 splitInlineCont cont@(StrictBind bndr _ _ se _) = Just (mkBoringStop (substTy se (idType bndr)), cont)
 237 splitInlineCont cont@(StrictArg _ fun_ty _ _ _) = Just (mkBoringStop (funArgTy fun_ty), cont)
 238 splitInlineCont other                           = Nothing
 239         -- NB: the calculation of the type for mkBoringStop is an annoying
 240         --     duplication of the same calucation in mkDupableCont
 241 \end{code}
 242
 243
 244 \begin{code}
 245 interestingArg :: OutExpr -> Bool
 246         -- An argument is interesting if it has *some* structure
 247         -- We are here trying to avoid unfolding a function that
 248         -- is applied only to variables that have no unfolding
 249         -- (i.e. they are probably lambda bound): f x y z
 250         -- There is little point in inlining f here.
 251 interestingArg (Var v)           = hasSomeUnfolding (idUnfolding v)
 252                                         -- Was: isValueUnfolding (idUnfolding v')
 253                                         -- But that seems over-pessimistic
 254                                  || isDataConWorkId v
 255                                         -- This accounts for an argument like
 256                                         -- () or [], which is definitely interesting
 257 interestingArg (Type _)          = False
 258 interestingArg (App fn (Type _)) = interestingArg fn
 259 interestingArg (Note _ a)        = interestingArg a
 260
 261 -- Idea (from Sam B); I'm not sure if it's a good idea, so commented out for now
 262 -- interestingArg expr | isUnLiftedType (exprType expr)
 263 --        -- Unlifted args are only ever interesting if we know what they are
 264 --  =                  case expr of
 265 --                        Lit lit -> True
 266 --                        _       -> False
 267
 268 interestingArg other             = True
 269         -- Consider     let x = 3 in f x
 270         -- The substitution will contain (x -> ContEx 3), and we want to
 271         -- to say that x is an interesting argument.
 272         -- But consider also (\x. f x y) y
 273         -- The substitution will contain (x -> ContEx y), and we want to say
 274         -- that x is not interesting (assuming y has no unfolding)
 275 \end{code}
 276
 277
 278 Comment about interestingCallContext
 279 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 280 We want to avoid inlining an expression where there can't possibly be
 281 any gain, such as in an argument position.  Hence, if the continuation
 282 is interesting (eg. a case scrutinee, application etc.) then we
 283 inline, otherwise we don't.
 284
 285 Previously some_benefit used to return True only if the variable was
 286 applied to some value arguments.  This didn't work:
 287
 288         let x = _coerce_ (T Int) Int (I# 3) in
 289         case _coerce_ Int (T Int) x of
 290                 I# y -> ....
 291
 292 we want to inline x, but can't see that it's a constructor in a case
 293 scrutinee position, and some_benefit is False.
 294
 295 Another example:
 296
 297 dMonadST = _/\_ t -> :Monad (g1 _@_ t, g2 _@_ t, g3 _@_ t)
 298
 299 ....  case dMonadST _@_ x0 of (a,b,c) -> ....
 300
 301 we'd really like to inline dMonadST here, but we *don't* want to
 302 inline if the case expression is just
 303
 304         case x of y { DEFAULT -> ... }
 305
 306 since we can just eliminate this case instead (x is in WHNF).  Similar
 307 applies when x is bound to a lambda expression.  Hence
 308 contIsInteresting looks for case expressions with just a single
 309 default case.
 310
 311
 312 \begin{code}
 313 interestingCallContext :: SimplCont -> CallCtxt
 314 interestingCallContext cont
 315   = interesting cont
 316   where
 317     interestingCtxt = ArgCtxt False 2   -- Give *some* incentive!
 318
 319     interesting (Select _ bndr _ _ _)
 320         | isDeadBinder bndr       = CaseCtxt
 321         | otherwise               = interestingCtxt
 322
 323     interesting (ApplyTo {})      = interestingCtxt
 324                                 -- Can happen if we have (coerce t (f x)) y
 325                                 -- Perhaps interestingCtxt is a bit over-keen, but I've
 326                                 -- seen (coerce f) x, where f has an INLINE prag,
 327                                 -- So we have to give some motivation for inlining it
 328
 329     interesting (StrictArg _ _ cci _ _) = cci
 330     interesting (StrictBind {})         = BoringCtxt
 331     interesting (Stop ty cci)           = cci
 332     interesting (CoerceIt _ cont)       = interesting cont
 333         -- If this call is the arg of a strict function, the context
 334         -- is a bit interesting.  If we inline here, we may get useful
 335         -- evaluation information to avoid repeated evals: e.g.
 336         --      x + (y * z)
 337         -- Here the contIsInteresting makes the '*' keener to inline,
 338         -- which in turn exposes a constructor which makes the '+' inline.
 339         -- Assuming that +,* aren't small enough to inline regardless.
 340         --
 341         -- It's also very important to inline in a strict context for things
 342         -- like
 343         --              foldr k z (f x)
 344         -- Here, the context of (f x) is strict, and if f's unfolding is
 345         -- a build it's *great* to inline it here.  So we must ensure that
 346         -- the context for (f x) is not totally uninteresting.
 347
 348
 349 -------------------
 350 mkArgInfo :: Id
 351           -> Int        -- Number of value args
 352           -> SimplCont  -- Context of the cal
 353           -> ArgInfo
 354
 355 mkArgInfo fun n_val_args call_cont
 356   | n_val_args < idArity fun            -- Note [Unsaturated functions]
 357   = ArgInfo { ai_rules = False
 358             , ai_strs = vanilla_stricts
 359             , ai_discs = vanilla_discounts }
 360   | otherwise
 361   = ArgInfo { ai_rules = interestingArgContext fun call_cont
 362             , ai_strs  = arg_stricts
 363             , ai_discs = arg_discounts }
 364   where
 365     vanilla_discounts, arg_discounts :: [Int]
 366     vanilla_discounts = repeat 0
 367     arg_discounts = case idUnfolding fun of
 368                         CoreUnfolding _ _ _ _ (UnfoldIfGoodArgs _ discounts _ _)
 369                               -> discounts ++ vanilla_discounts
 370                         other -> vanilla_discounts
 371
 372     vanilla_stricts, arg_stricts :: [Bool]
 373     vanilla_stricts  = repeat False
 374
 375     arg_stricts
 376       = case splitStrictSig (idNewStrictness fun) of
 377           (demands, result_info)
 378                 | not (demands `lengthExceeds` n_val_args)
 379                 ->      -- Enough args, use the strictness given.
 380                         -- For bottoming functions we used to pretend that the arg
 381                         -- is lazy, so that we don't treat the arg as an
 382                         -- interesting context.  This avoids substituting
 383                         -- top-level bindings for (say) strings into
 384                         -- calls to error.  But now we are more careful about
 385                         -- inlining lone variables, so its ok (see SimplUtils.analyseCont)
 386                    if isBotRes result_info then
 387                         map isStrictDmd demands         -- Finite => result is bottom
 388                    else
 389                         map isStrictDmd demands ++ vanilla_stricts
 390
 391                | otherwise
 392                -> WARN( True, text "More demands than arity" <+> ppr fun <+> ppr (idArity fun)
 393                                 <+> ppr n_val_args <+> ppr demands )
 394                    vanilla_stricts      -- Not enough args, or no strictness
 395
 396 {- Note [Unsaturated functions]
 397   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 398 Consider (test eyeball/inline4)
 399         x = a:as
 400         y = f x
 401 where f has arity 2.  Then we do not want to inline 'x', because
 402 it'll just be floated out again.  Even if f has lots of discounts
 403 on its first argument -- it must be saturated for these to kick in
 404 -}
 405
 406 interestingArgContext :: Id -> SimplCont -> Bool
 407 -- If the argument has form (f x y), where x,y are boring,
 408 -- and f is marked INLINE, then we don't want to inline f.
 409 -- But if the context of the argument is
 410 --      g (f x y)
 411 -- where g has rules, then we *do* want to inline f, in case it
 412 -- exposes a rule that might fire.  Similarly, if the context is
 413 --      h (g (f x x))
 414 -- where h has rules, then we do want to inline f; hence the
 415 -- call_cont argument to interestingArgContext
 416 --
 417 -- The interesting_arg_ctxt flag makes this happen; if it's
 418 -- set, the inliner gets just enough keener to inline f
 419 -- regardless of how boring f's arguments are, if it's marked INLINE
 420 --
 421 -- The alternative would be to *always* inline an INLINE function,
 422 -- regardless of how boring its context is; but that seems overkill
 423 -- For example, it'd mean that wrapper functions were always inlined
 424 interestingArgContext fn call_cont
 425   = idHasRules fn || go call_cont
 426   where
 427     go (Select {})             = False
 428     go (ApplyTo {})            = False
 429     go (StrictArg _ _ cci _ _) = interesting cci
 430     go (StrictBind {})         = False  -- ??
 431     go (CoerceIt _ c)          = go c
 432     go (Stop _ cci)            = interesting cci
 433
 434     interesting (ArgCtxt rules _) = rules
 435     interesting other             = False
 436 \end{code}
 437
 438
 439
 440 %************************************************************************
 441 %*                                                                      *
 442 \subsection{Decisions about inlining}
 443 %*                                                                      *
 444 %************************************************************************
 445
 446 Inlining is controlled partly by the SimplifierMode switch.  This has two
 447 settings:
 448
 449         SimplGently     (a) Simplifying before specialiser/full laziness
 450                         (b) Simplifiying inside INLINE pragma
 451                         (c) Simplifying the LHS of a rule
 452                         (d) Simplifying a GHCi expression or Template
 453                                 Haskell splice
 454
 455         SimplPhase n _   Used at all other times
 456
 457 The key thing about SimplGently is that it does no call-site inlining.
 458 Before full laziness we must be careful not to inline wrappers,
 459 because doing so inhibits floating
 460     e.g. ...(case f x of ...)...
 461     ==> ...(case (case x of I# x# -> fw x#) of ...)...
 462     ==> ...(case x of I# x# -> case fw x# of ...)...
 463 and now the redex (f x) isn't floatable any more.
 464
 465 The no-inlining thing is also important for Template Haskell.  You might be
 466 compiling in one-shot mode with -O2; but when TH compiles a splice before
 467 running it, we don't want to use -O2.  Indeed, we don't want to inline
 468 anything, because the byte-code interpreter might get confused about
 469 unboxed tuples and suchlike.
 470
 471 INLINE pragmas
 472 ~~~~~~~~~~~~~~
 473 SimplGently is also used as the mode to simplify inside an InlineMe note.
 474
 475 \begin{code}
 476 inlineMode :: SimplifierMode
 477 inlineMode = SimplGently
 478 \end{code}
 479
 480 It really is important to switch off inlinings inside such
 481 expressions.  Consider the following example
 482
 483         let f = \pq -> BIG
 484         in
 485         let g = \y -> f y y
 486             {-# INLINE g #-}
 487         in ...g...g...g...g...g...
 488
 489 Now, if that's the ONLY occurrence of f, it will be inlined inside g,
 490 and thence copied multiple times when g is inlined.
 491
 492
 493 This function may be inlinined in other modules, so we
 494 don't want to remove (by inlining) calls to functions that have
 495 specialisations, or that may have transformation rules in an importing
 496 scope.
 497
 498 E.g.    {-# INLINE f #-}
 499                 f x = ...g...
 500
 501 and suppose that g is strict *and* has specialisations.  If we inline
 502 g's wrapper, we deny f the chance of getting the specialised version
 503 of g when f is inlined at some call site (perhaps in some other
 504 module).
 505
 506 It's also important not to inline a worker back into a wrapper.
 507 A wrapper looks like
 508         wraper = inline_me (\x -> ...worker... )
 509 Normally, the inline_me prevents the worker getting inlined into
 510 the wrapper (initially, the worker's only call site!).  But,
 511 if the wrapper is sure to be called, the strictness analyser will
 512 mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf
 513 continuation.  That's why the keep_inline predicate returns True for
 514 ArgOf continuations.  It shouldn't do any harm not to dissolve the
 515 inline-me note under these circumstances.
 516
 517 Note that the result is that we do very little simplification
 518 inside an InlineMe.
 519
 520         all xs = foldr (&&) True xs
 521         any p = all . map p  {-# INLINE any #-}
 522
 523 Problem: any won't get deforested, and so if it's exported and the
 524 importer doesn't use the inlining, (eg passes it as an arg) then we
 525 won't get deforestation at all.  We havn't solved this problem yet!
 526
 527
 528 preInlineUnconditionally
 529 ~~~~~~~~~~~~~~~~~~~~~~~~
 530 @preInlineUnconditionally@ examines a bndr to see if it is used just
 531 once in a completely safe way, so that it is safe to discard the
 532 binding inline its RHS at the (unique) usage site, REGARDLESS of how
 533 big the RHS might be.  If this is the case we don't simplify the RHS
 534 first, but just inline it un-simplified.
 535
 536 This is much better than first simplifying a perhaps-huge RHS and then
 537 inlining and re-simplifying it.  Indeed, it can be at least quadratically
 538 better.  Consider
 539
 540         x1 = e1
 541         x2 = e2[x1]
 542         x3 = e3[x2]
 543         ...etc...
 544         xN = eN[xN-1]
 545
 546 We may end up simplifying e1 N times, e2 N-1 times, e3 N-3 times etc.
 547 This can happen with cascades of functions too:
 548
 549         f1 = \x1.e1
 550         f2 = \xs.e2[f1]
 551         f3 = \xs.e3[f3]
 552         ...etc...
 553
 554 THE MAIN INVARIANT is this:
 555
 556         ----  preInlineUnconditionally invariant -----
 557    IF preInlineUnconditionally chooses to inline x = <rhs>
 558    THEN doing the inlining should not change the occurrence
 559         info for the free vars of <rhs>
 560         ----------------------------------------------
 561
 562 For example, it's tempting to look at trivial binding like
 563         x = y
 564 and inline it unconditionally.  But suppose x is used many times,
 565 but this is the unique occurrence of y.  Then inlining x would change
 566 y's occurrence info, which breaks the invariant.  It matters: y
 567 might have a BIG rhs, which will now be dup'd at every occurrenc of x.
 568
 569
 570 Even RHSs labelled InlineMe aren't caught here, because there might be
 571 no benefit from inlining at the call site.
 572
 573 [Sept 01] Don't unconditionally inline a top-level thing, because that
 574 can simply make a static thing into something built dynamically.  E.g.
 575         x = (a,b)
 576         main = \s -> h x
 577
 578 [Remember that we treat \s as a one-shot lambda.]  No point in
 579 inlining x unless there is something interesting about the call site.
 580
 581 But watch out: if you aren't careful, some useful foldr/build fusion
 582 can be lost (most notably in spectral/hartel/parstof) because the
 583 foldr didn't see the build.  Doing the dynamic allocation isn't a big
 584 deal, in fact, but losing the fusion can be.  But the right thing here
 585 seems to be to do a callSiteInline based on the fact that there is
 586 something interesting about the call site (it's strict).  Hmm.  That
 587 seems a bit fragile.
 588
 589 Conclusion: inline top level things gaily until Phase 0 (the last
 590 phase), at which point don't.
 591
 592 \begin{code}
 593 preInlineUnconditionally :: SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
 594 preInlineUnconditionally env top_lvl bndr rhs
 595   | not active             = False
 596   | opt_SimplNoPreInlining = False
 597   | otherwise = case idOccInfo bndr of
 598                   IAmDead                    -> True    -- Happens in ((\x.1) v)
 599                   OneOcc in_lam True int_cxt -> try_once in_lam int_cxt
 600                   other                      -> False
 601   where
 602     phase = getMode env
 603     active = case phase of
 604                    SimplGently    -> isAlwaysActive prag
 605                    SimplPhase n _ -> isActive n prag
 606     prag = idInlinePragma bndr
 607
 608     try_once in_lam int_cxt     -- There's one textual occurrence
 609         | not in_lam = isNotTopLevel top_lvl || early_phase
 610         | otherwise  = int_cxt && canInlineInLam rhs
 611
 612 -- Be very careful before inlining inside a lambda, becuase (a) we must not
 613 -- invalidate occurrence information, and (b) we want to avoid pushing a
 614 -- single allocation (here) into multiple allocations (inside lambda).
 615 -- Inlining a *function* with a single *saturated* call would be ok, mind you.
 616 --      || (if is_cheap && not (canInlineInLam rhs) then pprTrace "preinline" (ppr bndr <+> ppr rhs) ok else ok)
 617 --      where
 618 --              is_cheap = exprIsCheap rhs
 619 --              ok = is_cheap && int_cxt
 620
 621         --      int_cxt         The context isn't totally boring
 622         -- E.g. let f = \ab.BIG in \y. map f xs
 623         --      Don't want to substitute for f, because then we allocate
 624         --      its closure every time the \y is called
 625         -- But: let f = \ab.BIG in \y. map (f y) xs
 626         --      Now we do want to substitute for f, even though it's not
 627         --      saturated, because we're going to allocate a closure for
 628         --      (f y) every time round the loop anyhow.
 629
 630         -- canInlineInLam => free vars of rhs are (Once in_lam) or Many,
 631         -- so substituting rhs inside a lambda doesn't change the occ info.
 632         -- Sadly, not quite the same as exprIsHNF.
 633     canInlineInLam (Lit l)              = True
 634     canInlineInLam (Lam b e)            = isRuntimeVar b || canInlineInLam e
 635     canInlineInLam (Note _ e)           = canInlineInLam e
 636     canInlineInLam _                    = False
 637
 638     early_phase = case phase of
 639                         SimplPhase 0 _ -> False
 640                         other          -> True
 641 -- If we don't have this early_phase test, consider
 642 --      x = length [1,2,3]
 643 -- The full laziness pass carefully floats all the cons cells to
 644 -- top level, and preInlineUnconditionally floats them all back in.
 645 -- Result is (a) static allocation replaced by dynamic allocation
 646 --           (b) many simplifier iterations because this tickles
 647 --               a related problem; only one inlining per pass
 648 --
 649 -- On the other hand, I have seen cases where top-level fusion is
 650 -- lost if we don't inline top level thing (e.g. string constants)
 651 -- Hence the test for phase zero (which is the phase for all the final
 652 -- simplifications).  Until phase zero we take no special notice of
 653 -- top level things, but then we become more leery about inlining
 654 -- them.
 655
 656 \end{code}
 657
 658 postInlineUnconditionally
 659 ~~~~~~~~~~~~~~~~~~~~~~~~~
 660 @postInlineUnconditionally@ decides whether to unconditionally inline
 661 a thing based on the form of its RHS; in particular if it has a
 662 trivial RHS.  If so, we can inline and discard the binding altogether.
 663
 664 NB: a loop breaker has must_keep_binding = True and non-loop-breakers
 665 only have *forward* references Hence, it's safe to discard the binding
 666
 667 NOTE: This isn't our last opportunity to inline.  We're at the binding
 668 site right now, and we'll get another opportunity when we get to the
 669 ocurrence(s)
 670
 671 Note that we do this unconditional inlining only for trival RHSs.
 672 Don't inline even WHNFs inside lambdas; doing so may simply increase
 673 allocation when the function is called. This isn't the last chance; see
 674 NOTE above.
 675
 676 NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here Why?
 677 Because we don't even want to inline them into the RHS of constructor
 678 arguments. See NOTE above
 679
 680 NB: At one time even NOINLINE was ignored here: if the rhs is trivial
 681 it's best to inline it anyway.  We often get a=E; b=a from desugaring,
 682 with both a and b marked NOINLINE.  But that seems incompatible with
 683 our new view that inlining is like a RULE, so I'm sticking to the 'active'
 684 story for now.
 685
 686 \begin{code}
 687 postInlineUnconditionally
 688     :: SimplEnv -> TopLevelFlag
 689     -> InId             -- The binder (an OutId would be fine too)
 690     -> OccInfo          -- From the InId
 691     -> OutExpr
 692     -> Unfolding
 693     -> Bool
 694 postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
 695   | not active             = False
 696   | isLoopBreaker occ_info = False      -- If it's a loop-breaker of any kind, dont' inline
 697                                         -- because it might be referred to "earlier"
 698   | isExportedId bndr      = False
 699   | exprIsTrivial rhs      = True
 700   | otherwise
 701   = case occ_info of
 702         -- The point of examining occ_info here is that for *non-values*
 703         -- that occur outside a lambda, the call-site inliner won't have
 704         -- a chance (becuase it doesn't know that the thing
 705         -- only occurs once).   The pre-inliner won't have gotten
 706         -- it either, if the thing occurs in more than one branch
 707         -- So the main target is things like
 708         --      let x = f y in
 709         --      case v of
 710         --         True  -> case x of ...
 711         --         False -> case x of ...
 712         -- I'm not sure how important this is in practice
 713       OneOcc in_lam one_br int_cxt      -- OneOcc => no code-duplication issue
 714         ->     smallEnoughToInline unfolding    -- Small enough to dup
 715                         -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
 716                         --
 717                         -- NB: Do NOT inline arbitrarily big things, even if one_br is True
 718                         -- Reason: doing so risks exponential behaviour.  We simplify a big
 719                         --         expression, inline it, and simplify it again.  But if the
 720                         --         very same thing happens in the big expression, we get
 721                         --         exponential cost!
 722                         -- PRINCIPLE: when we've already simplified an expression once,
 723                         -- make sure that we only inline it if it's reasonably small.
 724
 725            &&  ((isNotTopLevel top_lvl && not in_lam) ||
 726                         -- But outside a lambda, we want to be reasonably aggressive
 727                         -- about inlining into multiple branches of case
 728                         -- e.g. let x = <non-value>
 729                         --      in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... }
 730                         -- Inlining can be a big win if C3 is the hot-spot, even if
 731                         -- the uses in C1, C2 are not 'interesting'
 732                         -- An example that gets worse if you add int_cxt here is 'clausify'
 733
 734                 (isCheapUnfolding unfolding && int_cxt))
 735                         -- isCheap => acceptable work duplication; in_lam may be true
 736                         -- int_cxt to prevent us inlining inside a lambda without some
 737                         -- good reason.  See the notes on int_cxt in preInlineUnconditionally
 738
 739       IAmDead -> True   -- This happens; for example, the case_bndr during case of
 740                         -- known constructor:  case (a,b) of x { (p,q) -> ... }
 741                         -- Here x isn't mentioned in the RHS, so we don't want to
 742                         -- create the (dead) let-binding  let x = (a,b) in ...
 743
 744       other -> False
 745
 746 -- Here's an example that we don't handle well:
 747 --      let f = if b then Left (\x.BIG) else Right (\y.BIG)
 748 --      in \y. ....case f of {...} ....
 749 -- Here f is used just once, and duplicating the case work is fine (exprIsCheap).
 750 -- But
 751 -- * We can't preInlineUnconditionally because that woud invalidate
 752 --   the occ info for b.
 753 -- * We can't postInlineUnconditionally because the RHS is big, and
 754 --   that risks exponential behaviour
 755 -- * We can't call-site inline, because the rhs is big
 756 -- Alas!
 757
 758   where
 759     active = case getMode env of
 760                    SimplGently    -> isAlwaysActive prag
 761                    SimplPhase n _ -> isActive n prag
 762     prag = idInlinePragma bndr
 763
 764 activeInline :: SimplEnv -> OutId -> Bool
 765 activeInline env id
 766   = case getMode env of
 767       SimplGently -> False
 768         -- No inlining at all when doing gentle stuff,
 769         -- except for local things that occur once (pre/postInlineUnconditionally)
 770         -- The reason is that too little clean-up happens if you
 771         -- don't inline use-once things.   Also a bit of inlining is *good* for
 772         -- full laziness; it can expose constant sub-expressions.
 773         -- Example in spectral/mandel/Mandel.hs, where the mandelset
 774         -- function gets a useful let-float if you inline windowToViewport
 775
 776         -- NB: we used to have a second exception, for data con wrappers.
 777         -- On the grounds that we use gentle mode for rule LHSs, and
 778         -- they match better when data con wrappers are inlined.
 779         -- But that only really applies to the trivial wrappers (like (:)),
 780         -- and they are now constructed as Compulsory unfoldings (in MkId)
 781         -- so they'll happen anyway.
 782
 783       SimplPhase n _ -> isActive n prag
 784   where
 785     prag = idInlinePragma id
 786
 787 activeRule :: DynFlags -> SimplEnv -> Maybe (Activation -> Bool)
 788 -- Nothing => No rules at all
 789 activeRule dflags env
 790   | not (dopt Opt_RewriteRules dflags)
 791   = Nothing     -- Rewriting is off
 792   | otherwise
 793   = case getMode env of
 794         SimplGently    -> Just isAlwaysActive
 795                         -- Used to be Nothing (no rules in gentle mode)
 796                         -- Main motivation for changing is that I wanted
 797                         --      lift String ===> ...
 798                         -- to work in Template Haskell when simplifying
 799                         -- splices, so we get simpler code for literal strings
 800         SimplPhase n _ -> Just (isActive n)
 801 \end{code}
 802
 803
 804 %************************************************************************
 805 %*                                                                      *
 806         Rebuilding a lambda
 807 %*                                                                      *
 808 %************************************************************************
 809
 810 \begin{code}
 811 mkLam :: [OutBndr] -> OutExpr -> SimplM OutExpr
 812 -- mkLam tries three things
 813 --      a) eta reduction, if that gives a trivial expression
 814 --      b) eta expansion [only if there are some value lambdas]
 815
 816 mkLam [] body
 817   = return body
 818 mkLam bndrs body
 819   = do  { dflags <- getDOptsSmpl
 820         ; mkLam' dflags bndrs body }
 821   where
 822     mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr
 823     mkLam' dflags bndrs (Cast body co)
 824       | not (any bad bndrs)
 825         -- Note [Casts and lambdas]
 826       = do { lam <- mkLam' dflags bndrs body
 827            ; return (mkCoerce (mkPiTypes bndrs co) lam) }
 828       where
 829         co_vars  = tyVarsOfType co
 830         bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars
 831
 832     mkLam' dflags bndrs body
 833       | dopt Opt_DoEtaReduction dflags,
 834         Just etad_lam <- tryEtaReduce bndrs body
 835       = do { tick (EtaReduction (head bndrs))
 836            ; return etad_lam }
 837
 838       | dopt Opt_DoLambdaEtaExpansion dflags,
 839         any isRuntimeVar bndrs
 840       = do { body' <- tryEtaExpansion dflags body
 841            ; return (mkLams bndrs body') }
 842
 843       | otherwise
 844       = return (mkLams bndrs body)
 845 \end{code}
 846
 847 Note [Casts and lambdas]
 848 ~~~~~~~~~~~~~~~~~~~~~~~~
 849 Consider
 850         (\x. (\y. e) `cast` g1) `cast` g2
 851 There is a danger here that the two lambdas look separated, and the
 852 full laziness pass might float an expression to between the two.
 853
 854 So this equation in mkLam' floats the g1 out, thus:
 855         (\x. e `cast` g1)  -->  (\x.e) `cast` (tx -> g1)
 856 where x:tx.
 857
 858 In general, this floats casts outside lambdas, where (I hope) they
 859 might meet and cancel with some other cast:
 860         \x. e `cast` co   ===>   (\x. e) `cast` (tx -> co)
 861         /\a. e `cast` co  ===>   (/\a. e) `cast` (/\a. co)
 862         /\g. e `cast` co  ===>   (/\g. e) `cast` (/\g. co)
 863                           (if not (g `in` co))
 864
 865 Notice that it works regardless of 'e'.  Originally it worked only
 866 if 'e' was itself a lambda, but in some cases that resulted in
 867 fruitless iteration in the simplifier.  A good example was when
 868 compiling Text.ParserCombinators.ReadPrec, where we had a definition
 869 like    (\x. Get `cast` g)
 870 where Get is a constructor with nonzero arity.  Then mkLam eta-expanded
 871 the Get, and the next iteration eta-reduced it, and then eta-expanded
 872 it again.
 873
 874 Note also the side condition for the case of coercion binders.
 875 It does not make sense to transform
 876         /\g. e `cast` g  ==>  (/\g.e) `cast` (/\g.g)
 877 because the latter is not well-kinded.
 878
 879 --      c) floating lets out through big lambdas
 880 --              [only if all tyvar lambdas, and only if this lambda
 881 --               is the RHS of a let]
 882
 883 {-      Sept 01: I'm experimenting with getting the
 884         full laziness pass to float out past big lambdsa
 885  | all isTyVar bndrs,   -- Only for big lambdas
 886    contIsRhs cont       -- Only try the rhs type-lambda floating
 887                         -- if this is indeed a right-hand side; otherwise
 888                         -- we end up floating the thing out, only for float-in
 889                         -- to float it right back in again!
 890  = do (floats, body') <- tryRhsTyLam env bndrs body
 891       return (floats, mkLams bndrs body')
 892 -}
 893
 894
 895 %************************************************************************
 896 %*                                                                      *
 897                 Eta reduction
 898 %*                                                                      *
 899 %************************************************************************
 900
 901 Note [Eta reduction conditions]
 902 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 903 We try for eta reduction here, but *only* if we get all the way to an
 904 trivial expression.  We don't want to remove extra lambdas unless we
 905 are going to avoid allocating this thing altogether.
 906
 907 There are some particularly delicate points here:
 908
 909 * Eta reduction is not valid in general:
 910         \x. bot  /=  bot
 911   This matters, partly for old-fashioned correctness reasons but,
 912   worse, getting it wrong can yield a seg fault. Consider
 913         f = \x.f x
 914         h y = case (case y of { True -> f `seq` True; False -> False }) of
 915                 True -> ...; False -> ...
 916
 917   If we (unsoundly) eta-reduce f to get f=f, the strictness analyser
 918   says f=bottom, and replaces the (f `seq` True) with just
 919   (f `cast` unsafe-co).  BUT, as thing stand, 'f' got arity 1, and it
 920   *keeps* arity 1 (perhaps also wrongly).  So CorePrep eta-expands
 921   the definition again, so that it does not termninate after all.
 922   Result: seg-fault because the boolean case actually gets a function value.
 923   See Trac #1947.
 924
 925   So it's important to to the right thing.
 926
 927 * We need to be careful if we just look at f's arity. Currently (Dec07),
 928   f's arity is visible in its own RHS (see Note [Arity robustness] in
 929   SimplEnv) so we must *not* trust the arity when checking that 'f' is
 930   a value.  Instead, look at the unfolding.
 931
 932   However for GlobalIds we can look at the arity; and for primops we
 933   must, since they have no unfolding.
 934
 935 * Regardless of whether 'f' is a vlaue, we always want to
 936   reduce (/\a -> f a) to f
 937   This came up in a RULE: foldr (build (/\a -> g a))
 938   did not match            foldr (build (/\b -> ...something complex...))
 939   The type checker can insert these eta-expanded versions,
 940   with both type and dictionary lambdas; hence the slightly
 941   ad-hoc isDictId
 942
 943 These delicacies are why we don't use exprIsTrivial and exprIsHNF here.
 944 Alas.
 945
 946 \begin{code}
 947 tryEtaReduce :: [OutBndr] -> OutExpr -> Maybe OutExpr
 948 tryEtaReduce bndrs body
 949   = go (reverse bndrs) body
 950   where
 951     go (b : bs) (App fun arg) | ok_arg b arg = go bs fun        -- Loop round
 952     go []       fun           | ok_fun fun   = Just fun         -- Success!
 953     go _        _                            = Nothing          -- Failure!
 954
 955         -- Note [Eta reduction conditions]
 956     ok_fun (App fun (Type ty))
 957         | not (any (`elemVarSet` tyVarsOfType ty) bndrs)
 958         =  ok_fun fun
 959     ok_fun (Var fun_id)
 960         =  not (fun_id `elem` bndrs)
 961         && (ok_fun_id fun_id || all ok_lam bndrs)
 962     ok_fun _fun = False
 963
 964     ok_fun_id fun
 965         | isLocalId fun       = isEvaldUnfolding (idUnfolding fun)
 966         | isDataConWorkId fun = True
 967         | isGlobalId fun      = idArity fun > 0
 968
 969     ok_lam v = isTyVar v || isDictId v
 970
 971     ok_arg b arg = varToCoreExpr b `cheapEqExpr` arg
 972 \end{code}
 973
 974
 975 %************************************************************************
 976 %*                                                                      *
 977                 Eta expansion
 978 %*                                                                      *
 979 %************************************************************************
 980
 981
 982 We go for:
 983    f = \x1..xn -> N  ==>   f = \x1..xn y1..ym -> N y1..ym
 984                                  (n >= 0)
 985
 986 where (in both cases)
 987
 988         * The xi can include type variables
 989
 990         * The yi are all value variables
 991
 992         * N is a NORMAL FORM (i.e. no redexes anywhere)
 993           wanting a suitable number of extra args.
 994
 995 The biggest reason for doing this is for cases like
 996
 997         f = \x -> case x of
 998                     True  -> \y -> e1
 999                     False -> \y -> e2
1000
1001 Here we want to get the lambdas together.  A good exmaple is the nofib
1002 program fibheaps, which gets 25% more allocation if you don't do this
1003 eta-expansion.
1004
1005 We may have to sandwich some coerces between the lambdas
1006 to make the types work.   exprEtaExpandArity looks through coerces
1007 when computing arity; and etaExpand adds the coerces as necessary when
1008 actually computing the expansion.
1009
1010 \begin{code}
1011 tryEtaExpansion :: DynFlags -> OutExpr -> SimplM OutExpr
1012 -- There is at least one runtime binder in the binders
1013 tryEtaExpansion dflags body = do
1014     us <- getUniquesM
1015     return (etaExpand fun_arity us body (exprType body))
1016   where
1017     fun_arity = exprEtaExpandArity dflags body
1018 \end{code}
1019
1020
1021 %************************************************************************
1022 %*                                                                      *
1023 \subsection{Floating lets out of big lambdas}
1024 %*                                                                      *
1025 %************************************************************************
1026
1027 Note [Floating and type abstraction]
1028 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1029 Consider this:
1030         x = /\a. C e1 e2
1031 We'd like to float this to
1032         y1 = /\a. e1
1033         y2 = /\a. e2
1034         x = /\a. C (y1 a) (y2 a)
1035 for the usual reasons: we want to inline x rather vigorously.
1036
1037 You may think that this kind of thing is rare.  But in some programs it is
1038 common.  For example, if you do closure conversion you might get:
1039
1040         data a :-> b = forall e. (e -> a -> b) :$ e
1041
1042         f_cc :: forall a. a :-> a
1043         f_cc = /\a. (\e. id a) :$ ()
1044
1045 Now we really want to inline that f_cc thing so that the
1046 construction of the closure goes away.
1047
1048 So I have elaborated simplLazyBind to understand right-hand sides that look
1049 like
1050         /\ a1..an. body
1051
1052 and treat them specially. The real work is done in SimplUtils.abstractFloats,
1053 but there is quite a bit of plumbing in simplLazyBind as well.
1054
1055 The same transformation is good when there are lets in the body:
1056
1057         /\abc -> let(rec) x = e in b
1058    ==>
1059         let(rec) x' = /\abc -> let x = x' a b c in e
1060         in
1061         /\abc -> let x = x' a b c in b
1062
1063 This is good because it can turn things like:
1064
1065         let f = /\a -> letrec g = ... g ... in g
1066 into
1067         letrec g' = /\a -> ... g' a ...
1068         in
1069         let f = /\ a -> g' a
1070
1071 which is better.  In effect, it means that big lambdas don't impede
1072 let-floating.
1073
1074 This optimisation is CRUCIAL in eliminating the junk introduced by
1075 desugaring mutually recursive definitions.  Don't eliminate it lightly!
1076
1077 [May 1999]  If we do this transformation *regardless* then we can
1078 end up with some pretty silly stuff.  For example,
1079
1080         let
1081             st = /\ s -> let { x1=r1 ; x2=r2 } in ...
1082         in ..
1083 becomes
1084         let y1 = /\s -> r1
1085             y2 = /\s -> r2
1086             st = /\s -> ...[y1 s/x1, y2 s/x2]
1087         in ..
1088
1089 Unless the "..." is a WHNF there is really no point in doing this.
1090 Indeed it can make things worse.  Suppose x1 is used strictly,
1091 and is of the form
1092
1093         x1* = case f y of { (a,b) -> e }
1094
1095 If we abstract this wrt the tyvar we then can't do the case inline
1096 as we would normally do.
1097
1098 That's why the whole transformation is part of the same process that
1099 floats let-bindings and constructor arguments out of RHSs.  In particular,
1100 it is guarded by the doFloatFromRhs call in simplLazyBind.
1101
1102
1103 \begin{code}
1104 abstractFloats :: [OutTyVar] -> SimplEnv -> OutExpr -> SimplM ([OutBind], OutExpr)
1105 abstractFloats main_tvs body_env body
1106   = ASSERT( notNull body_floats )
1107     do  { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
1108         ; return (float_binds, CoreSubst.substExpr subst body) }
1109   where
1110     main_tv_set = mkVarSet main_tvs
1111     body_floats = getFloats body_env
1112     empty_subst = CoreSubst.mkEmptySubst (seInScope body_env)
1113
1114     abstract :: CoreSubst.Subst -> OutBind -> SimplM (CoreSubst.Subst, OutBind)
1115     abstract subst (NonRec id rhs)
1116       = do { (poly_id, poly_app) <- mk_poly tvs_here id
1117            ; let poly_rhs = mkLams tvs_here rhs'
1118                  subst'   = CoreSubst.extendIdSubst subst id poly_app
1119            ; return (subst', (NonRec poly_id poly_rhs)) }
1120       where
1121         rhs' = CoreSubst.substExpr subst rhs
1122         tvs_here | any isCoVar main_tvs = main_tvs      -- Note [Abstract over coercions]
1123                  | otherwise
1124                  = varSetElems (main_tv_set `intersectVarSet` exprSomeFreeVars isTyVar rhs')
1125
1126                 -- Abstract only over the type variables free in the rhs
1127                 -- wrt which the new binding is abstracted.  But the naive
1128                 -- approach of abstract wrt the tyvars free in the Id's type
1129                 -- fails. Consider:
1130                 --      /\ a b -> let t :: (a,b) = (e1, e2)
1131                 --                    x :: a     = fst t
1132                 --                in ...
1133                 -- Here, b isn't free in x's type, but we must nevertheless
1134                 -- abstract wrt b as well, because t's type mentions b.
1135                 -- Since t is floated too, we'd end up with the bogus:
1136                 --      poly_t = /\ a b -> (e1, e2)
1137                 --      poly_x = /\ a   -> fst (poly_t a *b*)
1138                 -- So for now we adopt the even more naive approach of
1139                 -- abstracting wrt *all* the tyvars.  We'll see if that
1140                 -- gives rise to problems.   SLPJ June 98
1141
1142     abstract subst (Rec prs)
1143        = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly tvs_here) ids
1144             ; let subst' = CoreSubst.extendSubstList subst (ids `zip` poly_apps)
1145                   poly_rhss = [mkLams tvs_here (CoreSubst.substExpr subst' rhs) | rhs <- rhss]
1146             ; return (subst', Rec (poly_ids `zip` poly_rhss)) }
1147        where
1148          (ids,rhss) = unzip prs
1149                 -- For a recursive group, it's a bit of a pain to work out the minimal
1150                 -- set of tyvars over which to abstract:
1151                 --      /\ a b c.  let x = ...a... in
1152                 --                 letrec { p = ...x...q...
1153                 --                          q = .....p...b... } in
1154                 --                 ...
1155                 -- Since 'x' is abstracted over 'a', the {p,q} group must be abstracted
1156                 -- over 'a' (because x is replaced by (poly_x a)) as well as 'b'.
1157                 -- Since it's a pain, we just use the whole set, which is always safe
1158                 --
1159                 -- If you ever want to be more selective, remember this bizarre case too:
1160                 --      x::a = x
1161                 -- Here, we must abstract 'x' over 'a'.
1162          tvs_here = main_tvs
1163
1164     mk_poly tvs_here var
1165       = do { uniq <- getUniqueM
1166            ; let  poly_name = setNameUnique (idName var) uniq           -- Keep same name
1167                   poly_ty   = mkForAllTys tvs_here (idType var) -- But new type of course
1168                   poly_id   = mkLocalId poly_name poly_ty
1169            ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
1170                 -- In the olden days, it was crucial to copy the occInfo of the original var,
1171                 -- because we were looking at occurrence-analysed but as yet unsimplified code!
1172                 -- In particular, we mustn't lose the loop breakers.  BUT NOW we are looking
1173                 -- at already simplified code, so it doesn't matter
1174                 --
1175                 -- It's even right to retain single-occurrence or dead-var info:
1176                 -- Suppose we started with  /\a -> let x = E in B
1177                 -- where x occurs once in B. Then we transform to:
1178                 --      let x' = /\a -> E in /\a -> let x* = x' a in B
1179                 -- where x* has an INLINE prag on it.  Now, once x* is inlined,
1180                 -- the occurrences of x' will be just the occurrences originally
1181                 -- pinned on x.
1182 \end{code}
1183
1184 Note [Abstract over coercions]
1185 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1186 If a coercion variable (g :: a ~ Int) is free in the RHS, then so is the
1187 type variable a.  Rather than sort this mess out, we simply bale out and abstract
1188 wrt all the type variables if any of them are coercion variables.
1189
1190
1191 Historical note: if you use let-bindings instead of a substitution, beware of this:
1192
1193                 -- Suppose we start with:
1194                 --
1195                 --      x = /\ a -> let g = G in E
1196                 --
1197                 -- Then we'll float to get
1198                 --
1199                 --      x = let poly_g = /\ a -> G
1200                 --          in /\ a -> let g = poly_g a in E
1201                 --
1202                 -- But now the occurrence analyser will see just one occurrence
1203                 -- of poly_g, not inside a lambda, so the simplifier will
1204                 -- PreInlineUnconditionally poly_g back into g!  Badk to square 1!
1205                 -- (I used to think that the "don't inline lone occurrences" stuff
1206                 --  would stop this happening, but since it's the *only* occurrence,
1207                 --  PreInlineUnconditionally kicks in first!)
1208                 --
1209                 -- Solution: put an INLINE note on g's RHS, so that poly_g seems
1210                 --           to appear many times.  (NB: mkInlineMe eliminates
1211                 --           such notes on trivial RHSs, so do it manually.)
1212
1213 %************************************************************************
1214 %*                                                                      *
1215                 prepareAlts
1216 %*                                                                      *
1217 %************************************************************************
1218
1219 prepareAlts tries these things:
1220
1221 1.  If several alternatives are identical, merge them into
1222     a single DEFAULT alternative.  I've occasionally seen this
1223     making a big difference:
1224
1225         case e of               =====>     case e of
1226           C _ -> f x                         D v -> ....v....
1227           D v -> ....v....                   DEFAULT -> f x
1228           DEFAULT -> f x
1229
1230    The point is that we merge common RHSs, at least for the DEFAULT case.
1231    [One could do something more elaborate but I've never seen it needed.]
1232    To avoid an expensive test, we just merge branches equal to the *first*
1233    alternative; this picks up the common cases
1234         a) all branches equal
1235         b) some branches equal to the DEFAULT (which occurs first)
1236
1237 2.  Case merging:
1238        case e of b {             ==>   case e of b {
1239          p1 -> rhs1                      p1 -> rhs1
1240          ...                             ...
1241          pm -> rhsm                      pm -> rhsm
1242          _  -> case b of b' {            pn -> let b'=b in rhsn
1243                      pn -> rhsn          ...
1244                      ...                 po -> let b'=b in rhso
1245                      po -> rhso          _  -> let b'=b in rhsd
1246                      _  -> rhsd
1247        }
1248
1249     which merges two cases in one case when -- the default alternative of
1250     the outer case scrutises the same variable as the outer case This
1251     transformation is called Case Merging.  It avoids that the same
1252     variable is scrutinised multiple times.
1253
1254
1255 The case where transformation (1) showed up was like this (lib/std/PrelCError.lhs):
1256
1257         x | p `is` 1 -> e1
1258           | p `is` 2 -> e2
1259         ...etc...
1260
1261 where @is@ was something like
1262
1263         p `is` n = p /= (-1) && p == n
1264
1265 This gave rise to a horrible sequence of cases
1266
1267         case p of
1268           (-1) -> $j p
1269           1    -> e1
1270           DEFAULT -> $j p
1271
1272 and similarly in cascade for all the join points!
1273
1274 Note [Dead binders]
1275 ~~~~~~~~~~~~~~~~~~~~
1276 We do this *here*, looking at un-simplified alternatives, because we
1277 have to check that r doesn't mention the variables bound by the
1278 pattern in each alternative, so the binder-info is rather useful.
1279
1280 \begin{code}
1281 prepareAlts :: SimplEnv -> OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
1282 prepareAlts env scrut case_bndr' alts
1283   = do  { dflags <- getDOptsSmpl
1284         ; alts <- combineIdenticalAlts case_bndr' alts
1285
1286         ; let (alts_wo_default, maybe_deflt) = findDefault alts
1287               alt_cons = [con | (con,_,_) <- alts_wo_default]
1288               imposs_deflt_cons = nub (imposs_cons ++ alt_cons)
1289                 -- "imposs_deflt_cons" are handled
1290                 --   EITHER by the context,
1291                 --   OR by a non-DEFAULT branch in this case expression.
1292
1293         ; default_alts <- prepareDefault dflags env case_bndr' mb_tc_app
1294                                          imposs_deflt_cons maybe_deflt
1295
1296         ; let trimmed_alts = filterOut impossible_alt alts_wo_default
1297               merged_alts = mergeAlts trimmed_alts default_alts
1298                 -- We need the mergeAlts in case the new default_alt
1299                 -- has turned into a constructor alternative.
1300                 -- The merge keeps the inner DEFAULT at the front, if there is one
1301                 -- and interleaves the alternatives in the right order
1302
1303         ; return (imposs_deflt_cons, merged_alts) }
1304   where
1305     mb_tc_app = splitTyConApp_maybe (idType case_bndr')
1306     Just (_, inst_tys) = mb_tc_app
1307
1308     imposs_cons = case scrut of
1309                     Var v -> otherCons (idUnfolding v)
1310                     other -> []
1311
1312     impossible_alt :: CoreAlt -> Bool
1313     impossible_alt (con, _, _) | con `elem` imposs_cons = True
1314     impossible_alt (DataAlt con, _, _) = dataConCannotMatch inst_tys con
1315     impossible_alt alt                 = False
1316
1317
1318 --------------------------------------------------
1319 --      1. Merge identical branches
1320 --------------------------------------------------
1321 combineIdenticalAlts :: OutId -> [InAlt] -> SimplM [InAlt]
1322
1323 combineIdenticalAlts case_bndr alts@((con1,bndrs1,rhs1) : con_alts)
1324   | all isDeadBinder bndrs1,                    -- Remember the default
1325     length filtered_alts < length con_alts      -- alternative comes first
1326         -- Also Note [Dead binders]
1327   = do  { tick (AltMerge case_bndr)
1328         ; return ((DEFAULT, [], rhs1) : filtered_alts) }
1329   where
1330     filtered_alts        = filter keep con_alts
1331     keep (con,bndrs,rhs) = not (all isDeadBinder bndrs && rhs `cheapEqExpr` rhs1)
1332
1333 combineIdenticalAlts case_bndr alts = return alts
1334
1335 -------------------------------------------------------------------------
1336 --                      Prepare the default alternative
1337 -------------------------------------------------------------------------
1338 prepareDefault :: DynFlags
1339                -> SimplEnv
1340                -> OutId         -- Case binder; need just for its type. Note that as an
1341                                 --   OutId, it has maximum information; this is important.
1342                                 --   Test simpl013 is an example
1343                -> Maybe (TyCon, [Type]) -- Type of scrutinee, decomposed
1344                -> [AltCon]      -- These cons can't happen when matching the default
1345                -> Maybe InExpr  -- Rhs
1346                -> SimplM [InAlt]        -- Still unsimplified
1347                                         -- We use a list because it's what mergeAlts expects,
1348                                         -- And becuase case-merging can cause many to show up
1349
1350 ------- Merge nested cases ----------
1351 prepareDefault dflags env outer_bndr bndr_ty imposs_cons (Just deflt_rhs)
1352   | dopt Opt_CaseMerge dflags
1353   , Case (Var inner_scrut_var) inner_bndr _ inner_alts <- deflt_rhs
1354   , DoneId inner_scrut_var' <- substId env inner_scrut_var
1355         -- Remember, inner_scrut_var is an InId, but outer_bndr is an OutId
1356   , inner_scrut_var' == outer_bndr
1357         -- NB: the substId means that if the outer scrutinee was a
1358         --     variable, and inner scrutinee is the same variable,
1359         --     then inner_scrut_var' will be outer_bndr
1360         --     via the magic of simplCaseBinder
1361   = do  { tick (CaseMerge outer_bndr)
1362
1363         ; let munge_rhs rhs = bindCaseBndr inner_bndr (Var outer_bndr) rhs
1364         ; return [(con, args, munge_rhs rhs) | (con, args, rhs) <- inner_alts,
1365                                                not (con `elem` imposs_cons) ]
1366                 -- NB: filter out any imposs_cons.  Example:
1367                 --      case x of
1368                 --        A -> e1
1369                 --        DEFAULT -> case x of
1370                 --                      A -> e2
1371                 --                      B -> e3
1372                 -- When we merge, we must ensure that e1 takes
1373                 -- precedence over e2 as the value for A!
1374         }
1375         -- Warning: don't call prepareAlts recursively!
1376         -- Firstly, there's no point, because inner alts have already had
1377         -- mkCase applied to them, so they won't have a case in their default
1378         -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
1379         -- in munge_rhs may put a case into the DEFAULT branch!
1380
1381
1382 --------- Fill in known constructor -----------
1383 prepareDefault dflags env case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rhs)
1384   |     -- This branch handles the case where we are
1385         -- scrutinisng an algebraic data type
1386     isAlgTyCon tycon            -- It's a data type, tuple, or unboxed tuples.
1387   , not (isNewTyCon tycon)      -- We can have a newtype, if we are just doing an eval:
1388                                 --      case x of { DEFAULT -> e }
1389                                 -- and we don't want to fill in a default for them!
1390   , Just all_cons <- tyConDataCons_maybe tycon
1391   , not (null all_cons)         -- This is a tricky corner case.  If the data type has no constructors,
1392                                 -- which GHC allows, then the case expression will have at most a default
1393                                 -- alternative.  We don't want to eliminate that alternative, because the
1394                                 -- invariant is that there's always one alternative.  It's more convenient
1395                                 -- to leave
1396                                 --      case x of { DEFAULT -> e }
1397                                 -- as it is, rather than transform it to
1398                                 --      error "case cant match"
1399                                 -- which would be quite legitmate.  But it's a really obscure corner, and
1400                                 -- not worth wasting code on.
1401   , let imposs_data_cons = [con | DataAlt con <- imposs_cons]   -- We now know it's a data type
1402         impossible con  = con `elem` imposs_data_cons || dataConCannotMatch inst_tys con
1403   = case filterOut impossible all_cons of
1404         []    -> return []      -- Eliminate the default alternative
1405                                 -- altogether if it can't match
1406
1407         [con] ->        -- It matches exactly one constructor, so fill it in
1408                  do { tick (FillInCaseDefault case_bndr)
1409                     ; us <- getUniquesM
1410                     ; let (ex_tvs, co_tvs, arg_ids) =
1411                               dataConRepInstPat us con inst_tys
1412                     ; return [(DataAlt con, ex_tvs ++ co_tvs ++ arg_ids, deflt_rhs)] }
1413
1414         two_or_more -> return [(DEFAULT, [], deflt_rhs)]
1415
1416 --------- Catch-all cases -----------
1417 prepareDefault dflags env case_bndr bndr_ty imposs_cons (Just deflt_rhs)
1418   = return [(DEFAULT, [], deflt_rhs)]
1419
1420 prepareDefault dflags env case_bndr bndr_ty imposs_cons Nothing
1421   = return []   -- No default branch
1422 \end{code}
1423
1424
1425
1426 =================================================================================
1427
1428 mkCase tries these things
1429
1430 1.  Eliminate the case altogether if possible
1431
1432 2.  Case-identity:
1433
1434         case e of               ===> e
1435                 True  -> True;
1436                 False -> False
1437
1438     and similar friends.
1439
1440
1441 \begin{code}
1442 mkCase :: OutExpr -> OutId -> OutType
1443        -> [OutAlt]              -- Increasing order
1444        -> SimplM OutExpr
1445
1446 --------------------------------------------------
1447 --      1. Check for empty alternatives
1448 --------------------------------------------------
1449
1450 -- This isn't strictly an error.  It's possible that the simplifer might "see"
1451 -- that an inner case has no accessible alternatives before it "sees" that the
1452 -- entire branch of an outer case is inaccessible.  So we simply
1453 -- put an error case here insteadd
1454 mkCase scrut case_bndr ty []
1455   = pprTrace "mkCase: null alts" (ppr case_bndr <+> ppr scrut) $
1456     return (mkApps (Var rUNTIME_ERROR_ID)
1457                    [Type ty, Lit (mkStringLit "Impossible alternative")])
1458
1459
1460 --------------------------------------------------
1461 --      2. Identity case
1462 --------------------------------------------------
1463
1464 mkCase scrut case_bndr ty alts  -- Identity case
1465   | all identity_alt alts
1466   = do tick (CaseIdentity case_bndr)
1467        return (re_cast scrut)
1468   where
1469     identity_alt (con, args, rhs) = check_eq con args (de_cast rhs)
1470
1471     check_eq DEFAULT       _    (Var v)   = v == case_bndr
1472     check_eq (LitAlt lit') _    (Lit lit) = lit == lit'
1473     check_eq (DataAlt con) args rhs       = rhs `cheapEqExpr` mkConApp con (arg_tys ++ varsToCoreExprs args)
1474                                          || rhs `cheapEqExpr` Var case_bndr
1475     check_eq con args rhs = False
1476
1477     arg_tys = map Type (tyConAppArgs (idType case_bndr))
1478
1479         -- We've seen this:
1480         --      case e of x { _ -> x `cast` c }
1481         -- And we definitely want to eliminate this case, to give
1482         --      e `cast` c
1483         -- So we throw away the cast from the RHS, and reconstruct
1484         -- it at the other end.  All the RHS casts must be the same
1485         -- if (all identity_alt alts) holds.
1486         --
1487         -- Don't worry about nested casts, because the simplifier combines them
1488     de_cast (Cast e _) = e
1489     de_cast e          = e
1490
1491     re_cast scrut = case head alts of
1492                         (_,_,Cast _ co) -> Cast scrut co
1493                         other           -> scrut
1494
1495
1496
1497 --------------------------------------------------
1498 --      Catch-all
1499 --------------------------------------------------
1500 mkCase scrut bndr ty alts = return (Case scrut bndr ty alts)
1501 \end{code}
1502
1503
1504 When adding auxiliary bindings for the case binder, it's worth checking if
1505 its dead, because it often is, and occasionally these mkCase transformations
1506 cascade rather nicely.
1507
1508 \begin{code}
1509 bindCaseBndr bndr rhs body
1510   | isDeadBinder bndr = body
1511   | otherwise         = bindNonRec bndr rhs body
1512 \end{code}