compiler/simplCore/SimplUtils.lhs

   1 %
   2 % (c) The AQUA Project, Glasgow University, 1993-1998
   3 %
   4 \section[SimplUtils]{The simplifier utilities}
   5
   6 \begin{code}
   7 module SimplUtils (
   8         mkLam, mkCase,
   9
  10         -- Inlining,
  11         preInlineUnconditionally, postInlineUnconditionally, activeInline, activeRule,
  12         inlineMode,
  13
  14         -- The continuation type
  15         SimplCont(..), DupFlag(..), LetRhsFlag(..),
  16         contIsDupable, contResultType,
  17         countValArgs, countArgs, pushContArgs,
  18         mkBoringStop, mkLazyArgStop, mkRhsStop, contIsRhs, contIsRhsOrArg,
  19         getContArgs, interestingCallContext, interestingArgContext,
  20         interestingArg, isStrictType
  21
  22     ) where
  23
  24 #include "HsVersions.h"
  25
  26 import SimplEnv
  27 import DynFlags         ( SimplifierSwitch(..), SimplifierMode(..),
  28                           DynFlag(..), dopt )
  29 import StaticFlags      ( opt_UF_UpdateInPlace, opt_SimplNoPreInlining,
  30                           opt_RulesOff )
  31 import CoreSyn
  32 import CoreFVs          ( exprFreeVars )
  33 import CoreUtils        ( cheapEqExpr, exprType, exprIsTrivial,
  34                           etaExpand, exprEtaExpandArity, bindNonRec, mkCoerce2,
  35                           findDefault, exprOkForSpeculation, exprIsHNF, mergeAlts
  36                         )
  37 import Literal          ( mkStringLit )
  38 import CoreUnfold       ( smallEnoughToInline )
  39 import MkId             ( eRROR_ID )
  40 import Id               ( Id, idType, isDataConWorkId, idOccInfo, isDictId,
  41                           isDeadBinder, idNewDemandInfo, isExportedId,
  42                           idUnfolding, idNewStrictness, idInlinePragma, idHasRules
  43                         )
  44 import NewDemand        ( isStrictDmd, isBotRes, splitStrictSig )
  45 import SimplMonad
  46 import Type             ( Type, splitFunTys, dropForAlls, isStrictType,
  47                           splitTyConApp_maybe, tyConAppArgs
  48                         )
  49 import TyCon            ( tyConDataCons_maybe )
  50 import DataCon          ( dataConRepArity )
  51 import VarSet
  52 import BasicTypes       ( TopLevelFlag(..), isNotTopLevel, OccInfo(..), isLoopBreaker, isOneOcc,
  53                           Activation, isAlwaysActive, isActive )
  54 import Util             ( lengthExceeds )
  55 import Outputable
  56 \end{code}
  57
  58
  59 %************************************************************************
  60 %*                                                                      *
  61 \subsection{The continuation data type}
  62 %*                                                                      *
  63 %************************************************************************
  64
  65 \begin{code}
  66 data SimplCont          -- Strict contexts
  67   = Stop     OutType    -- Type of the result
  68              LetRhsFlag
  69              Bool       -- True <=> There is something interesting about
  70                         --          the context, and hence the inliner
  71                         --          should be a bit keener (see interestingCallContext)
  72                         -- Two cases:
  73                         -- (a) This is the RHS of a thunk whose type suggests
  74                         --     that update-in-place would be possible
  75                         -- (b) This is an argument of a function that has RULES
  76                         --     Inlining the call might allow the rule to fire
  77
  78   | CoerceIt OutType                    -- The To-type, simplified
  79              SimplCont
  80
  81   | InlinePlease                        -- This continuation makes a function very
  82              SimplCont                  -- keen to inline itelf
  83
  84   | ApplyTo  DupFlag
  85              InExpr SimplEnv            -- The argument, as yet unsimplified,
  86              SimplCont                  -- and its environment
  87
  88   | Select   DupFlag
  89              InId [InAlt] SimplEnv      -- The case binder, alts, and subst-env
  90              SimplCont
  91
  92   | ArgOf    LetRhsFlag         -- An arbitrary strict context: the argument
  93                                 --      of a strict function, or a primitive-arg fn
  94                                 --      or a PrimOp
  95                                 -- No DupFlag, because we never duplicate it
  96              OutType            -- arg_ty: type of the argument itself
  97              OutType            -- cont_ty: the type of the expression being sought by the context
  98                                 --      f (error "foo") ==> coerce t (error "foo")
  99                                 -- when f is strict
 100                                 -- We need to know the type t, to which to coerce.
 101
 102              (SimplEnv -> OutExpr -> SimplM FloatsWithExpr)     -- What to do with the result
 103                                 -- The result expression in the OutExprStuff has type cont_ty
 104
 105 data LetRhsFlag = AnArg         -- It's just an argument not a let RHS
 106                 | AnRhs         -- It's the RHS of a let (so please float lets out of big lambdas)
 107
 108 instance Outputable LetRhsFlag where
 109   ppr AnArg = ptext SLIT("arg")
 110   ppr AnRhs = ptext SLIT("rhs")
 111
 112 instance Outputable SimplCont where
 113   ppr (Stop ty is_rhs _)             = ptext SLIT("Stop") <> brackets (ppr is_rhs) <+> ppr ty
 114   ppr (ApplyTo dup arg se cont)      = (ptext SLIT("ApplyTo") <+> ppr dup <+> ppr arg) $$ ppr cont
 115   ppr (ArgOf _ _ _ _)                = ptext SLIT("ArgOf...")
 116   ppr (Select dup bndr alts se cont) = (ptext SLIT("Select") <+> ppr dup <+> ppr bndr) $$
 117                                        (nest 4 (ppr alts)) $$ ppr cont
 118   ppr (CoerceIt ty cont)             = (ptext SLIT("CoerceIt") <+> ppr ty) $$ ppr cont
 119   ppr (InlinePlease cont)            = ptext SLIT("InlinePlease") $$ ppr cont
 120
 121 data DupFlag = OkToDup | NoDup
 122
 123 instance Outputable DupFlag where
 124   ppr OkToDup = ptext SLIT("ok")
 125   ppr NoDup   = ptext SLIT("nodup")
 126
 127
 128 -------------------
 129 mkBoringStop :: OutType -> SimplCont
 130 mkBoringStop ty = Stop ty AnArg False
 131
 132 mkLazyArgStop :: OutType -> Bool -> SimplCont
 133 mkLazyArgStop ty has_rules = Stop ty AnArg (canUpdateInPlace ty || has_rules)
 134
 135 mkRhsStop :: OutType -> SimplCont
 136 mkRhsStop ty = Stop ty AnRhs (canUpdateInPlace ty)
 137
 138 contIsRhs :: SimplCont -> Bool
 139 contIsRhs (Stop _ AnRhs _)    = True
 140 contIsRhs (ArgOf AnRhs _ _ _) = True
 141 contIsRhs other               = False
 142
 143 contIsRhsOrArg (Stop _ _ _)    = True
 144 contIsRhsOrArg (ArgOf _ _ _ _) = True
 145 contIsRhsOrArg other           = False
 146
 147 -------------------
 148 contIsDupable :: SimplCont -> Bool
 149 contIsDupable (Stop _ _ _)               = True
 150 contIsDupable (ApplyTo  OkToDup _ _ _)   = True
 151 contIsDupable (Select   OkToDup _ _ _ _) = True
 152 contIsDupable (CoerceIt _ cont)          = contIsDupable cont
 153 contIsDupable (InlinePlease cont)        = contIsDupable cont
 154 contIsDupable other                      = False
 155
 156 -------------------
 157 discardableCont :: SimplCont -> Bool
 158 discardableCont (Stop _ _ _)        = False
 159 discardableCont (CoerceIt _ cont)   = discardableCont cont
 160 discardableCont (InlinePlease cont) = discardableCont cont
 161 discardableCont other               = True
 162
 163 discardCont :: SimplCont        -- A continuation, expecting
 164             -> SimplCont        -- Replace the continuation with a suitable coerce
 165 discardCont cont = case cont of
 166                      Stop to_ty is_rhs _ -> cont
 167                      other               -> CoerceIt to_ty (mkBoringStop to_ty)
 168                  where
 169                    to_ty = contResultType cont
 170
 171 -------------------
 172 contResultType :: SimplCont -> OutType
 173 contResultType (Stop to_ty _ _)      = to_ty
 174 contResultType (ArgOf _ _ to_ty _)   = to_ty
 175 contResultType (ApplyTo _ _ _ cont)  = contResultType cont
 176 contResultType (CoerceIt _ cont)     = contResultType cont
 177 contResultType (InlinePlease cont)   = contResultType cont
 178 contResultType (Select _ _ _ _ cont) = contResultType cont
 179
 180 -------------------
 181 countValArgs :: SimplCont -> Int
 182 countValArgs (ApplyTo _ (Type ty) se cont) = countValArgs cont
 183 countValArgs (ApplyTo _ val_arg   se cont) = 1 + countValArgs cont
 184 countValArgs other                         = 0
 185
 186 countArgs :: SimplCont -> Int
 187 countArgs (ApplyTo _ arg se cont) = 1 + countArgs cont
 188 countArgs other                   = 0
 189
 190 -------------------
 191 pushContArgs :: SimplEnv -> [OutArg] -> SimplCont -> SimplCont
 192 -- Pushes args with the specified environment
 193 pushContArgs env []           cont = cont
 194 pushContArgs env (arg : args) cont = ApplyTo NoDup arg env (pushContArgs env args cont)
 195 \end{code}
 196
 197
 198 \begin{code}
 199 getContArgs :: SwitchChecker
 200             -> OutId -> SimplCont
 201             -> ([(InExpr, SimplEnv, Bool)],     -- Arguments; the Bool is true for strict args
 202                 SimplCont,                      -- Remaining continuation
 203                 Bool)                           -- Whether we came across an InlineCall
 204 -- getContArgs id k = (args, k', inl)
 205 --      args are the leading ApplyTo items in k
 206 --      (i.e. outermost comes first)
 207 --      augmented with demand info from the functionn
 208 getContArgs chkr fun orig_cont
 209   = let
 210                 -- Ignore strictness info if the no-case-of-case
 211                 -- flag is on.  Strictness changes evaluation order
 212                 -- and that can change full laziness
 213         stricts | switchIsOn chkr NoCaseOfCase = vanilla_stricts
 214                 | otherwise                    = computed_stricts
 215     in
 216     go [] stricts False orig_cont
 217   where
 218     ----------------------------
 219
 220         -- Type argument
 221     go acc ss inl (ApplyTo _ arg@(Type _) se cont)
 222         = go ((arg,se,False) : acc) ss inl cont
 223                 -- NB: don't bother to instantiate the function type
 224
 225         -- Value argument
 226     go acc (s:ss) inl (ApplyTo _ arg se cont)
 227         = go ((arg,se,s) : acc) ss inl cont
 228
 229         -- An Inline continuation
 230     go acc ss inl (InlinePlease cont)
 231         = go acc ss True cont
 232
 233         -- We're run out of arguments, or else we've run out of demands
 234         -- The latter only happens if the result is guaranteed bottom
 235         -- This is the case for
 236         --      * case (error "hello") of { ... }
 237         --      * (error "Hello") arg
 238         --      * f (error "Hello") where f is strict
 239         --      etc
 240         -- Then, especially in the first of these cases, we'd like to discard
 241         -- the continuation, leaving just the bottoming expression.  But the
 242         -- type might not be right, so we may have to add a coerce.
 243     go acc ss inl cont
 244         | null ss && discardableCont cont = (reverse acc, discardCont cont, inl)
 245         | otherwise                       = (reverse acc, cont,             inl)
 246
 247     ----------------------------
 248     vanilla_stricts, computed_stricts :: [Bool]
 249     vanilla_stricts  = repeat False
 250     computed_stricts = zipWith (||) fun_stricts arg_stricts
 251
 252     ----------------------------
 253     (val_arg_tys, _) = splitFunTys (dropForAlls (idType fun))
 254     arg_stricts      = map isStrictType val_arg_tys ++ repeat False
 255         -- These argument types are used as a cheap and cheerful way to find
 256         -- unboxed arguments, which must be strict.  But it's an InType
 257         -- and so there might be a type variable where we expect a function
 258         -- type (the substitution hasn't happened yet).  And we don't bother
 259         -- doing the type applications for a polymorphic function.
 260         -- Hence the splitFunTys*IgnoringForAlls*
 261
 262     ----------------------------
 263         -- If fun_stricts is finite, it means the function returns bottom
 264         -- after that number of value args have been consumed
 265         -- Otherwise it's infinite, extended with False
 266     fun_stricts
 267       = case splitStrictSig (idNewStrictness fun) of
 268           (demands, result_info)
 269                 | not (demands `lengthExceeds` countValArgs orig_cont)
 270                 ->      -- Enough args, use the strictness given.
 271                         -- For bottoming functions we used to pretend that the arg
 272                         -- is lazy, so that we don't treat the arg as an
 273                         -- interesting context.  This avoids substituting
 274                         -- top-level bindings for (say) strings into
 275                         -- calls to error.  But now we are more careful about
 276                         -- inlining lone variables, so its ok (see SimplUtils.analyseCont)
 277                    if isBotRes result_info then
 278                         map isStrictDmd demands         -- Finite => result is bottom
 279                    else
 280                         map isStrictDmd demands ++ vanilla_stricts
 281
 282           other -> vanilla_stricts      -- Not enough args, or no strictness
 283
 284 -------------------
 285 interestingArg :: OutExpr -> Bool
 286         -- An argument is interesting if it has *some* structure
 287         -- We are here trying to avoid unfolding a function that
 288         -- is applied only to variables that have no unfolding
 289         -- (i.e. they are probably lambda bound): f x y z
 290         -- There is little point in inlining f here.
 291 interestingArg (Var v)           = hasSomeUnfolding (idUnfolding v)
 292                                         -- Was: isValueUnfolding (idUnfolding v')
 293                                         -- But that seems over-pessimistic
 294                                  || isDataConWorkId v
 295                                         -- This accounts for an argument like
 296                                         -- () or [], which is definitely interesting
 297 interestingArg (Type _)          = False
 298 interestingArg (App fn (Type _)) = interestingArg fn
 299 interestingArg (Note _ a)        = interestingArg a
 300 interestingArg other             = True
 301         -- Consider     let x = 3 in f x
 302         -- The substitution will contain (x -> ContEx 3), and we want to
 303         -- to say that x is an interesting argument.
 304         -- But consider also (\x. f x y) y
 305         -- The substitution will contain (x -> ContEx y), and we want to say
 306         -- that x is not interesting (assuming y has no unfolding)
 307 \end{code}
 308
 309 Comment about interestingCallContext
 310 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 311 We want to avoid inlining an expression where there can't possibly be
 312 any gain, such as in an argument position.  Hence, if the continuation
 313 is interesting (eg. a case scrutinee, application etc.) then we
 314 inline, otherwise we don't.
 315
 316 Previously some_benefit used to return True only if the variable was
 317 applied to some value arguments.  This didn't work:
 318
 319         let x = _coerce_ (T Int) Int (I# 3) in
 320         case _coerce_ Int (T Int) x of
 321                 I# y -> ....
 322
 323 we want to inline x, but can't see that it's a constructor in a case
 324 scrutinee position, and some_benefit is False.
 325
 326 Another example:
 327
 328 dMonadST = _/\_ t -> :Monad (g1 _@_ t, g2 _@_ t, g3 _@_ t)
 329
 330 ....  case dMonadST _@_ x0 of (a,b,c) -> ....
 331
 332 we'd really like to inline dMonadST here, but we *don't* want to
 333 inline if the case expression is just
 334
 335         case x of y { DEFAULT -> ... }
 336
 337 since we can just eliminate this case instead (x is in WHNF).  Similar
 338 applies when x is bound to a lambda expression.  Hence
 339 contIsInteresting looks for case expressions with just a single
 340 default case.
 341
 342 \begin{code}
 343 interestingCallContext :: Bool          -- False <=> no args at all
 344                        -> Bool          -- False <=> no value args
 345                        -> SimplCont -> Bool
 346         -- The "lone-variable" case is important.  I spent ages
 347         -- messing about with unsatisfactory varaints, but this is nice.
 348         -- The idea is that if a variable appear all alone
 349         --      as an arg of lazy fn, or rhs    Stop
 350         --      as scrutinee of a case          Select
 351         --      as arg of a strict fn           ArgOf
 352         -- then we should not inline it (unless there is some other reason,
 353         -- e.g. is is the sole occurrence).  We achieve this by making
 354         -- interestingCallContext return False for a lone variable.
 355         --
 356         -- Why?  At least in the case-scrutinee situation, turning
 357         --      let x = (a,b) in case x of y -> ...
 358         -- into
 359         --      let x = (a,b) in case (a,b) of y -> ...
 360         -- and thence to
 361         --      let x = (a,b) in let y = (a,b) in ...
 362         -- is bad if the binding for x will remain.
 363         --
 364         -- Another example: I discovered that strings
 365         -- were getting inlined straight back into applications of 'error'
 366         -- because the latter is strict.
 367         --      s = "foo"
 368         --      f = \x -> ...(error s)...
 369
 370         -- Fundamentally such contexts should not ecourage inlining because
 371         -- the context can ``see'' the unfolding of the variable (e.g. case or a RULE)
 372         -- so there's no gain.
 373         --
 374         -- However, even a type application or coercion isn't a lone variable.
 375         -- Consider
 376         --      case $fMonadST @ RealWorld of { :DMonad a b c -> c }
 377         -- We had better inline that sucker!  The case won't see through it.
 378         --
 379         -- For now, I'm treating treating a variable applied to types
 380         -- in a *lazy* context "lone". The motivating example was
 381         --      f = /\a. \x. BIG
 382         --      g = /\a. \y.  h (f a)
 383         -- There's no advantage in inlining f here, and perhaps
 384         -- a significant disadvantage.  Hence some_val_args in the Stop case
 385
 386 interestingCallContext some_args some_val_args cont
 387   = interesting cont
 388   where
 389     interesting (InlinePlease _)         = True
 390     interesting (Select _ _ _ _ _)       = some_args
 391     interesting (ApplyTo _ _ _ _)        = True -- Can happen if we have (coerce t (f x)) y
 392                                                 -- Perhaps True is a bit over-keen, but I've
 393                                                 -- seen (coerce f) x, where f has an INLINE prag,
 394                                                 -- So we have to give some motivaiton for inlining it
 395     interesting (ArgOf _ _ _ _)          = some_val_args
 396     interesting (Stop ty _ interesting)  = some_val_args && interesting
 397     interesting (CoerceIt _ cont)        = interesting cont
 398         -- If this call is the arg of a strict function, the context
 399         -- is a bit interesting.  If we inline here, we may get useful
 400         -- evaluation information to avoid repeated evals: e.g.
 401         --      x + (y * z)
 402         -- Here the contIsInteresting makes the '*' keener to inline,
 403         -- which in turn exposes a constructor which makes the '+' inline.
 404         -- Assuming that +,* aren't small enough to inline regardless.
 405         --
 406         -- It's also very important to inline in a strict context for things
 407         -- like
 408         --              foldr k z (f x)
 409         -- Here, the context of (f x) is strict, and if f's unfolding is
 410         -- a build it's *great* to inline it here.  So we must ensure that
 411         -- the context for (f x) is not totally uninteresting.
 412
 413
 414 -------------------
 415 interestingArgContext :: Id -> SimplCont -> Bool
 416 -- If the argument has form (f x y), where x,y are boring,
 417 -- and f is marked INLINE, then we don't want to inline f.
 418 -- But if the context of the argument is
 419 --      g (f x y)
 420 -- where g has rules, then we *do* want to inline f, in case it
 421 -- exposes a rule that might fire.  Similarly, if the context is
 422 --      h (g (f x x))
 423 -- where h has rules, then we do want to inline f.
 424 -- The interesting_arg_ctxt flag makes this happen; if it's
 425 -- set, the inliner gets just enough keener to inline f
 426 -- regardless of how boring f's arguments are, if it's marked INLINE
 427 --
 428 -- The alternative would be to *always* inline an INLINE function,
 429 -- regardless of how boring its context is; but that seems overkill
 430 -- For example, it'd mean that wrapper functions were always inlined
 431 interestingArgContext fn cont
 432   = idHasRules fn || go cont
 433   where
 434     go (InlinePlease c)       = go c
 435     go (Select {})            = False
 436     go (ApplyTo {})           = False
 437     go (ArgOf {})             = True
 438     go (CoerceIt _ c)         = go c
 439     go (Stop _ _ interesting) = interesting
 440
 441 -------------------
 442 canUpdateInPlace :: Type -> Bool
 443 -- Consider   let x = <wurble> in ...
 444 -- If <wurble> returns an explicit constructor, we might be able
 445 -- to do update in place.  So we treat even a thunk RHS context
 446 -- as interesting if update in place is possible.  We approximate
 447 -- this by seeing if the type has a single constructor with a
 448 -- small arity.  But arity zero isn't good -- we share the single copy
 449 -- for that case, so no point in sharing.
 450
 451 canUpdateInPlace ty
 452   | not opt_UF_UpdateInPlace = False
 453   | otherwise
 454   = case splitTyConApp_maybe ty of
 455         Nothing         -> False
 456         Just (tycon, _) -> case tyConDataCons_maybe tycon of
 457                                 Just [dc]  -> arity == 1 || arity == 2
 458                                            where
 459                                               arity = dataConRepArity dc
 460                                 other -> False
 461 \end{code}
 462
 463
 464
 465 %************************************************************************
 466 %*                                                                      *
 467 \subsection{Decisions about inlining}
 468 %*                                                                      *
 469 %************************************************************************
 470
 471 Inlining is controlled partly by the SimplifierMode switch.  This has two
 472 settings:
 473
 474         SimplGently     (a) Simplifying before specialiser/full laziness
 475                         (b) Simplifiying inside INLINE pragma
 476                         (c) Simplifying the LHS of a rule
 477                         (d) Simplifying a GHCi expression or Template
 478                                 Haskell splice
 479
 480         SimplPhase n    Used at all other times
 481
 482 The key thing about SimplGently is that it does no call-site inlining.
 483 Before full laziness we must be careful not to inline wrappers,
 484 because doing so inhibits floating
 485     e.g. ...(case f x of ...)...
 486     ==> ...(case (case x of I# x# -> fw x#) of ...)...
 487     ==> ...(case x of I# x# -> case fw x# of ...)...
 488 and now the redex (f x) isn't floatable any more.
 489
 490 The no-inlining thing is also important for Template Haskell.  You might be
 491 compiling in one-shot mode with -O2; but when TH compiles a splice before
 492 running it, we don't want to use -O2.  Indeed, we don't want to inline
 493 anything, because the byte-code interpreter might get confused about
 494 unboxed tuples and suchlike.
 495
 496 INLINE pragmas
 497 ~~~~~~~~~~~~~~
 498 SimplGently is also used as the mode to simplify inside an InlineMe note.
 499
 500 \begin{code}
 501 inlineMode :: SimplifierMode
 502 inlineMode = SimplGently
 503 \end{code}
 504
 505 It really is important to switch off inlinings inside such
 506 expressions.  Consider the following example
 507
 508         let f = \pq -> BIG
 509         in
 510         let g = \y -> f y y
 511             {-# INLINE g #-}
 512         in ...g...g...g...g...g...
 513
 514 Now, if that's the ONLY occurrence of f, it will be inlined inside g,
 515 and thence copied multiple times when g is inlined.
 516
 517
 518 This function may be inlinined in other modules, so we
 519 don't want to remove (by inlining) calls to functions that have
 520 specialisations, or that may have transformation rules in an importing
 521 scope.
 522
 523 E.g.    {-# INLINE f #-}
 524                 f x = ...g...
 525
 526 and suppose that g is strict *and* has specialisations.  If we inline
 527 g's wrapper, we deny f the chance of getting the specialised version
 528 of g when f is inlined at some call site (perhaps in some other
 529 module).
 530
 531 It's also important not to inline a worker back into a wrapper.
 532 A wrapper looks like
 533         wraper = inline_me (\x -> ...worker... )
 534 Normally, the inline_me prevents the worker getting inlined into
 535 the wrapper (initially, the worker's only call site!).  But,
 536 if the wrapper is sure to be called, the strictness analyser will
 537 mark it 'demanded', so when the RHS is simplified, it'll get an ArgOf
 538 continuation.  That's why the keep_inline predicate returns True for
 539 ArgOf continuations.  It shouldn't do any harm not to dissolve the
 540 inline-me note under these circumstances.
 541
 542 Note that the result is that we do very little simplification
 543 inside an InlineMe.
 544
 545         all xs = foldr (&&) True xs
 546         any p = all . map p  {-# INLINE any #-}
 547
 548 Problem: any won't get deforested, and so if it's exported and the
 549 importer doesn't use the inlining, (eg passes it as an arg) then we
 550 won't get deforestation at all.  We havn't solved this problem yet!
 551
 552
 553 preInlineUnconditionally
 554 ~~~~~~~~~~~~~~~~~~~~~~~~
 555 @preInlineUnconditionally@ examines a bndr to see if it is used just
 556 once in a completely safe way, so that it is safe to discard the
 557 binding inline its RHS at the (unique) usage site, REGARDLESS of how
 558 big the RHS might be.  If this is the case we don't simplify the RHS
 559 first, but just inline it un-simplified.
 560
 561 This is much better than first simplifying a perhaps-huge RHS and then
 562 inlining and re-simplifying it.  Indeed, it can be at least quadratically
 563 better.  Consider
 564
 565         x1 = e1
 566         x2 = e2[x1]
 567         x3 = e3[x2]
 568         ...etc...
 569         xN = eN[xN-1]
 570
 571 We may end up simplifying e1 N times, e2 N-1 times, e3 N-3 times etc.
 572 This can happen with cascades of functions too:
 573
 574         f1 = \x1.e1
 575         f2 = \xs.e2[f1]
 576         f3 = \xs.e3[f3]
 577         ...etc...
 578
 579 THE MAIN INVARIANT is this:
 580
 581         ----  preInlineUnconditionally invariant -----
 582    IF preInlineUnconditionally chooses to inline x = <rhs>
 583    THEN doing the inlining should not change the occurrence
 584         info for the free vars of <rhs>
 585         ----------------------------------------------
 586
 587 For example, it's tempting to look at trivial binding like
 588         x = y
 589 and inline it unconditionally.  But suppose x is used many times,
 590 but this is the unique occurrence of y.  Then inlining x would change
 591 y's occurrence info, which breaks the invariant.  It matters: y
 592 might have a BIG rhs, which will now be dup'd at every occurrenc of x.
 593
 594
 595 Evne RHSs labelled InlineMe aren't caught here, because there might be
 596 no benefit from inlining at the call site.
 597
 598 [Sept 01] Don't unconditionally inline a top-level thing, because that
 599 can simply make a static thing into something built dynamically.  E.g.
 600         x = (a,b)
 601         main = \s -> h x
 602
 603 [Remember that we treat \s as a one-shot lambda.]  No point in
 604 inlining x unless there is something interesting about the call site.
 605
 606 But watch out: if you aren't careful, some useful foldr/build fusion
 607 can be lost (most notably in spectral/hartel/parstof) because the
 608 foldr didn't see the build.  Doing the dynamic allocation isn't a big
 609 deal, in fact, but losing the fusion can be.  But the right thing here
 610 seems to be to do a callSiteInline based on the fact that there is
 611 something interesting about the call site (it's strict).  Hmm.  That
 612 seems a bit fragile.
 613
 614 Conclusion: inline top level things gaily until Phase 0 (the last
 615 phase), at which point don't.
 616
 617 \begin{code}
 618 preInlineUnconditionally :: SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
 619 preInlineUnconditionally env top_lvl bndr rhs
 620   | not active             = False
 621   | opt_SimplNoPreInlining = False
 622   | otherwise = case idOccInfo bndr of
 623                   IAmDead                    -> True    -- Happens in ((\x.1) v)
 624                   OneOcc in_lam True int_cxt -> try_once in_lam int_cxt
 625                   other                      -> False
 626   where
 627     phase = getMode env
 628     active = case phase of
 629                    SimplGently  -> isAlwaysActive prag
 630                    SimplPhase n -> isActive n prag
 631     prag = idInlinePragma bndr
 632
 633     try_once in_lam int_cxt     -- There's one textual occurrence
 634         | not in_lam = isNotTopLevel top_lvl || early_phase
 635         | otherwise  = int_cxt && canInlineInLam rhs
 636
 637 -- Be very careful before inlining inside a lambda, becuase (a) we must not
 638 -- invalidate occurrence information, and (b) we want to avoid pushing a
 639 -- single allocation (here) into multiple allocations (inside lambda).
 640 -- Inlining a *function* with a single *saturated* call would be ok, mind you.
 641 --      || (if is_cheap && not (canInlineInLam rhs) then pprTrace "preinline" (ppr bndr <+> ppr rhs) ok else ok)
 642 --      where
 643 --              is_cheap = exprIsCheap rhs
 644 --              ok = is_cheap && int_cxt
 645
 646         --      int_cxt         The context isn't totally boring
 647         -- E.g. let f = \ab.BIG in \y. map f xs
 648         --      Don't want to substitute for f, because then we allocate
 649         --      its closure every time the \y is called
 650         -- But: let f = \ab.BIG in \y. map (f y) xs
 651         --      Now we do want to substitute for f, even though it's not
 652         --      saturated, because we're going to allocate a closure for
 653         --      (f y) every time round the loop anyhow.
 654
 655         -- canInlineInLam => free vars of rhs are (Once in_lam) or Many,
 656         -- so substituting rhs inside a lambda doesn't change the occ info.
 657         -- Sadly, not quite the same as exprIsHNF.
 658     canInlineInLam (Lit l)              = True
 659     canInlineInLam (Lam b e)            = isRuntimeVar b || canInlineInLam e
 660     canInlineInLam (Note _ e)           = canInlineInLam e
 661     canInlineInLam _                    = False
 662
 663     early_phase = case phase of
 664                         SimplPhase 0 -> False
 665                         other        -> True
 666 -- If we don't have this early_phase test, consider
 667 --      x = length [1,2,3]
 668 -- The full laziness pass carefully floats all the cons cells to
 669 -- top level, and preInlineUnconditionally floats them all back in.
 670 -- Result is (a) static allocation replaced by dynamic allocation
 671 --           (b) many simplifier iterations because this tickles
 672 --               a related problem; only one inlining per pass
 673 --
 674 -- On the other hand, I have seen cases where top-level fusion is
 675 -- lost if we don't inline top level thing (e.g. string constants)
 676 -- Hence the test for phase zero (which is the phase for all the final
 677 -- simplifications).  Until phase zero we take no special notice of
 678 -- top level things, but then we become more leery about inlining
 679 -- them.
 680
 681 \end{code}
 682
 683 postInlineUnconditionally
 684 ~~~~~~~~~~~~~~~~~~~~~~~~~
 685 @postInlineUnconditionally@ decides whether to unconditionally inline
 686 a thing based on the form of its RHS; in particular if it has a
 687 trivial RHS.  If so, we can inline and discard the binding altogether.
 688
 689 NB: a loop breaker has must_keep_binding = True and non-loop-breakers
 690 only have *forward* references Hence, it's safe to discard the binding
 691
 692 NOTE: This isn't our last opportunity to inline.  We're at the binding
 693 site right now, and we'll get another opportunity when we get to the
 694 ocurrence(s)
 695
 696 Note that we do this unconditional inlining only for trival RHSs.
 697 Don't inline even WHNFs inside lambdas; doing so may simply increase
 698 allocation when the function is called. This isn't the last chance; see
 699 NOTE above.
 700
 701 NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here Why?
 702 Because we don't even want to inline them into the RHS of constructor
 703 arguments. See NOTE above
 704
 705 NB: At one time even NOINLINE was ignored here: if the rhs is trivial
 706 it's best to inline it anyway.  We often get a=E; b=a from desugaring,
 707 with both a and b marked NOINLINE.  But that seems incompatible with
 708 our new view that inlining is like a RULE, so I'm sticking to the 'active'
 709 story for now.
 710
 711 \begin{code}
 712 postInlineUnconditionally :: SimplEnv -> TopLevelFlag -> OutId -> OccInfo -> OutExpr -> Unfolding -> Bool
 713 postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
 714   | not active             = False
 715   | isLoopBreaker occ_info = False
 716   | isExportedId bndr      = False
 717   | exprIsTrivial rhs      = True
 718   | otherwise
 719   = case occ_info of
 720       OneOcc in_lam one_br int_cxt
 721         ->     (one_br || smallEnoughToInline unfolding)        -- Small enough to dup
 722                         -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
 723                         --
 724                         -- NB: Do we want to inline arbitrarily big things becuase
 725                         -- one_br is True? that can lead to inline cascades.  But
 726                         -- preInlineUnconditionlly has dealt with all the common cases
 727                         -- so perhaps it's worth the risk. Here's an example
 728                         --      let f = if b then Left (\x.BIG) else Right (\y.BIG)
 729                         --      in \y. ....f....
 730                         -- We can't preInlineUnconditionally because that woud invalidate
 731                         -- the occ info for b.  Yet f is used just once, and duplicating
 732                         -- the case work is fine (exprIsCheap).
 733
 734            &&  ((isNotTopLevel top_lvl && not in_lam) ||
 735                         -- But outside a lambda, we want to be reasonably aggressive
 736                         -- about inlining into multiple branches of case
 737                         -- e.g. let x = <non-value>
 738                         --      in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... }
 739                         -- Inlining can be a big win if C3 is the hot-spot, even if
 740                         -- the uses in C1, C2 are not 'interesting'
 741                         -- An example that gets worse if you add int_cxt here is 'clausify'
 742
 743                 (isCheapUnfolding unfolding && int_cxt))
 744                         -- isCheap => acceptable work duplication; in_lam may be true
 745                         -- int_cxt to prevent us inlining inside a lambda without some
 746                         -- good reason.  See the notes on int_cxt in preInlineUnconditionally
 747
 748       other -> False
 749         -- The point here is that for *non-values* that occur
 750         -- outside a lambda, the call-site inliner won't have
 751         -- a chance (becuase it doesn't know that the thing
 752         -- only occurs once).   The pre-inliner won't have gotten
 753         -- it either, if the thing occurs in more than one branch
 754         -- So the main target is things like
 755         --      let x = f y in
 756         --      case v of
 757         --         True  -> case x of ...
 758         --         False -> case x of ...
 759         -- I'm not sure how important this is in practice
 760   where
 761     active = case getMode env of
 762                    SimplGently  -> isAlwaysActive prag
 763                    SimplPhase n -> isActive n prag
 764     prag = idInlinePragma bndr
 765
 766 activeInline :: SimplEnv -> OutId -> OccInfo -> Bool
 767 activeInline env id occ
 768   = case getMode env of
 769       SimplGently -> isOneOcc occ && isAlwaysActive prag
 770         -- No inlining at all when doing gentle stuff,
 771         -- except for local things that occur once
 772         -- The reason is that too little clean-up happens if you
 773         -- don't inline use-once things.   Also a bit of inlining is *good* for
 774         -- full laziness; it can expose constant sub-expressions.
 775         -- Example in spectral/mandel/Mandel.hs, where the mandelset
 776         -- function gets a useful let-float if you inline windowToViewport
 777
 778         -- NB: we used to have a second exception, for data con wrappers.
 779         -- On the grounds that we use gentle mode for rule LHSs, and
 780         -- they match better when data con wrappers are inlined.
 781         -- But that only really applies to the trivial wrappers (like (:)),
 782         -- and they are now constructed as Compulsory unfoldings (in MkId)
 783         -- so they'll happen anyway.
 784
 785       SimplPhase n -> isActive n prag
 786   where
 787     prag = idInlinePragma id
 788
 789 activeRule :: SimplEnv -> Maybe (Activation -> Bool)
 790 -- Nothing => No rules at all
 791 activeRule env
 792   | opt_RulesOff = Nothing
 793   | otherwise
 794   = case getMode env of
 795         SimplGently  -> Just isAlwaysActive
 796                         -- Used to be Nothing (no rules in gentle mode)
 797                         -- Main motivation for changing is that I wanted
 798                         --      lift String ===> ...
 799                         -- to work in Template Haskell when simplifying
 800                         -- splices, so we get simpler code for literal strings
 801         SimplPhase n -> Just (isActive n)
 802 \end{code}
 803
 804
 805 %************************************************************************
 806 %*                                                                      *
 807 \subsection{Rebuilding a lambda}
 808 %*                                                                      *
 809 %************************************************************************
 810
 811 \begin{code}
 812 mkLam :: SimplEnv -> [OutBinder] -> OutExpr -> SimplCont -> SimplM FloatsWithExpr
 813 \end{code}
 814
 815 Try three things
 816         a) eta reduction, if that gives a trivial expression
 817         b) eta expansion [only if there are some value lambdas]
 818         c) floating lets out through big lambdas
 819                 [only if all tyvar lambdas, and only if this lambda
 820                  is the RHS of a let]
 821
 822 \begin{code}
 823 mkLam env bndrs body cont
 824  = getDOptsSmpl  `thenSmpl` \dflags ->
 825    mkLam' dflags env bndrs body cont
 826  where
 827  mkLam' dflags env bndrs body cont
 828    | dopt Opt_DoEtaReduction dflags,
 829      Just etad_lam <- tryEtaReduce bndrs body
 830    = tick (EtaReduction (head bndrs))   `thenSmpl_`
 831      returnSmpl (emptyFloats env, etad_lam)
 832
 833    | dopt Opt_DoLambdaEtaExpansion dflags,
 834      any isRuntimeVar bndrs
 835    = tryEtaExpansion body               `thenSmpl` \ body' ->
 836      returnSmpl (emptyFloats env, mkLams bndrs body')
 837
 838 {-      Sept 01: I'm experimenting with getting the
 839         full laziness pass to float out past big lambdsa
 840  | all isTyVar bndrs,   -- Only for big lambdas
 841    contIsRhs cont       -- Only try the rhs type-lambda floating
 842                         -- if this is indeed a right-hand side; otherwise
 843                         -- we end up floating the thing out, only for float-in
 844                         -- to float it right back in again!
 845  = tryRhsTyLam env bndrs body           `thenSmpl` \ (floats, body') ->
 846    returnSmpl (floats, mkLams bndrs body')
 847 -}
 848
 849    | otherwise
 850    = returnSmpl (emptyFloats env, mkLams bndrs body)
 851 \end{code}
 852
 853
 854 %************************************************************************
 855 %*                                                                      *
 856 \subsection{Eta expansion and reduction}
 857 %*                                                                      *
 858 %************************************************************************
 859
 860 We try for eta reduction here, but *only* if we get all the
 861 way to an exprIsTrivial expression.
 862 We don't want to remove extra lambdas unless we are going
 863 to avoid allocating this thing altogether
 864
 865 \begin{code}
 866 tryEtaReduce :: [OutBinder] -> OutExpr -> Maybe OutExpr
 867 tryEtaReduce bndrs body
 868         -- We don't use CoreUtils.etaReduce, because we can be more
 869         -- efficient here:
 870         --  (a) we already have the binders
 871         --  (b) we can do the triviality test before computing the free vars
 872   = go (reverse bndrs) body
 873   where
 874     go (b : bs) (App fun arg) | ok_arg b arg = go bs fun        -- Loop round
 875     go []       fun           | ok_fun fun   = Just fun         -- Success!
 876     go _        _                            = Nothing          -- Failure!
 877
 878     ok_fun fun =  exprIsTrivial fun
 879                && not (any (`elemVarSet` (exprFreeVars fun)) bndrs)
 880                && (exprIsHNF fun || all ok_lam bndrs)
 881     ok_lam v = isTyVar v || isDictId v
 882         -- The exprIsHNF is because eta reduction is not
 883         -- valid in general:  \x. bot  /=  bot
 884         -- So we need to be sure that the "fun" is a value.
 885         --
 886         -- However, we always want to reduce (/\a -> f a) to f
 887         -- This came up in a RULE: foldr (build (/\a -> g a))
 888         --      did not match      foldr (build (/\b -> ...something complex...))
 889         -- The type checker can insert these eta-expanded versions,
 890         -- with both type and dictionary lambdas; hence the slightly
 891         -- ad-hoc isDictTy
 892
 893     ok_arg b arg = varToCoreExpr b `cheapEqExpr` arg
 894 \end{code}
 895
 896
 897         Try eta expansion for RHSs
 898
 899 We go for:
 900    f = \x1..xn -> N  ==>   f = \x1..xn y1..ym -> N y1..ym
 901                                  (n >= 0)
 902
 903 where (in both cases)
 904
 905         * The xi can include type variables
 906
 907         * The yi are all value variables
 908
 909         * N is a NORMAL FORM (i.e. no redexes anywhere)
 910           wanting a suitable number of extra args.
 911
 912 We may have to sandwich some coerces between the lambdas
 913 to make the types work.   exprEtaExpandArity looks through coerces
 914 when computing arity; and etaExpand adds the coerces as necessary when
 915 actually computing the expansion.
 916
 917 \begin{code}
 918 tryEtaExpansion :: OutExpr -> SimplM OutExpr
 919 -- There is at least one runtime binder in the binders
 920 tryEtaExpansion body
 921   = getUniquesSmpl                      `thenSmpl` \ us ->
 922     returnSmpl (etaExpand fun_arity us body (exprType body))
 923   where
 924     fun_arity = exprEtaExpandArity body
 925 \end{code}
 926
 927
 928 %************************************************************************
 929 %*                                                                      *
 930 \subsection{Floating lets out of big lambdas}
 931 %*                                                                      *
 932 %************************************************************************
 933
 934 tryRhsTyLam tries this transformation, when the big lambda appears as
 935 the RHS of a let(rec) binding:
 936
 937         /\abc -> let(rec) x = e in b
 938    ==>
 939         let(rec) x' = /\abc -> let x = x' a b c in e
 940         in
 941         /\abc -> let x = x' a b c in b
 942
 943 This is good because it can turn things like:
 944
 945         let f = /\a -> letrec g = ... g ... in g
 946 into
 947         letrec g' = /\a -> ... g' a ...
 948         in
 949         let f = /\ a -> g' a
 950
 951 which is better.  In effect, it means that big lambdas don't impede
 952 let-floating.
 953
 954 This optimisation is CRUCIAL in eliminating the junk introduced by
 955 desugaring mutually recursive definitions.  Don't eliminate it lightly!
 956
 957 So far as the implementation is concerned:
 958
 959         Invariant: go F e = /\tvs -> F e
 960
 961         Equalities:
 962                 go F (Let x=e in b)
 963                 = Let x' = /\tvs -> F e
 964                   in
 965                   go G b
 966                 where
 967                     G = F . Let x = x' tvs
 968
 969                 go F (Letrec xi=ei in b)
 970                 = Letrec {xi' = /\tvs -> G ei}
 971                   in
 972                   go G b
 973                 where
 974                   G = F . Let {xi = xi' tvs}
 975
 976 [May 1999]  If we do this transformation *regardless* then we can
 977 end up with some pretty silly stuff.  For example,
 978
 979         let
 980             st = /\ s -> let { x1=r1 ; x2=r2 } in ...
 981         in ..
 982 becomes
 983         let y1 = /\s -> r1
 984             y2 = /\s -> r2
 985             st = /\s -> ...[y1 s/x1, y2 s/x2]
 986         in ..
 987
 988 Unless the "..." is a WHNF there is really no point in doing this.
 989 Indeed it can make things worse.  Suppose x1 is used strictly,
 990 and is of the form
 991
 992         x1* = case f y of { (a,b) -> e }
 993
 994 If we abstract this wrt the tyvar we then can't do the case inline
 995 as we would normally do.
 996
 997
 998 \begin{code}
 999 {-      Trying to do this in full laziness
1000
1001 tryRhsTyLam :: SimplEnv -> [OutTyVar] -> OutExpr -> SimplM FloatsWithExpr
1002 -- Call ensures that all the binders are type variables
1003
1004 tryRhsTyLam env tyvars body             -- Only does something if there's a let
1005   |  not (all isTyVar tyvars)
1006   || not (worth_it body)                -- inside a type lambda,
1007   = returnSmpl (emptyFloats env, body)  -- and a WHNF inside that
1008
1009   | otherwise
1010   = go env (\x -> x) body
1011
1012   where
1013     worth_it e@(Let _ _) = whnf_in_middle e
1014     worth_it e           = False
1015
1016     whnf_in_middle (Let (NonRec x rhs) e) | isUnLiftedType (idType x) = False
1017     whnf_in_middle (Let _ e) = whnf_in_middle e
1018     whnf_in_middle e         = exprIsCheap e
1019
1020     main_tyvar_set = mkVarSet tyvars
1021
1022     go env fn (Let bind@(NonRec var rhs) body)
1023       | exprIsTrivial rhs
1024       = go env (fn . Let bind) body
1025
1026     go env fn (Let (NonRec var rhs) body)
1027       = mk_poly tyvars_here var                                                 `thenSmpl` \ (var', rhs') ->
1028         addAuxiliaryBind env (NonRec var' (mkLams tyvars_here (fn rhs)))        $ \ env ->
1029         go env (fn . Let (mk_silly_bind var rhs')) body
1030
1031       where
1032
1033         tyvars_here = varSetElems (main_tyvar_set `intersectVarSet` exprSomeFreeVars isTyVar rhs)
1034                 -- Abstract only over the type variables free in the rhs
1035                 -- wrt which the new binding is abstracted.  But the naive
1036                 -- approach of abstract wrt the tyvars free in the Id's type
1037                 -- fails. Consider:
1038                 --      /\ a b -> let t :: (a,b) = (e1, e2)
1039                 --                    x :: a     = fst t
1040                 --                in ...
1041                 -- Here, b isn't free in x's type, but we must nevertheless
1042                 -- abstract wrt b as well, because t's type mentions b.
1043                 -- Since t is floated too, we'd end up with the bogus:
1044                 --      poly_t = /\ a b -> (e1, e2)
1045                 --      poly_x = /\ a   -> fst (poly_t a *b*)
1046                 -- So for now we adopt the even more naive approach of
1047                 -- abstracting wrt *all* the tyvars.  We'll see if that
1048                 -- gives rise to problems.   SLPJ June 98
1049
1050     go env fn (Let (Rec prs) body)
1051        = mapAndUnzipSmpl (mk_poly tyvars_here) vars     `thenSmpl` \ (vars', rhss') ->
1052          let
1053             gn body = fn (foldr Let body (zipWith mk_silly_bind vars rhss'))
1054             pairs   = vars' `zip` [mkLams tyvars_here (gn rhs) | rhs <- rhss]
1055          in
1056          addAuxiliaryBind env (Rec pairs)               $ \ env ->
1057          go env gn body
1058        where
1059          (vars,rhss) = unzip prs
1060          tyvars_here = varSetElems (main_tyvar_set `intersectVarSet` exprsSomeFreeVars isTyVar (map snd prs))
1061                 -- See notes with tyvars_here above
1062
1063     go env fn body = returnSmpl (emptyFloats env, fn body)
1064
1065     mk_poly tyvars_here var
1066       = getUniqueSmpl           `thenSmpl` \ uniq ->
1067         let
1068             poly_name = setNameUnique (idName var) uniq         -- Keep same name
1069             poly_ty   = mkForAllTys tyvars_here (idType var)    -- But new type of course
1070             poly_id   = mkLocalId poly_name poly_ty
1071
1072                 -- In the olden days, it was crucial to copy the occInfo of the original var,
1073                 -- because we were looking at occurrence-analysed but as yet unsimplified code!
1074                 -- In particular, we mustn't lose the loop breakers.  BUT NOW we are looking
1075                 -- at already simplified code, so it doesn't matter
1076                 --
1077                 -- It's even right to retain single-occurrence or dead-var info:
1078                 -- Suppose we started with  /\a -> let x = E in B
1079                 -- where x occurs once in B. Then we transform to:
1080                 --      let x' = /\a -> E in /\a -> let x* = x' a in B
1081                 -- where x* has an INLINE prag on it.  Now, once x* is inlined,
1082                 -- the occurrences of x' will be just the occurrences originally
1083                 -- pinned on x.
1084         in
1085         returnSmpl (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tyvars_here))
1086
1087     mk_silly_bind var rhs = NonRec var (Note InlineMe rhs)
1088                 -- Suppose we start with:
1089                 --
1090                 --      x = /\ a -> let g = G in E
1091                 --
1092                 -- Then we'll float to get
1093                 --
1094                 --      x = let poly_g = /\ a -> G
1095                 --          in /\ a -> let g = poly_g a in E
1096                 --
1097                 -- But now the occurrence analyser will see just one occurrence
1098                 -- of poly_g, not inside a lambda, so the simplifier will
1099                 -- PreInlineUnconditionally poly_g back into g!  Badk to square 1!
1100                 -- (I used to think that the "don't inline lone occurrences" stuff
1101                 --  would stop this happening, but since it's the *only* occurrence,
1102                 --  PreInlineUnconditionally kicks in first!)
1103                 --
1104                 -- Solution: put an INLINE note on g's RHS, so that poly_g seems
1105                 --           to appear many times.  (NB: mkInlineMe eliminates
1106                 --           such notes on trivial RHSs, so do it manually.)
1107 -}
1108 \end{code}
1109
1110 %************************************************************************
1111 %*                                                                      *
1112 \subsection{Case absorption and identity-case elimination}
1113 %*                                                                      *
1114 %************************************************************************
1115
1116 mkCase puts a case expression back together, trying various transformations first.
1117
1118 \begin{code}
1119 mkCase :: OutExpr -> OutId -> OutType
1120        -> [OutAlt]              -- Increasing order
1121        -> SimplM OutExpr
1122
1123 mkCase scrut case_bndr ty alts
1124   = getDOptsSmpl                        `thenSmpl` \dflags ->
1125     mkAlts dflags scrut case_bndr alts  `thenSmpl` \ better_alts ->
1126     mkCase1 scrut case_bndr ty better_alts
1127 \end{code}
1128
1129
1130 mkAlts tries these things:
1131
1132 1.  If several alternatives are identical, merge them into
1133     a single DEFAULT alternative.  I've occasionally seen this
1134     making a big difference:
1135
1136         case e of               =====>     case e of
1137           C _ -> f x                         D v -> ....v....
1138           D v -> ....v....                   DEFAULT -> f x
1139           DEFAULT -> f x
1140
1141    The point is that we merge common RHSs, at least for the DEFAULT case.
1142    [One could do something more elaborate but I've never seen it needed.]
1143    To avoid an expensive test, we just merge branches equal to the *first*
1144    alternative; this picks up the common cases
1145         a) all branches equal
1146         b) some branches equal to the DEFAULT (which occurs first)
1147
1148 2.  Case merging:
1149        case e of b {             ==>   case e of b {
1150          p1 -> rhs1                      p1 -> rhs1
1151          ...                             ...
1152          pm -> rhsm                      pm -> rhsm
1153          _  -> case b of b' {            pn -> let b'=b in rhsn
1154                      pn -> rhsn          ...
1155                      ...                 po -> let b'=b in rhso
1156                      po -> rhso          _  -> let b'=b in rhsd
1157                      _  -> rhsd
1158        }
1159
1160     which merges two cases in one case when -- the default alternative of
1161     the outer case scrutises the same variable as the outer case This
1162     transformation is called Case Merging.  It avoids that the same
1163     variable is scrutinised multiple times.
1164
1165
1166 The case where transformation (1) showed up was like this (lib/std/PrelCError.lhs):
1167
1168         x | p `is` 1 -> e1
1169           | p `is` 2 -> e2
1170         ...etc...
1171
1172 where @is@ was something like
1173
1174         p `is` n = p /= (-1) && p == n
1175
1176 This gave rise to a horrible sequence of cases
1177
1178         case p of
1179           (-1) -> $j p
1180           1    -> e1
1181           DEFAULT -> $j p
1182
1183 and similarly in cascade for all the join points!
1184
1185
1186
1187 \begin{code}
1188 --------------------------------------------------
1189 --      1. Merge identical branches
1190 --------------------------------------------------
1191 mkAlts dflags scrut case_bndr alts@((con1,bndrs1,rhs1) : con_alts)
1192   | all isDeadBinder bndrs1,                    -- Remember the default
1193     length filtered_alts < length con_alts      -- alternative comes first
1194   = tick (AltMerge case_bndr)                   `thenSmpl_`
1195     returnSmpl better_alts
1196   where
1197     filtered_alts        = filter keep con_alts
1198     keep (con,bndrs,rhs) = not (all isDeadBinder bndrs && rhs `cheapEqExpr` rhs1)
1199     better_alts          = (DEFAULT, [], rhs1) : filtered_alts
1200
1201
1202 --------------------------------------------------
1203 --      2.  Merge nested cases
1204 --------------------------------------------------
1205
1206 mkAlts dflags scrut outer_bndr outer_alts
1207   | dopt Opt_CaseMerge dflags,
1208     (outer_alts_without_deflt, maybe_outer_deflt)   <- findDefault outer_alts,
1209     Just (Case (Var scrut_var) inner_bndr _ inner_alts) <- maybe_outer_deflt,
1210     scruting_same_var scrut_var
1211   = let
1212         munged_inner_alts = [(con, args, munge_rhs rhs) | (con, args, rhs) <- inner_alts]
1213         munge_rhs rhs = bindCaseBndr inner_bndr (Var outer_bndr) rhs
1214
1215         new_alts = mergeAlts outer_alts_without_deflt munged_inner_alts
1216                 -- The merge keeps the inner DEFAULT at the front, if there is one
1217                 -- and eliminates any inner_alts that are shadowed by the outer_alts
1218     in
1219     tick (CaseMerge outer_bndr)                         `thenSmpl_`
1220     returnSmpl new_alts
1221         -- Warning: don't call mkAlts recursively!
1222         -- Firstly, there's no point, because inner alts have already had
1223         -- mkCase applied to them, so they won't have a case in their default
1224         -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
1225         -- in munge_rhs may put a case into the DEFAULT branch!
1226   where
1227         -- We are scrutinising the same variable if it's
1228         -- the outer case-binder, or if the outer case scrutinises a variable
1229         -- (and it's the same).  Testing both allows us not to replace the
1230         -- outer scrut-var with the outer case-binder (Simplify.simplCaseBinder).
1231     scruting_same_var = case scrut of
1232                           Var outer_scrut -> \ v -> v == outer_bndr || v == outer_scrut
1233                           other           -> \ v -> v == outer_bndr
1234
1235 ------------------------------------------------
1236 --      Catch-all
1237 ------------------------------------------------
1238
1239 mkAlts dflags scrut case_bndr other_alts = returnSmpl other_alts
1240 \end{code}
1241
1242
1243
1244 =================================================================================
1245
1246 mkCase1 tries these things
1247
1248 1.  Eliminate the case altogether if possible
1249
1250 2.  Case-identity:
1251
1252         case e of               ===> e
1253                 True  -> True;
1254                 False -> False
1255
1256     and similar friends.
1257
1258
1259 Start with a simple situation:
1260
1261         case x# of      ===>   e[x#/y#]
1262           y# -> e
1263
1264 (when x#, y# are of primitive type, of course).  We can't (in general)
1265 do this for algebraic cases, because we might turn bottom into
1266 non-bottom!
1267
1268 Actually, we generalise this idea to look for a case where we're
1269 scrutinising a variable, and we know that only the default case can
1270 match.  For example:
1271 \begin{verbatim}
1272         case x of
1273           0#    -> ...
1274           other -> ...(case x of
1275                          0#    -> ...
1276                          other -> ...) ...
1277 \end{code}
1278 Here the inner case can be eliminated.  This really only shows up in
1279 eliminating error-checking code.
1280
1281 We also make sure that we deal with this very common case:
1282
1283         case e of
1284           x -> ...x...
1285
1286 Here we are using the case as a strict let; if x is used only once
1287 then we want to inline it.  We have to be careful that this doesn't
1288 make the program terminate when it would have diverged before, so we
1289 check that
1290         - x is used strictly, or
1291         - e is already evaluated (it may so if e is a variable)
1292
1293 Lastly, we generalise the transformation to handle this:
1294
1295         case e of       ===> r
1296            True  -> r
1297            False -> r
1298
1299 We only do this for very cheaply compared r's (constructors, literals
1300 and variables).  If pedantic bottoms is on, we only do it when the
1301 scrutinee is a PrimOp which can't fail.
1302
1303 We do it *here*, looking at un-simplified alternatives, because we
1304 have to check that r doesn't mention the variables bound by the
1305 pattern in each alternative, so the binder-info is rather useful.
1306
1307 So the case-elimination algorithm is:
1308
1309         1. Eliminate alternatives which can't match
1310
1311         2. Check whether all the remaining alternatives
1312                 (a) do not mention in their rhs any of the variables bound in their pattern
1313            and  (b) have equal rhss
1314
1315         3. Check we can safely ditch the case:
1316                    * PedanticBottoms is off,
1317                 or * the scrutinee is an already-evaluated variable
1318                 or * the scrutinee is a primop which is ok for speculation
1319                         -- ie we want to preserve divide-by-zero errors, and
1320                         -- calls to error itself!
1321
1322                 or * [Prim cases] the scrutinee is a primitive variable
1323
1324                 or * [Alg cases] the scrutinee is a variable and
1325                      either * the rhs is the same variable
1326                         (eg case x of C a b -> x  ===>   x)
1327                      or     * there is only one alternative, the default alternative,
1328                                 and the binder is used strictly in its scope.
1329                                 [NB this is helped by the "use default binder where
1330                                  possible" transformation; see below.]
1331
1332
1333 If so, then we can replace the case with one of the rhss.
1334
1335 Further notes about case elimination
1336 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1337 Consider:       test :: Integer -> IO ()
1338                 test = print
1339
1340 Turns out that this compiles to:
1341     Print.test
1342       = \ eta :: Integer
1343           eta1 :: State# RealWorld ->
1344           case PrelNum.< eta PrelNum.zeroInteger of wild { __DEFAULT ->
1345           case hPutStr stdout
1346                  (PrelNum.jtos eta ($w[] @ Char))
1347                  eta1
1348           of wild1 { (# new_s, a4 #) -> PrelIO.lvl23 new_s  }}
1349
1350 Notice the strange '<' which has no effect at all. This is a funny one.
1351 It started like this:
1352
1353 f x y = if x < 0 then jtos x
1354           else if y==0 then "" else jtos x
1355
1356 At a particular call site we have (f v 1).  So we inline to get
1357
1358         if v < 0 then jtos x
1359         else if 1==0 then "" else jtos x
1360
1361 Now simplify the 1==0 conditional:
1362
1363         if v<0 then jtos v else jtos v
1364
1365 Now common-up the two branches of the case:
1366
1367         case (v<0) of DEFAULT -> jtos v
1368
1369 Why don't we drop the case?  Because it's strict in v.  It's technically
1370 wrong to drop even unnecessary evaluations, and in practice they
1371 may be a result of 'seq' so we *definitely* don't want to drop those.
1372 I don't really know how to improve this situation.
1373
1374
1375 \begin{code}
1376 --------------------------------------------------
1377 --      0. Check for empty alternatives
1378 --------------------------------------------------
1379
1380 -- This isn't strictly an error.  It's possible that the simplifer might "see"
1381 -- that an inner case has no accessible alternatives before it "sees" that the
1382 -- entire branch of an outer case is inaccessible.  So we simply
1383 -- put an error case here insteadd
1384 mkCase1 scrut case_bndr ty []
1385   = pprTrace "mkCase1: null alts" (ppr case_bndr <+> ppr scrut) $
1386     return (mkApps (Var eRROR_ID)
1387                    [Type ty, Lit (mkStringLit "Impossible alternative")])
1388
1389 --------------------------------------------------
1390 --      1. Eliminate the case altogether if poss
1391 --------------------------------------------------
1392
1393 mkCase1 scrut case_bndr ty [(con,bndrs,rhs)]
1394   -- See if we can get rid of the case altogether
1395   -- See the extensive notes on case-elimination above
1396   -- mkCase made sure that if all the alternatives are equal,
1397   -- then there is now only one (DEFAULT) rhs
1398  |  all isDeadBinder bndrs,
1399
1400         -- Check that the scrutinee can be let-bound instead of case-bound
1401     exprOkForSpeculation scrut
1402                 -- OK not to evaluate it
1403                 -- This includes things like (==# a# b#)::Bool
1404                 -- so that we simplify
1405                 --      case ==# a# b# of { True -> x; False -> x }
1406                 -- to just
1407                 --      x
1408                 -- This particular example shows up in default methods for
1409                 -- comparision operations (e.g. in (>=) for Int.Int32)
1410         || exprIsHNF scrut                      -- It's already evaluated
1411         || var_demanded_later scrut             -- It'll be demanded later
1412
1413 --      || not opt_SimplPedanticBottoms)        -- Or we don't care!
1414 --      We used to allow improving termination by discarding cases, unless -fpedantic-bottoms was on,
1415 --      but that breaks badly for the dataToTag# primop, which relies on a case to evaluate
1416 --      its argument:  case x of { y -> dataToTag# y }
1417 --      Here we must *not* discard the case, because dataToTag# just fetches the tag from
1418 --      the info pointer.  So we'll be pedantic all the time, and see if that gives any
1419 --      other problems
1420 --      Also we don't want to discard 'seq's
1421   = tick (CaseElim case_bndr)                   `thenSmpl_`
1422     returnSmpl (bindCaseBndr case_bndr scrut rhs)
1423
1424   where
1425         -- The case binder is going to be evaluated later,
1426         -- and the scrutinee is a simple variable
1427     var_demanded_later (Var v) = isStrictDmd (idNewDemandInfo case_bndr)
1428     var_demanded_later other   = False
1429
1430
1431 --------------------------------------------------
1432 --      2. Identity case
1433 --------------------------------------------------
1434
1435 mkCase1 scrut case_bndr ty alts -- Identity case
1436   | all identity_alt alts
1437   = tick (CaseIdentity case_bndr)               `thenSmpl_`
1438     returnSmpl (re_note scrut)
1439   where
1440     identity_alt (con, args, rhs) = de_note rhs `cheapEqExpr` identity_rhs con args
1441
1442     identity_rhs (DataAlt con) args = mkConApp con (arg_tys ++ map varToCoreExpr args)
1443     identity_rhs (LitAlt lit)  _    = Lit lit
1444     identity_rhs DEFAULT       _    = Var case_bndr
1445
1446     arg_tys = map Type (tyConAppArgs (idType case_bndr))
1447
1448         -- We've seen this:
1449         --      case coerce T e of x { _ -> coerce T' x }
1450         -- And we definitely want to eliminate this case!
1451         -- So we throw away notes from the RHS, and reconstruct
1452         -- (at least an approximation) at the other end
1453     de_note (Note _ e) = de_note e
1454     de_note e          = e
1455
1456         -- re_note wraps a coerce if it might be necessary
1457     re_note scrut = case head alts of
1458                         (_,_,rhs1@(Note _ _)) -> mkCoerce2 (exprType rhs1) (idType case_bndr) scrut
1459                         other                 -> scrut
1460
1461
1462 --------------------------------------------------
1463 --      Catch-all
1464 --------------------------------------------------
1465 mkCase1 scrut bndr ty alts = returnSmpl (Case scrut bndr ty alts)
1466 \end{code}
1467
1468
1469 When adding auxiliary bindings for the case binder, it's worth checking if
1470 its dead, because it often is, and occasionally these mkCase transformations
1471 cascade rather nicely.
1472
1473 \begin{code}
1474 bindCaseBndr bndr rhs body
1475   | isDeadBinder bndr = body
1476   | otherwise         = bindNonRec bndr rhs body
1477 \end{code}