compiler/simplCore/OccurAnal.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 %************************************************************************
   5 %*                                                                      *
   6 \section[OccurAnal]{Occurrence analysis pass}
   7 %*                                                                      *
   8 %************************************************************************
   9
  10 The occurrence analyser re-typechecks a core expression, returning a new
  11 core expression with (hopefully) improved usage information.
  12
  13 \begin{code}
  14 module OccurAnal (
  15         occurAnalysePgm, occurAnalyseExpr
  16     ) where
  17
  18 #include "HsVersions.h"
  19
  20 import CoreSyn
  21 import CoreFVs
  22 import Type             ( tyVarsOfType )
  23 import CoreUtils        ( exprIsTrivial, isDefaultAlt, mkCoerceI, isExpandableApp )
  24 import Coercion         ( CoercionI(..), mkSymCoI )
  25 import Id
  26 import Name             ( localiseName )
  27 import BasicTypes
  28
  29 import VarSet
  30 import VarEnv
  31 import Var              ( Var, varUnique )
  32
  33 import Maybes           ( orElse )
  34 import Digraph          ( SCC(..), stronglyConnCompFromEdgedVerticesR )
  35 import PrelNames        ( buildIdKey, foldrIdKey, runSTRepIdKey, augmentIdKey )
  36 import Unique           ( Unique )
  37 import UniqFM           ( keysUFM, intersectUFM_C, foldUFM_Directly )
  38 import Util             ( mapAndUnzip, filterOut )
  39 import Bag
  40 import Outputable
  41 import FastString
  42 import Data.List
  43 \end{code}
  44
  45
  46 %************************************************************************
  47 %*                                                                      *
  48 \subsection[OccurAnal-main]{Counting occurrences: main function}
  49 %*                                                                      *
  50 %************************************************************************
  51
  52 Here's the externally-callable interface:
  53
  54 \begin{code}
  55 occurAnalysePgm :: [CoreBind] -> [CoreRule] -> [CoreBind]
  56 occurAnalysePgm binds rules
  57   = snd (go initOccEnv binds)
  58   where
  59     initial_details = addIdOccs emptyDetails (rulesFreeVars rules)
  60     -- The RULES keep things alive!
  61
  62     go :: OccEnv -> [CoreBind] -> (UsageDetails, [CoreBind])
  63     go _ []
  64         = (initial_details, [])
  65     go env (bind:binds)
  66         = (final_usage, bind' ++ binds')
  67         where
  68            (bs_usage, binds')   = go env binds
  69            (final_usage, bind') = occAnalBind env env bind bs_usage
  70
  71 occurAnalyseExpr :: CoreExpr -> CoreExpr
  72         -- Do occurrence analysis, and discard occurence info returned
  73 occurAnalyseExpr expr = snd (occAnal initOccEnv expr)
  74 \end{code}
  75
  76
  77 %************************************************************************
  78 %*                                                                      *
  79 \subsection[OccurAnal-main]{Counting occurrences: main function}
  80 %*                                                                      *
  81 %************************************************************************
  82
  83 Bindings
  84 ~~~~~~~~
  85
  86 \begin{code}
  87 occAnalBind :: OccEnv           -- The incoming OccEnv
  88             -> OccEnv           -- Same, but trimmed by (binderOf bind)
  89             -> CoreBind
  90             -> UsageDetails             -- Usage details of scope
  91             -> (UsageDetails,           -- Of the whole let(rec)
  92                 [CoreBind])
  93
  94 occAnalBind env _ (NonRec binder rhs) body_usage
  95   | isTyCoVar binder                    -- A type let; we don't gather usage info
  96   = (body_usage, [NonRec binder rhs])
  97
  98   | not (binder `usedIn` body_usage)    -- It's not mentioned
  99   = (body_usage, [])
 100
 101   | otherwise                   -- It's mentioned in the body
 102   = (body_usage' +++ addRuleUsage rhs_usage binder,     -- Note [Rules are extra RHSs]
 103      [NonRec tagged_binder rhs'])
 104   where
 105     (body_usage', tagged_binder) = tagBinder body_usage binder
 106     (rhs_usage, rhs')            = occAnalRhs env tagged_binder rhs
 107 \end{code}
 108
 109 Note [Dead code]
 110 ~~~~~~~~~~~~~~~~
 111 Dropping dead code for recursive bindings is done in a very simple way:
 112
 113         the entire set of bindings is dropped if none of its binders are
 114         mentioned in its body; otherwise none are.
 115
 116 This seems to miss an obvious improvement.
 117
 118         letrec  f = ...g...
 119                 g = ...f...
 120         in
 121         ...g...
 122 ===>
 123         letrec f = ...g...
 124                g = ...(...g...)...
 125         in
 126         ...g...
 127
 128 Now 'f' is unused! But it's OK!  Dependency analysis will sort this
 129 out into a letrec for 'g' and a 'let' for 'f', and then 'f' will get
 130 dropped.  It isn't easy to do a perfect job in one blow.  Consider
 131
 132         letrec f = ...g...
 133                g = ...h...
 134                h = ...k...
 135                k = ...m...
 136                m = ...m...
 137         in
 138         ...m...
 139
 140
 141 Note [Loop breaking and RULES]
 142 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 143 Loop breaking is surprisingly subtle.  First read the section 4 of
 144 "Secrets of the GHC inliner".  This describes our basic plan.
 145
 146 However things are made quite a bit more complicated by RULES.  Remember
 147
 148   * Note [Rules are extra RHSs]
 149     ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 150     A RULE for 'f' is like an extra RHS for 'f'. That way the "parent"
 151     keeps the specialised "children" alive.  If the parent dies
 152     (because it isn't referenced any more), then the children will die
 153     too (unless they are already referenced directly).
 154
 155     To that end, we build a Rec group for each cyclic strongly
 156     connected component,
 157         *treating f's rules as extra RHSs for 'f'*.
 158
 159     When we make the Rec groups we include variables free in *either*
 160     LHS *or* RHS of the rule.  The former might seems silly, but see
 161     Note [Rule dependency info].
 162
 163     So in Example [eftInt], eftInt and eftIntFB will be put in the
 164     same Rec, even though their 'main' RHSs are both non-recursive.
 165
 166   * Note [Rules are visible in their own rec group]
 167     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 168     We want the rules for 'f' to be visible in f's right-hand side.
 169     And we'd like them to be visible in other functions in f's Rec
 170     group.  E.g. in Example [Specialisation rules] we want f' rule
 171     to be visible in both f's RHS, and fs's RHS.
 172
 173     This means that we must simplify the RULEs first, before looking
 174     at any of the definitions.  This is done by Simplify.simplRecBind,
 175     when it calls addLetIdInfo.
 176
 177   * Note [Choosing loop breakers]
 178     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 179     We avoid infinite inlinings by choosing loop breakers, and
 180     ensuring that a loop breaker cuts each loop.  But what is a
 181     "loop"?  In particular, a RULE is like an equation for 'f' that
 182     is *always* inlined if it is applicable.  We do *not* disable
 183     rules for loop-breakers.  It's up to whoever makes the rules to
 184     make sure that the rules themselves always terminate.  See Note
 185     [Rules for recursive functions] in Simplify.lhs
 186
 187     Hence, if
 188         f's RHS mentions g, and
 189         g has a RULE that mentions h, and
 190         h has a RULE that mentions f
 191
 192     then we *must* choose f to be a loop breaker.  In general, take the
 193     free variables of f's RHS, and augment it with all the variables
 194     reachable by RULES from those starting points.  That is the whole
 195     reason for computing rule_fv_env in occAnalBind.  (Of course we
 196     only consider free vars that are also binders in this Rec group.)
 197
 198     Note that when we compute this rule_fv_env, we only consider variables
 199     free in the *RHS* of the rule, in contrast to the way we build the
 200     Rec group in the first place (Note [Rule dependency info])
 201
 202     Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is
 203     chosen as a loop breaker, because their RHSs don't mention each other.
 204     And indeed both can be inlined safely.
 205
 206     Note that the edges of the graph we use for computing loop breakers
 207     are not the same as the edges we use for computing the Rec blocks.
 208     That's why we compute
 209         rec_edges          for the Rec block analysis
 210         loop_breaker_edges for the loop breaker analysis
 211
 212
 213   * Note [Weak loop breakers]
 214     ~~~~~~~~~~~~~~~~~~~~~~~~~
 215     There is a last nasty wrinkle.  Suppose we have
 216
 217         Rec { f = f_rhs
 218               RULE f [] = g
 219
 220               h = h_rhs
 221               g = h
 222               ...more...
 223         }
 224
 225     Remmber that we simplify the RULES before any RHS (see Note
 226     [Rules are visible in their own rec group] above).
 227
 228     So we must *not* postInlineUnconditionally 'g', even though
 229     its RHS turns out to be trivial.  (I'm assuming that 'g' is
 230     not choosen as a loop breaker.)  Why not?  Because then we
 231     drop the binding for 'g', which leaves it out of scope in the
 232     RULE!
 233
 234     We "solve" this by making g a "weak" or "rules-only" loop breaker,
 235     with OccInfo = IAmLoopBreaker True.  A normal "strong" loop breaker
 236     has IAmLoopBreaker False.  So
 237
 238                                 Inline  postInlineUnconditionally
 239         IAmLoopBreaker False    no      no
 240         IAmLoopBreaker True     yes     no
 241         other                   yes     yes
 242
 243     The **sole** reason for this kind of loop breaker is so that
 244     postInlineUnconditionally does not fire.  Ugh.
 245
 246   * Note [Rule dependency info]
 247     ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 248     The VarSet in a SpecInfo is used for dependency analysis in the
 249     occurrence analyser.  We must track free vars in *both* lhs and rhs.
 250     Hence use of idRuleVars, rather than idRuleRhsVars in addRuleUsage.
 251     Why both? Consider
 252         x = y
 253         RULE f x = 4
 254     Then if we substitute y for x, we'd better do so in the
 255     rule's LHS too, so we'd better ensure the dependency is respected
 256
 257
 258   * Note [Inline rules]
 259     ~~~~~~~~~~~~~~~~~~~
 260     None of the above stuff about RULES applies to Inline Rules,
 261     stored in a CoreUnfolding.  The unfolding, if any, is simplified
 262     at the same time as the regular RHS of the function, so it should
 263     be treated *exactly* like an extra RHS.
 264
 265
 266 Example [eftInt]
 267 ~~~~~~~~~~~~~~~
 268 Example (from GHC.Enum):
 269
 270   eftInt :: Int# -> Int# -> [Int]
 271   eftInt x y = ...(non-recursive)...
 272
 273   {-# INLINE [0] eftIntFB #-}
 274   eftIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> r
 275   eftIntFB c n x y = ...(non-recursive)...
 276
 277   {-# RULES
 278   "eftInt"  [~1] forall x y. eftInt x y = build (\ c n -> eftIntFB c n x y)
 279   "eftIntList"  [1] eftIntFB  (:) [] = eftInt
 280    #-}
 281
 282 Example [Specialisation rules]
 283 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 284 Consider this group, which is typical of what SpecConstr builds:
 285
 286    fs a = ....f (C a)....
 287    f  x = ....f (C a)....
 288    {-# RULE f (C a) = fs a #-}
 289
 290 So 'f' and 'fs' are in the same Rec group (since f refers to fs via its RULE).
 291
 292 But watch out!  If 'fs' is not chosen as a loop breaker, we may get an infinite loop:
 293         - the RULE is applied in f's RHS (see Note [Self-recursive rules] in Simplify
 294         - fs is inlined (say it's small)
 295         - now there's another opportunity to apply the RULE
 296
 297 This showed up when compiling Control.Concurrent.Chan.getChanContents.
 298
 299
 300 \begin{code}
 301 occAnalBind _ env (Rec pairs) body_usage
 302   = foldr occAnalRec (body_usage, []) sccs
 303         -- For a recursive group, we
 304         --      * occ-analyse all the RHSs
 305         --      * compute strongly-connected components
 306         --      * feed those components to occAnalRec
 307   where
 308     -------------Dependency analysis ------------------------------
 309     bndr_set = mkVarSet (map fst pairs)
 310
 311     sccs :: [SCC (Node Details)]
 312     sccs = {-# SCC "occAnalBind.scc" #-} stronglyConnCompFromEdgedVerticesR rec_edges
 313
 314     rec_edges :: [Node Details]
 315     rec_edges = {-# SCC "occAnalBind.assoc" #-}  map make_node pairs
 316
 317     make_node (bndr, rhs)
 318         = (ND bndr rhs' all_rhs_usage rhs_fvs, varUnique bndr, out_edges)
 319         where
 320           (rhs_usage, rhs') = occAnalRhs env bndr rhs
 321           all_rhs_usage = addIdOccs rhs_usage rule_vars -- Note [Rules are extra RHSs]
 322           rhs_fvs   = intersectUFM_C (\b _ -> b) bndr_set rhs_usage
 323           out_edges = keysUFM (rhs_fvs `unionVarSet` rule_vars)
 324           rule_vars = idRuleVars bndr      -- See Note [Rule dependency info]
 325         -- (a -> b) means a mentions b
 326         -- Given the usage details (a UFM that gives occ info for each free var of
 327         -- the RHS) we can get the list of free vars -- or rather their Int keys --
 328         -- by just extracting the keys from the finite map.  Grimy, but fast.
 329         -- Previously we had this:
 330         --      [ bndr | bndr <- bndrs,
 331         --               maybeToBool (lookupVarEnv rhs_usage bndr)]
 332         -- which has n**2 cost, and this meant that edges_from alone
 333         -- consumed 10% of total runtime!
 334
 335 -----------------------------
 336 occAnalRec :: SCC (Node Details) -> (UsageDetails, [CoreBind])
 337                                  -> (UsageDetails, [CoreBind])
 338
 339         -- The NonRec case is just like a Let (NonRec ...) above
 340 occAnalRec (AcyclicSCC (ND bndr rhs rhs_usage _, _, _)) (body_usage, binds)
 341   | not (bndr `usedIn` body_usage)
 342   = (body_usage, binds)
 343
 344   | otherwise                   -- It's mentioned in the body
 345   = (body_usage' +++ rhs_usage,
 346      NonRec tagged_bndr rhs : binds)
 347   where
 348     (body_usage', tagged_bndr) = tagBinder body_usage bndr
 349
 350
 351         -- The Rec case is the interesting one
 352         -- See Note [Loop breaking]
 353 occAnalRec (CyclicSCC nodes) (body_usage, binds)
 354   | not (any (`usedIn` body_usage) bndrs)       -- NB: look at body_usage, not total_usage
 355   = (body_usage, binds)                         -- Dead code
 356
 357   | otherwise   -- At this point we always build a single Rec
 358   = (final_usage, Rec pairs : binds)
 359
 360   where
 361     bndrs    = [b | (ND b _ _ _, _, _) <- nodes]
 362     bndr_set = mkVarSet bndrs
 363
 364         ----------------------------
 365         -- Tag the binders with their occurrence info
 366     total_usage = foldl add_usage body_usage nodes
 367     add_usage usage_so_far (ND _ _ rhs_usage _, _, _) = usage_so_far +++ rhs_usage
 368     (final_usage, tagged_nodes) = mapAccumL tag_node total_usage nodes
 369
 370     tag_node :: UsageDetails -> Node Details -> (UsageDetails, Node Details)
 371         -- (a) Tag the binders in the details with occ info
 372         -- (b) Mark the binder with "weak loop-breaker" OccInfo
 373         --      saying "no preInlineUnconditionally" if it is used
 374         --      in any rule (lhs or rhs) of the recursive group
 375         --      See Note [Weak loop breakers]
 376     tag_node usage (ND bndr rhs rhs_usage rhs_fvs, k, ks)
 377       = (usage `delVarEnv` bndr, (ND bndr2 rhs rhs_usage rhs_fvs, k, ks))
 378       where
 379         bndr2 | bndr `elemVarSet` all_rule_fvs = makeLoopBreaker True bndr1
 380               | otherwise                      = bndr1
 381         bndr1 = setBinderOcc usage bndr
 382     all_rule_fvs = bndr_set `intersectVarSet` foldr (unionVarSet . idRuleVars)
 383                                                     emptyVarSet bndrs
 384
 385         ----------------------------
 386         -- Now reconstruct the cycle
 387     pairs | no_rules  = reOrderCycle 0 tagged_nodes []
 388           | otherwise = foldr (reOrderRec 0) [] $
 389                         stronglyConnCompFromEdgedVerticesR loop_breaker_edges
 390
 391         -- See Note [Choosing loop breakers] for loop_breaker_edges
 392     loop_breaker_edges = map mk_node tagged_nodes
 393     mk_node (details@(ND _ _ _ rhs_fvs), k, _) = (details, k, new_ks)
 394         where
 395           new_ks = keysUFM (extendFvs rule_fv_env rhs_fvs rhs_fvs)
 396
 397     ------------------------------------
 398     rule_fv_env :: IdEnv IdSet  -- Variables from this group mentioned in RHS of rules
 399                                 -- Domain is *subset* of bound vars (others have no rule fvs)
 400     rule_fv_env = rule_loop init_rule_fvs
 401
 402     no_rules      = null init_rule_fvs
 403     init_rule_fvs = [(b, rule_fvs)
 404                     | b <- bndrs
 405                     , isId b
 406                     , let rule_fvs = idRuleRhsVars b `intersectVarSet` bndr_set
 407                     , not (isEmptyVarSet rule_fvs)]
 408
 409     rule_loop :: [(Id,IdSet)] -> IdEnv IdSet    -- Finds fixpoint
 410     rule_loop fv_list
 411         | no_change = env
 412         | otherwise = rule_loop new_fv_list
 413         where
 414           env = mkVarEnv init_rule_fvs
 415           (no_change, new_fv_list) = mapAccumL bump True fv_list
 416           bump no_change (b,fvs)
 417                 | new_fvs `subVarSet` fvs = (no_change, (b,fvs))
 418                 | otherwise               = (False,     (b,new_fvs `unionVarSet` fvs))
 419                 where
 420                   new_fvs = extendFvs env emptyVarSet fvs
 421
 422 extendFvs :: IdEnv IdSet -> IdSet -> IdSet -> IdSet
 423 -- (extendFVs env fvs s) returns (fvs `union` env(s))
 424 extendFvs env fvs id_set
 425   = foldUFM_Directly add fvs id_set
 426   where
 427     add uniq _ fvs
 428         = case lookupVarEnv_Directly env uniq  of
 429             Just fvs' -> fvs' `unionVarSet` fvs
 430             Nothing   -> fvs
 431 \end{code}
 432
 433 @reOrderRec@ is applied to the list of (binder,rhs) pairs for a cyclic
 434 strongly connected component (there's guaranteed to be a cycle).  It returns the
 435 same pairs, but
 436         a) in a better order,
 437         b) with some of the Ids having a IAmALoopBreaker pragma
 438
 439 The "loop-breaker" Ids are sufficient to break all cycles in the SCC.  This means
 440 that the simplifier can guarantee not to loop provided it never records an inlining
 441 for these no-inline guys.
 442
 443 Furthermore, the order of the binds is such that if we neglect dependencies
 444 on the no-inline Ids then the binds are topologically sorted.  This means
 445 that the simplifier will generally do a good job if it works from top bottom,
 446 recording inlinings for any Ids which aren't marked as "no-inline" as it goes.
 447
 448 ==============
 449 [June 98: I don't understand the following paragraphs, and I've
 450           changed the a=b case again so that it isn't a special case any more.]
 451
 452 Here's a case that bit me:
 453
 454         letrec
 455                 a = b
 456                 b = \x. BIG
 457         in
 458         ...a...a...a....
 459
 460 Re-ordering doesn't change the order of bindings, but there was no loop-breaker.
 461
 462 My solution was to make a=b bindings record b as Many, rather like INLINE bindings.
 463 Perhaps something cleverer would suffice.
 464 ===============
 465
 466
 467 \begin{code}
 468 type Node details = (details, Unique, [Unique]) -- The Ints are gotten from the Unique,
 469                                                 -- which is gotten from the Id.
 470 data Details = ND Id            -- Binder
 471                   CoreExpr      -- RHS
 472
 473                   UsageDetails  -- Full usage from RHS,
 474                                 -- including *both* RULES *and* InlineRule unfolding
 475
 476                   IdSet         -- Other binders *from this Rec group* mentioned in
 477                                 --   * the  RHS
 478                                 --   * any InlineRule unfolding
 479                                 -- but *excluding* any RULES
 480
 481 reOrderRec :: Int -> SCC (Node Details)
 482            -> [(Id,CoreExpr)] -> [(Id,CoreExpr)]
 483 -- Sorted into a plausible order.  Enough of the Ids have
 484 --      IAmALoopBreaker pragmas that there are no loops left.
 485 reOrderRec _ (AcyclicSCC (ND bndr rhs _ _, _, _)) pairs = (bndr, rhs) : pairs
 486 reOrderRec depth (CyclicSCC cycle)                pairs = reOrderCycle depth cycle pairs
 487
 488 reOrderCycle :: Int -> [Node Details] -> [(Id,CoreExpr)] -> [(Id,CoreExpr)]
 489 reOrderCycle _ [] _
 490   = panic "reOrderCycle"
 491 reOrderCycle _ [bind] pairs    -- Common case of simple self-recursion
 492   = (makeLoopBreaker False bndr, rhs) : pairs
 493   where
 494     (ND bndr rhs _ _, _, _) = bind
 495
 496 reOrderCycle depth (bind : binds) pairs
 497   =     -- Choose a loop breaker, mark it no-inline,
 498         -- do SCC analysis on the rest, and recursively sort them out
 499 --    pprTrace "reOrderCycle" (ppr [b | (ND b _ _ _, _, _) <- bind:binds]) $
 500     foldr (reOrderRec new_depth)
 501           ([ (makeLoopBreaker False bndr, rhs)
 502            | (ND bndr rhs _ _, _, _) <- chosen_binds] ++ pairs)
 503           (stronglyConnCompFromEdgedVerticesR unchosen)
 504   where
 505     (chosen_binds, unchosen) = choose_loop_breaker [bind] (score bind) [] binds
 506
 507     approximate_loop_breaker = depth >= 2
 508     new_depth | approximate_loop_breaker = 0
 509               | otherwise                = depth+1
 510         -- After two iterations (d=0, d=1) give up
 511         -- and approximate, returning to d=0
 512
 513         -- This loop looks for the bind with the lowest score
 514         -- to pick as the loop  breaker.  The rest accumulate in
 515     choose_loop_breaker loop_binds _loop_sc acc []
 516         = (loop_binds, acc)        -- Done
 517
 518         -- If approximate_loop_breaker is True, we pick *all*
 519         -- nodes with lowest score, else just one
 520         -- See Note [Complexity of loop breaking]
 521     choose_loop_breaker loop_binds loop_sc acc (bind : binds)
 522         | sc < loop_sc  -- Lower score so pick this new one
 523         = choose_loop_breaker [bind] sc (loop_binds ++ acc) binds
 524
 525         | approximate_loop_breaker && sc == loop_sc
 526         = choose_loop_breaker (bind : loop_binds) loop_sc acc binds
 527
 528         | otherwise     -- Higher score so don't pick it
 529         = choose_loop_breaker loop_binds loop_sc (bind : acc) binds
 530         where
 531           sc = score bind
 532
 533     score :: Node Details -> Int        -- Higher score => less likely to be picked as loop breaker
 534     score (ND bndr rhs _ _, _, _)
 535         | not (isId bndr) = 100     -- A type or cercion varialbe is never a loop breaker
 536
 537         | isDFunId bndr = 9   -- Never choose a DFun as a loop breaker
 538                               -- Note [DFuns should not be loop breakers]
 539
 540         | Just (inl_source, _) <- isStableUnfolding_maybe (idUnfolding bndr)
 541         = case inl_source of
 542              InlineWrapper {} -> 10  -- Note [INLINE pragmas]
 543              _other           ->  3  -- Data structures are more important than this
 544                                      -- so that dictionary/method recursion unravels
 545                 -- Note that this case hits all InlineRule things, so we
 546                 -- never look at 'rhs for InlineRule stuff. That's right, because
 547                 -- 'rhs' is irrelevant for inlining things with an InlineRule
 548
 549         | is_con_app rhs = 5  -- Data types help with cases: Note [Constructor applications]
 550
 551         | exprIsTrivial rhs = 10  -- Practically certain to be inlined
 552                 -- Used to have also: && not (isExportedId bndr)
 553                 -- But I found this sometimes cost an extra iteration when we have
 554                 --      rec { d = (a,b); a = ...df...; b = ...df...; df = d }
 555                 -- where df is the exported dictionary. Then df makes a really
 556                 -- bad choice for loop breaker
 557
 558
 559 -- If an Id is marked "never inline" then it makes a great loop breaker
 560 -- The only reason for not checking that here is that it is rare
 561 -- and I've never seen a situation where it makes a difference,
 562 -- so it probably isn't worth the time to test on every binder
 563 --      | isNeverActive (idInlinePragma bndr) = -10
 564
 565         | isOneOcc (idOccInfo bndr) = 2  -- Likely to be inlined
 566
 567         | canUnfold (realIdUnfolding bndr) = 1
 568                 -- The Id has some kind of unfolding
 569                 -- Ignore loop-breaker-ness here because that is what we are setting!
 570
 571         | otherwise = 0
 572
 573         -- Checking for a constructor application
 574         -- Cheap and cheerful; the simplifer moves casts out of the way
 575         -- The lambda case is important to spot x = /\a. C (f a)
 576         -- which comes up when C is a dictionary constructor and
 577         -- f is a default method.
 578         -- Example: the instance for Show (ST s a) in GHC.ST
 579         --
 580         -- However we *also* treat (\x. C p q) as a con-app-like thing,
 581         --      Note [Closure conversion]
 582     is_con_app (Var v)    = isConLikeId v
 583     is_con_app (App f _)  = is_con_app f
 584     is_con_app (Lam _ e)  = is_con_app e
 585     is_con_app (Note _ e) = is_con_app e
 586     is_con_app _          = False
 587
 588 makeLoopBreaker :: Bool -> Id -> Id
 589 -- Set the loop-breaker flag: see Note [Weak loop breakers]
 590 makeLoopBreaker weak bndr
 591   = ASSERT2( isId bndr, ppr bndr ) setIdOccInfo bndr (IAmALoopBreaker weak)
 592 \end{code}
 593
 594 Note [Complexity of loop breaking]
 595 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 596 The loop-breaking algorithm knocks out one binder at a time, and
 597 performs a new SCC analysis on the remaining binders.  That can
 598 behave very badly in tightly-coupled groups of bindings; in the
 599 worst case it can be (N**2)*log N, because it does a full SCC
 600 on N, then N-1, then N-2 and so on.
 601
 602 To avoid this, we switch plans after 2 (or whatever) attempts:
 603   Plan A: pick one binder with the lowest score, make it
 604           a loop breaker, and try again
 605   Plan B: pick *all* binders with the lowest score, make them
 606           all loop breakers, and try again
 607 Since there are only a small finite number of scores, this will
 608 terminate in a constant number of iterations, rather than O(N)
 609 iterations.
 610
 611 You might thing that it's very unlikely, but RULES make it much
 612 more likely.  Here's a real example from Trac #1969:
 613   Rec { $dm = \d.\x. op d
 614         {-# RULES forall d. $dm Int d  = $s$dm1
 615                   forall d. $dm Bool d = $s$dm2 #-}
 616
 617         dInt = MkD .... opInt ...
 618         dInt = MkD .... opBool ...
 619         opInt  = $dm dInt
 620         opBool = $dm dBool
 621
 622         $s$dm1 = \x. op dInt
 623         $s$dm2 = \x. op dBool }
 624 The RULES stuff means that we can't choose $dm as a loop breaker
 625 (Note [Choosing loop breakers]), so we must choose at least (say)
 626 opInt *and* opBool, and so on.  The number of loop breakders is
 627 linear in the number of instance declarations.
 628
 629 Note [INLINE pragmas]
 630 ~~~~~~~~~~~~~~~~~~~~~
 631 Avoid choosing a function with an INLINE pramga as the loop breaker!
 632 If such a function is mutually-recursive with a non-INLINE thing,
 633 then the latter should be the loop-breaker.
 634
 635 Usually this is just a question of optimisation. But a particularly
 636 bad case is wrappers generated by the demand analyser: if you make
 637 then into a loop breaker you may get an infinite inlining loop.  For
 638 example:
 639   rec {
 640         $wfoo x = ....foo x....
 641
 642         {-loop brk-} foo x = ...$wfoo x...
 643   }
 644 The interface file sees the unfolding for $wfoo, and sees that foo is
 645 strict (and hence it gets an auto-generated wrapper).  Result: an
 646 infinite inlining in the importing scope.  So be a bit careful if you
 647 change this.  A good example is Tree.repTree in
 648 nofib/spectral/minimax. If the repTree wrapper is chosen as the loop
 649 breaker then compiling Game.hs goes into an infinite loop.  This
 650 happened when we gave is_con_app a lower score than inline candidates:
 651
 652   Tree.repTree
 653     = __inline_me (/\a. \w w1 w2 ->
 654                    case Tree.$wrepTree @ a w w1 w2 of
 655                     { (# ww1, ww2 #) -> Branch @ a ww1 ww2 })
 656   Tree.$wrepTree
 657     = /\a w w1 w2 ->
 658       (# w2_smP, map a (Tree a) (Tree.repTree a w1 w) (w w2) #)
 659
 660 Here we do *not* want to choose 'repTree' as the loop breaker.
 661
 662 Note [DFuns should not be loop breakers]
 663 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 664 It's particularly bad to make a DFun into a loop breaker.  See
 665 Note [How instance declarations are translated] in TcInstDcls
 666
 667 We give DFuns a higher score than ordinary CONLIKE things because
 668 if there's a choice we want the DFun to be the non-looop breker. Eg
 669
 670 rec { sc = /\ a \$dC. $fBWrap (T a) ($fCT @ a $dC)
 671
 672       $fCT :: forall a_afE. (Roman.C a_afE) => Roman.C (Roman.T a_afE)
 673       {-# DFUN #-}
 674       $fCT = /\a \$dC. MkD (T a) ((sc @ a $dC) |> blah) ($ctoF @ a $dC)
 675     }
 676
 677 Here 'sc' (the superclass) looks CONLIKE, but we'll never get to it
 678 if we can't unravel the DFun first.
 679
 680 Note [Constructor applications]
 681 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 682 It's really really important to inline dictionaries.  Real
 683 example (the Enum Ordering instance from GHC.Base):
 684
 685      rec     f = \ x -> case d of (p,q,r) -> p x
 686              g = \ x -> case d of (p,q,r) -> q x
 687              d = (v, f, g)
 688
 689 Here, f and g occur just once; but we can't inline them into d.
 690 On the other hand we *could* simplify those case expressions if
 691 we didn't stupidly choose d as the loop breaker.
 692 But we won't because constructor args are marked "Many".
 693 Inlining dictionaries is really essential to unravelling
 694 the loops in static numeric dictionaries, see GHC.Float.
 695
 696 Note [Closure conversion]
 697 ~~~~~~~~~~~~~~~~~~~~~~~~~
 698 We treat (\x. C p q) as a high-score candidate in the letrec scoring algorithm.
 699 The immediate motivation came from the result of a closure-conversion transformation
 700 which generated code like this:
 701
 702     data Clo a b = forall c. Clo (c -> a -> b) c
 703
 704     ($:) :: Clo a b -> a -> b
 705     Clo f env $: x = f env x
 706
 707     rec { plus = Clo plus1 ()
 708
 709         ; plus1 _ n = Clo plus2 n
 710
 711         ; plus2 Zero     n = n
 712         ; plus2 (Succ m) n = Succ (plus $: m $: n) }
 713
 714 If we inline 'plus' and 'plus1', everything unravels nicely.  But if
 715 we choose 'plus1' as the loop breaker (which is entirely possible
 716 otherwise), the loop does not unravel nicely.
 717
 718
 719 @occAnalRhs@ deals with the question of bindings where the Id is marked
 720 by an INLINE pragma.  For these we record that anything which occurs
 721 in its RHS occurs many times.  This pessimistically assumes that ths
 722 inlined binder also occurs many times in its scope, but if it doesn't
 723 we'll catch it next time round.  At worst this costs an extra simplifier pass.
 724 ToDo: try using the occurrence info for the inline'd binder.
 725
 726 [March 97] We do the same for atomic RHSs.  Reason: see notes with reOrderRec.
 727 [June 98, SLPJ]  I've undone this change; I don't understand it.  See notes with reOrderRec.
 728
 729
 730 \begin{code}
 731 occAnalRhs :: OccEnv
 732            -> Id -> CoreExpr    -- Binder and rhs
 733                                 -- For non-recs the binder is alrady tagged
 734                                 -- with occurrence info
 735            -> (UsageDetails, CoreExpr)
 736               -- Returned usage details includes any INLINE rhs
 737
 738 occAnalRhs env id rhs
 739   | isId id   = (addIdOccs rhs_usage (idUnfoldingVars id), rhs')
 740   | otherwise = (rhs_usage, rhs')
 741         -- Include occurrences for the "extra RHS" from a CoreUnfolding
 742   where
 743     (rhs_usage, rhs') = occAnal ctxt rhs
 744     ctxt | certainly_inline id = env
 745          | otherwise           = rhsCtxt env
 746         -- Note that we generally use an rhsCtxt.  This tells the occ anal n
 747         -- that it's looking at an RHS, which has an effect in occAnalApp
 748         --
 749         -- But there's a problem.  Consider
 750         --      x1 = a0 : []
 751         --      x2 = a1 : x1
 752         --      x3 = a2 : x2
 753         --      g  = f x3
 754         -- First time round, it looks as if x1 and x2 occur as an arg of a
 755         -- let-bound constructor ==> give them a many-occurrence.
 756         -- But then x3 is inlined (unconditionally as it happens) and
 757         -- next time round, x2 will be, and the next time round x1 will be
 758         -- Result: multiple simplifier iterations.  Sigh.
 759         -- Crude solution: use rhsCtxt for things that occur just once...
 760
 761     certainly_inline id = case idOccInfo id of
 762                             OneOcc in_lam one_br _ -> not in_lam && one_br
 763                             _                      -> False
 764 \end{code}
 765
 766
 767
 768 \begin{code}
 769 addRuleUsage :: UsageDetails -> Var -> UsageDetails
 770 -- Add the usage from RULES in Id to the usage
 771 addRuleUsage usage var
 772   | isId var  = addIdOccs usage (idRuleVars var)
 773   | otherwise = usage
 774         -- idRuleVars here: see Note [Rule dependency info]
 775
 776 addIdOccs :: UsageDetails -> VarSet -> UsageDetails
 777 addIdOccs usage id_set = foldVarSet add usage id_set
 778   where
 779     add v u | isId v    = addOneOcc u v NoOccInfo
 780             | otherwise = u
 781         -- Give a non-committal binder info (i.e NoOccInfo) because
 782         --   a) Many copies of the specialised thing can appear
 783         --   b) We don't want to substitute a BIG expression inside a RULE
 784         --      even if that's the only occurrence of the thing
 785         --      (Same goes for INLINE.)
 786 \end{code}
 787
 788 Expressions
 789 ~~~~~~~~~~~
 790 \begin{code}
 791 occAnal :: OccEnv
 792         -> CoreExpr
 793         -> (UsageDetails,       -- Gives info only about the "interesting" Ids
 794             CoreExpr)
 795
 796 occAnal _   (Type t)  = (emptyDetails, Type t)
 797 occAnal env (Var v)   = (mkOneOcc env v False, Var v)
 798     -- At one stage, I gathered the idRuleVars for v here too,
 799     -- which in a way is the right thing to do.
 800     -- But that went wrong right after specialisation, when
 801     -- the *occurrences* of the overloaded function didn't have any
 802     -- rules in them, so the *specialised* versions looked as if they
 803     -- weren't used at all.
 804 \end{code}
 805
 806 We regard variables that occur as constructor arguments as "dangerousToDup":
 807
 808 \begin{verbatim}
 809 module A where
 810 f x = let y = expensive x in
 811       let z = (True,y) in
 812       (case z of {(p,q)->q}, case z of {(p,q)->q})
 813 \end{verbatim}
 814
 815 We feel free to duplicate the WHNF (True,y), but that means
 816 that y may be duplicated thereby.
 817
 818 If we aren't careful we duplicate the (expensive x) call!
 819 Constructors are rather like lambdas in this way.
 820
 821 \begin{code}
 822 occAnal _   expr@(Lit _) = (emptyDetails, expr)
 823 \end{code}
 824
 825 \begin{code}
 826 occAnal env (Note note@(SCC _) body)
 827   = case occAnal env body of { (usage, body') ->
 828     (mapVarEnv markInsideSCC usage, Note note body')
 829     }
 830
 831 occAnal env (Note note body)
 832   = case occAnal env body of { (usage, body') ->
 833     (usage, Note note body')
 834     }
 835
 836 occAnal env (Cast expr co)
 837   = case occAnal env expr of { (usage, expr') ->
 838       (markManyIf (isRhsEnv env) usage, Cast expr' co)
 839         -- If we see let x = y `cast` co
 840         -- then mark y as 'Many' so that we don't
 841         -- immediately inline y again.
 842     }
 843 \end{code}
 844
 845 \begin{code}
 846 occAnal env app@(App _ _)
 847   = occAnalApp env (collectArgs app)
 848
 849 -- Ignore type variables altogether
 850 --   (a) occurrences inside type lambdas only not marked as InsideLam
 851 --   (b) type variables not in environment
 852
 853 occAnal env (Lam x body) | isTyCoVar x
 854   = case occAnal env body of { (body_usage, body') ->
 855     (body_usage, Lam x body')
 856     }
 857
 858 -- For value lambdas we do a special hack.  Consider
 859 --      (\x. \y. ...x...)
 860 -- If we did nothing, x is used inside the \y, so would be marked
 861 -- as dangerous to dup.  But in the common case where the abstraction
 862 -- is applied to two arguments this is over-pessimistic.
 863 -- So instead, we just mark each binder with its occurrence
 864 -- info in the *body* of the multiple lambda.
 865 -- Then, the simplifier is careful when partially applying lambdas.
 866
 867 occAnal env expr@(Lam _ _)
 868   = case occAnal env_body body of { (body_usage, body') ->
 869     let
 870         (final_usage, tagged_binders) = tagLamBinders body_usage binders'
 871                       -- Use binders' to put one-shot info on the lambdas
 872
 873         --      URGH!  Sept 99: we don't seem to be able to use binders' here, because
 874         --      we get linear-typed things in the resulting program that we can't handle yet.
 875         --      (e.g. PrelShow)  TODO
 876
 877         really_final_usage = if linear then
 878                                 final_usage
 879                              else
 880                                 mapVarEnv markInsideLam final_usage
 881     in
 882     (really_final_usage,
 883      mkLams tagged_binders body') }
 884   where
 885     env_body        = vanillaCtxt (trimOccEnv env binders)
 886                         -- Body is (no longer) an RhsContext
 887     (binders, body) = collectBinders expr
 888     binders'        = oneShotGroup env binders
 889     linear          = all is_one_shot binders'
 890     is_one_shot b   = isId b && isOneShotBndr b
 891
 892 occAnal env (Case scrut bndr ty alts)
 893   = case occ_anal_scrut scrut alts     of { (scrut_usage, scrut') ->
 894     case mapAndUnzip occ_anal_alt alts of { (alts_usage_s, alts')   ->
 895     let
 896         alts_usage  = foldr1 combineAltsUsageDetails alts_usage_s
 897         (alts_usage1, tagged_bndr) = tag_case_bndr alts_usage bndr
 898         total_usage = scrut_usage +++ alts_usage1
 899     in
 900     total_usage `seq` (total_usage, Case scrut' tagged_bndr ty alts') }}
 901   where
 902         -- Note [Case binder usage]
 903         -- ~~~~~~~~~~~~~~~~~~~~~~~~
 904         -- The case binder gets a usage of either "many" or "dead", never "one".
 905         -- Reason: we like to inline single occurrences, to eliminate a binding,
 906         -- but inlining a case binder *doesn't* eliminate a binding.
 907         -- We *don't* want to transform
 908         --      case x of w { (p,q) -> f w }
 909         -- into
 910         --      case x of w { (p,q) -> f (p,q) }
 911     tag_case_bndr usage bndr
 912       = case lookupVarEnv usage bndr of
 913           Nothing -> (usage,                  setIdOccInfo bndr IAmDead)
 914           Just _  -> (usage `delVarEnv` bndr, setIdOccInfo bndr NoOccInfo)
 915
 916     alt_env      = mkAltEnv env scrut bndr
 917     occ_anal_alt = occAnalAlt alt_env bndr
 918
 919     occ_anal_scrut (Var v) (alt1 : other_alts)
 920         | not (null other_alts) || not (isDefaultAlt alt1)
 921         = (mkOneOcc env v True, Var v)  -- The 'True' says that the variable occurs
 922                                         -- in an interesting context; the case has
 923                                         -- at least one non-default alternative
 924     occ_anal_scrut scrut _alts
 925         = occAnal (vanillaCtxt env) scrut    -- No need for rhsCtxt
 926
 927 occAnal env (Let bind body)
 928   = case occAnal env_body body                    of { (body_usage, body') ->
 929     case occAnalBind env env_body bind body_usage of { (final_usage, new_binds) ->
 930        (final_usage, mkLets new_binds body') }}
 931   where
 932     env_body = trimOccEnv env (bindersOf bind)
 933
 934 occAnalArgs :: OccEnv -> [CoreExpr] -> (UsageDetails, [CoreExpr])
 935 occAnalArgs env args
 936   = case mapAndUnzip (occAnal arg_env) args of  { (arg_uds_s, args') ->
 937     (foldr (+++) emptyDetails arg_uds_s, args')}
 938   where
 939     arg_env = vanillaCtxt env
 940 \end{code}
 941
 942 Applications are dealt with specially because we want
 943 the "build hack" to work.
 944
 945 \begin{code}
 946 occAnalApp :: OccEnv
 947            -> (Expr CoreBndr, [Arg CoreBndr])
 948            -> (UsageDetails, Expr CoreBndr)
 949 occAnalApp env (Var fun, args)
 950   = case args_stuff of { (args_uds, args') ->
 951     let
 952        final_args_uds = markManyIf (isRhsEnv env && is_exp) args_uds
 953           -- We mark the free vars of the argument of a constructor or PAP
 954           -- as "many", if it is the RHS of a let(rec).
 955           -- This means that nothing gets inlined into a constructor argument
 956           -- position, which is what we want.  Typically those constructor
 957           -- arguments are just variables, or trivial expressions.
 958           --
 959           -- This is the *whole point* of the isRhsEnv predicate
 960     in
 961     (fun_uds +++ final_args_uds, mkApps (Var fun) args') }
 962   where
 963     fun_uniq = idUnique fun
 964     fun_uds  = mkOneOcc env fun (valArgCount args > 0)
 965     is_exp = isExpandableApp fun (valArgCount args)
 966            -- See Note [CONLIKE pragma] in BasicTypes
 967            -- The definition of is_exp should match that in
 968            -- Simplify.prepareRhs
 969
 970                 -- Hack for build, fold, runST
 971     args_stuff  | fun_uniq == buildIdKey    = appSpecial env 2 [True,True]  args
 972                 | fun_uniq == augmentIdKey  = appSpecial env 2 [True,True]  args
 973                 | fun_uniq == foldrIdKey    = appSpecial env 3 [False,True] args
 974                 | fun_uniq == runSTRepIdKey = appSpecial env 2 [True]       args
 975                         -- (foldr k z xs) may call k many times, but it never
 976                         -- shares a partial application of k; hence [False,True]
 977                         -- This means we can optimise
 978                         --      foldr (\x -> let v = ...x... in \y -> ...v...) z xs
 979                         -- by floating in the v
 980
 981                 | otherwise = occAnalArgs env args
 982
 983
 984 occAnalApp env (fun, args)
 985   = case occAnal (addAppCtxt env args) fun of   { (fun_uds, fun') ->
 986         -- The addAppCtxt is a bit cunning.  One iteration of the simplifier
 987         -- often leaves behind beta redexs like
 988         --      (\x y -> e) a1 a2
 989         -- Here we would like to mark x,y as one-shot, and treat the whole
 990         -- thing much like a let.  We do this by pushing some True items
 991         -- onto the context stack.
 992
 993     case occAnalArgs env args of        { (args_uds, args') ->
 994     let
 995         final_uds = fun_uds +++ args_uds
 996     in
 997     (final_uds, mkApps fun' args') }}
 998
 999
1000 markManyIf :: Bool              -- If this is true
1001            -> UsageDetails      -- Then do markMany on this
1002            -> UsageDetails
1003 markManyIf True  uds = mapVarEnv markMany uds
1004 markManyIf False uds = uds
1005
1006 appSpecial :: OccEnv
1007            -> Int -> CtxtTy     -- Argument number, and context to use for it
1008            -> [CoreExpr]
1009            -> (UsageDetails, [CoreExpr])
1010 appSpecial env n ctxt args
1011   = go n args
1012   where
1013     arg_env = vanillaCtxt env
1014
1015     go _ [] = (emptyDetails, [])        -- Too few args
1016
1017     go 1 (arg:args)                     -- The magic arg
1018       = case occAnal (setCtxtTy arg_env ctxt) arg of    { (arg_uds, arg') ->
1019         case occAnalArgs env args of                    { (args_uds, args') ->
1020         (arg_uds +++ args_uds, arg':args') }}
1021
1022     go n (arg:args)
1023       = case occAnal arg_env arg of     { (arg_uds, arg') ->
1024         case go (n-1) args of           { (args_uds, args') ->
1025         (arg_uds +++ args_uds, arg':args') }}
1026 \end{code}
1027
1028
1029 Note [Binders in case alternatives]
1030 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1031 Consider
1032     case x of y { (a,b) -> f y }
1033 We treat 'a', 'b' as dead, because they don't physically occur in the
1034 case alternative.  (Indeed, a variable is dead iff it doesn't occur in
1035 its scope in the output of OccAnal.)  It really helps to know when
1036 binders are unused.  See esp the call to isDeadBinder in
1037 Simplify.mkDupableAlt
1038
1039 In this example, though, the Simplifier will bring 'a' and 'b' back to
1040 life, beause it binds 'y' to (a,b) (imagine got inlined and
1041 scrutinised y).
1042
1043 \begin{code}
1044 occAnalAlt :: OccEnv
1045            -> CoreBndr
1046            -> CoreAlt
1047            -> (UsageDetails, Alt IdWithOccInfo)
1048 occAnalAlt env case_bndr (con, bndrs, rhs)
1049   = let
1050         env' = trimOccEnv env bndrs
1051     in
1052     case occAnal env' rhs of { (rhs_usage1, rhs1) ->
1053     let
1054         proxies = getProxies env' case_bndr
1055         (rhs_usage2, rhs2) = foldrBag wrapProxy (rhs_usage1, rhs1) proxies
1056         (alt_usg, tagged_bndrs) = tagLamBinders rhs_usage2 bndrs
1057         bndrs' = tagged_bndrs      -- See Note [Binders in case alternatives]
1058     in
1059     (alt_usg, (con, bndrs', rhs2)) }
1060
1061 wrapProxy :: ProxyBind -> (UsageDetails, CoreExpr) -> (UsageDetails, CoreExpr)
1062 wrapProxy (bndr, rhs_var, co) (body_usg, body)
1063   | not (bndr `usedIn` body_usg)
1064   = (body_usg, body)
1065   | otherwise
1066   = (body_usg' +++ rhs_usg, Let (NonRec tagged_bndr rhs) body)
1067   where
1068     (body_usg', tagged_bndr) = tagBinder body_usg bndr
1069     rhs_usg = unitVarEnv rhs_var NoOccInfo      -- We don't need exact info
1070     rhs = mkCoerceI co (Var rhs_var)
1071 \end{code}
1072
1073
1074 %************************************************************************
1075 %*                                                                      *
1076                     OccEnv
1077 %*                                                                      *
1078 %************************************************************************
1079
1080 \begin{code}
1081 data OccEnv
1082   = OccEnv { occ_encl  :: !OccEncl      -- Enclosing context information
1083            , occ_ctxt  :: !CtxtTy       -- Tells about linearity
1084            , occ_proxy :: ProxyEnv }
1085
1086
1087 -----------------------------
1088 -- OccEncl is used to control whether to inline into constructor arguments
1089 -- For example:
1090 --      x = (p,q)               -- Don't inline p or q
1091 --      y = /\a -> (p a, q a)   -- Still don't inline p or q
1092 --      z = f (p,q)             -- Do inline p,q; it may make a rule fire
1093 -- So OccEncl tells enought about the context to know what to do when
1094 -- we encounter a contructor application or PAP.
1095
1096 data OccEncl
1097   = OccRhs              -- RHS of let(rec), albeit perhaps inside a type lambda
1098                         -- Don't inline into constructor args here
1099   | OccVanilla          -- Argument of function, body of lambda, scruintee of case etc.
1100                         -- Do inline into constructor args here
1101
1102 instance Outputable OccEncl where
1103   ppr OccRhs     = ptext (sLit "occRhs")
1104   ppr OccVanilla = ptext (sLit "occVanilla")
1105
1106 type CtxtTy = [Bool]
1107         -- []           No info
1108         --
1109         -- True:ctxt    Analysing a function-valued expression that will be
1110         --                      applied just once
1111         --
1112         -- False:ctxt   Analysing a function-valued expression that may
1113         --                      be applied many times; but when it is,
1114         --                      the CtxtTy inside applies
1115
1116 initOccEnv :: OccEnv
1117 initOccEnv = OccEnv { occ_encl  = OccVanilla
1118                     , occ_ctxt  = []
1119                     , occ_proxy = PE emptyVarEnv emptyVarSet }
1120
1121 vanillaCtxt :: OccEnv -> OccEnv
1122 vanillaCtxt env = OccEnv { occ_encl = OccVanilla
1123                          , occ_ctxt = []
1124                          , occ_proxy = occ_proxy env }
1125
1126 rhsCtxt :: OccEnv -> OccEnv
1127 rhsCtxt env = OccEnv { occ_encl = OccRhs, occ_ctxt = []
1128                      , occ_proxy = occ_proxy env }
1129
1130 setCtxtTy :: OccEnv -> CtxtTy -> OccEnv
1131 setCtxtTy env ctxt = env { occ_ctxt = ctxt }
1132
1133 isRhsEnv :: OccEnv -> Bool
1134 isRhsEnv (OccEnv { occ_encl = OccRhs })     = True
1135 isRhsEnv (OccEnv { occ_encl = OccVanilla }) = False
1136
1137 oneShotGroup :: OccEnv -> [CoreBndr] -> [CoreBndr]
1138         -- The result binders have one-shot-ness set that they might not have had originally.
1139         -- This happens in (build (\cn -> e)).  Here the occurrence analyser
1140         -- linearity context knows that c,n are one-shot, and it records that fact in
1141         -- the binder. This is useful to guide subsequent float-in/float-out tranformations
1142
1143 oneShotGroup (OccEnv { occ_ctxt = ctxt }) bndrs
1144   = go ctxt bndrs []
1145   where
1146     go _ [] rev_bndrs = reverse rev_bndrs
1147
1148     go (lin_ctxt:ctxt) (bndr:bndrs) rev_bndrs
1149         | isId bndr = go ctxt bndrs (bndr':rev_bndrs)
1150         where
1151           bndr' | lin_ctxt  = setOneShotLambda bndr
1152                 | otherwise = bndr
1153
1154     go ctxt (bndr:bndrs) rev_bndrs = go ctxt bndrs (bndr:rev_bndrs)
1155
1156 addAppCtxt :: OccEnv -> [Arg CoreBndr] -> OccEnv
1157 addAppCtxt env@(OccEnv { occ_ctxt = ctxt }) args
1158   = env { occ_ctxt = replicate (valArgCount args) True ++ ctxt }
1159 \end{code}
1160
1161 %************************************************************************
1162 %*                                                                      *
1163                     ProxyEnv
1164 %*                                                                      *
1165 %************************************************************************
1166
1167 \begin{code}
1168 data ProxyEnv
1169    = PE (IdEnv (Id, [(Id,CoercionI)])) VarSet
1170         -- Main env, and its free variables (of both range and domain)
1171 \end{code}
1172
1173 Note [ProxyEnv]
1174 ~~~~~~~~~~~~~~~
1175 The ProxyEnv keeps track of the connection between case binders and
1176 scrutinee.  Specifically, if
1177      sc |-> (sc, [...(cb, co)...])
1178 is a binding in the ProxyEnv, then
1179      cb = sc |> coi
1180 Typically we add such a binding when encountering the case expression
1181      case (sc |> coi) of cb { ... }
1182
1183 Things to note:
1184   * The domain of the ProxyEnv is the variable (or casted variable)
1185     scrutinees of enclosing cases.  This is additionally used
1186     to ensure we gather occurrence info even for GlobalId scrutinees;
1187     see Note [Binder swap for GlobalId scrutinee]
1188
1189   * The ProxyEnv is just an optimisation; you can throw away any
1190     element without losing correctness.  And we do so when pushing
1191     it inside a binding (see trimProxyEnv).
1192
1193   * Once scrutinee might map to many case binders:  Eg
1194       case sc of cb1 { DEFAULT -> ....case sc of cb2 { ... } .. }
1195
1196 INVARIANTS
1197  * If sc1 |-> (sc2, [...(cb, co)...]), then sc1==sc2
1198    It's a UniqFM and we sometimes need the domain Id
1199
1200  * Any particular case binder 'cb' occurs only once in entire range
1201
1202  * No loops
1203
1204 The Main Reason for having a ProxyEnv is so that when we encounter
1205     case e of cb { pi -> ri }
1206 we can find all the in-scope variables derivable from 'cb',
1207 and effectively add let-bindings for them thus:
1208     case e of cb { pi -> let { x = ..cb..; y = ...cb.. }
1209                          in ri }
1210 The function getProxies finds these bindings; then we
1211 add just the necessary ones, using wrapProxy.
1212
1213 More info under Note [Binder swap]
1214
1215 Note [Binder swap]
1216 ~~~~~~~~~~~~~~~~~~
1217 We do these two transformations right here:
1218
1219  (1)   case x of b { pi -> ri }
1220     ==>
1221       case x of b { pi -> let x=b in ri }
1222
1223  (2)  case (x |> co) of b { pi -> ri }
1224     ==>
1225       case (x |> co) of b { pi -> let x = b |> sym co in ri }
1226
1227     Why (2)?  See Note [Case of cast]
1228
1229 In both cases, in a particular alternative (pi -> ri), we only
1230 add the binding if
1231   (a) x occurs free in (pi -> ri)
1232         (ie it occurs in ri, but is not bound in pi)
1233   (b) the pi does not bind b (or the free vars of co)
1234 We need (a) and (b) for the inserted binding to be correct.
1235
1236 For the alternatives where we inject the binding, we can transfer
1237 all x's OccInfo to b.  And that is the point.
1238
1239 Notice that
1240   * The deliberate shadowing of 'x'.
1241   * That (a) rapidly becomes false, so no bindings are injected.
1242
1243 The reason for doing these transformations here is because it allows
1244 us to adjust the OccInfo for 'x' and 'b' as we go.
1245
1246   * Suppose the only occurrences of 'x' are the scrutinee and in the
1247     ri; then this transformation makes it occur just once, and hence
1248     get inlined right away.
1249
1250   * If we do this in the Simplifier, we don't know whether 'x' is used
1251     in ri, so we are forced to pessimistically zap b's OccInfo even
1252     though it is typically dead (ie neither it nor x appear in the
1253     ri).  There's nothing actually wrong with zapping it, except that
1254     it's kind of nice to know which variables are dead.  My nose
1255     tells me to keep this information as robustly as possible.
1256
1257 The Maybe (Id,CoreExpr) passed to occAnalAlt is the extra let-binding
1258 {x=b}; it's Nothing if the binder-swap doesn't happen.
1259
1260 There is a danger though.  Consider
1261       let v = x +# y
1262       in case (f v) of w -> ...v...v...
1263 And suppose that (f v) expands to just v.  Then we'd like to
1264 use 'w' instead of 'v' in the alternative.  But it may be too
1265 late; we may have substituted the (cheap) x+#y for v in the
1266 same simplifier pass that reduced (f v) to v.
1267
1268 I think this is just too bad.  CSE will recover some of it.
1269
1270 Note [Case of cast]
1271 ~~~~~~~~~~~~~~~~~~~
1272 Consider        case (x `cast` co) of b { I# ->
1273                 ... (case (x `cast` co) of {...}) ...
1274 We'd like to eliminate the inner case.  That is the motivation for
1275 equation (2) in Note [Binder swap].  When we get to the inner case, we
1276 inline x, cancel the casts, and away we go.
1277
1278 Note [Binder swap on GlobalId scrutinees]
1279 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1280 When the scrutinee is a GlobalId we must take care in two ways
1281
1282  i) In order to *know* whether 'x' occurs free in the RHS, we need its
1283     occurrence info. BUT, we don't gather occurrence info for
1284     GlobalIds.  That's one use for the (small) occ_proxy env in OccEnv is
1285     for: it says "gather occurrence info for these.
1286
1287  ii) We must call localiseId on 'x' first, in case it's a GlobalId, or
1288      has an External Name. See, for example, SimplEnv Note [Global Ids in
1289      the substitution].
1290
1291 Note [getProxies is subtle]
1292 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
1293 The code for getProxies isn't all that obvious. Consider
1294
1295   case v |> cov  of x { DEFAULT ->
1296   case x |> cox1 of y { DEFAULT ->
1297   case x |> cox2 of z { DEFAULT -> r
1298
1299 These will give us a ProxyEnv looking like:
1300   x |-> (x, [(y, cox1), (z, cox2)])
1301   v |-> (v, [(x, cov)])
1302
1303 From this we want to extract the bindings
1304     x = z |> sym cox2
1305     v = x |> sym cov
1306     y = x |> cox1
1307
1308 Notice that later bindings may mention earlier ones, and that
1309 we need to go "both ways".
1310
1311 Historical note [no-case-of-case]
1312 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1313 We *used* to suppress the binder-swap in case expressions when
1314 -fno-case-of-case is on.  Old remarks:
1315     "This happens in the first simplifier pass,
1316     and enhances full laziness.  Here's the bad case:
1317             f = \ y -> ...(case x of I# v -> ...(case x of ...) ... )
1318     If we eliminate the inner case, we trap it inside the I# v -> arm,
1319     which might prevent some full laziness happening.  I've seen this
1320     in action in spectral/cichelli/Prog.hs:
1321              [(m,n) | m <- [1..max], n <- [1..max]]
1322     Hence the check for NoCaseOfCase."
1323 However, now the full-laziness pass itself reverses the binder-swap, so this
1324 check is no longer necessary.
1325
1326 Historical note [Suppressing the case binder-swap]
1327 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1328 This old note describes a problem that is also fixed by doing the
1329 binder-swap in OccAnal:
1330
1331     There is another situation when it might make sense to suppress the
1332     case-expression binde-swap. If we have
1333
1334         case x of w1 { DEFAULT -> case x of w2 { A -> e1; B -> e2 }
1335                        ...other cases .... }
1336
1337     We'll perform the binder-swap for the outer case, giving
1338
1339         case x of w1 { DEFAULT -> case w1 of w2 { A -> e1; B -> e2 }
1340                        ...other cases .... }
1341
1342     But there is no point in doing it for the inner case, because w1 can't
1343     be inlined anyway.  Furthermore, doing the case-swapping involves
1344     zapping w2's occurrence info (see paragraphs that follow), and that
1345     forces us to bind w2 when doing case merging.  So we get
1346
1347         case x of w1 { A -> let w2 = w1 in e1
1348                        B -> let w2 = w1 in e2
1349                        ...other cases .... }
1350
1351     This is plain silly in the common case where w2 is dead.
1352
1353     Even so, I can't see a good way to implement this idea.  I tried
1354     not doing the binder-swap if the scrutinee was already evaluated
1355     but that failed big-time:
1356
1357             data T = MkT !Int
1358
1359             case v of w  { MkT x ->
1360             case x of x1 { I# y1 ->
1361             case x of x2 { I# y2 -> ...
1362
1363     Notice that because MkT is strict, x is marked "evaluated".  But to
1364     eliminate the last case, we must either make sure that x (as well as
1365     x1) has unfolding MkT y1.  THe straightforward thing to do is to do
1366     the binder-swap.  So this whole note is a no-op.
1367
1368 It's fixed by doing the binder-swap in OccAnal because we can do the
1369 binder-swap unconditionally and still get occurrence analysis
1370 information right.
1371
1372 \begin{code}
1373 extendProxyEnv :: ProxyEnv -> Id -> CoercionI -> Id -> ProxyEnv
1374 -- (extendPE x co y) typically arises from
1375 --                case (x |> co) of y { ... }
1376 -- It extends the proxy env with the binding
1377 --                     y = x |> co
1378 extendProxyEnv pe scrut co case_bndr
1379   | scrut == case_bndr = PE env1 fvs1   -- If case_bndr shadows scrut,
1380   | otherwise          = PE env2 fvs2   --   don't extend
1381   where
1382     PE env1 fvs1 = trimProxyEnv pe [case_bndr]
1383     env2 = extendVarEnv_Acc add single env1 scrut1 (case_bndr,co)
1384     single cb_co = (scrut1, [cb_co])
1385     add cb_co (x, cb_cos) = (x, cb_co:cb_cos)
1386     fvs2 = fvs1 `unionVarSet`  freeVarsCoI co
1387                 `extendVarSet` case_bndr
1388                 `extendVarSet` scrut1
1389
1390     scrut1 = mkLocalId (localiseName (idName scrut)) (idType scrut)
1391         -- Localise the scrut_var before shadowing it; we're making a
1392         -- new binding for it, and it might have an External Name, or
1393         -- even be a GlobalId; Note [Binder swap on GlobalId scrutinees]
1394         -- Also we don't want any INLILNE or NOINLINE pragmas!
1395
1396 -----------
1397 type ProxyBind = (Id, Id, CoercionI)
1398
1399 getProxies :: OccEnv -> Id -> Bag ProxyBind
1400 -- Return a bunch of bindings [...(xi,ei)...]
1401 -- such that  let { ...; xi=ei; ... } binds the xi using y alone
1402 -- See Note [getProxies is subtle]
1403 getProxies (OccEnv { occ_proxy = PE pe _ }) case_bndr
1404   = -- pprTrace "wrapProxies" (ppr case_bndr) $
1405     go_fwd case_bndr
1406   where
1407     fwd_pe :: IdEnv (Id, CoercionI)
1408     fwd_pe = foldVarEnv add1 emptyVarEnv pe
1409            where
1410              add1 (x,ycos) env = foldr (add2 x) env ycos
1411              add2 x (y,co) env = extendVarEnv env y (x,co)
1412
1413     go_fwd :: Id -> Bag ProxyBind
1414         -- Return bindings derivable from case_bndr
1415     go_fwd case_bndr = -- pprTrace "go_fwd" (vcat [ppr case_bndr, text "fwd_pe =" <+> ppr fwd_pe,
1416                        --                         text "pe =" <+> ppr pe]) $
1417                        go_fwd' case_bndr
1418
1419     go_fwd' case_bndr
1420         | Just (scrut, co) <- lookupVarEnv fwd_pe case_bndr
1421         = unitBag (scrut,  case_bndr, mkSymCoI co)
1422           `unionBags` go_fwd scrut
1423           `unionBags` go_bwd scrut [pr | pr@(cb,_) <- lookup_bwd scrut
1424                                        , cb /= case_bndr]
1425         | otherwise
1426         = emptyBag
1427
1428     lookup_bwd :: Id -> [(Id, CoercionI)]
1429         -- Return case_bndrs that are connected to scrut
1430     lookup_bwd scrut = case lookupVarEnv pe scrut of
1431                           Nothing          -> []
1432                           Just (_, cb_cos) -> cb_cos
1433
1434     go_bwd :: Id -> [(Id, CoercionI)] -> Bag ProxyBind
1435     go_bwd scrut cb_cos = foldr (unionBags . go_bwd1 scrut) emptyBag cb_cos
1436
1437     go_bwd1 :: Id -> (Id, CoercionI) -> Bag ProxyBind
1438     go_bwd1 scrut (case_bndr, co)
1439        = -- pprTrace "go_bwd1" (ppr case_bndr) $
1440          unitBag (case_bndr, scrut, co)
1441          `unionBags` go_bwd case_bndr (lookup_bwd case_bndr)
1442
1443 -----------
1444 mkAltEnv :: OccEnv -> CoreExpr -> Id -> OccEnv
1445 -- Does two things: a) makes the occ_ctxt = OccVanilla
1446 --                  b) extends the ProxyEnv if possible
1447 mkAltEnv env scrut cb
1448   = env { occ_encl  = OccVanilla, occ_proxy = pe' }
1449   where
1450     pe  = occ_proxy env
1451     pe' = case scrut of
1452              Var v           -> extendProxyEnv pe v (IdCo (idType v)) cb
1453              Cast (Var v) co -> extendProxyEnv pe v (ACo co)          cb
1454              _other          -> trimProxyEnv pe [cb]
1455
1456 -----------
1457 trimOccEnv :: OccEnv -> [CoreBndr] -> OccEnv
1458 trimOccEnv env bndrs = env { occ_proxy = trimProxyEnv (occ_proxy env) bndrs }
1459
1460 -----------
1461 trimProxyEnv :: ProxyEnv -> [CoreBndr] -> ProxyEnv
1462 -- We are about to push this ProxyEnv inside a binding for 'bndrs'
1463 -- So dump any ProxyEnv bindings which mention any of the bndrs
1464 trimProxyEnv (PE pe fvs) bndrs
1465   | not (bndr_set `intersectsVarSet` fvs)
1466   = PE pe fvs
1467   | otherwise
1468   = PE pe' (fvs `minusVarSet` bndr_set)
1469   where
1470     pe' = mapVarEnv trim pe
1471     bndr_set = mkVarSet bndrs
1472     trim (scrut, cb_cos) | scrut `elemVarSet` bndr_set = (scrut, [])
1473                          | otherwise = (scrut, filterOut discard cb_cos)
1474     discard (cb,co) = bndr_set `intersectsVarSet`
1475                       extendVarSet (freeVarsCoI co) cb
1476
1477 -----------
1478 freeVarsCoI :: CoercionI -> VarSet
1479 freeVarsCoI (IdCo t) = tyVarsOfType t
1480 freeVarsCoI (ACo co) = tyVarsOfType co
1481 \end{code}
1482
1483
1484 %************************************************************************
1485 %*                                                                      *
1486 \subsection[OccurAnal-types]{OccEnv}
1487 %*                                                                      *
1488 %************************************************************************
1489
1490 \begin{code}
1491 type UsageDetails = IdEnv OccInfo       -- A finite map from ids to their usage
1492                 -- INVARIANT: never IAmDead
1493                 -- (Deadness is signalled by not being in the map at all)
1494
1495 (+++), combineAltsUsageDetails
1496         :: UsageDetails -> UsageDetails -> UsageDetails
1497
1498 (+++) usage1 usage2
1499   = plusVarEnv_C addOccInfo usage1 usage2
1500
1501 combineAltsUsageDetails usage1 usage2
1502   = plusVarEnv_C orOccInfo usage1 usage2
1503
1504 addOneOcc :: UsageDetails -> Id -> OccInfo -> UsageDetails
1505 addOneOcc usage id info
1506   = plusVarEnv_C addOccInfo usage (unitVarEnv id info)
1507         -- ToDo: make this more efficient
1508
1509 emptyDetails :: UsageDetails
1510 emptyDetails = (emptyVarEnv :: UsageDetails)
1511
1512 usedIn :: Id -> UsageDetails -> Bool
1513 v `usedIn` details = isExportedId v || v `elemVarEnv` details
1514
1515 type IdWithOccInfo = Id
1516
1517 tagLamBinders :: UsageDetails          -- Of scope
1518               -> [Id]                  -- Binders
1519               -> (UsageDetails,        -- Details with binders removed
1520                  [IdWithOccInfo])    -- Tagged binders
1521 -- Used for lambda and case binders
1522 -- It copes with the fact that lambda bindings can have InlineRule
1523 -- unfoldings, used for join points
1524 tagLamBinders usage binders = usage' `seq` (usage', bndrs')
1525   where
1526     (usage', bndrs') = mapAccumR tag_lam usage binders
1527     tag_lam usage bndr = (usage2, setBinderOcc usage bndr)
1528       where
1529         usage1 = usage `delVarEnv` bndr
1530         usage2 | isId bndr = addIdOccs usage1 (idUnfoldingVars bndr)
1531                | otherwise = usage1
1532
1533 tagBinder :: UsageDetails           -- Of scope
1534           -> Id                     -- Binders
1535           -> (UsageDetails,         -- Details with binders removed
1536               IdWithOccInfo)        -- Tagged binders
1537
1538 tagBinder usage binder
1539  = let
1540      usage'  = usage `delVarEnv` binder
1541      binder' = setBinderOcc usage binder
1542    in
1543    usage' `seq` (usage', binder')
1544
1545 setBinderOcc :: UsageDetails -> CoreBndr -> CoreBndr
1546 setBinderOcc usage bndr
1547   | isTyCoVar bndr    = bndr
1548   | isExportedId bndr = case idOccInfo bndr of
1549                           NoOccInfo -> bndr
1550                           _         -> setIdOccInfo bndr NoOccInfo
1551             -- Don't use local usage info for visible-elsewhere things
1552             -- BUT *do* erase any IAmALoopBreaker annotation, because we're
1553             -- about to re-generate it and it shouldn't be "sticky"
1554
1555   | otherwise = setIdOccInfo bndr occ_info
1556   where
1557     occ_info = lookupVarEnv usage bndr `orElse` IAmDead
1558 \end{code}
1559
1560
1561 %************************************************************************
1562 %*                                                                      *
1563 \subsection{Operations over OccInfo}
1564 %*                                                                      *
1565 %************************************************************************
1566
1567 \begin{code}
1568 mkOneOcc :: OccEnv -> Id -> InterestingCxt -> UsageDetails
1569 mkOneOcc env id int_cxt
1570   | isLocalId id = unitVarEnv id (OneOcc False True int_cxt)
1571   | PE env _ <- occ_proxy env
1572   , id `elemVarEnv` env = unitVarEnv id NoOccInfo
1573   | otherwise           = emptyDetails
1574
1575 markMany, markInsideLam, markInsideSCC :: OccInfo -> OccInfo
1576
1577 markMany _  = NoOccInfo
1578
1579 markInsideSCC occ = markMany occ
1580
1581 markInsideLam (OneOcc _ one_br int_cxt) = OneOcc True one_br int_cxt
1582 markInsideLam occ                       = occ
1583
1584 addOccInfo, orOccInfo :: OccInfo -> OccInfo -> OccInfo
1585
1586 addOccInfo a1 a2  = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
1587                     NoOccInfo   -- Both branches are at least One
1588                                 -- (Argument is never IAmDead)
1589
1590 -- (orOccInfo orig new) is used
1591 -- when combining occurrence info from branches of a case
1592
1593 orOccInfo (OneOcc in_lam1 _ int_cxt1)
1594           (OneOcc in_lam2 _ int_cxt2)
1595   = OneOcc (in_lam1 || in_lam2)
1596            False        -- False, because it occurs in both branches
1597            (int_cxt1 && int_cxt2)
1598 orOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
1599                   NoOccInfo
1600 \end{code}