compiler/simplCore/OccurAnal.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 %************************************************************************
   5 %*                                                                      *
   6 \section[OccurAnal]{Occurrence analysis pass}
   7 %*                                                                      *
   8 %************************************************************************
   9
  10 The occurrence analyser re-typechecks a core expression, returning a new
  11 core expression with (hopefully) improved usage information.
  12
  13 \begin{code}
  14 module OccurAnal (
  15         occurAnalysePgm, occurAnalyseExpr
  16     ) where
  17
  18 #include "HsVersions.h"
  19
  20 import CoreSyn
  21 import CoreFVs
  22 import CoreUtils        ( exprIsTrivial, isDefaultAlt )
  23 import Coercion         ( mkSymCoercion )
  24 import Id
  25 import Name             ( localiseName )
  26 import IdInfo
  27 import BasicTypes
  28
  29 import VarSet
  30 import VarEnv
  31
  32 import Maybes           ( orElse )
  33 import Digraph          ( SCC(..), stronglyConnCompFromEdgedVerticesR )
  34 import PrelNames        ( buildIdKey, foldrIdKey, runSTRepIdKey, augmentIdKey )
  35 import Unique           ( Unique )
  36 import UniqFM           ( keysUFM, intersectUFM_C, foldUFM_Directly )
  37 import Util             ( mapAndUnzip )
  38 import Outputable
  39
  40 import Data.List
  41 \end{code}
  42
  43
  44 %************************************************************************
  45 %*                                                                      *
  46 \subsection[OccurAnal-main]{Counting occurrences: main function}
  47 %*                                                                      *
  48 %************************************************************************
  49
  50 Here's the externally-callable interface:
  51
  52 \begin{code}
  53 occurAnalysePgm :: [CoreBind] -> [CoreBind]
  54 occurAnalysePgm binds
  55   = snd (go initOccEnv binds)
  56   where
  57     go :: OccEnv -> [CoreBind] -> (UsageDetails, [CoreBind])
  58     go _ []
  59         = (emptyDetails, [])
  60     go env (bind:binds)
  61         = (final_usage, bind' ++ binds')
  62         where
  63            (bs_usage, binds')   = go env binds
  64            (final_usage, bind') = occAnalBind env bind bs_usage
  65
  66 occurAnalyseExpr :: CoreExpr -> CoreExpr
  67         -- Do occurrence analysis, and discard occurence info returned
  68 occurAnalyseExpr expr = snd (occAnal initOccEnv expr)
  69 \end{code}
  70
  71
  72 %************************************************************************
  73 %*                                                                      *
  74 \subsection[OccurAnal-main]{Counting occurrences: main function}
  75 %*                                                                      *
  76 %************************************************************************
  77
  78 Bindings
  79 ~~~~~~~~
  80
  81 \begin{code}
  82 occAnalBind :: OccEnv
  83             -> CoreBind
  84             -> UsageDetails             -- Usage details of scope
  85             -> (UsageDetails,           -- Of the whole let(rec)
  86                 [CoreBind])
  87
  88 occAnalBind env (NonRec binder rhs) body_usage
  89   | isTyVar binder                      -- A type let; we don't gather usage info
  90   = (body_usage, [NonRec binder rhs])
  91
  92   | not (binder `usedIn` body_usage)    -- It's not mentioned
  93   = (body_usage, [])
  94
  95   | otherwise                   -- It's mentioned in the body
  96   = (body_usage' +++ addRuleUsage rhs_usage binder,     -- Note [Rules are extra RHSs]
  97      [NonRec tagged_binder rhs'])
  98   where
  99     (body_usage', tagged_binder) = tagBinder body_usage binder
 100     (rhs_usage, rhs')            = occAnalRhs env tagged_binder rhs
 101 \end{code}
 102
 103 Note [Dead code]
 104 ~~~~~~~~~~~~~~~~
 105 Dropping dead code for recursive bindings is done in a very simple way:
 106
 107         the entire set of bindings is dropped if none of its binders are
 108         mentioned in its body; otherwise none are.
 109
 110 This seems to miss an obvious improvement.
 111
 112         letrec  f = ...g...
 113                 g = ...f...
 114         in
 115         ...g...
 116 ===>
 117         letrec f = ...g...
 118                g = ...(...g...)...
 119         in
 120         ...g...
 121
 122 Now 'f' is unused! But it's OK!  Dependency analysis will sort this
 123 out into a letrec for 'g' and a 'let' for 'f', and then 'f' will get
 124 dropped.  It isn't easy to do a perfect job in one blow.  Consider
 125
 126         letrec f = ...g...
 127                g = ...h...
 128                h = ...k...
 129                k = ...m...
 130                m = ...m...
 131         in
 132         ...m...
 133
 134
 135 Note [Loop breaking and RULES]
 136 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 137 Loop breaking is surprisingly subtle.  First read the section 4 of
 138 "Secrets of the GHC inliner".  This describes our basic plan.
 139
 140 However things are made quite a bit more complicated by RULES.  Remember
 141
 142   * Note [Rules are extra RHSs]
 143     ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 144     A RULE for 'f' is like an extra RHS for 'f'. That way the "parent"
 145     keeps the specialised "children" alive.  If the parent dies
 146     (because it isn't referenced any more), then the children will die
 147     too (unless they are already referenced directly).
 148
 149     To that end, we build a Rec group for each cyclic strongly
 150     connected component,
 151         *treating f's rules as extra RHSs for 'f'*.
 152
 153     When we make the Rec groups we include variables free in *either*
 154     LHS *or* RHS of the rule.  The former might seems silly, but see
 155     Note [Rule dependency info].
 156
 157     So in Example [eftInt], eftInt and eftIntFB will be put in the
 158     same Rec, even though their 'main' RHSs are both non-recursive.
 159
 160   * Note [Rules are visible in their own rec group]
 161     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 162     We want the rules for 'f' to be visible in f's right-hand side.
 163     And we'd like them to be visible in other functions in f's Rec
 164     group.  E.g. in Example [Specialisation rules] we want f' rule
 165     to be visible in both f's RHS, and fs's RHS.
 166
 167     This means that we must simplify the RULEs first, before looking
 168     at any of the definitions.  This is done by Simplify.simplRecBind,
 169     when it calls addLetIdInfo.
 170
 171   * Note [Choosing loop breakers]
 172     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 173     We avoid infinite inlinings by choosing loop breakers, and
 174     ensuring that a loop breaker cuts each loop.  But what is a
 175     "loop"?  In particular, a RULE is like an equation for 'f' that
 176     is *always* inlined if it is applicable.  We do *not* disable
 177     rules for loop-breakers.  It's up to whoever makes the rules to
 178     make sure that the rules themselves alwasys terminate.  See Note
 179     [Rules for recursive functions] in Simplify.lhs
 180
 181     Hence, if
 182         f's RHS mentions g, and
 183         g has a RULE that mentions h, and
 184         h has a RULE that mentions f
 185
 186     then we *must* choose f to be a loop breaker.  In general, take the
 187     free variables of f's RHS, and augment it with all the variables
 188     reachable by RULES from those starting points.  That is the whole
 189     reason for computing rule_fv_env in occAnalBind.  (Of course we
 190     only consider free vars that are also binders in this Rec group.)
 191
 192     Note that when we compute this rule_fv_env, we only consider variables
 193     free in the *RHS* of the rule, in contrast to the way we build the
 194     Rec group in the first place (Note [Rule dependency info])
 195
 196     Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is
 197     chosen as a loop breaker, because their RHSs don't mention each other.
 198     And indeed both can be inlined safely.
 199
 200     Note that the edges of the graph we use for computing loop breakers
 201     are not the same as the edges we use for computing the Rec blocks.
 202     That's why we compute
 203         rec_edges          for the Rec block analysis
 204         loop_breaker_edges for the loop breaker analysis
 205
 206
 207   * Note [Weak loop breakers]
 208     ~~~~~~~~~~~~~~~~~~~~~~~~~
 209     There is a last nasty wrinkle.  Suppose we have
 210
 211         Rec { f = f_rhs
 212               RULE f [] = g
 213
 214               h = h_rhs
 215               g = h
 216               ...more...
 217         }
 218
 219     Remmber that we simplify the RULES before any RHS (see Note
 220     [Rules are visible in their own rec group] above).
 221
 222     So we must *not* postInlineUnconditionally 'g', even though
 223     its RHS turns out to be trivial.  (I'm assuming that 'g' is
 224     not choosen as a loop breaker.)
 225
 226     We "solve" this by making g a "weak" or "rules-only" loop breaker,
 227     with OccInfo = IAmLoopBreaker True.  A normal "strong" loop breaker
 228     has IAmLoopBreaker False.  So
 229
 230                                 Inline  postInlineUnconditinoally
 231         IAmLoopBreaker False    no      no
 232         IAmLoopBreaker True     yes     no
 233         other                   yes     yes
 234
 235     The **sole** reason for this kind of loop breaker is so that
 236     postInlineUnconditionally does not fire.  Ugh.
 237
 238   * Note [Rule dependency info]
 239     ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 240     The VarSet in a SpecInfo is used for dependency analysis in the
 241     occurrence analyser.  We must track free vars in *both* lhs and rhs.
 242     Hence use of idRuleVars, rather than idRuleRhsVars in addRuleUsage.
 243     Why both? Consider
 244         x = y
 245         RULE f x = 4
 246     Then if we substitute y for x, we'd better do so in the
 247     rule's LHS too, so we'd better ensure the dependency is respected
 248
 249
 250 Example [eftInt]
 251 ~~~~~~~~~~~~~~~
 252 Example (from GHC.Enum):
 253
 254   eftInt :: Int# -> Int# -> [Int]
 255   eftInt x y = ...(non-recursive)...
 256
 257   {-# INLINE [0] eftIntFB #-}
 258   eftIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> r
 259   eftIntFB c n x y = ...(non-recursive)...
 260
 261   {-# RULES
 262   "eftInt"  [~1] forall x y. eftInt x y = build (\ c n -> eftIntFB c n x y)
 263   "eftIntList"  [1] eftIntFB  (:) [] = eftInt
 264    #-}
 265
 266 Example [Specialisation rules]
 267 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 268 Consider this group, which is typical of what SpecConstr builds:
 269
 270    fs a = ....f (C a)....
 271    f  x = ....f (C a)....
 272    {-# RULE f (C a) = fs a #-}
 273
 274 So 'f' and 'fs' are in the same Rec group (since f refers to fs via its RULE).
 275
 276 But watch out!  If 'fs' is not chosen as a loop breaker, we may get an infinite loop:
 277         - the RULE is applied in f's RHS (see Note [Self-recursive rules] in Simplify
 278         - fs is inlined (say it's small)
 279         - now there's another opportunity to apply the RULE
 280
 281 This showed up when compiling Control.Concurrent.Chan.getChanContents.
 282
 283
 284 \begin{code}
 285 occAnalBind env (Rec pairs) body_usage
 286   = foldr occAnalRec (body_usage, []) sccs
 287         -- For a recursive group, we
 288         --      * occ-analyse all the RHSs
 289         --      * compute strongly-connected components
 290         --      * feed those components to occAnalRec
 291   where
 292     -------------Dependency analysis ------------------------------
 293     bndr_set = mkVarSet (map fst pairs)
 294
 295     sccs :: [SCC (Node Details)]
 296     sccs = {-# SCC "occAnalBind.scc" #-} stronglyConnCompFromEdgedVerticesR rec_edges
 297
 298     rec_edges :: [Node Details]
 299     rec_edges = {-# SCC "occAnalBind.assoc" #-}  map make_node pairs
 300
 301     make_node (bndr, rhs)
 302         = (ND bndr rhs' rhs_usage rhs_fvs, idUnique bndr, out_edges)
 303         where
 304           (rhs_usage, rhs') = occAnalRhs env bndr rhs
 305           rhs_fvs = intersectUFM_C (\b _ -> b) bndr_set rhs_usage
 306           out_edges = keysUFM (rhs_fvs `unionVarSet` idRuleVars bndr)
 307         -- (a -> b) means a mentions b
 308         -- Given the usage details (a UFM that gives occ info for each free var of
 309         -- the RHS) we can get the list of free vars -- or rather their Int keys --
 310         -- by just extracting the keys from the finite map.  Grimy, but fast.
 311         -- Previously we had this:
 312         --      [ bndr | bndr <- bndrs,
 313         --               maybeToBool (lookupVarEnv rhs_usage bndr)]
 314         -- which has n**2 cost, and this meant that edges_from alone
 315         -- consumed 10% of total runtime!
 316
 317 -----------------------------
 318 occAnalRec :: SCC (Node Details) -> (UsageDetails, [CoreBind])
 319                                  -> (UsageDetails, [CoreBind])
 320
 321         -- The NonRec case is just like a Let (NonRec ...) above
 322 occAnalRec (AcyclicSCC (ND bndr rhs rhs_usage _, _, _)) (body_usage, binds)
 323   | not (bndr `usedIn` body_usage)
 324   = (body_usage, binds)
 325
 326   | otherwise                   -- It's mentioned in the body
 327   = (body_usage' +++ addRuleUsage rhs_usage bndr,       -- Note [Rules are extra RHSs]
 328      NonRec tagged_bndr rhs : binds)
 329   where
 330     (body_usage', tagged_bndr) = tagBinder body_usage bndr
 331
 332
 333         -- The Rec case is the interesting one
 334         -- See Note [Loop breaking]
 335 occAnalRec (CyclicSCC nodes) (body_usage, binds)
 336   | not (any (`usedIn` body_usage) bndrs)       -- NB: look at body_usage, not total_usage
 337   = (body_usage, binds)                         -- Dead code
 338
 339   | otherwise   -- At this point we always build a single Rec
 340   = (final_usage, Rec pairs : binds)
 341
 342   where
 343     bndrs    = [b | (ND b _ _ _, _, _) <- nodes]
 344     bndr_set = mkVarSet bndrs
 345
 346         ----------------------------
 347         -- Tag the binders with their occurrence info
 348     total_usage = foldl add_usage body_usage nodes
 349     add_usage body_usage (ND bndr _ rhs_usage _, _, _)
 350         = body_usage +++ addRuleUsage rhs_usage bndr
 351     (final_usage, tagged_nodes) = mapAccumL tag_node total_usage nodes
 352
 353     tag_node :: UsageDetails -> Node Details -> (UsageDetails, Node Details)
 354         -- (a) Tag the binders in the details with occ info
 355         -- (b) Mark the binder with "weak loop-breaker" OccInfo
 356         --      saying "no preInlineUnconditionally" if it is used
 357         --      in any rule (lhs or rhs) of the recursive group
 358         --      See Note [Weak loop breakers]
 359     tag_node usage (ND bndr rhs rhs_usage rhs_fvs, k, ks)
 360       = (usage `delVarEnv` bndr, (ND bndr2 rhs rhs_usage rhs_fvs, k, ks))
 361       where
 362         bndr2 | bndr `elemVarSet` all_rule_fvs = makeLoopBreaker True bndr1
 363               | otherwise                      = bndr1
 364         bndr1 = setBinderOcc usage bndr
 365     all_rule_fvs = bndr_set `intersectVarSet` foldr (unionVarSet . idRuleVars)
 366                                                     emptyVarSet bndrs
 367
 368         ----------------------------
 369         -- Now reconstruct the cycle
 370     pairs | no_rules  = reOrderCycle 0 tagged_nodes []
 371           | otherwise = foldr (reOrderRec 0) [] $
 372                         stronglyConnCompFromEdgedVerticesR loop_breaker_edges
 373
 374         -- See Note [Choosing loop breakers] for looop_breaker_edges
 375     loop_breaker_edges = map mk_node tagged_nodes
 376     mk_node (details@(ND _ _ _ rhs_fvs), k, _) = (details, k, new_ks)
 377         where
 378           new_ks = keysUFM (extendFvs rule_fv_env rhs_fvs rhs_fvs)
 379
 380     ------------------------------------
 381     rule_fv_env :: IdEnv IdSet  -- Variables from this group mentioned in RHS of rules
 382                                 -- Domain is *subset* of bound vars (others have no rule fvs)
 383     rule_fv_env = rule_loop init_rule_fvs
 384
 385     no_rules      = null init_rule_fvs
 386     init_rule_fvs = [(b, rule_fvs)
 387                     | b <- bndrs
 388                     , let rule_fvs = idRuleRhsVars b `intersectVarSet` bndr_set
 389                     , not (isEmptyVarSet rule_fvs)]
 390
 391     rule_loop :: [(Id,IdSet)] -> IdEnv IdSet    -- Finds fixpoint
 392     rule_loop fv_list
 393         | no_change = env
 394         | otherwise = rule_loop new_fv_list
 395         where
 396           env = mkVarEnv init_rule_fvs
 397           (no_change, new_fv_list) = mapAccumL bump True fv_list
 398           bump no_change (b,fvs)
 399                 | new_fvs `subVarSet` fvs = (no_change, (b,fvs))
 400                 | otherwise               = (False,     (b,new_fvs `unionVarSet` fvs))
 401                 where
 402                   new_fvs = extendFvs env emptyVarSet fvs
 403
 404 idRuleRhsVars :: Id -> VarSet
 405 -- Just the variables free on the *rhs* of a rule
 406 -- See Note [Choosing loop breakers]
 407 idRuleRhsVars id = foldr (unionVarSet . ruleRhsFreeVars) emptyVarSet (idCoreRules id)
 408
 409 extendFvs :: IdEnv IdSet -> IdSet -> IdSet -> IdSet
 410 -- (extendFVs env fvs s) returns (fvs `union` env(s))
 411 extendFvs env fvs id_set
 412   = foldUFM_Directly add fvs id_set
 413   where
 414     add uniq _ fvs
 415         = case lookupVarEnv_Directly env uniq  of
 416             Just fvs' -> fvs' `unionVarSet` fvs
 417             Nothing   -> fvs
 418 \end{code}
 419
 420 @reOrderRec@ is applied to the list of (binder,rhs) pairs for a cyclic
 421 strongly connected component (there's guaranteed to be a cycle).  It returns the
 422 same pairs, but
 423         a) in a better order,
 424         b) with some of the Ids having a IAmALoopBreaker pragma
 425
 426 The "loop-breaker" Ids are sufficient to break all cycles in the SCC.  This means
 427 that the simplifier can guarantee not to loop provided it never records an inlining
 428 for these no-inline guys.
 429
 430 Furthermore, the order of the binds is such that if we neglect dependencies
 431 on the no-inline Ids then the binds are topologically sorted.  This means
 432 that the simplifier will generally do a good job if it works from top bottom,
 433 recording inlinings for any Ids which aren't marked as "no-inline" as it goes.
 434
 435 ==============
 436 [June 98: I don't understand the following paragraphs, and I've
 437           changed the a=b case again so that it isn't a special case any more.]
 438
 439 Here's a case that bit me:
 440
 441         letrec
 442                 a = b
 443                 b = \x. BIG
 444         in
 445         ...a...a...a....
 446
 447 Re-ordering doesn't change the order of bindings, but there was no loop-breaker.
 448
 449 My solution was to make a=b bindings record b as Many, rather like INLINE bindings.
 450 Perhaps something cleverer would suffice.
 451 ===============
 452
 453
 454 \begin{code}
 455 type Node details = (details, Unique, [Unique]) -- The Ints are gotten from the Unique,
 456                                                 -- which is gotten from the Id.
 457 data Details = ND Id            -- Binder
 458                   CoreExpr      -- RHS
 459                   UsageDetails  -- Full usage from RHS (*not* including rules)
 460                   IdSet         -- Other binders from this Rec group mentioned on RHS
 461                                 -- (derivable from UsageDetails but cached here)
 462
 463 reOrderRec :: Int -> SCC (Node Details)
 464            -> [(Id,CoreExpr)] -> [(Id,CoreExpr)]
 465 -- Sorted into a plausible order.  Enough of the Ids have
 466 --      IAmALoopBreaker pragmas that there are no loops left.
 467 reOrderRec _ (AcyclicSCC (ND bndr rhs _ _, _, _)) pairs = (bndr, rhs) : pairs
 468 reOrderRec depth (CyclicSCC cycle)                pairs = reOrderCycle depth cycle pairs
 469
 470 reOrderCycle :: Int -> [Node Details] -> [(Id,CoreExpr)] -> [(Id,CoreExpr)]
 471 reOrderCycle _ [] _
 472   = panic "reOrderCycle"
 473 reOrderCycle _ [bind] pairs    -- Common case of simple self-recursion
 474   = (makeLoopBreaker False bndr, rhs) : pairs
 475   where
 476     (ND bndr rhs _ _, _, _) = bind
 477
 478 reOrderCycle depth (bind : binds) pairs
 479   =     -- Choose a loop breaker, mark it no-inline,
 480         -- do SCC analysis on the rest, and recursively sort them out
 481 --    pprTrace "reOrderCycle" (ppr [b | (ND b _ _ _, _, _) <- bind:binds]) $
 482     foldr (reOrderRec new_depth)
 483           ([ (makeLoopBreaker False bndr, rhs)
 484            | (ND bndr rhs _ _, _, _) <- chosen_binds] ++ pairs)
 485           (stronglyConnCompFromEdgedVerticesR unchosen)
 486   where
 487     (chosen_binds, unchosen) = choose_loop_breaker [bind] (score bind) [] binds
 488
 489     approximate_loop_breaker = depth >= 2
 490     new_depth | approximate_loop_breaker = 0
 491               | otherwise                = depth+1
 492         -- After two iterations (d=0, d=1) give up
 493         -- and approximate, returning to d=0
 494
 495         -- This loop looks for the bind with the lowest score
 496         -- to pick as the loop  breaker.  The rest accumulate in
 497     choose_loop_breaker loop_binds _loop_sc acc []
 498         = (loop_binds, acc)        -- Done
 499
 500         -- If approximate_loop_breaker is True, we pick *all*
 501         -- nodes with lowest score, else just one
 502         -- See Note [Complexity of loop breaking]
 503     choose_loop_breaker loop_binds loop_sc acc (bind : binds)
 504         | sc < loop_sc  -- Lower score so pick this new one
 505         = choose_loop_breaker [bind] sc (loop_binds ++ acc) binds
 506
 507         | approximate_loop_breaker && sc == loop_sc
 508         = choose_loop_breaker (bind : loop_binds) loop_sc acc binds
 509
 510         | otherwise     -- Higher score so don't pick it
 511         = choose_loop_breaker loop_binds loop_sc (bind : acc) binds
 512         where
 513           sc = score bind
 514
 515     score :: Node Details -> Int        -- Higher score => less likely to be picked as loop breaker
 516     score (ND bndr rhs _ _, _, _)
 517         | workerExists (idWorkerInfo bndr)      = 10
 518                 -- Note [Worker inline loop]
 519
 520         | exprIsTrivial rhs        = 5  -- Practically certain to be inlined
 521                 -- Used to have also: && not (isExportedId bndr)
 522                 -- But I found this sometimes cost an extra iteration when we have
 523                 --      rec { d = (a,b); a = ...df...; b = ...df...; df = d }
 524                 -- where df is the exported dictionary. Then df makes a really
 525                 -- bad choice for loop breaker
 526
 527         | is_con_app rhs = 3    -- Data types help with cases
 528                 -- Note [Constructor applictions]
 529
 530 -- If an Id is marked "never inline" then it makes a great loop breaker
 531 -- The only reason for not checking that here is that it is rare
 532 -- and I've never seen a situation where it makes a difference,
 533 -- so it probably isn't worth the time to test on every binder
 534 --      | isNeverActive (idInlinePragma bndr) = -10
 535
 536         | inlineCandidate bndr rhs = 2  -- Likely to be inlined
 537                 -- Note [Inline candidates]
 538
 539         | not (neverUnfold (idUnfolding bndr)) = 1
 540                 -- the Id has some kind of unfolding
 541
 542         | otherwise = 0
 543
 544     inlineCandidate :: Id -> CoreExpr -> Bool
 545     inlineCandidate _  (Note InlineMe _) = True
 546     inlineCandidate id _                 = isOneOcc (idOccInfo id)
 547
 548         -- Note [conapp]
 549         --
 550         -- It's really really important to inline dictionaries.  Real
 551         -- example (the Enum Ordering instance from GHC.Base):
 552         --
 553         --      rec     f = \ x -> case d of (p,q,r) -> p x
 554         --              g = \ x -> case d of (p,q,r) -> q x
 555         --              d = (v, f, g)
 556         --
 557         -- Here, f and g occur just once; but we can't inline them into d.
 558         -- On the other hand we *could* simplify those case expressions if
 559         -- we didn't stupidly choose d as the loop breaker.
 560         -- But we won't because constructor args are marked "Many".
 561         -- Inlining dictionaries is really essential to unravelling
 562         -- the loops in static numeric dictionaries, see GHC.Float.
 563
 564         -- Cheap and cheerful; the simplifer moves casts out of the way
 565         -- The lambda case is important to spot x = /\a. C (f a)
 566         -- which comes up when C is a dictionary constructor and
 567         -- f is a default method.
 568         -- Example: the instance for Show (ST s a) in GHC.ST
 569         --
 570         -- However we *also* treat (\x. C p q) as a con-app-like thing,
 571         --      Note [Closure conversion]
 572     is_con_app (Var v)    = isDataConWorkId v
 573     is_con_app (App f _)  = is_con_app f
 574     is_con_app (Lam _ e)  = is_con_app e
 575     is_con_app (Note _ e) = is_con_app e
 576     is_con_app _          = False
 577
 578 makeLoopBreaker :: Bool -> Id -> Id
 579 -- Set the loop-breaker flag: see Note [Weak loop breakers]
 580 makeLoopBreaker weak bndr = setIdOccInfo bndr (IAmALoopBreaker weak)
 581 \end{code}
 582
 583 Note [Complexity of loop breaking]
 584 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 585 The loop-breaking algorithm knocks out one binder at a time, and
 586 performs a new SCC analysis on the remaining binders.  That can
 587 behave very badly in tightly-coupled groups of bindings; in the
 588 worst case it can be (N**2)*log N, because it does a full SCC
 589 on N, then N-1, then N-2 and so on.
 590
 591 To avoid this, we switch plans after 2 (or whatever) attempts:
 592   Plan A: pick one binder with the lowest score, make it
 593           a loop breaker, and try again
 594   Plan B: pick *all* binders with the lowest score, make them
 595           all loop breakers, and try again
 596 Since there are only a small finite number of scores, this will
 597 terminate in a constant number of iterations, rather than O(N)
 598 iterations.
 599
 600 You might thing that it's very unlikely, but RULES make it much
 601 more likely.  Here's a real example from Trac #1969:
 602   Rec { $dm = \d.\x. op d
 603         {-# RULES forall d. $dm Int d  = $s$dm1
 604                   forall d. $dm Bool d = $s$dm2 #-}
 605
 606         dInt = MkD .... opInt ...
 607         dInt = MkD .... opBool ...
 608         opInt  = $dm dInt
 609         opBool = $dm dBool
 610
 611         $s$dm1 = \x. op dInt
 612         $s$dm2 = \x. op dBool }
 613 The RULES stuff means that we can't choose $dm as a loop breaker
 614 (Note [Choosing loop breakers]), so we must choose at least (say)
 615 opInt *and* opBool, and so on.  The number of loop breakders is
 616 linear in the number of instance declarations.
 617
 618 Note [INLINE pragmas]
 619 ~~~~~~~~~~~~~~~~~~~~~
 620 Never choose a function with an INLINE pramga as the loop breaker!
 621 If such a function is mutually-recursive with a non-INLINE thing,
 622 then the latter should be the loop-breaker.
 623
 624 A particular case is wrappers generated by the demand analyser.
 625 If you make then into a loop breaker you may get an infinite
 626 inlining loop.  For example:
 627   rec {
 628         $wfoo x = ....foo x....
 629
 630         {-loop brk-} foo x = ...$wfoo x...
 631   }
 632 The interface file sees the unfolding for $wfoo, and sees that foo is
 633 strict (and hence it gets an auto-generated wrapper).  Result: an
 634 infinite inlining in the importing scope.  So be a bit careful if you
 635 change this.  A good example is Tree.repTree in
 636 nofib/spectral/minimax. If the repTree wrapper is chosen as the loop
 637 breaker then compiling Game.hs goes into an infinite loop (this
 638 happened when we gave is_con_app a lower score than inline candidates).
 639
 640 Note [Constructor applications]
 641 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 642 It's really really important to inline dictionaries.  Real
 643 example (the Enum Ordering instance from GHC.Base):
 644
 645      rec     f = \ x -> case d of (p,q,r) -> p x
 646              g = \ x -> case d of (p,q,r) -> q x
 647              d = (v, f, g)
 648
 649 Here, f and g occur just once; but we can't inline them into d.
 650 On the other hand we *could* simplify those case expressions if
 651 we didn't stupidly choose d as the loop breaker.
 652 But we won't because constructor args are marked "Many".
 653 Inlining dictionaries is really essential to unravelling
 654 the loops in static numeric dictionaries, see GHC.Float.
 655
 656 Note [Closure conversion]
 657 ~~~~~~~~~~~~~~~~~~~~~~~~~
 658 We treat (\x. C p q) as a high-score candidate in the letrec scoring algorithm.
 659 The immediate motivation came from the result of a closure-conversion transformation
 660 which generated code like this:
 661
 662     data Clo a b = forall c. Clo (c -> a -> b) c
 663
 664     ($:) :: Clo a b -> a -> b
 665     Clo f env $: x = f env x
 666
 667     rec { plus = Clo plus1 ()
 668
 669         ; plus1 _ n = Clo plus2 n
 670
 671         ; plus2 Zero     n = n
 672         ; plus2 (Succ m) n = Succ (plus $: m $: n) }
 673
 674 If we inline 'plus' and 'plus1', everything unravels nicely.  But if
 675 we choose 'plus1' as the loop breaker (which is entirely possible
 676 otherwise), the loop does not unravel nicely.
 677
 678
 679 @occAnalRhs@ deals with the question of bindings where the Id is marked
 680 by an INLINE pragma.  For these we record that anything which occurs
 681 in its RHS occurs many times.  This pessimistically assumes that ths
 682 inlined binder also occurs many times in its scope, but if it doesn't
 683 we'll catch it next time round.  At worst this costs an extra simplifier pass.
 684 ToDo: try using the occurrence info for the inline'd binder.
 685
 686 [March 97] We do the same for atomic RHSs.  Reason: see notes with reOrderRec.
 687 [June 98, SLPJ]  I've undone this change; I don't understand it.  See notes with reOrderRec.
 688
 689
 690 \begin{code}
 691 occAnalRhs :: OccEnv
 692            -> Id -> CoreExpr    -- Binder and rhs
 693                                 -- For non-recs the binder is alrady tagged
 694                                 -- with occurrence info
 695            -> (UsageDetails, CoreExpr)
 696
 697 occAnalRhs env id rhs
 698   = occAnal ctxt rhs
 699   where
 700     ctxt | certainly_inline id = env
 701          | otherwise           = rhsCtxt env
 702         -- Note that we generally use an rhsCtxt.  This tells the occ anal n
 703         -- that it's looking at an RHS, which has an effect in occAnalApp
 704         --
 705         -- But there's a problem.  Consider
 706         --      x1 = a0 : []
 707         --      x2 = a1 : x1
 708         --      x3 = a2 : x2
 709         --      g  = f x3
 710         -- First time round, it looks as if x1 and x2 occur as an arg of a
 711         -- let-bound constructor ==> give them a many-occurrence.
 712         -- But then x3 is inlined (unconditionally as it happens) and
 713         -- next time round, x2 will be, and the next time round x1 will be
 714         -- Result: multiple simplifier iterations.  Sigh.
 715         -- Crude solution: use rhsCtxt for things that occur just once...
 716
 717     certainly_inline id = case idOccInfo id of
 718                             OneOcc in_lam one_br _ -> not in_lam && one_br
 719                             _                      -> False
 720 \end{code}
 721
 722
 723
 724 \begin{code}
 725 addRuleUsage :: UsageDetails -> Id -> UsageDetails
 726 -- Add the usage from RULES in Id to the usage
 727 addRuleUsage usage id
 728   = foldVarSet add usage (idRuleVars id)
 729         -- idRuleVars here: see Note [Rule dependency info]
 730   where
 731     add v u = addOneOcc u v NoOccInfo
 732         -- Give a non-committal binder info (i.e manyOcc) because
 733         --   a) Many copies of the specialised thing can appear
 734         --   b) We don't want to substitute a BIG expression inside a RULE
 735         --      even if that's the only occurrence of the thing
 736         --      (Same goes for INLINE.)
 737 \end{code}
 738
 739 Expressions
 740 ~~~~~~~~~~~
 741 \begin{code}
 742 occAnal :: OccEnv
 743         -> CoreExpr
 744         -> (UsageDetails,       -- Gives info only about the "interesting" Ids
 745             CoreExpr)
 746
 747 occAnal _   (Type t)  = (emptyDetails, Type t)
 748 occAnal env (Var v)   = (mkOneOcc env v False, Var v)
 749     -- At one stage, I gathered the idRuleVars for v here too,
 750     -- which in a way is the right thing to do.
 751     -- But that went wrong right after specialisation, when
 752     -- the *occurrences* of the overloaded function didn't have any
 753     -- rules in them, so the *specialised* versions looked as if they
 754     -- weren't used at all.
 755 \end{code}
 756
 757 We regard variables that occur as constructor arguments as "dangerousToDup":
 758
 759 \begin{verbatim}
 760 module A where
 761 f x = let y = expensive x in
 762       let z = (True,y) in
 763       (case z of {(p,q)->q}, case z of {(p,q)->q})
 764 \end{verbatim}
 765
 766 We feel free to duplicate the WHNF (True,y), but that means
 767 that y may be duplicated thereby.
 768
 769 If we aren't careful we duplicate the (expensive x) call!
 770 Constructors are rather like lambdas in this way.
 771
 772 \begin{code}
 773 occAnal _   expr@(Lit _) = (emptyDetails, expr)
 774 \end{code}
 775
 776 \begin{code}
 777 occAnal env (Note InlineMe body)
 778   = case occAnal env body of { (usage, body') ->
 779     (mapVarEnv markMany usage, Note InlineMe body')
 780     }
 781
 782 occAnal env (Note note@(SCC _) body)
 783   = case occAnal env body of { (usage, body') ->
 784     (mapVarEnv markInsideSCC usage, Note note body')
 785     }
 786
 787 occAnal env (Note note body)
 788   = case occAnal env body of { (usage, body') ->
 789     (usage, Note note body')
 790     }
 791
 792 occAnal env (Cast expr co)
 793   = case occAnal env expr of { (usage, expr') ->
 794     (markRhsUds env True usage, Cast expr' co)
 795         -- If we see let x = y `cast` co
 796         -- then mark y as 'Many' so that we don't
 797         -- immediately inline y again.
 798     }
 799 \end{code}
 800
 801 \begin{code}
 802 occAnal env app@(App _ _)
 803   = occAnalApp env (collectArgs app)
 804
 805 -- Ignore type variables altogether
 806 --   (a) occurrences inside type lambdas only not marked as InsideLam
 807 --   (b) type variables not in environment
 808
 809 occAnal env (Lam x body) | isTyVar x
 810   = case occAnal env body of { (body_usage, body') ->
 811     (body_usage, Lam x body')
 812     }
 813
 814 -- For value lambdas we do a special hack.  Consider
 815 --      (\x. \y. ...x...)
 816 -- If we did nothing, x is used inside the \y, so would be marked
 817 -- as dangerous to dup.  But in the common case where the abstraction
 818 -- is applied to two arguments this is over-pessimistic.
 819 -- So instead, we just mark each binder with its occurrence
 820 -- info in the *body* of the multiple lambda.
 821 -- Then, the simplifier is careful when partially applying lambdas.
 822
 823 occAnal env expr@(Lam _ _)
 824   = case occAnal env_body body of { (body_usage, body') ->
 825     let
 826         (final_usage, tagged_binders) = tagBinders body_usage binders
 827         --      URGH!  Sept 99: we don't seem to be able to use binders' here, because
 828         --      we get linear-typed things in the resulting program that we can't handle yet.
 829         --      (e.g. PrelShow)  TODO
 830
 831         really_final_usage = if linear then
 832                                 final_usage
 833                              else
 834                                 mapVarEnv markInsideLam final_usage
 835     in
 836     (really_final_usage,
 837      mkLams tagged_binders body') }
 838   where
 839     env_body        = vanillaCtxt env        -- Body is (no longer) an RhsContext
 840     (binders, body) = collectBinders expr
 841     binders'        = oneShotGroup env binders
 842     linear          = all is_one_shot binders'
 843     is_one_shot b   = isId b && isOneShotBndr b
 844
 845 occAnal env (Case scrut bndr ty alts)
 846   = case occ_anal_scrut scrut alts     of { (scrut_usage, scrut') ->
 847     case mapAndUnzip occ_anal_alt alts of { (alts_usage_s, alts')   ->
 848     let
 849         alts_usage  = foldr1 combineAltsUsageDetails alts_usage_s
 850         alts_usage' = addCaseBndrUsage alts_usage
 851         (alts_usage1, tagged_bndr) = tagBinder alts_usage' bndr
 852         total_usage = scrut_usage +++ alts_usage1
 853     in
 854     total_usage `seq` (total_usage, Case scrut' tagged_bndr ty alts') }}
 855   where
 856         -- Note [Case binder usage]
 857         -- ~~~~~~~~~~~~~~~~~~~~~~~~
 858         -- The case binder gets a usage of either "many" or "dead", never "one".
 859         -- Reason: we like to inline single occurrences, to eliminate a binding,
 860         -- but inlining a case binder *doesn't* eliminate a binding.
 861         -- We *don't* want to transform
 862         --      case x of w { (p,q) -> f w }
 863         -- into
 864         --      case x of w { (p,q) -> f (p,q) }
 865     addCaseBndrUsage usage = case lookupVarEnv usage bndr of
 866                                 Nothing -> usage
 867                                 Just _  -> extendVarEnv usage bndr NoOccInfo
 868
 869     alt_env = mkAltEnv env bndr_swap
 870         -- Consider     x = case v of { True -> (p,q); ... }
 871         -- Then it's fine to inline p and q
 872
 873     bndr_swap = case scrut of
 874                   Var v           -> Just (v, Var bndr)
 875                   Cast (Var v) co -> Just (v, Cast (Var bndr) (mkSymCoercion co))
 876                   _other          -> Nothing
 877
 878     occ_anal_alt = occAnalAlt alt_env bndr bndr_swap
 879
 880     occ_anal_scrut (Var v) (alt1 : other_alts)
 881         | not (null other_alts) || not (isDefaultAlt alt1)
 882         = (mkOneOcc env v True, Var v)  -- The 'True' says that the variable occurs
 883                                         -- in an interesting context; the case has
 884                                         -- at least one non-default alternative
 885     occ_anal_scrut scrut _alts
 886         = occAnal (vanillaCtxt env) scrut    -- No need for rhsCtxt
 887
 888 occAnal env (Let bind body)
 889   = case occAnal env body                of { (body_usage, body') ->
 890     case occAnalBind env bind body_usage of { (final_usage, new_binds) ->
 891        (final_usage, mkLets new_binds body') }}
 892
 893 occAnalArgs :: OccEnv -> [CoreExpr] -> (UsageDetails, [CoreExpr])
 894 occAnalArgs env args
 895   = case mapAndUnzip (occAnal arg_env) args of  { (arg_uds_s, args') ->
 896     (foldr (+++) emptyDetails arg_uds_s, args')}
 897   where
 898     arg_env = vanillaCtxt env
 899 \end{code}
 900
 901 Applications are dealt with specially because we want
 902 the "build hack" to work.
 903
 904 \begin{code}
 905 occAnalApp :: OccEnv
 906            -> (Expr CoreBndr, [Arg CoreBndr])
 907            -> (UsageDetails, Expr CoreBndr)
 908 occAnalApp env (Var fun, args)
 909   = case args_stuff of { (args_uds, args') ->
 910     let
 911         final_args_uds = markRhsUds env is_pap args_uds
 912     in
 913     (fun_uds +++ final_args_uds, mkApps (Var fun) args') }
 914   where
 915     fun_uniq = idUnique fun
 916     fun_uds  = mkOneOcc env fun (valArgCount args > 0)
 917     is_pap = isConLikeId fun || valArgCount args < idArity fun
 918
 919                 -- Hack for build, fold, runST
 920     args_stuff  | fun_uniq == buildIdKey    = appSpecial env 2 [True,True]  args
 921                 | fun_uniq == augmentIdKey  = appSpecial env 2 [True,True]  args
 922                 | fun_uniq == foldrIdKey    = appSpecial env 3 [False,True] args
 923                 | fun_uniq == runSTRepIdKey = appSpecial env 2 [True]       args
 924                         -- (foldr k z xs) may call k many times, but it never
 925                         -- shares a partial application of k; hence [False,True]
 926                         -- This means we can optimise
 927                         --      foldr (\x -> let v = ...x... in \y -> ...v...) z xs
 928                         -- by floating in the v
 929
 930                 | otherwise = occAnalArgs env args
 931
 932
 933 occAnalApp env (fun, args)
 934   = case occAnal (addAppCtxt env args) fun of   { (fun_uds, fun') ->
 935         -- The addAppCtxt is a bit cunning.  One iteration of the simplifier
 936         -- often leaves behind beta redexs like
 937         --      (\x y -> e) a1 a2
 938         -- Here we would like to mark x,y as one-shot, and treat the whole
 939         -- thing much like a let.  We do this by pushing some True items
 940         -- onto the context stack.
 941
 942     case occAnalArgs env args of        { (args_uds, args') ->
 943     let
 944         final_uds = fun_uds +++ args_uds
 945     in
 946     (final_uds, mkApps fun' args') }}
 947
 948
 949 markRhsUds :: OccEnv            -- Check if this is a RhsEnv
 950            -> Bool              -- and this is true
 951            -> UsageDetails      -- The do markMany on this
 952            -> UsageDetails
 953 -- We mark the free vars of the argument of a constructor or PAP
 954 -- as "many", if it is the RHS of a let(rec).
 955 -- This means that nothing gets inlined into a constructor argument
 956 -- position, which is what we want.  Typically those constructor
 957 -- arguments are just variables, or trivial expressions.
 958 --
 959 -- This is the *whole point* of the isRhsEnv predicate
 960 markRhsUds env is_pap arg_uds
 961   | isRhsEnv env && is_pap = mapVarEnv markMany arg_uds
 962   | otherwise              = arg_uds
 963
 964
 965 appSpecial :: OccEnv
 966            -> Int -> CtxtTy     -- Argument number, and context to use for it
 967            -> [CoreExpr]
 968            -> (UsageDetails, [CoreExpr])
 969 appSpecial env n ctxt args
 970   = go n args
 971   where
 972     arg_env = vanillaCtxt env
 973
 974     go _ [] = (emptyDetails, [])        -- Too few args
 975
 976     go 1 (arg:args)                     -- The magic arg
 977       = case occAnal (setCtxtTy arg_env ctxt) arg of    { (arg_uds, arg') ->
 978         case occAnalArgs env args of                    { (args_uds, args') ->
 979         (arg_uds +++ args_uds, arg':args') }}
 980
 981     go n (arg:args)
 982       = case occAnal arg_env arg of     { (arg_uds, arg') ->
 983         case go (n-1) args of           { (args_uds, args') ->
 984         (arg_uds +++ args_uds, arg':args') }}
 985 \end{code}
 986
 987
 988 Note [Binder swap]
 989 ~~~~~~~~~~~~~~~~~~
 990 We do these two transformations right here:
 991
 992  (1)   case x of b { pi -> ri }
 993     ==>
 994       case x of b { pi -> let x=b in ri }
 995
 996  (2)  case (x |> co) of b { pi -> ri }
 997     ==>
 998       case (x |> co) of b { pi -> let x = b |> sym co in ri }
 999
1000     Why (2)?  See Note [Case of cast]
1001
1002 In both cases, in a particular alternative (pi -> ri), we only
1003 add the binding if
1004   (a) x occurs free in (pi -> ri)
1005         (ie it occurs in ri, but is not bound in pi)
1006   (b) the pi does not bind b (or the free vars of co)
1007 We need (a) and (b) for the inserted binding to be correct.
1008
1009 For the alternatives where we inject the binding, we can transfer
1010 all x's OccInfo to b.  And that is the point.
1011
1012 Notice that
1013   * The deliberate shadowing of 'x'.
1014   * That (a) rapidly becomes false, so no bindings are injected.
1015
1016 The reason for doing these transformations here is because it allows
1017 us to adjust the OccInfo for 'x' and 'b' as we go.
1018
1019   * Suppose the only occurrences of 'x' are the scrutinee and in the
1020     ri; then this transformation makes it occur just once, and hence
1021     get inlined right away.
1022
1023   * If we do this in the Simplifier, we don't know whether 'x' is used
1024     in ri, so we are forced to pessimistically zap b's OccInfo even
1025     though it is typically dead (ie neither it nor x appear in the
1026     ri).  There's nothing actually wrong with zapping it, except that
1027     it's kind of nice to know which variables are dead.  My nose
1028     tells me to keep this information as robustly as possible.
1029
1030 The Maybe (Id,CoreExpr) passed to occAnalAlt is the extra let-binding
1031 {x=b}; it's Nothing if the binder-swap doesn't happen.
1032
1033 There is a danger though.  Consider
1034       let v = x +# y
1035       in case (f v) of w -> ...v...v...
1036 And suppose that (f v) expands to just v.  Then we'd like to
1037 use 'w' instead of 'v' in the alternative.  But it may be too
1038 late; we may have substituted the (cheap) x+#y for v in the
1039 same simplifier pass that reduced (f v) to v.
1040
1041 I think this is just too bad.  CSE will recover some of it.
1042
1043 Note [Binder swap on GlobalId scrutinees]
1044 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1045 When the scrutinee is a GlobalId we must take care in two ways
1046
1047  i) In order to *know* whether 'x' occurs free in the RHS, we need its
1048     occurrence info. BUT, we don't gather occurrence info for
1049     GlobalIds.  That's what the (small) occ_scrut_ids set in OccEnv is
1050     for: it says "gather occurrence info for these.
1051
1052  ii) We must call localiseId on 'x' first, in case it's a GlobalId, or
1053      has an External Name. See, for example, SimplEnv Note [Global Ids in
1054      the substitution].
1055
1056 Historical note [no-case-of-case]
1057 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1058 We *used* to suppress the binder-swap in case expressoins when
1059 -fno-case-of-case is on.  Old remarks:
1060     "This happens in the first simplifier pass,
1061     and enhances full laziness.  Here's the bad case:
1062             f = \ y -> ...(case x of I# v -> ...(case x of ...) ... )
1063     If we eliminate the inner case, we trap it inside the I# v -> arm,
1064     which might prevent some full laziness happening.  I've seen this
1065     in action in spectral/cichelli/Prog.hs:
1066              [(m,n) | m <- [1..max], n <- [1..max]]
1067     Hence the check for NoCaseOfCase."
1068 However, now the full-laziness pass itself reverses the binder-swap, so this
1069 check is no longer necessary.
1070
1071 Historical note [Suppressing the case binder-swap]
1072 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1073 This old note describes a problem that is also fixed by doing the
1074 binder-swap in OccAnal:
1075
1076     There is another situation when it might make sense to suppress the
1077     case-expression binde-swap. If we have
1078
1079         case x of w1 { DEFAULT -> case x of w2 { A -> e1; B -> e2 }
1080                        ...other cases .... }
1081
1082     We'll perform the binder-swap for the outer case, giving
1083
1084         case x of w1 { DEFAULT -> case w1 of w2 { A -> e1; B -> e2 }
1085                        ...other cases .... }
1086
1087     But there is no point in doing it for the inner case, because w1 can't
1088     be inlined anyway.  Furthermore, doing the case-swapping involves
1089     zapping w2's occurrence info (see paragraphs that follow), and that
1090     forces us to bind w2 when doing case merging.  So we get
1091
1092         case x of w1 { A -> let w2 = w1 in e1
1093                        B -> let w2 = w1 in e2
1094                        ...other cases .... }
1095
1096     This is plain silly in the common case where w2 is dead.
1097
1098     Even so, I can't see a good way to implement this idea.  I tried
1099     not doing the binder-swap if the scrutinee was already evaluated
1100     but that failed big-time:
1101
1102             data T = MkT !Int
1103
1104             case v of w  { MkT x ->
1105             case x of x1 { I# y1 ->
1106             case x of x2 { I# y2 -> ...
1107
1108     Notice that because MkT is strict, x is marked "evaluated".  But to
1109     eliminate the last case, we must either make sure that x (as well as
1110     x1) has unfolding MkT y1.  THe straightforward thing to do is to do
1111     the binder-swap.  So this whole note is a no-op.
1112
1113 It's fixed by doing the binder-swap in OccAnal because we can do the
1114 binder-swap unconditionally and still get occurrence analysis
1115 information right.
1116
1117 Note [Case of cast]
1118 ~~~~~~~~~~~~~~~~~~~
1119 Consider        case (x `cast` co) of b { I# ->
1120                 ... (case (x `cast` co) of {...}) ...
1121 We'd like to eliminate the inner case.  That is the motivation for
1122 equation (2) in Note [Binder swap].  When we get to the inner case, we
1123 inline x, cancel the casts, and away we go.
1124
1125 Note [Binders in case alternatives]
1126 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1127 Consider
1128     case x of y { (a,b) -> f y }
1129 We treat 'a', 'b' as dead, because they don't physically occur in the
1130 case alternative.  (Indeed, a variable is dead iff it doesn't occur in
1131 its scope in the output of OccAnal.)  This invariant is It really
1132 helpe to know when binders are unused.  See esp the call to
1133 isDeadBinder in Simplify.mkDupableAlt
1134
1135 In this example, though, the Simplifier will bring 'a' and 'b' back to
1136 life, beause it binds 'y' to (a,b) (imagine got inlined and
1137 scrutinised y).
1138
1139 \begin{code}
1140 occAnalAlt :: OccEnv
1141            -> CoreBndr
1142            -> Maybe (Id, CoreExpr)  -- Note [Binder swap]
1143            -> CoreAlt
1144            -> (UsageDetails, Alt IdWithOccInfo)
1145 occAnalAlt env case_bndr mb_scrut_var (con, bndrs, rhs)
1146   = case occAnal env rhs of { (rhs_usage, rhs') ->
1147     let
1148         (alt_usg, tagged_bndrs) = tagBinders rhs_usage bndrs
1149         bndrs' = tagged_bndrs      -- See Note [Binders in case alternatives]
1150     in
1151     case mb_scrut_var of
1152         Just (scrut_var, scrut_rhs)             -- See Note [Binder swap]
1153           | scrut_var `localUsedIn` alt_usg     -- (a) Fast path, usually false
1154           , not (any shadowing bndrs)           -- (b)
1155           -> (addOneOcc usg_wo_scrut case_bndr NoOccInfo,
1156                         -- See Note [Case binder usage] for the NoOccInfo
1157               (con, bndrs', Let (NonRec scrut_var2 scrut_rhs) rhs'))
1158           where
1159            scrut_var1 = mkLocalId (localiseName (idName scrut_var)) (idType scrut_var)
1160                         -- Localise the scrut_var before shadowing it; we're making a
1161                         -- new binding for it, and it might have an External Name, or
1162                         -- even be a GlobalId; Note [Binder swap on GlobalId scrutinees]
1163                         -- Also we don't want any INLILNE or NOINLINE pragmas!
1164
1165            (usg_wo_scrut, scrut_var2) = tagBinder alt_usg scrut_var1
1166            shadowing bndr = bndr `elemVarSet` rhs_fvs
1167            rhs_fvs = exprFreeVars scrut_rhs
1168
1169         _other -> (alt_usg, (con, bndrs', rhs')) }
1170 \end{code}
1171
1172
1173 %************************************************************************
1174 %*                                                                      *
1175 \subsection[OccurAnal-types]{OccEnv}
1176 %*                                                                      *
1177 %************************************************************************
1178
1179 \begin{code}
1180 data OccEnv
1181   = OccEnv { occ_encl      :: !OccEncl      -- Enclosing context information
1182            , occ_ctxt      :: !CtxtTy       -- Tells about linearity
1183            , occ_scrut_ids :: !GblScrutIds }
1184
1185 type GblScrutIds = IdSet  -- GlobalIds that are scrutinised, and for which
1186                           -- we want to gather occurence info; see
1187                           -- Note [Binder swap for GlobalId scrutinee]
1188                           -- No need to prune this if there's a shadowing binding
1189                           -- because it's OK for it to be too big
1190
1191 -- OccEncl is used to control whether to inline into constructor arguments
1192 -- For example:
1193 --      x = (p,q)               -- Don't inline p or q
1194 --      y = /\a -> (p a, q a)   -- Still don't inline p or q
1195 --      z = f (p,q)             -- Do inline p,q; it may make a rule fire
1196 -- So OccEncl tells enought about the context to know what to do when
1197 -- we encounter a contructor application or PAP.
1198
1199 data OccEncl
1200   = OccRhs              -- RHS of let(rec), albeit perhaps inside a type lambda
1201                         -- Don't inline into constructor args here
1202   | OccVanilla          -- Argument of function, body of lambda, scruintee of case etc.
1203                         -- Do inline into constructor args here
1204
1205 type CtxtTy = [Bool]
1206         -- []           No info
1207         --
1208         -- True:ctxt    Analysing a function-valued expression that will be
1209         --                      applied just once
1210         --
1211         -- False:ctxt   Analysing a function-valued expression that may
1212         --                      be applied many times; but when it is,
1213         --                      the CtxtTy inside applies
1214
1215 initOccEnv :: OccEnv
1216 initOccEnv = OccEnv { occ_encl = OccRhs
1217                     , occ_ctxt = []
1218                     , occ_scrut_ids = emptyVarSet }
1219
1220 vanillaCtxt :: OccEnv -> OccEnv
1221 vanillaCtxt env = OccEnv { occ_encl = OccVanilla, occ_ctxt = []
1222                          , occ_scrut_ids = occ_scrut_ids env }
1223
1224 rhsCtxt :: OccEnv -> OccEnv
1225 rhsCtxt env = OccEnv { occ_encl = OccRhs, occ_ctxt = []
1226                      , occ_scrut_ids = occ_scrut_ids env }
1227
1228 mkAltEnv :: OccEnv -> Maybe (Id, CoreExpr) -> OccEnv
1229 -- Does two things: a) makes the occ_ctxt = OccVanilla
1230 --                  b) extends the scrut_ids if necessary
1231 mkAltEnv env (Just (scrut_id, _))
1232   | not (isLocalId scrut_id)
1233   = OccEnv { occ_encl      = OccVanilla
1234            , occ_scrut_ids = extendVarSet (occ_scrut_ids env) scrut_id
1235            , occ_ctxt      = occ_ctxt env }
1236 mkAltEnv env _
1237   | isRhsEnv env = env { occ_encl = OccVanilla }
1238   | otherwise    = env
1239
1240 setCtxtTy :: OccEnv -> CtxtTy -> OccEnv
1241 setCtxtTy env ctxt = env { occ_ctxt = ctxt }
1242
1243 isRhsEnv :: OccEnv -> Bool
1244 isRhsEnv (OccEnv { occ_encl = OccRhs })     = True
1245 isRhsEnv (OccEnv { occ_encl = OccVanilla }) = False
1246
1247 oneShotGroup :: OccEnv -> [CoreBndr] -> [CoreBndr]
1248         -- The result binders have one-shot-ness set that they might not have had originally.
1249         -- This happens in (build (\cn -> e)).  Here the occurrence analyser
1250         -- linearity context knows that c,n are one-shot, and it records that fact in
1251         -- the binder. This is useful to guide subsequent float-in/float-out tranformations
1252
1253 oneShotGroup (OccEnv { occ_ctxt = ctxt }) bndrs
1254   = go ctxt bndrs []
1255   where
1256     go _ [] rev_bndrs = reverse rev_bndrs
1257
1258     go (lin_ctxt:ctxt) (bndr:bndrs) rev_bndrs
1259         | isId bndr = go ctxt bndrs (bndr':rev_bndrs)
1260         where
1261           bndr' | lin_ctxt  = setOneShotLambda bndr
1262                 | otherwise = bndr
1263
1264     go ctxt (bndr:bndrs) rev_bndrs = go ctxt bndrs (bndr:rev_bndrs)
1265
1266 addAppCtxt :: OccEnv -> [Arg CoreBndr] -> OccEnv
1267 addAppCtxt env@(OccEnv { occ_ctxt = ctxt }) args
1268   = env { occ_ctxt = replicate (valArgCount args) True ++ ctxt }
1269 \end{code}
1270
1271 %************************************************************************
1272 %*                                                                      *
1273 \subsection[OccurAnal-types]{OccEnv}
1274 %*                                                                      *
1275 %************************************************************************
1276
1277 \begin{code}
1278 type UsageDetails = IdEnv OccInfo       -- A finite map from ids to their usage
1279                 -- INVARIANT: never IAmDead
1280                 -- (Deadness is signalled by not being in the map at all)
1281
1282 (+++), combineAltsUsageDetails
1283         :: UsageDetails -> UsageDetails -> UsageDetails
1284
1285 (+++) usage1 usage2
1286   = plusVarEnv_C addOccInfo usage1 usage2
1287
1288 combineAltsUsageDetails usage1 usage2
1289   = plusVarEnv_C orOccInfo usage1 usage2
1290
1291 addOneOcc :: UsageDetails -> Id -> OccInfo -> UsageDetails
1292 addOneOcc usage id info
1293   = plusVarEnv_C addOccInfo usage (unitVarEnv id info)
1294         -- ToDo: make this more efficient
1295
1296 emptyDetails :: UsageDetails
1297 emptyDetails = (emptyVarEnv :: UsageDetails)
1298
1299 localUsedIn, usedIn :: Id -> UsageDetails -> Bool
1300 v `localUsedIn` details = v `elemVarEnv` details
1301 v `usedIn`      details =  isExportedId v || v `localUsedIn` details
1302
1303 type IdWithOccInfo = Id
1304
1305 tagBinders :: UsageDetails          -- Of scope
1306            -> [Id]                  -- Binders
1307            -> (UsageDetails,        -- Details with binders removed
1308               [IdWithOccInfo])    -- Tagged binders
1309
1310 tagBinders usage binders
1311  = let
1312      usage' = usage `delVarEnvList` binders
1313      uss    = map (setBinderOcc usage) binders
1314    in
1315    usage' `seq` (usage', uss)
1316
1317 tagBinder :: UsageDetails           -- Of scope
1318           -> Id                     -- Binders
1319           -> (UsageDetails,         -- Details with binders removed
1320               IdWithOccInfo)        -- Tagged binders
1321
1322 tagBinder usage binder
1323  = let
1324      usage'  = usage `delVarEnv` binder
1325      binder' = setBinderOcc usage binder
1326    in
1327    usage' `seq` (usage', binder')
1328
1329 setBinderOcc :: UsageDetails -> CoreBndr -> CoreBndr
1330 setBinderOcc usage bndr
1331   | isTyVar bndr      = bndr
1332   | isExportedId bndr = case idOccInfo bndr of
1333                           NoOccInfo -> bndr
1334                           _         -> setIdOccInfo bndr NoOccInfo
1335             -- Don't use local usage info for visible-elsewhere things
1336             -- BUT *do* erase any IAmALoopBreaker annotation, because we're
1337             -- about to re-generate it and it shouldn't be "sticky"
1338
1339   | otherwise = setIdOccInfo bndr occ_info
1340   where
1341     occ_info = lookupVarEnv usage bndr `orElse` IAmDead
1342 \end{code}
1343
1344
1345 %************************************************************************
1346 %*                                                                      *
1347 \subsection{Operations over OccInfo}
1348 %*                                                                      *
1349 %************************************************************************
1350
1351 \begin{code}
1352 mkOneOcc :: OccEnv -> Id -> InterestingCxt -> UsageDetails
1353 mkOneOcc env id int_cxt
1354   | isLocalId id = unitVarEnv id (OneOcc False True int_cxt)
1355   | id `elemVarSet` occ_scrut_ids env = unitVarEnv id NoOccInfo
1356   | otherwise                         = emptyDetails
1357
1358 markMany, markInsideLam, markInsideSCC :: OccInfo -> OccInfo
1359
1360 markMany _  = NoOccInfo
1361
1362 markInsideSCC occ = markMany occ
1363
1364 markInsideLam (OneOcc _ one_br int_cxt) = OneOcc True one_br int_cxt
1365 markInsideLam occ                       = occ
1366
1367 addOccInfo, orOccInfo :: OccInfo -> OccInfo -> OccInfo
1368
1369 addOccInfo a1 a2  = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
1370                     NoOccInfo   -- Both branches are at least One
1371                                 -- (Argument is never IAmDead)
1372
1373 -- (orOccInfo orig new) is used
1374 -- when combining occurrence info from branches of a case
1375
1376 orOccInfo (OneOcc in_lam1 _ int_cxt1)
1377           (OneOcc in_lam2 _ int_cxt2)
1378   = OneOcc (in_lam1 || in_lam2)
1379            False        -- False, because it occurs in both branches
1380            (int_cxt1 && int_cxt2)
1381 orOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
1382                   NoOccInfo
1383 \end{code}