compiler/simplCore/OccurAnal.lhs

   1 %
   2 % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
   3 %
   4 %************************************************************************
   5 %*                                                                      *
   6 \section[OccurAnal]{Occurrence analysis pass}
   7 %*                                                                      *
   8 %************************************************************************
   9
  10 The occurrence analyser re-typechecks a core expression, returning a new
  11 core expression with (hopefully) improved usage information.
  12
  13 \begin{code}
  14 module OccurAnal (
  15         occurAnalysePgm, occurAnalyseExpr
  16     ) where
  17
  18 #include "HsVersions.h"
  19
  20 import CoreSyn
  21 import CoreFVs
  22 import CoreUtils        ( exprIsTrivial, isDefaultAlt )
  23 import Coercion         ( mkSymCoercion )
  24 import Id
  25 import Name             ( localiseName )
  26 import BasicTypes
  27
  28 import VarSet
  29 import VarEnv
  30
  31 import Maybes           ( orElse )
  32 import Digraph          ( SCC(..), stronglyConnCompFromEdgedVerticesR )
  33 import PrelNames        ( buildIdKey, foldrIdKey, runSTRepIdKey, augmentIdKey )
  34 import Unique           ( Unique )
  35 import UniqFM           ( keysUFM, intersectUFM_C, foldUFM_Directly )
  36 import Util             ( mapAndUnzip )
  37 import Outputable
  38
  39 import Data.List
  40 \end{code}
  41
  42
  43 %************************************************************************
  44 %*                                                                      *
  45 \subsection[OccurAnal-main]{Counting occurrences: main function}
  46 %*                                                                      *
  47 %************************************************************************
  48
  49 Here's the externally-callable interface:
  50
  51 \begin{code}
  52 occurAnalysePgm :: [CoreBind] -> [CoreRule] -> [CoreBind]
  53 occurAnalysePgm binds rules
  54   = snd (go initOccEnv binds)
  55   where
  56     initial_details = addIdOccs emptyDetails (rulesFreeVars rules)
  57     -- The RULES keep things alive!
  58
  59     go :: OccEnv -> [CoreBind] -> (UsageDetails, [CoreBind])
  60     go _ []
  61         = (initial_details, [])
  62     go env (bind:binds)
  63         = (final_usage, bind' ++ binds')
  64         where
  65            (bs_usage, binds')   = go env binds
  66            (final_usage, bind') = occAnalBind env bind bs_usage
  67
  68 occurAnalyseExpr :: CoreExpr -> CoreExpr
  69         -- Do occurrence analysis, and discard occurence info returned
  70 occurAnalyseExpr expr = snd (occAnal initOccEnv expr)
  71 \end{code}
  72
  73
  74 %************************************************************************
  75 %*                                                                      *
  76 \subsection[OccurAnal-main]{Counting occurrences: main function}
  77 %*                                                                      *
  78 %************************************************************************
  79
  80 Bindings
  81 ~~~~~~~~
  82
  83 \begin{code}
  84 occAnalBind :: OccEnv
  85             -> CoreBind
  86             -> UsageDetails             -- Usage details of scope
  87             -> (UsageDetails,           -- Of the whole let(rec)
  88                 [CoreBind])
  89
  90 occAnalBind env (NonRec binder rhs) body_usage
  91   | isTyVar binder                      -- A type let; we don't gather usage info
  92   = (body_usage, [NonRec binder rhs])
  93
  94   | not (binder `usedIn` body_usage)    -- It's not mentioned
  95   = (body_usage, [])
  96
  97   | otherwise                   -- It's mentioned in the body
  98   = (body_usage' +++ addRuleUsage rhs_usage binder,     -- Note [Rules are extra RHSs]
  99      [NonRec tagged_binder rhs'])
 100   where
 101     (body_usage', tagged_binder) = tagBinder body_usage binder
 102     (rhs_usage, rhs')            = occAnalRhs env tagged_binder rhs
 103 \end{code}
 104
 105 Note [Dead code]
 106 ~~~~~~~~~~~~~~~~
 107 Dropping dead code for recursive bindings is done in a very simple way:
 108
 109         the entire set of bindings is dropped if none of its binders are
 110         mentioned in its body; otherwise none are.
 111
 112 This seems to miss an obvious improvement.
 113
 114         letrec  f = ...g...
 115                 g = ...f...
 116         in
 117         ...g...
 118 ===>
 119         letrec f = ...g...
 120                g = ...(...g...)...
 121         in
 122         ...g...
 123
 124 Now 'f' is unused! But it's OK!  Dependency analysis will sort this
 125 out into a letrec for 'g' and a 'let' for 'f', and then 'f' will get
 126 dropped.  It isn't easy to do a perfect job in one blow.  Consider
 127
 128         letrec f = ...g...
 129                g = ...h...
 130                h = ...k...
 131                k = ...m...
 132                m = ...m...
 133         in
 134         ...m...
 135
 136
 137 Note [Loop breaking and RULES]
 138 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 139 Loop breaking is surprisingly subtle.  First read the section 4 of
 140 "Secrets of the GHC inliner".  This describes our basic plan.
 141
 142 However things are made quite a bit more complicated by RULES.  Remember
 143
 144   * Note [Rules are extra RHSs]
 145     ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 146     A RULE for 'f' is like an extra RHS for 'f'. That way the "parent"
 147     keeps the specialised "children" alive.  If the parent dies
 148     (because it isn't referenced any more), then the children will die
 149     too (unless they are already referenced directly).
 150
 151     To that end, we build a Rec group for each cyclic strongly
 152     connected component,
 153         *treating f's rules as extra RHSs for 'f'*.
 154
 155     When we make the Rec groups we include variables free in *either*
 156     LHS *or* RHS of the rule.  The former might seems silly, but see
 157     Note [Rule dependency info].
 158
 159     So in Example [eftInt], eftInt and eftIntFB will be put in the
 160     same Rec, even though their 'main' RHSs are both non-recursive.
 161
 162   * Note [Rules are visible in their own rec group]
 163     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 164     We want the rules for 'f' to be visible in f's right-hand side.
 165     And we'd like them to be visible in other functions in f's Rec
 166     group.  E.g. in Example [Specialisation rules] we want f' rule
 167     to be visible in both f's RHS, and fs's RHS.
 168
 169     This means that we must simplify the RULEs first, before looking
 170     at any of the definitions.  This is done by Simplify.simplRecBind,
 171     when it calls addLetIdInfo.
 172
 173   * Note [Choosing loop breakers]
 174     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 175     We avoid infinite inlinings by choosing loop breakers, and
 176     ensuring that a loop breaker cuts each loop.  But what is a
 177     "loop"?  In particular, a RULE is like an equation for 'f' that
 178     is *always* inlined if it is applicable.  We do *not* disable
 179     rules for loop-breakers.  It's up to whoever makes the rules to
 180     make sure that the rules themselves always terminate.  See Note
 181     [Rules for recursive functions] in Simplify.lhs
 182
 183     Hence, if
 184         f's RHS mentions g, and
 185         g has a RULE that mentions h, and
 186         h has a RULE that mentions f
 187
 188     then we *must* choose f to be a loop breaker.  In general, take the
 189     free variables of f's RHS, and augment it with all the variables
 190     reachable by RULES from those starting points.  That is the whole
 191     reason for computing rule_fv_env in occAnalBind.  (Of course we
 192     only consider free vars that are also binders in this Rec group.)
 193
 194     Note that when we compute this rule_fv_env, we only consider variables
 195     free in the *RHS* of the rule, in contrast to the way we build the
 196     Rec group in the first place (Note [Rule dependency info])
 197
 198     Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is
 199     chosen as a loop breaker, because their RHSs don't mention each other.
 200     And indeed both can be inlined safely.
 201
 202     Note that the edges of the graph we use for computing loop breakers
 203     are not the same as the edges we use for computing the Rec blocks.
 204     That's why we compute
 205         rec_edges          for the Rec block analysis
 206         loop_breaker_edges for the loop breaker analysis
 207
 208
 209   * Note [Weak loop breakers]
 210     ~~~~~~~~~~~~~~~~~~~~~~~~~
 211     There is a last nasty wrinkle.  Suppose we have
 212
 213         Rec { f = f_rhs
 214               RULE f [] = g
 215
 216               h = h_rhs
 217               g = h
 218               ...more...
 219         }
 220
 221     Remmber that we simplify the RULES before any RHS (see Note
 222     [Rules are visible in their own rec group] above).
 223
 224     So we must *not* postInlineUnconditionally 'g', even though
 225     its RHS turns out to be trivial.  (I'm assuming that 'g' is
 226     not choosen as a loop breaker.)  Why not?  Because then we
 227     drop the binding for 'g', which leaves it out of scope in the
 228     RULE!
 229
 230     We "solve" this by making g a "weak" or "rules-only" loop breaker,
 231     with OccInfo = IAmLoopBreaker True.  A normal "strong" loop breaker
 232     has IAmLoopBreaker False.  So
 233
 234                                 Inline  postInlineUnconditionally
 235         IAmLoopBreaker False    no      no
 236         IAmLoopBreaker True     yes     no
 237         other                   yes     yes
 238
 239     The **sole** reason for this kind of loop breaker is so that
 240     postInlineUnconditionally does not fire.  Ugh.
 241
 242   * Note [Rule dependency info]
 243     ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 244     The VarSet in a SpecInfo is used for dependency analysis in the
 245     occurrence analyser.  We must track free vars in *both* lhs and rhs.
 246     Hence use of idRuleVars, rather than idRuleRhsVars in addRuleUsage.
 247     Why both? Consider
 248         x = y
 249         RULE f x = 4
 250     Then if we substitute y for x, we'd better do so in the
 251     rule's LHS too, so we'd better ensure the dependency is respected
 252
 253
 254   * Note [Inline rules]
 255     ~~~~~~~~~~~~~~~~~~~
 256     None of the above stuff about RULES applies to Inline Rules,
 257     stored in a CoreUnfolding.  The unfolding, if any, is simplified
 258     at the same time as the regular RHS of the function, so it should
 259     be treated *exactly* like an extra RHS.
 260
 261
 262 Example [eftInt]
 263 ~~~~~~~~~~~~~~~
 264 Example (from GHC.Enum):
 265
 266   eftInt :: Int# -> Int# -> [Int]
 267   eftInt x y = ...(non-recursive)...
 268
 269   {-# INLINE [0] eftIntFB #-}
 270   eftIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> r
 271   eftIntFB c n x y = ...(non-recursive)...
 272
 273   {-# RULES
 274   "eftInt"  [~1] forall x y. eftInt x y = build (\ c n -> eftIntFB c n x y)
 275   "eftIntList"  [1] eftIntFB  (:) [] = eftInt
 276    #-}
 277
 278 Example [Specialisation rules]
 279 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 280 Consider this group, which is typical of what SpecConstr builds:
 281
 282    fs a = ....f (C a)....
 283    f  x = ....f (C a)....
 284    {-# RULE f (C a) = fs a #-}
 285
 286 So 'f' and 'fs' are in the same Rec group (since f refers to fs via its RULE).
 287
 288 But watch out!  If 'fs' is not chosen as a loop breaker, we may get an infinite loop:
 289         - the RULE is applied in f's RHS (see Note [Self-recursive rules] in Simplify
 290         - fs is inlined (say it's small)
 291         - now there's another opportunity to apply the RULE
 292
 293 This showed up when compiling Control.Concurrent.Chan.getChanContents.
 294
 295
 296 \begin{code}
 297 occAnalBind env (Rec pairs) body_usage
 298   = foldr occAnalRec (body_usage, []) sccs
 299         -- For a recursive group, we
 300         --      * occ-analyse all the RHSs
 301         --      * compute strongly-connected components
 302         --      * feed those components to occAnalRec
 303   where
 304     -------------Dependency analysis ------------------------------
 305     bndr_set = mkVarSet (map fst pairs)
 306
 307     sccs :: [SCC (Node Details)]
 308     sccs = {-# SCC "occAnalBind.scc" #-} stronglyConnCompFromEdgedVerticesR rec_edges
 309
 310     rec_edges :: [Node Details]
 311     rec_edges = {-# SCC "occAnalBind.assoc" #-}  map make_node pairs
 312
 313     make_node (bndr, rhs)
 314         = (ND bndr rhs' all_rhs_usage rhs_fvs, idUnique bndr, out_edges)
 315         where
 316           (rhs_usage, rhs') = occAnalRhs env bndr rhs
 317           all_rhs_usage = addRuleUsage rhs_usage bndr    -- Note [Rules are extra RHSs]
 318           rhs_fvs = intersectUFM_C (\b _ -> b) bndr_set rhs_usage
 319           out_edges = keysUFM (rhs_fvs `unionVarSet` idRuleVars bndr)
 320         -- (a -> b) means a mentions b
 321         -- Given the usage details (a UFM that gives occ info for each free var of
 322         -- the RHS) we can get the list of free vars -- or rather their Int keys --
 323         -- by just extracting the keys from the finite map.  Grimy, but fast.
 324         -- Previously we had this:
 325         --      [ bndr | bndr <- bndrs,
 326         --               maybeToBool (lookupVarEnv rhs_usage bndr)]
 327         -- which has n**2 cost, and this meant that edges_from alone
 328         -- consumed 10% of total runtime!
 329
 330 -----------------------------
 331 occAnalRec :: SCC (Node Details) -> (UsageDetails, [CoreBind])
 332                                  -> (UsageDetails, [CoreBind])
 333
 334         -- The NonRec case is just like a Let (NonRec ...) above
 335 occAnalRec (AcyclicSCC (ND bndr rhs rhs_usage _, _, _)) (body_usage, binds)
 336   | not (bndr `usedIn` body_usage)
 337   = (body_usage, binds)
 338
 339   | otherwise                   -- It's mentioned in the body
 340   = (body_usage' +++ rhs_usage,
 341      NonRec tagged_bndr rhs : binds)
 342   where
 343     (body_usage', tagged_bndr) = tagBinder body_usage bndr
 344
 345
 346         -- The Rec case is the interesting one
 347         -- See Note [Loop breaking]
 348 occAnalRec (CyclicSCC nodes) (body_usage, binds)
 349   | not (any (`usedIn` body_usage) bndrs)       -- NB: look at body_usage, not total_usage
 350   = (body_usage, binds)                         -- Dead code
 351
 352   | otherwise   -- At this point we always build a single Rec
 353   = (final_usage, Rec pairs : binds)
 354
 355   where
 356     bndrs    = [b | (ND b _ _ _, _, _) <- nodes]
 357     bndr_set = mkVarSet bndrs
 358
 359         ----------------------------
 360         -- Tag the binders with their occurrence info
 361     total_usage = foldl add_usage body_usage nodes
 362     add_usage usage_so_far (ND _ _ rhs_usage _, _, _) = usage_so_far +++ rhs_usage
 363     (final_usage, tagged_nodes) = mapAccumL tag_node total_usage nodes
 364
 365     tag_node :: UsageDetails -> Node Details -> (UsageDetails, Node Details)
 366         -- (a) Tag the binders in the details with occ info
 367         -- (b) Mark the binder with "weak loop-breaker" OccInfo
 368         --      saying "no preInlineUnconditionally" if it is used
 369         --      in any rule (lhs or rhs) of the recursive group
 370         --      See Note [Weak loop breakers]
 371     tag_node usage (ND bndr rhs rhs_usage rhs_fvs, k, ks)
 372       = (usage `delVarEnv` bndr, (ND bndr2 rhs rhs_usage rhs_fvs, k, ks))
 373       where
 374         bndr2 | bndr `elemVarSet` all_rule_fvs = makeLoopBreaker True bndr1
 375               | otherwise                      = bndr1
 376         bndr1 = setBinderOcc usage bndr
 377     all_rule_fvs = bndr_set `intersectVarSet` foldr (unionVarSet . idRuleVars)
 378                                                     emptyVarSet bndrs
 379
 380         ----------------------------
 381         -- Now reconstruct the cycle
 382     pairs | no_rules  = reOrderCycle 0 tagged_nodes []
 383           | otherwise = foldr (reOrderRec 0) [] $
 384                         stronglyConnCompFromEdgedVerticesR loop_breaker_edges
 385
 386         -- See Note [Choosing loop breakers] for loop_breaker_edges
 387     loop_breaker_edges = map mk_node tagged_nodes
 388     mk_node (details@(ND _ _ _ rhs_fvs), k, _) = (details, k, new_ks)
 389         where
 390           new_ks = keysUFM (extendFvs rule_fv_env rhs_fvs rhs_fvs)
 391
 392     ------------------------------------
 393     rule_fv_env :: IdEnv IdSet  -- Variables from this group mentioned in RHS of rules
 394                                 -- Domain is *subset* of bound vars (others have no rule fvs)
 395     rule_fv_env = rule_loop init_rule_fvs
 396
 397     no_rules      = null init_rule_fvs
 398     init_rule_fvs = [(b, rule_fvs)
 399                     | b <- bndrs
 400                     , let rule_fvs = idRuleRhsVars b `intersectVarSet` bndr_set
 401                     , not (isEmptyVarSet rule_fvs)]
 402
 403     rule_loop :: [(Id,IdSet)] -> IdEnv IdSet    -- Finds fixpoint
 404     rule_loop fv_list
 405         | no_change = env
 406         | otherwise = rule_loop new_fv_list
 407         where
 408           env = mkVarEnv init_rule_fvs
 409           (no_change, new_fv_list) = mapAccumL bump True fv_list
 410           bump no_change (b,fvs)
 411                 | new_fvs `subVarSet` fvs = (no_change, (b,fvs))
 412                 | otherwise               = (False,     (b,new_fvs `unionVarSet` fvs))
 413                 where
 414                   new_fvs = extendFvs env emptyVarSet fvs
 415
 416 extendFvs :: IdEnv IdSet -> IdSet -> IdSet -> IdSet
 417 -- (extendFVs env fvs s) returns (fvs `union` env(s))
 418 extendFvs env fvs id_set
 419   = foldUFM_Directly add fvs id_set
 420   where
 421     add uniq _ fvs
 422         = case lookupVarEnv_Directly env uniq  of
 423             Just fvs' -> fvs' `unionVarSet` fvs
 424             Nothing   -> fvs
 425 \end{code}
 426
 427 @reOrderRec@ is applied to the list of (binder,rhs) pairs for a cyclic
 428 strongly connected component (there's guaranteed to be a cycle).  It returns the
 429 same pairs, but
 430         a) in a better order,
 431         b) with some of the Ids having a IAmALoopBreaker pragma
 432
 433 The "loop-breaker" Ids are sufficient to break all cycles in the SCC.  This means
 434 that the simplifier can guarantee not to loop provided it never records an inlining
 435 for these no-inline guys.
 436
 437 Furthermore, the order of the binds is such that if we neglect dependencies
 438 on the no-inline Ids then the binds are topologically sorted.  This means
 439 that the simplifier will generally do a good job if it works from top bottom,
 440 recording inlinings for any Ids which aren't marked as "no-inline" as it goes.
 441
 442 ==============
 443 [June 98: I don't understand the following paragraphs, and I've
 444           changed the a=b case again so that it isn't a special case any more.]
 445
 446 Here's a case that bit me:
 447
 448         letrec
 449                 a = b
 450                 b = \x. BIG
 451         in
 452         ...a...a...a....
 453
 454 Re-ordering doesn't change the order of bindings, but there was no loop-breaker.
 455
 456 My solution was to make a=b bindings record b as Many, rather like INLINE bindings.
 457 Perhaps something cleverer would suffice.
 458 ===============
 459
 460
 461 \begin{code}
 462 type Node details = (details, Unique, [Unique]) -- The Ints are gotten from the Unique,
 463                                                 -- which is gotten from the Id.
 464 data Details = ND Id            -- Binder
 465                   CoreExpr      -- RHS
 466
 467                   UsageDetails  -- Full usage from RHS,
 468                                 -- including *both* RULES *and* InlineRule unfolding
 469
 470                   IdSet         -- Other binders *from this Rec group* mentioned in
 471                                 --   * the  RHS
 472                                 --   * any InlineRule unfolding
 473                                 -- but *excluding* any RULES
 474
 475 reOrderRec :: Int -> SCC (Node Details)
 476            -> [(Id,CoreExpr)] -> [(Id,CoreExpr)]
 477 -- Sorted into a plausible order.  Enough of the Ids have
 478 --      IAmALoopBreaker pragmas that there are no loops left.
 479 reOrderRec _ (AcyclicSCC (ND bndr rhs _ _, _, _)) pairs = (bndr, rhs) : pairs
 480 reOrderRec depth (CyclicSCC cycle)                pairs = reOrderCycle depth cycle pairs
 481
 482 reOrderCycle :: Int -> [Node Details] -> [(Id,CoreExpr)] -> [(Id,CoreExpr)]
 483 reOrderCycle _ [] _
 484   = panic "reOrderCycle"
 485 reOrderCycle _ [bind] pairs    -- Common case of simple self-recursion
 486   = (makeLoopBreaker False bndr, rhs) : pairs
 487   where
 488     (ND bndr rhs _ _, _, _) = bind
 489
 490 reOrderCycle depth (bind : binds) pairs
 491   =     -- Choose a loop breaker, mark it no-inline,
 492         -- do SCC analysis on the rest, and recursively sort them out
 493 --    pprTrace "reOrderCycle" (ppr [b | (ND b _ _ _, _, _) <- bind:binds]) $
 494     foldr (reOrderRec new_depth)
 495           ([ (makeLoopBreaker False bndr, rhs)
 496            | (ND bndr rhs _ _, _, _) <- chosen_binds] ++ pairs)
 497           (stronglyConnCompFromEdgedVerticesR unchosen)
 498   where
 499     (chosen_binds, unchosen) = choose_loop_breaker [bind] (score bind) [] binds
 500
 501     approximate_loop_breaker = depth >= 2
 502     new_depth | approximate_loop_breaker = 0
 503               | otherwise                = depth+1
 504         -- After two iterations (d=0, d=1) give up
 505         -- and approximate, returning to d=0
 506
 507         -- This loop looks for the bind with the lowest score
 508         -- to pick as the loop  breaker.  The rest accumulate in
 509     choose_loop_breaker loop_binds _loop_sc acc []
 510         = (loop_binds, acc)        -- Done
 511
 512         -- If approximate_loop_breaker is True, we pick *all*
 513         -- nodes with lowest score, else just one
 514         -- See Note [Complexity of loop breaking]
 515     choose_loop_breaker loop_binds loop_sc acc (bind : binds)
 516         | sc < loop_sc  -- Lower score so pick this new one
 517         = choose_loop_breaker [bind] sc (loop_binds ++ acc) binds
 518
 519         | approximate_loop_breaker && sc == loop_sc
 520         = choose_loop_breaker (bind : loop_binds) loop_sc acc binds
 521
 522         | otherwise     -- Higher score so don't pick it
 523         = choose_loop_breaker loop_binds loop_sc (bind : acc) binds
 524         where
 525           sc = score bind
 526
 527     score :: Node Details -> Int        -- Higher score => less likely to be picked as loop breaker
 528     score (ND bndr rhs _ _, _, _)
 529         | isDFunId bndr = 9   -- Never choose a DFun as a loop breaker
 530                               -- Note [DFuns should not be loop breakers]
 531
 532         | Just (inl_rule_info, _) <- isInlineRule_maybe (idUnfolding bndr)
 533         = case inl_rule_info of
 534              InlWrapper {} -> 10  -- Note [INLINE pragmas]
 535              _other        ->  3  -- Data structures are more important than this
 536                                   -- so that dictionary/method recursion unravels
 537                 -- Note that this case hits all InlineRule things, so we
 538                 -- never look at 'rhs for InlineRule stuff. That's right, because
 539                 -- 'rhs' is irrelevant for inlining things with an InlineRule
 540
 541         | is_con_app rhs = 5  -- Data types help with cases: Note [Constructor applications]
 542
 543         | exprIsTrivial rhs = 10  -- Practically certain to be inlined
 544                 -- Used to have also: && not (isExportedId bndr)
 545                 -- But I found this sometimes cost an extra iteration when we have
 546                 --      rec { d = (a,b); a = ...df...; b = ...df...; df = d }
 547                 -- where df is the exported dictionary. Then df makes a really
 548                 -- bad choice for loop breaker
 549
 550
 551 -- If an Id is marked "never inline" then it makes a great loop breaker
 552 -- The only reason for not checking that here is that it is rare
 553 -- and I've never seen a situation where it makes a difference,
 554 -- so it probably isn't worth the time to test on every binder
 555 --      | isNeverActive (idInlinePragma bndr) = -10
 556
 557         | isOneOcc (idOccInfo bndr) = 2  -- Likely to be inlined
 558
 559         | canUnfold (idUnfolding bndr) = 1
 560                 -- the Id has some kind of unfolding
 561
 562         | otherwise = 0
 563
 564         -- Checking for a constructor application
 565         -- Cheap and cheerful; the simplifer moves casts out of the way
 566         -- The lambda case is important to spot x = /\a. C (f a)
 567         -- which comes up when C is a dictionary constructor and
 568         -- f is a default method.
 569         -- Example: the instance for Show (ST s a) in GHC.ST
 570         --
 571         -- However we *also* treat (\x. C p q) as a con-app-like thing,
 572         --      Note [Closure conversion]
 573     is_con_app (Var v)    = isConLikeId v
 574     is_con_app (App f _)  = is_con_app f
 575     is_con_app (Lam _ e)  = is_con_app e
 576     is_con_app (Note _ e) = is_con_app e
 577     is_con_app _          = False
 578
 579 makeLoopBreaker :: Bool -> Id -> Id
 580 -- Set the loop-breaker flag: see Note [Weak loop breakers]
 581 makeLoopBreaker weak bndr = setIdOccInfo bndr (IAmALoopBreaker weak)
 582 \end{code}
 583
 584 Note [Complexity of loop breaking]
 585 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 586 The loop-breaking algorithm knocks out one binder at a time, and
 587 performs a new SCC analysis on the remaining binders.  That can
 588 behave very badly in tightly-coupled groups of bindings; in the
 589 worst case it can be (N**2)*log N, because it does a full SCC
 590 on N, then N-1, then N-2 and so on.
 591
 592 To avoid this, we switch plans after 2 (or whatever) attempts:
 593   Plan A: pick one binder with the lowest score, make it
 594           a loop breaker, and try again
 595   Plan B: pick *all* binders with the lowest score, make them
 596           all loop breakers, and try again
 597 Since there are only a small finite number of scores, this will
 598 terminate in a constant number of iterations, rather than O(N)
 599 iterations.
 600
 601 You might thing that it's very unlikely, but RULES make it much
 602 more likely.  Here's a real example from Trac #1969:
 603   Rec { $dm = \d.\x. op d
 604         {-# RULES forall d. $dm Int d  = $s$dm1
 605                   forall d. $dm Bool d = $s$dm2 #-}
 606
 607         dInt = MkD .... opInt ...
 608         dInt = MkD .... opBool ...
 609         opInt  = $dm dInt
 610         opBool = $dm dBool
 611
 612         $s$dm1 = \x. op dInt
 613         $s$dm2 = \x. op dBool }
 614 The RULES stuff means that we can't choose $dm as a loop breaker
 615 (Note [Choosing loop breakers]), so we must choose at least (say)
 616 opInt *and* opBool, and so on.  The number of loop breakders is
 617 linear in the number of instance declarations.
 618
 619 Note [INLINE pragmas]
 620 ~~~~~~~~~~~~~~~~~~~~~
 621 Avoid choosing a function with an INLINE pramga as the loop breaker!
 622 If such a function is mutually-recursive with a non-INLINE thing,
 623 then the latter should be the loop-breaker.
 624
 625 Usually this is just a question of optimisation. But a particularly
 626 bad case is wrappers generated by the demand analyser: if you make
 627 then into a loop breaker you may get an infinite inlining loop.  For
 628 example:
 629   rec {
 630         $wfoo x = ....foo x....
 631
 632         {-loop brk-} foo x = ...$wfoo x...
 633   }
 634 The interface file sees the unfolding for $wfoo, and sees that foo is
 635 strict (and hence it gets an auto-generated wrapper).  Result: an
 636 infinite inlining in the importing scope.  So be a bit careful if you
 637 change this.  A good example is Tree.repTree in
 638 nofib/spectral/minimax. If the repTree wrapper is chosen as the loop
 639 breaker then compiling Game.hs goes into an infinite loop.  This
 640 happened when we gave is_con_app a lower score than inline candidates:
 641
 642   Tree.repTree
 643     = __inline_me (/\a. \w w1 w2 ->
 644                    case Tree.$wrepTree @ a w w1 w2 of
 645                     { (# ww1, ww2 #) -> Branch @ a ww1 ww2 })
 646   Tree.$wrepTree
 647     = /\a w w1 w2 ->
 648       (# w2_smP, map a (Tree a) (Tree.repTree a w1 w) (w w2) #)
 649
 650 Here we do *not* want to choose 'repTree' as the loop breaker.
 651
 652 Note [DFuns should not be loop breakers]
 653 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 654 It's particularly bad to make a DFun into a loop breaker.  See
 655 Note [How instance declarations are translated] in TcInstDcls
 656
 657 We give DFuns a higher score than ordinary CONLIKE things because
 658 if there's a choice we want the DFun to be the non-looop breker. Eg
 659
 660 rec { sc = /\ a \$dC. $fBWrap (T a) ($fCT @ a $dC)
 661
 662       $fCT :: forall a_afE. (Roman.C a_afE) => Roman.C (Roman.T a_afE)
 663       {-# DFUN #-}
 664       $fCT = /\a \$dC. MkD (T a) ((sc @ a $dC) |> blah) ($ctoF @ a $dC)
 665     }
 666
 667 Here 'sc' (the superclass) looks CONLIKE, but we'll never get to it
 668 if we can't unravel the DFun first.
 669
 670 Note [Constructor applications]
 671 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 672 It's really really important to inline dictionaries.  Real
 673 example (the Enum Ordering instance from GHC.Base):
 674
 675      rec     f = \ x -> case d of (p,q,r) -> p x
 676              g = \ x -> case d of (p,q,r) -> q x
 677              d = (v, f, g)
 678
 679 Here, f and g occur just once; but we can't inline them into d.
 680 On the other hand we *could* simplify those case expressions if
 681 we didn't stupidly choose d as the loop breaker.
 682 But we won't because constructor args are marked "Many".
 683 Inlining dictionaries is really essential to unravelling
 684 the loops in static numeric dictionaries, see GHC.Float.
 685
 686 Note [Closure conversion]
 687 ~~~~~~~~~~~~~~~~~~~~~~~~~
 688 We treat (\x. C p q) as a high-score candidate in the letrec scoring algorithm.
 689 The immediate motivation came from the result of a closure-conversion transformation
 690 which generated code like this:
 691
 692     data Clo a b = forall c. Clo (c -> a -> b) c
 693
 694     ($:) :: Clo a b -> a -> b
 695     Clo f env $: x = f env x
 696
 697     rec { plus = Clo plus1 ()
 698
 699         ; plus1 _ n = Clo plus2 n
 700
 701         ; plus2 Zero     n = n
 702         ; plus2 (Succ m) n = Succ (plus $: m $: n) }
 703
 704 If we inline 'plus' and 'plus1', everything unravels nicely.  But if
 705 we choose 'plus1' as the loop breaker (which is entirely possible
 706 otherwise), the loop does not unravel nicely.
 707
 708
 709 @occAnalRhs@ deals with the question of bindings where the Id is marked
 710 by an INLINE pragma.  For these we record that anything which occurs
 711 in its RHS occurs many times.  This pessimistically assumes that ths
 712 inlined binder also occurs many times in its scope, but if it doesn't
 713 we'll catch it next time round.  At worst this costs an extra simplifier pass.
 714 ToDo: try using the occurrence info for the inline'd binder.
 715
 716 [March 97] We do the same for atomic RHSs.  Reason: see notes with reOrderRec.
 717 [June 98, SLPJ]  I've undone this change; I don't understand it.  See notes with reOrderRec.
 718
 719
 720 \begin{code}
 721 occAnalRhs :: OccEnv
 722            -> Id -> CoreExpr    -- Binder and rhs
 723                                 -- For non-recs the binder is alrady tagged
 724                                 -- with occurrence info
 725            -> (UsageDetails, CoreExpr)
 726               -- Returned usage details includes any INLINE rhs
 727
 728 occAnalRhs env id rhs
 729   = (addIdOccs rhs_usage (idUnfoldingVars id), rhs')
 730         -- Include occurrences for the "extra RHS" from a CoreUnfolding
 731   where
 732     (rhs_usage, rhs') = occAnal ctxt rhs
 733     ctxt | certainly_inline id = env
 734          | otherwise           = rhsCtxt env
 735         -- Note that we generally use an rhsCtxt.  This tells the occ anal n
 736         -- that it's looking at an RHS, which has an effect in occAnalApp
 737         --
 738         -- But there's a problem.  Consider
 739         --      x1 = a0 : []
 740         --      x2 = a1 : x1
 741         --      x3 = a2 : x2
 742         --      g  = f x3
 743         -- First time round, it looks as if x1 and x2 occur as an arg of a
 744         -- let-bound constructor ==> give them a many-occurrence.
 745         -- But then x3 is inlined (unconditionally as it happens) and
 746         -- next time round, x2 will be, and the next time round x1 will be
 747         -- Result: multiple simplifier iterations.  Sigh.
 748         -- Crude solution: use rhsCtxt for things that occur just once...
 749
 750     certainly_inline id = case idOccInfo id of
 751                             OneOcc in_lam one_br _ -> not in_lam && one_br
 752                             _                      -> False
 753 \end{code}
 754
 755
 756
 757 \begin{code}
 758 addRuleUsage :: UsageDetails -> Id -> UsageDetails
 759 -- Add the usage from RULES in Id to the usage
 760 addRuleUsage usage id = addIdOccs usage (idRuleVars id)
 761         -- idRuleVars here: see Note [Rule dependency info]
 762
 763 addIdOccs :: UsageDetails -> VarSet -> UsageDetails
 764 addIdOccs usage id_set = foldVarSet add usage id_set
 765   where
 766     add v u | isId v    = addOneOcc u v NoOccInfo
 767             | otherwise = u
 768         -- Give a non-committal binder info (i.e NoOccInfo) because
 769         --   a) Many copies of the specialised thing can appear
 770         --   b) We don't want to substitute a BIG expression inside a RULE
 771         --      even if that's the only occurrence of the thing
 772         --      (Same goes for INLINE.)
 773 \end{code}
 774
 775 Expressions
 776 ~~~~~~~~~~~
 777 \begin{code}
 778 occAnal :: OccEnv
 779         -> CoreExpr
 780         -> (UsageDetails,       -- Gives info only about the "interesting" Ids
 781             CoreExpr)
 782
 783 occAnal _   (Type t)  = (emptyDetails, Type t)
 784 occAnal env (Var v)   = (mkOneOcc env v False, Var v)
 785     -- At one stage, I gathered the idRuleVars for v here too,
 786     -- which in a way is the right thing to do.
 787     -- But that went wrong right after specialisation, when
 788     -- the *occurrences* of the overloaded function didn't have any
 789     -- rules in them, so the *specialised* versions looked as if they
 790     -- weren't used at all.
 791 \end{code}
 792
 793 We regard variables that occur as constructor arguments as "dangerousToDup":
 794
 795 \begin{verbatim}
 796 module A where
 797 f x = let y = expensive x in
 798       let z = (True,y) in
 799       (case z of {(p,q)->q}, case z of {(p,q)->q})
 800 \end{verbatim}
 801
 802 We feel free to duplicate the WHNF (True,y), but that means
 803 that y may be duplicated thereby.
 804
 805 If we aren't careful we duplicate the (expensive x) call!
 806 Constructors are rather like lambdas in this way.
 807
 808 \begin{code}
 809 occAnal _   expr@(Lit _) = (emptyDetails, expr)
 810 \end{code}
 811
 812 \begin{code}
 813 occAnal env (Note note@(SCC _) body)
 814   = case occAnal env body of { (usage, body') ->
 815     (mapVarEnv markInsideSCC usage, Note note body')
 816     }
 817
 818 occAnal env (Note note body)
 819   = case occAnal env body of { (usage, body') ->
 820     (usage, Note note body')
 821     }
 822
 823 occAnal env (Cast expr co)
 824   = case occAnal env expr of { (usage, expr') ->
 825     (markRhsUds env True usage, Cast expr' co)
 826         -- If we see let x = y `cast` co
 827         -- then mark y as 'Many' so that we don't
 828         -- immediately inline y again.
 829     }
 830 \end{code}
 831
 832 \begin{code}
 833 occAnal env app@(App _ _)
 834   = occAnalApp env (collectArgs app)
 835
 836 -- Ignore type variables altogether
 837 --   (a) occurrences inside type lambdas only not marked as InsideLam
 838 --   (b) type variables not in environment
 839
 840 occAnal env (Lam x body) | isTyVar x
 841   = case occAnal env body of { (body_usage, body') ->
 842     (body_usage, Lam x body')
 843     }
 844
 845 -- For value lambdas we do a special hack.  Consider
 846 --      (\x. \y. ...x...)
 847 -- If we did nothing, x is used inside the \y, so would be marked
 848 -- as dangerous to dup.  But in the common case where the abstraction
 849 -- is applied to two arguments this is over-pessimistic.
 850 -- So instead, we just mark each binder with its occurrence
 851 -- info in the *body* of the multiple lambda.
 852 -- Then, the simplifier is careful when partially applying lambdas.
 853
 854 occAnal env expr@(Lam _ _)
 855   = case occAnal env_body body of { (body_usage, body') ->
 856     let
 857         (final_usage, tagged_binders) = tagLamBinders body_usage binders'
 858                       -- Use binders' to put one-shot info on the lambdas
 859
 860         --      URGH!  Sept 99: we don't seem to be able to use binders' here, because
 861         --      we get linear-typed things in the resulting program that we can't handle yet.
 862         --      (e.g. PrelShow)  TODO
 863
 864         really_final_usage = if linear then
 865                                 final_usage
 866                              else
 867                                 mapVarEnv markInsideLam final_usage
 868     in
 869     (really_final_usage,
 870      mkLams tagged_binders body') }
 871   where
 872     env_body        = vanillaCtxt env        -- Body is (no longer) an RhsContext
 873     (binders, body) = collectBinders expr
 874     binders'        = oneShotGroup env binders
 875     linear          = all is_one_shot binders'
 876     is_one_shot b   = isId b && isOneShotBndr b
 877
 878 occAnal env (Case scrut bndr ty alts)
 879   = case occ_anal_scrut scrut alts     of { (scrut_usage, scrut') ->
 880     case mapAndUnzip occ_anal_alt alts of { (alts_usage_s, alts')   ->
 881     let
 882         alts_usage  = foldr1 combineAltsUsageDetails alts_usage_s
 883         (alts_usage1, tagged_bndr) = tag_case_bndr alts_usage bndr
 884         total_usage = scrut_usage +++ alts_usage1
 885     in
 886     total_usage `seq` (total_usage, Case scrut' tagged_bndr ty alts') }}
 887   where
 888         -- Note [Case binder usage]
 889         -- ~~~~~~~~~~~~~~~~~~~~~~~~
 890         -- The case binder gets a usage of either "many" or "dead", never "one".
 891         -- Reason: we like to inline single occurrences, to eliminate a binding,
 892         -- but inlining a case binder *doesn't* eliminate a binding.
 893         -- We *don't* want to transform
 894         --      case x of w { (p,q) -> f w }
 895         -- into
 896         --      case x of w { (p,q) -> f (p,q) }
 897     tag_case_bndr usage bndr
 898       = case lookupVarEnv usage bndr of
 899           Nothing -> (usage,                  setIdOccInfo bndr IAmDead)
 900           Just _  -> (usage `delVarEnv` bndr, setIdOccInfo bndr NoOccInfo)
 901
 902     alt_env = mkAltEnv env bndr_swap
 903         -- Consider     x = case v of { True -> (p,q); ... }
 904         -- Then it's fine to inline p and q
 905
 906     bndr_swap = case scrut of
 907                   Var v           -> Just (v, Var bndr)
 908                   Cast (Var v) co -> Just (v, Cast (Var bndr) (mkSymCoercion co))
 909                   _other          -> Nothing
 910
 911     occ_anal_alt = occAnalAlt alt_env bndr bndr_swap
 912
 913     occ_anal_scrut (Var v) (alt1 : other_alts)
 914         | not (null other_alts) || not (isDefaultAlt alt1)
 915         = (mkOneOcc env v True, Var v)  -- The 'True' says that the variable occurs
 916                                         -- in an interesting context; the case has
 917                                         -- at least one non-default alternative
 918     occ_anal_scrut scrut _alts
 919         = occAnal (vanillaCtxt env) scrut    -- No need for rhsCtxt
 920
 921 occAnal env (Let bind body)
 922   = case occAnal env body                of { (body_usage, body') ->
 923     case occAnalBind env bind body_usage of { (final_usage, new_binds) ->
 924        (final_usage, mkLets new_binds body') }}
 925
 926 occAnalArgs :: OccEnv -> [CoreExpr] -> (UsageDetails, [CoreExpr])
 927 occAnalArgs env args
 928   = case mapAndUnzip (occAnal arg_env) args of  { (arg_uds_s, args') ->
 929     (foldr (+++) emptyDetails arg_uds_s, args')}
 930   where
 931     arg_env = vanillaCtxt env
 932 \end{code}
 933
 934 Applications are dealt with specially because we want
 935 the "build hack" to work.
 936
 937 \begin{code}
 938 occAnalApp :: OccEnv
 939            -> (Expr CoreBndr, [Arg CoreBndr])
 940            -> (UsageDetails, Expr CoreBndr)
 941 occAnalApp env (Var fun, args)
 942   = case args_stuff of { (args_uds, args') ->
 943     let
 944         final_args_uds = markRhsUds env is_pap args_uds
 945     in
 946     (fun_uds +++ final_args_uds, mkApps (Var fun) args') }
 947   where
 948     fun_uniq = idUnique fun
 949     fun_uds  = mkOneOcc env fun (valArgCount args > 0)
 950     is_pap = isConLikeId fun || valArgCount args < idArity fun
 951            -- See Note [CONLIKE pragma] in BasicTypes
 952
 953                 -- Hack for build, fold, runST
 954     args_stuff  | fun_uniq == buildIdKey    = appSpecial env 2 [True,True]  args
 955                 | fun_uniq == augmentIdKey  = appSpecial env 2 [True,True]  args
 956                 | fun_uniq == foldrIdKey    = appSpecial env 3 [False,True] args
 957                 | fun_uniq == runSTRepIdKey = appSpecial env 2 [True]       args
 958                         -- (foldr k z xs) may call k many times, but it never
 959                         -- shares a partial application of k; hence [False,True]
 960                         -- This means we can optimise
 961                         --      foldr (\x -> let v = ...x... in \y -> ...v...) z xs
 962                         -- by floating in the v
 963
 964                 | otherwise = occAnalArgs env args
 965
 966
 967 occAnalApp env (fun, args)
 968   = case occAnal (addAppCtxt env args) fun of   { (fun_uds, fun') ->
 969         -- The addAppCtxt is a bit cunning.  One iteration of the simplifier
 970         -- often leaves behind beta redexs like
 971         --      (\x y -> e) a1 a2
 972         -- Here we would like to mark x,y as one-shot, and treat the whole
 973         -- thing much like a let.  We do this by pushing some True items
 974         -- onto the context stack.
 975
 976     case occAnalArgs env args of        { (args_uds, args') ->
 977     let
 978         final_uds = fun_uds +++ args_uds
 979     in
 980     (final_uds, mkApps fun' args') }}
 981
 982
 983 markRhsUds :: OccEnv            -- Check if this is a RhsEnv
 984            -> Bool              -- and this is true
 985            -> UsageDetails      -- The do markMany on this
 986            -> UsageDetails
 987 -- We mark the free vars of the argument of a constructor or PAP
 988 -- as "many", if it is the RHS of a let(rec).
 989 -- This means that nothing gets inlined into a constructor argument
 990 -- position, which is what we want.  Typically those constructor
 991 -- arguments are just variables, or trivial expressions.
 992 --
 993 -- This is the *whole point* of the isRhsEnv predicate
 994 markRhsUds env is_pap arg_uds
 995   | isRhsEnv env && is_pap = mapVarEnv markMany arg_uds
 996   | otherwise              = arg_uds
 997
 998
 999 appSpecial :: OccEnv
1000            -> Int -> CtxtTy     -- Argument number, and context to use for it
1001            -> [CoreExpr]
1002            -> (UsageDetails, [CoreExpr])
1003 appSpecial env n ctxt args
1004   = go n args
1005   where
1006     arg_env = vanillaCtxt env
1007
1008     go _ [] = (emptyDetails, [])        -- Too few args
1009
1010     go 1 (arg:args)                     -- The magic arg
1011       = case occAnal (setCtxtTy arg_env ctxt) arg of    { (arg_uds, arg') ->
1012         case occAnalArgs env args of                    { (args_uds, args') ->
1013         (arg_uds +++ args_uds, arg':args') }}
1014
1015     go n (arg:args)
1016       = case occAnal arg_env arg of     { (arg_uds, arg') ->
1017         case go (n-1) args of           { (args_uds, args') ->
1018         (arg_uds +++ args_uds, arg':args') }}
1019 \end{code}
1020
1021
1022 Note [Binder swap]
1023 ~~~~~~~~~~~~~~~~~~
1024 We do these two transformations right here:
1025
1026  (1)   case x of b { pi -> ri }
1027     ==>
1028       case x of b { pi -> let x=b in ri }
1029
1030  (2)  case (x |> co) of b { pi -> ri }
1031     ==>
1032       case (x |> co) of b { pi -> let x = b |> sym co in ri }
1033
1034     Why (2)?  See Note [Case of cast]
1035
1036 In both cases, in a particular alternative (pi -> ri), we only
1037 add the binding if
1038   (a) x occurs free in (pi -> ri)
1039         (ie it occurs in ri, but is not bound in pi)
1040   (b) the pi does not bind b (or the free vars of co)
1041 We need (a) and (b) for the inserted binding to be correct.
1042
1043 For the alternatives where we inject the binding, we can transfer
1044 all x's OccInfo to b.  And that is the point.
1045
1046 Notice that
1047   * The deliberate shadowing of 'x'.
1048   * That (a) rapidly becomes false, so no bindings are injected.
1049
1050 The reason for doing these transformations here is because it allows
1051 us to adjust the OccInfo for 'x' and 'b' as we go.
1052
1053   * Suppose the only occurrences of 'x' are the scrutinee and in the
1054     ri; then this transformation makes it occur just once, and hence
1055     get inlined right away.
1056
1057   * If we do this in the Simplifier, we don't know whether 'x' is used
1058     in ri, so we are forced to pessimistically zap b's OccInfo even
1059     though it is typically dead (ie neither it nor x appear in the
1060     ri).  There's nothing actually wrong with zapping it, except that
1061     it's kind of nice to know which variables are dead.  My nose
1062     tells me to keep this information as robustly as possible.
1063
1064 The Maybe (Id,CoreExpr) passed to occAnalAlt is the extra let-binding
1065 {x=b}; it's Nothing if the binder-swap doesn't happen.
1066
1067 There is a danger though.  Consider
1068       let v = x +# y
1069       in case (f v) of w -> ...v...v...
1070 And suppose that (f v) expands to just v.  Then we'd like to
1071 use 'w' instead of 'v' in the alternative.  But it may be too
1072 late; we may have substituted the (cheap) x+#y for v in the
1073 same simplifier pass that reduced (f v) to v.
1074
1075 I think this is just too bad.  CSE will recover some of it.
1076
1077 Note [Binder swap on GlobalId scrutinees]
1078 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1079 When the scrutinee is a GlobalId we must take care in two ways
1080
1081  i) In order to *know* whether 'x' occurs free in the RHS, we need its
1082     occurrence info. BUT, we don't gather occurrence info for
1083     GlobalIds.  That's what the (small) occ_scrut_ids set in OccEnv is
1084     for: it says "gather occurrence info for these.
1085
1086  ii) We must call localiseId on 'x' first, in case it's a GlobalId, or
1087      has an External Name. See, for example, SimplEnv Note [Global Ids in
1088      the substitution].
1089
1090 Historical note [no-case-of-case]
1091 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1092 We *used* to suppress the binder-swap in case expressoins when
1093 -fno-case-of-case is on.  Old remarks:
1094     "This happens in the first simplifier pass,
1095     and enhances full laziness.  Here's the bad case:
1096             f = \ y -> ...(case x of I# v -> ...(case x of ...) ... )
1097     If we eliminate the inner case, we trap it inside the I# v -> arm,
1098     which might prevent some full laziness happening.  I've seen this
1099     in action in spectral/cichelli/Prog.hs:
1100              [(m,n) | m <- [1..max], n <- [1..max]]
1101     Hence the check for NoCaseOfCase."
1102 However, now the full-laziness pass itself reverses the binder-swap, so this
1103 check is no longer necessary.
1104
1105 Historical note [Suppressing the case binder-swap]
1106 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1107 This old note describes a problem that is also fixed by doing the
1108 binder-swap in OccAnal:
1109
1110     There is another situation when it might make sense to suppress the
1111     case-expression binde-swap. If we have
1112
1113         case x of w1 { DEFAULT -> case x of w2 { A -> e1; B -> e2 }
1114                        ...other cases .... }
1115
1116     We'll perform the binder-swap for the outer case, giving
1117
1118         case x of w1 { DEFAULT -> case w1 of w2 { A -> e1; B -> e2 }
1119                        ...other cases .... }
1120
1121     But there is no point in doing it for the inner case, because w1 can't
1122     be inlined anyway.  Furthermore, doing the case-swapping involves
1123     zapping w2's occurrence info (see paragraphs that follow), and that
1124     forces us to bind w2 when doing case merging.  So we get
1125
1126         case x of w1 { A -> let w2 = w1 in e1
1127                        B -> let w2 = w1 in e2
1128                        ...other cases .... }
1129
1130     This is plain silly in the common case where w2 is dead.
1131
1132     Even so, I can't see a good way to implement this idea.  I tried
1133     not doing the binder-swap if the scrutinee was already evaluated
1134     but that failed big-time:
1135
1136             data T = MkT !Int
1137
1138             case v of w  { MkT x ->
1139             case x of x1 { I# y1 ->
1140             case x of x2 { I# y2 -> ...
1141
1142     Notice that because MkT is strict, x is marked "evaluated".  But to
1143     eliminate the last case, we must either make sure that x (as well as
1144     x1) has unfolding MkT y1.  THe straightforward thing to do is to do
1145     the binder-swap.  So this whole note is a no-op.
1146
1147 It's fixed by doing the binder-swap in OccAnal because we can do the
1148 binder-swap unconditionally and still get occurrence analysis
1149 information right.
1150
1151 Note [Case of cast]
1152 ~~~~~~~~~~~~~~~~~~~
1153 Consider        case (x `cast` co) of b { I# ->
1154                 ... (case (x `cast` co) of {...}) ...
1155 We'd like to eliminate the inner case.  That is the motivation for
1156 equation (2) in Note [Binder swap].  When we get to the inner case, we
1157 inline x, cancel the casts, and away we go.
1158
1159 Note [Binders in case alternatives]
1160 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1161 Consider
1162     case x of y { (a,b) -> f y }
1163 We treat 'a', 'b' as dead, because they don't physically occur in the
1164 case alternative.  (Indeed, a variable is dead iff it doesn't occur in
1165 its scope in the output of OccAnal.)  It really helps to know when
1166 binders are unused.  See esp the call to isDeadBinder in
1167 Simplify.mkDupableAlt
1168
1169 In this example, though, the Simplifier will bring 'a' and 'b' back to
1170 life, beause it binds 'y' to (a,b) (imagine got inlined and
1171 scrutinised y).
1172
1173 \begin{code}
1174 occAnalAlt :: OccEnv
1175            -> CoreBndr
1176            -> Maybe (Id, CoreExpr)  -- Note [Binder swap]
1177            -> CoreAlt
1178            -> (UsageDetails, Alt IdWithOccInfo)
1179 occAnalAlt env case_bndr mb_scrut_var (con, bndrs, rhs)
1180   = case occAnal env rhs of { (rhs_usage, rhs') ->
1181     let
1182         (alt_usg, tagged_bndrs) = tagLamBinders rhs_usage bndrs
1183         bndrs' = tagged_bndrs      -- See Note [Binders in case alternatives]
1184     in
1185     case mb_scrut_var of
1186         Just (scrut_var, scrut_rhs)             -- See Note [Binder swap]
1187           | scrut_var `localUsedIn` alt_usg     -- (a) Fast path, usually false
1188           , not (any shadowing bndrs)           -- (b)
1189           -> (addOneOcc usg_wo_scrut case_bndr NoOccInfo,
1190                         -- See Note [Case binder usage] for the NoOccInfo
1191               (con, bndrs', Let (NonRec scrut_var2 scrut_rhs) rhs'))
1192           where
1193            scrut_var1 = mkLocalId (localiseName (idName scrut_var)) (idType scrut_var)
1194                         -- Localise the scrut_var before shadowing it; we're making a
1195                         -- new binding for it, and it might have an External Name, or
1196                         -- even be a GlobalId; Note [Binder swap on GlobalId scrutinees]
1197                         -- Also we don't want any INLILNE or NOINLINE pragmas!
1198
1199            (usg_wo_scrut, scrut_var2) = tagBinder alt_usg scrut_var1
1200            shadowing bndr = bndr `elemVarSet` rhs_fvs
1201            rhs_fvs = exprFreeVars scrut_rhs
1202
1203         _other -> (alt_usg, (con, bndrs', rhs')) }
1204 \end{code}
1205
1206
1207 %************************************************************************
1208 %*                                                                      *
1209 \subsection[OccurAnal-types]{OccEnv}
1210 %*                                                                      *
1211 %************************************************************************
1212
1213 \begin{code}
1214 data OccEnv
1215   = OccEnv { occ_encl      :: !OccEncl      -- Enclosing context information
1216            , occ_ctxt      :: !CtxtTy       -- Tells about linearity
1217            , occ_scrut_ids :: !GblScrutIds }
1218
1219 type GblScrutIds = IdSet  -- GlobalIds that are scrutinised, and for which
1220                           -- we want to gather occurence info; see
1221                           -- Note [Binder swap for GlobalId scrutinee]
1222                           -- No need to prune this if there's a shadowing binding
1223                           -- because it's OK for it to be too big
1224
1225 -- OccEncl is used to control whether to inline into constructor arguments
1226 -- For example:
1227 --      x = (p,q)               -- Don't inline p or q
1228 --      y = /\a -> (p a, q a)   -- Still don't inline p or q
1229 --      z = f (p,q)             -- Do inline p,q; it may make a rule fire
1230 -- So OccEncl tells enought about the context to know what to do when
1231 -- we encounter a contructor application or PAP.
1232
1233 data OccEncl
1234   = OccRhs              -- RHS of let(rec), albeit perhaps inside a type lambda
1235                         -- Don't inline into constructor args here
1236   | OccVanilla          -- Argument of function, body of lambda, scruintee of case etc.
1237                         -- Do inline into constructor args here
1238
1239 type CtxtTy = [Bool]
1240         -- []           No info
1241         --
1242         -- True:ctxt    Analysing a function-valued expression that will be
1243         --                      applied just once
1244         --
1245         -- False:ctxt   Analysing a function-valued expression that may
1246         --                      be applied many times; but when it is,
1247         --                      the CtxtTy inside applies
1248
1249 initOccEnv :: OccEnv
1250 initOccEnv = OccEnv { occ_encl = OccVanilla
1251                     , occ_ctxt = []
1252                     , occ_scrut_ids = emptyVarSet }
1253
1254 vanillaCtxt :: OccEnv -> OccEnv
1255 vanillaCtxt env = OccEnv { occ_encl = OccVanilla, occ_ctxt = []
1256                          , occ_scrut_ids = occ_scrut_ids env }
1257
1258 rhsCtxt :: OccEnv -> OccEnv
1259 rhsCtxt env = OccEnv { occ_encl = OccRhs, occ_ctxt = []
1260                      , occ_scrut_ids = occ_scrut_ids env }
1261
1262 mkAltEnv :: OccEnv -> Maybe (Id, CoreExpr) -> OccEnv
1263 -- Does two things: a) makes the occ_ctxt = OccVanilla
1264 --                  b) extends the scrut_ids if necessary
1265 mkAltEnv env (Just (scrut_id, _))
1266   | not (isLocalId scrut_id)
1267   = OccEnv { occ_encl      = OccVanilla
1268            , occ_scrut_ids = extendVarSet (occ_scrut_ids env) scrut_id
1269            , occ_ctxt      = occ_ctxt env }
1270 mkAltEnv env _
1271   | isRhsEnv env = env { occ_encl = OccVanilla }
1272   | otherwise    = env
1273
1274 setCtxtTy :: OccEnv -> CtxtTy -> OccEnv
1275 setCtxtTy env ctxt = env { occ_ctxt = ctxt }
1276
1277 isRhsEnv :: OccEnv -> Bool
1278 isRhsEnv (OccEnv { occ_encl = OccRhs })     = True
1279 isRhsEnv (OccEnv { occ_encl = OccVanilla }) = False
1280
1281 oneShotGroup :: OccEnv -> [CoreBndr] -> [CoreBndr]
1282         -- The result binders have one-shot-ness set that they might not have had originally.
1283         -- This happens in (build (\cn -> e)).  Here the occurrence analyser
1284         -- linearity context knows that c,n are one-shot, and it records that fact in
1285         -- the binder. This is useful to guide subsequent float-in/float-out tranformations
1286
1287 oneShotGroup (OccEnv { occ_ctxt = ctxt }) bndrs
1288   = go ctxt bndrs []
1289   where
1290     go _ [] rev_bndrs = reverse rev_bndrs
1291
1292     go (lin_ctxt:ctxt) (bndr:bndrs) rev_bndrs
1293         | isId bndr = go ctxt bndrs (bndr':rev_bndrs)
1294         where
1295           bndr' | lin_ctxt  = setOneShotLambda bndr
1296                 | otherwise = bndr
1297
1298     go ctxt (bndr:bndrs) rev_bndrs = go ctxt bndrs (bndr:rev_bndrs)
1299
1300 addAppCtxt :: OccEnv -> [Arg CoreBndr] -> OccEnv
1301 addAppCtxt env@(OccEnv { occ_ctxt = ctxt }) args
1302   = env { occ_ctxt = replicate (valArgCount args) True ++ ctxt }
1303 \end{code}
1304
1305 %************************************************************************
1306 %*                                                                      *
1307 \subsection[OccurAnal-types]{OccEnv}
1308 %*                                                                      *
1309 %************************************************************************
1310
1311 \begin{code}
1312 type UsageDetails = IdEnv OccInfo       -- A finite map from ids to their usage
1313                 -- INVARIANT: never IAmDead
1314                 -- (Deadness is signalled by not being in the map at all)
1315
1316 (+++), combineAltsUsageDetails
1317         :: UsageDetails -> UsageDetails -> UsageDetails
1318
1319 (+++) usage1 usage2
1320   = plusVarEnv_C addOccInfo usage1 usage2
1321
1322 combineAltsUsageDetails usage1 usage2
1323   = plusVarEnv_C orOccInfo usage1 usage2
1324
1325 addOneOcc :: UsageDetails -> Id -> OccInfo -> UsageDetails
1326 addOneOcc usage id info
1327   = plusVarEnv_C addOccInfo usage (unitVarEnv id info)
1328         -- ToDo: make this more efficient
1329
1330 emptyDetails :: UsageDetails
1331 emptyDetails = (emptyVarEnv :: UsageDetails)
1332
1333 localUsedIn, usedIn :: Id -> UsageDetails -> Bool
1334 v `localUsedIn` details = v `elemVarEnv` details
1335 v `usedIn`      details =  isExportedId v || v `localUsedIn` details
1336
1337 type IdWithOccInfo = Id
1338
1339 tagLamBinders :: UsageDetails          -- Of scope
1340               -> [Id]                  -- Binders
1341               -> (UsageDetails,        -- Details with binders removed
1342                  [IdWithOccInfo])    -- Tagged binders
1343 -- Used for lambda and case binders
1344 -- It copes with the fact that lambda bindings can have InlineRule
1345 -- unfoldings, used for join points
1346 tagLamBinders usage binders = usage' `seq` (usage', bndrs')
1347   where
1348     (usage', bndrs') = mapAccumR tag_lam usage binders
1349     tag_lam usage bndr = (usage2, setBinderOcc usage bndr)
1350       where
1351         usage1 = usage `delVarEnv` bndr
1352         usage2 | isId bndr = addIdOccs usage1 (idUnfoldingVars bndr)
1353                | otherwise = usage1
1354
1355 tagBinder :: UsageDetails           -- Of scope
1356           -> Id                     -- Binders
1357           -> (UsageDetails,         -- Details with binders removed
1358               IdWithOccInfo)        -- Tagged binders
1359
1360 tagBinder usage binder
1361  = let
1362      usage'  = usage `delVarEnv` binder
1363      binder' = setBinderOcc usage binder
1364    in
1365    usage' `seq` (usage', binder')
1366
1367 setBinderOcc :: UsageDetails -> CoreBndr -> CoreBndr
1368 setBinderOcc usage bndr
1369   | isTyVar bndr      = bndr
1370   | isExportedId bndr = case idOccInfo bndr of
1371                           NoOccInfo -> bndr
1372                           _         -> setIdOccInfo bndr NoOccInfo
1373             -- Don't use local usage info for visible-elsewhere things
1374             -- BUT *do* erase any IAmALoopBreaker annotation, because we're
1375             -- about to re-generate it and it shouldn't be "sticky"
1376
1377   | otherwise = setIdOccInfo bndr occ_info
1378   where
1379     occ_info = lookupVarEnv usage bndr `orElse` IAmDead
1380 \end{code}
1381
1382
1383 %************************************************************************
1384 %*                                                                      *
1385 \subsection{Operations over OccInfo}
1386 %*                                                                      *
1387 %************************************************************************
1388
1389 \begin{code}
1390 mkOneOcc :: OccEnv -> Id -> InterestingCxt -> UsageDetails
1391 mkOneOcc env id int_cxt
1392   | isLocalId id = unitVarEnv id (OneOcc False True int_cxt)
1393   | id `elemVarSet` occ_scrut_ids env = unitVarEnv id NoOccInfo
1394   | otherwise                         = emptyDetails
1395
1396 markMany, markInsideLam, markInsideSCC :: OccInfo -> OccInfo
1397
1398 markMany _  = NoOccInfo
1399
1400 markInsideSCC occ = markMany occ
1401
1402 markInsideLam (OneOcc _ one_br int_cxt) = OneOcc True one_br int_cxt
1403 markInsideLam occ                       = occ
1404
1405 addOccInfo, orOccInfo :: OccInfo -> OccInfo -> OccInfo
1406
1407 addOccInfo a1 a2  = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
1408                     NoOccInfo   -- Both branches are at least One
1409                                 -- (Argument is never IAmDead)
1410
1411 -- (orOccInfo orig new) is used
1412 -- when combining occurrence info from branches of a case
1413
1414 orOccInfo (OneOcc in_lam1 _ int_cxt1)
1415           (OneOcc in_lam2 _ int_cxt2)
1416   = OneOcc (in_lam1 || in_lam2)
1417            False        -- False, because it occurs in both branches
1418            (int_cxt1 && int_cxt2)
1419 orOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
1420                   NoOccInfo
1421 \end{code}