X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2FsimplCore%2FOccurAnal.lhs;h=6af776a5d231a91554c9a1e73e93285241c83ff8;hp=87444e024d9db7b9231561bc8d3ea8c0209dbdda;hb=e79c9ce01d0ce4412bd4bcd99c8c728a6a2ec569;hpb=b4696d8ac6c34ccb4e3ed833831ba8166dcce578 diff --git a/compiler/simplCore/OccurAnal.lhs b/compiler/simplCore/OccurAnal.lhs index 87444e0..6af776a 100644 --- a/compiler/simplCore/OccurAnal.lhs +++ b/compiler/simplCore/OccurAnal.lhs @@ -15,13 +15,12 @@ module OccurAnal ( occurAnalysePgm, occurAnalyseExpr ) where --- XXX This define is a bit of a hack, and should be done more nicely -#define FAST_STRING_NOT_NEEDED 1 #include "HsVersions.h" import CoreSyn import CoreFVs import CoreUtils ( exprIsTrivial, isDefaultAlt ) +import Coercion ( mkSymCoercion ) import Id import IdInfo import BasicTypes @@ -30,7 +29,7 @@ import VarSet import VarEnv import Maybes ( orElse ) -import Digraph ( stronglyConnCompR, SCC(..) ) +import Digraph ( SCC(..), stronglyConnCompFromEdgedVerticesR ) import PrelNames ( buildIdKey, foldrIdKey, runSTRepIdKey, augmentIdKey ) import Unique ( Unique ) import UniqFM ( keysUFM, intersectUFM_C, foldUFM_Directly ) @@ -86,7 +85,10 @@ occAnalBind :: OccEnv [CoreBind]) occAnalBind env (NonRec binder rhs) body_usage - | not (binder `usedIn` body_usage) -- It's not mentioned + | isTyVar binder -- A type let; we don't gather usage info + = (body_usage, [NonRec binder rhs]) + + | not (binder `usedIn` body_usage) -- It's not mentioned = (body_usage, []) | otherwise -- It's mentioned in the body @@ -169,8 +171,8 @@ However things are made quite a bit more complicated by RULES. Remember ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We avoid infinite inlinings by choosing loop breakers, and ensuring that a loop breaker cuts each loop. But what is a - "loop"? In particular, a RULES is like an equation for 'f' that - is *always* inlined if it are applicable. We do *not* disable + "loop"? In particular, a RULE is like an equation for 'f' that + is *always* inlined if it is applicable. We do *not* disable rules for loop-breakers. It's up to whoever makes the rules to make sure that the rules themselves alwasys terminate. See Note [Rules for recursive functions] in Simplify.lhs @@ -235,8 +237,9 @@ However things are made quite a bit more complicated by RULES. Remember * Note [Rule dependency info] ~~~~~~~~~~~~~~~~~~~~~~~~~~~ The VarSet in a SpecInfo is used for dependency analysis in the - occurrence analyser. We must track free vars in *both* lhs and rhs. Why both? - Consider + occurrence analyser. We must track free vars in *both* lhs and rhs. + Hence use of idRuleVars, rather than idRuleRhsVars in addRuleUsage. + Why both? Consider x = y RULE f x = 4 Then if we substitute y for x, we'd better do so in the @@ -279,51 +282,27 @@ This showed up when compiling Control.Concurrent.Chan.getChanContents. \begin{code} occAnalBind env (Rec pairs) body_usage - | not (any (`usedIn` body_usage) bndrs) -- NB: look at body_usage, not total_usage - = (body_usage, []) -- Dead code - | otherwise - = (final_usage, map ({-# SCC "occAnalBind.dofinal" #-} do_final_bind) sccs) + = foldr occAnalRec (body_usage, []) sccs + -- For a recursive group, we + -- * occ-analyse all the RHSs + -- * compute strongly-connected components + -- * feed those components to occAnalRec where - bndrs = map fst pairs - bndr_set = mkVarSet bndrs - - --------------------------------------- - -- See Note [Loop breaking] - --------------------------------------- - -------------Dependency analysis ------------------------------ - occ_anald :: [(Id, (UsageDetails, CoreExpr))] - -- The UsageDetails here are strictly those arising from the RHS - -- *not* from any rules in the Id - occ_anald = [(bndr, occAnalRhs env bndr rhs) | (bndr,rhs) <- pairs] - - total_usage = foldl add_usage body_usage occ_anald - add_usage body_usage (bndr, (rhs_usage, _)) - = body_usage +++ addRuleUsage rhs_usage bndr - - (final_usage, tagged_bndrs) = tagBinders total_usage bndrs - final_bndrs | isEmptyVarSet all_rule_fvs = tagged_bndrs - | otherwise = map tag_rule_var tagged_bndrs - - tag_rule_var bndr | bndr `elemVarSet` all_rule_fvs = makeLoopBreaker True bndr - | otherwise = bndr - all_rule_fvs = bndr_set `intersectVarSet` foldr (unionVarSet . idRuleVars) emptyVarSet bndrs - -- Mark the binder with OccInfo saying "no preInlineUnconditionally" if - -- it is used in any rule (lhs or rhs) of the recursive group - - ---- stuff for dependency analysis of binds ------------------------------- - sccs :: [SCC (Node Details)] - sccs = {-# SCC "occAnalBind.scc" #-} stronglyConnCompR rec_edges - - rec_edges :: [Node Details] -- The binders are tagged with correct occ-info - rec_edges = {-# SCC "occAnalBind.assoc" #-} zipWith make_node final_bndrs occ_anald - make_node tagged_bndr (_bndr, (rhs_usage, rhs)) - = ((tagged_bndr, rhs, rhs_fvs), idUnique tagged_bndr, out_edges) - where - rhs_fvs = intersectUFM_C (\b _ -> b) bndr_set rhs_usage - out_edges = keysUFM (rhs_fvs `unionVarSet` idRuleVars tagged_bndr) - + bndr_set = mkVarSet (map fst pairs) + sccs :: [SCC (Node Details)] + sccs = {-# SCC "occAnalBind.scc" #-} stronglyConnCompFromEdgedVerticesR rec_edges + + rec_edges :: [Node Details] + rec_edges = {-# SCC "occAnalBind.assoc" #-} map make_node pairs + + make_node (bndr, rhs) + = (ND bndr rhs' rhs_usage rhs_fvs, idUnique bndr, out_edges) + where + (rhs_usage, rhs') = occAnalRhs env bndr rhs + rhs_fvs = intersectUFM_C (\b _ -> b) bndr_set rhs_usage + out_edges = keysUFM (rhs_fvs `unionVarSet` idRuleVars bndr) -- (a -> b) means a mentions b -- Given the usage details (a UFM that gives occ info for each free var of -- the RHS) we can get the list of free vars -- or rather their Int keys -- @@ -334,17 +313,67 @@ occAnalBind env (Rec pairs) body_usage -- which has n**2 cost, and this meant that edges_from alone -- consumed 10% of total runtime! - ---- Stuff to "re-constitute" bindings from dependency-analysis info ------ - do_final_bind (AcyclicSCC ((bndr, rhs, _), _, _)) = NonRec bndr rhs - do_final_bind (CyclicSCC cycle) - | no_rules = Rec (reOrderCycle cycle) - | otherwise = Rec (concatMap reOrderRec (stronglyConnCompR loop_breaker_edges)) - where -- See Note [Choosing loop breakers] for looop_breker_edges - loop_breaker_edges = map mk_node cycle - mk_node (details@(_bndr, _rhs, rhs_fvs), k, _) = (details, k, new_ks) - where - new_ks = keysUFM (extendFvs rule_fv_env rhs_fvs rhs_fvs) +----------------------------- +occAnalRec :: SCC (Node Details) -> (UsageDetails, [CoreBind]) + -> (UsageDetails, [CoreBind]) + + -- The NonRec case is just like a Let (NonRec ...) above +occAnalRec (AcyclicSCC (ND bndr rhs rhs_usage _, _, _)) (body_usage, binds) + | not (bndr `usedIn` body_usage) + = (body_usage, binds) + + | otherwise -- It's mentioned in the body + = (body_usage' +++ addRuleUsage rhs_usage bndr, -- Note [Rules are extra RHSs] + NonRec tagged_bndr rhs : binds) + where + (body_usage', tagged_bndr) = tagBinder body_usage bndr + + + -- The Rec case is the interesting one + -- See Note [Loop breaking] +occAnalRec (CyclicSCC nodes) (body_usage, binds) + | not (any (`usedIn` body_usage) bndrs) -- NB: look at body_usage, not total_usage + = (body_usage, binds) -- Dead code + + | otherwise -- At this point we always build a single Rec + = (final_usage, Rec pairs : binds) + + where + bndrs = [b | (ND b _ _ _, _, _) <- nodes] + bndr_set = mkVarSet bndrs + ---------------------------- + -- Tag the binders with their occurrence info + total_usage = foldl add_usage body_usage nodes + add_usage body_usage (ND bndr _ rhs_usage _, _, _) + = body_usage +++ addRuleUsage rhs_usage bndr + (final_usage, tagged_nodes) = mapAccumL tag_node total_usage nodes + + tag_node :: UsageDetails -> Node Details -> (UsageDetails, Node Details) + -- (a) Tag the binders in the details with occ info + -- (b) Mark the binder with "weak loop-breaker" OccInfo + -- saying "no preInlineUnconditionally" if it is used + -- in any rule (lhs or rhs) of the recursive group + -- See Note [Weak loop breakers] + tag_node usage (ND bndr rhs rhs_usage rhs_fvs, k, ks) + = (usage `delVarEnv` bndr, (ND bndr2 rhs rhs_usage rhs_fvs, k, ks)) + where + bndr2 | bndr `elemVarSet` all_rule_fvs = makeLoopBreaker True bndr1 + | otherwise = bndr1 + bndr1 = setBinderOcc usage bndr + all_rule_fvs = bndr_set `intersectVarSet` foldr (unionVarSet . idRuleVars) + emptyVarSet bndrs + + ---------------------------- + -- Now reconstruct the cycle + pairs | no_rules = reOrderCycle tagged_nodes + | otherwise = concatMap reOrderRec (stronglyConnCompFromEdgedVerticesR loop_breaker_edges) + + -- See Note [Choosing loop breakers] for looop_breaker_edges + loop_breaker_edges = map mk_node tagged_nodes + mk_node (details@(ND _ _ _ rhs_fvs), k, _) = (details, k, new_ks) + where + new_ks = keysUFM (extendFvs rule_fv_env rhs_fvs rhs_fvs) ------------------------------------ rule_fv_env :: IdEnv IdSet -- Variables from this group mentioned in RHS of rules @@ -421,18 +450,20 @@ Perhaps something cleverer would suffice. \begin{code} -type Node details = (details, Unique, [Unique]) -- The Ints are gotten from the Unique, - -- which is gotten from the Id. -type Details = (Id, -- Binder - CoreExpr, -- RHS - IdSet) -- RHS free vars (*not* include rules) +type Node details = (details, Unique, [Unique]) -- The Ints are gotten from the Unique, + -- which is gotten from the Id. +data Details = ND Id -- Binder + CoreExpr -- RHS + UsageDetails -- Full usage from RHS (*not* including rules) + IdSet -- Other binders from this Rec group mentioned on RHS + -- (derivable from UsageDetails but cached here) reOrderRec :: SCC (Node Details) -> [(Id,CoreExpr)] -- Sorted into a plausible order. Enough of the Ids have -- IAmALoopBreaker pragmas that there are no loops left. -reOrderRec (AcyclicSCC ((bndr, rhs, _), _, _)) = [(bndr, rhs)] -reOrderRec (CyclicSCC cycle) = reOrderCycle cycle +reOrderRec (AcyclicSCC (ND bndr rhs _ _, _, _)) = [(bndr, rhs)] +reOrderRec (CyclicSCC cycle) = reOrderCycle cycle reOrderCycle :: [Node Details] -> [(Id,CoreExpr)] reOrderCycle [] @@ -440,17 +471,17 @@ reOrderCycle [] reOrderCycle [bind] -- Common case of simple self-recursion = [(makeLoopBreaker False bndr, rhs)] where - ((bndr, rhs, _), _, _) = bind + (ND bndr rhs _ _, _, _) = bind reOrderCycle (bind : binds) = -- Choose a loop breaker, mark it no-inline, -- do SCC analysis on the rest, and recursively sort them out - concatMap reOrderRec (stronglyConnCompR unchosen) ++ + concatMap reOrderRec (stronglyConnCompFromEdgedVerticesR unchosen) ++ [(makeLoopBreaker False bndr, rhs)] where (chosen_bind, unchosen) = choose_loop_breaker bind (score bind) [] binds - (bndr, rhs, _) = chosen_bind + ND bndr rhs _ _ = chosen_bind -- This loop looks for the bind with the lowest score -- to pick as the loop breaker. The rest accumulate in @@ -467,23 +498,32 @@ reOrderCycle (bind : binds) sc = score bind score :: Node Details -> Int -- Higher score => less likely to be picked as loop breaker - score ((bndr, rhs, _), _, _) + score (ND bndr rhs _ _, _, _) | workerExists (idWorkerInfo bndr) = 10 -- Note [Worker inline loop] - | exprIsTrivial rhs = 4 -- Practically certain to be inlined + | exprIsTrivial rhs = 5 -- Practically certain to be inlined -- Used to have also: && not (isExportedId bndr) -- But I found this sometimes cost an extra iteration when we have -- rec { d = (a,b); a = ...df...; b = ...df...; df = d } -- where df is the exported dictionary. Then df makes a really -- bad choice for loop breaker - | is_con_app rhs = 2 -- Data types help with cases - -- Note [conapp] + | is_con_app rhs = 3 -- Data types help with cases + -- Note [Constructor applictions] + +-- If an Id is marked "never inline" then it makes a great loop breaker +-- The only reason for not checking that here is that it is rare +-- and I've never seen a situation where it makes a difference, +-- so it probably isn't worth the time to test on every binder +-- | isNeverActive (idInlinePragma bndr) = -10 - | inlineCandidate bndr rhs = 1 -- Likely to be inlined + | inlineCandidate bndr rhs = 2 -- Likely to be inlined -- Note [Inline candidates] + | not (neverUnfold (idUnfolding bndr)) = 1 + -- the Id has some kind of unfolding + | otherwise = 0 inlineCandidate :: Id -> CoreExpr -> Bool @@ -521,22 +561,24 @@ reOrderCycle (bind : binds) is_con_app _ = False makeLoopBreaker :: Bool -> Id -> Id --- Set the loop-breaker flag --- See Note [Weak loop breakers] +-- Set the loop-breaker flag: see Note [Weak loop breakers] makeLoopBreaker weak bndr = setIdOccInfo bndr (IAmALoopBreaker weak) \end{code} -Note [Worker inline loop] -~~~~~~~~~~~~~~~~~~~~~~~~ -Never choose a wrapper as the loop breaker! Because -wrappers get auto-generated inlinings when importing, and -that can lead to an infinite inlining loop. For example: +Note [INLINE pragmas] +~~~~~~~~~~~~~~~~~~~~~ +Never choose a function with an INLINE pramga as the loop breaker! +If such a function is mutually-recursive with a non-INLINE thing, +then the latter should be the loop-breaker. + +A particular case is wrappers generated by the demand analyser. +If you make then into a loop breaker you may get an infinite +inlining loop. For example: rec { $wfoo x = ....foo x.... {-loop brk-} foo x = ...$wfoo x... } - The interface file sees the unfolding for $wfoo, and sees that foo is strict (and hence it gets an auto-generated wrapper). Result: an infinite inlining in the importing scope. So be a bit careful if you @@ -545,6 +587,22 @@ nofib/spectral/minimax. If the repTree wrapper is chosen as the loop breaker then compiling Game.hs goes into an infinite loop (this happened when we gave is_con_app a lower score than inline candidates). +Note [Constructor applications] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +It's really really important to inline dictionaries. Real +example (the Enum Ordering instance from GHC.Base): + + rec f = \ x -> case d of (p,q,r) -> p x + g = \ x -> case d of (p,q,r) -> q x + d = (v, f, g) + +Here, f and g occur just once; but we can't inline them into d. +On the other hand we *could* simplify those case expressions if +we didn't stupidly choose d as the loop breaker. +But we won't because constructor args are marked "Many". +Inlining dictionaries is really essential to unravelling +the loops in static numeric dictionaries, see GHC.Float. + Note [Closure conversion] ~~~~~~~~~~~~~~~~~~~~~~~~~ We treat (\x. C p q) as a high-score candidate in the letrec scoring algorithm. @@ -590,7 +648,7 @@ occAnalRhs env id rhs = occAnal ctxt rhs where ctxt | certainly_inline id = env - | otherwise = rhsCtxt + | otherwise = rhsCtxt env -- Note that we generally use an rhsCtxt. This tells the occ anal n -- that it's looking at an RHS, which has an effect in occAnalApp -- @@ -618,10 +676,14 @@ addRuleUsage :: UsageDetails -> Id -> UsageDetails -- Add the usage from RULES in Id to the usage addRuleUsage usage id = foldVarSet add usage (idRuleVars id) + -- idRuleVars here: see Note [Rule dependency info] where - add v u = addOneOcc u v NoOccInfo -- Give a non-committal binder info - -- (i.e manyOcc) because many copies - -- of the specialised thing can appear + add v u = addOneOcc u v NoOccInfo + -- Give a non-committal binder info (i.e manyOcc) because + -- a) Many copies of the specialised thing can appear + -- b) We don't want to substitute a BIG expression inside a RULE + -- even if that's the only occurrence of the thing + -- (Same goes for INLINE.) \end{code} Expressions @@ -724,15 +786,15 @@ occAnal env expr@(Lam _ _) (really_final_usage, mkLams tagged_binders body') } where - env_body = vanillaCtxt -- Body is (no longer) an RhsContext + env_body = vanillaCtxt env -- Body is (no longer) an RhsContext (binders, body) = collectBinders expr binders' = oneShotGroup env binders linear = all is_one_shot binders' is_one_shot b = isId b && isOneShotBndr b occAnal env (Case scrut bndr ty alts) - = case occ_anal_scrut scrut alts of { (scrut_usage, scrut') -> - case mapAndUnzip (occAnalAlt alt_env bndr) alts of { (alts_usage_s, alts') -> + = case occ_anal_scrut scrut alts of { (scrut_usage, scrut') -> + case mapAndUnzip occ_anal_alt alts of { (alts_usage_s, alts') -> let alts_usage = foldr1 combineAltsUsageDetails alts_usage_s alts_usage' = addCaseBndrUsage alts_usage @@ -741,6 +803,8 @@ occAnal env (Case scrut bndr ty alts) in total_usage `seq` (total_usage, Case scrut' tagged_bndr ty alts') }} where + -- Note [Case binder usage] + -- ~~~~~~~~~~~~~~~~~~~~~~~~ -- The case binder gets a usage of either "many" or "dead", never "one". -- Reason: we like to inline single occurrences, to eliminate a binding, -- but inlining a case binder *doesn't* eliminate a binding. @@ -749,18 +813,27 @@ occAnal env (Case scrut bndr ty alts) -- into -- case x of w { (p,q) -> f (p,q) } addCaseBndrUsage usage = case lookupVarEnv usage bndr of - Nothing -> usage - Just occ -> extendVarEnv usage bndr (markMany occ) + Nothing -> usage + Just _ -> extendVarEnv usage bndr NoOccInfo - alt_env = setVanillaCtxt env + alt_env = mkAltEnv env bndr_swap -- Consider x = case v of { True -> (p,q); ... } -- Then it's fine to inline p and q + bndr_swap = case scrut of + Var v -> Just (v, Var bndr) + Cast (Var v) co -> Just (v, Cast (Var bndr) (mkSymCoercion co)) + _other -> Nothing + + occ_anal_alt = occAnalAlt alt_env bndr bndr_swap + occ_anal_scrut (Var v) (alt1 : other_alts) - | not (null other_alts) || not (isDefaultAlt alt1) - = (mkOneOcc env v True, Var v) - occ_anal_scrut scrut _alts = occAnal vanillaCtxt scrut - -- No need for rhsCtxt + | not (null other_alts) || not (isDefaultAlt alt1) + = (mkOneOcc env v True, Var v) -- The 'True' says that the variable occurs + -- in an interesting context; the case has + -- at least one non-default alternative + occ_anal_scrut scrut _alts + = occAnal (vanillaCtxt env) scrut -- No need for rhsCtxt occAnal env (Let bind body) = case occAnal env body of { (body_usage, body') -> @@ -768,11 +841,11 @@ occAnal env (Let bind body) (final_usage, mkLets new_binds body') }} occAnalArgs :: OccEnv -> [CoreExpr] -> (UsageDetails, [CoreExpr]) -occAnalArgs _env args +occAnalArgs env args = case mapAndUnzip (occAnal arg_env) args of { (arg_uds_s, args') -> (foldr (+++) emptyDetails arg_uds_s, args')} where - arg_env = vanillaCtxt + arg_env = vanillaCtxt env \end{code} Applications are dealt with specially because we want @@ -846,12 +919,12 @@ appSpecial :: OccEnv appSpecial env n ctxt args = go n args where - arg_env = vanillaCtxt + arg_env = vanillaCtxt env go _ [] = (emptyDetails, []) -- Too few args go 1 (arg:args) -- The magic arg - = case occAnal (setCtxt arg_env ctxt) arg of { (arg_uds, arg') -> + = case occAnal (setCtxtTy arg_env ctxt) arg of { (arg_uds, arg') -> case occAnalArgs env args of { (args_uds, args') -> (arg_uds +++ args_uds, arg':args') }} @@ -862,38 +935,114 @@ appSpecial env n ctxt args \end{code} -Case alternatives -~~~~~~~~~~~~~~~~~ -If the case binder occurs at all, the other binders effectively do too. -For example - case e of x { (a,b) -> rhs } -is rather like - let x = (a,b) in rhs -If e turns out to be (e1,e2) we indeed get something like - let a = e1; b = e2; x = (a,b) in rhs - -Note [Aug 06]: I don't think this is necessary any more, and it helpe - to know when binders are unused. See esp the call to - isDeadBinder in Simplify.mkDupableAlt +Note [Binder swap] +~~~~~~~~~~~~~~~~~~ +We do these two transformations right here: + + (1) case x of b { pi -> ri } + ==> + case x of b { pi -> let x=b in ri } + + (2) case (x |> co) of b { pi -> ri } + ==> + case (x |> co) of b { pi -> let x = b |> sym co in ri } + + Why (2)? See Note [Case of cast] + +In both cases, in a particular alternative (pi -> ri), we only +add the binding if + (a) x occurs free in (pi -> ri) + (ie it occurs in ri, but is not bound in pi) + (b) the pi does not bind b (or the free vars of co) +We need (a) and (b) for the inserted binding to be correct. + +For the alternatives where we inject the binding, we can transfer +all x's OccInfo to b. And that is the point. + +Notice that + * The deliberate shadowing of 'x'. + * That (a) rapidly becomes false, so no bindings are injected. + +The reason for doing these transformations here is because it allows +us to adjust the OccInfo for 'x' and 'b' as we go. + + * Suppose the only occurrences of 'x' are the scrutinee and in the + ri; then this transformation makes it occur just once, and hence + get inlined right away. + + * If we do this in the Simplifier, we don't know whether 'x' is used + in ri, so we are forced to pessimistically zap b's OccInfo even + though it is typically dead (ie neither it nor x appear in the + ri). There's nothing actually wrong with zapping it, except that + it's kind of nice to know which variables are dead. My nose + tells me to keep this information as robustly as possible. + +The Maybe (Id,CoreExpr) passed to occAnalAlt is the extra let-binding +{x=b}; it's Nothing if the binder-swap doesn't happen. + +Note [Binder swap on GlobalId scrutinees] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +When the scrutinee is a GlobalId we must take care in two ways + + i) In order to *know* whether 'x' occurs free in the RHS, we need its + occurrence info. BUT, we don't gather occurrence info for + GlobalIds. That's what the (small) occ_scrut_ids set in OccEnv is + for: it says "gather occurrence info for these. + + ii) We must call localiseId on 'x' first, in case it's a GlobalId, or + has an External Name. See, for example, SimplEnv Note [Global Ids in + the substitution]. + +Note [Case of cast] +~~~~~~~~~~~~~~~~~~~ +Consider case (x `cast` co) of b { I# -> + ... (case (x `cast` co) of {...}) ... +We'd like to eliminate the inner case. That is the motivation for +equation (2) in Note [Binder swap]. When we get to the inner case, we +inline x, cancel the casts, and away we go. + +Note [Binders in case alternatives] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + case x of y { (a,b) -> f y } +We treat 'a', 'b' as dead, because they don't physically occur in the +case alternative. (Indeed, a variable is dead iff it doesn't occur in +its scope in the output of OccAnal.) This invariant is It really +helpe to know when binders are unused. See esp the call to +isDeadBinder in Simplify.mkDupableAlt + +In this example, though, the Simplifier will bring 'a' and 'b' back to +life, beause it binds 'y' to (a,b) (imagine got inlined and +scrutinised y). \begin{code} occAnalAlt :: OccEnv -> CoreBndr + -> Maybe (Id, CoreExpr) -- Note [Binder swap] -> CoreAlt -> (UsageDetails, Alt IdWithOccInfo) -occAnalAlt env _case_bndr (con, bndrs, rhs) +occAnalAlt env case_bndr mb_scrut_var (con, bndrs, rhs) = case occAnal env rhs of { (rhs_usage, rhs') -> let - (final_usage, tagged_bndrs) = tagBinders rhs_usage bndrs - final_bndrs = tagged_bndrs -- See Note [Aug06] above -{- - final_bndrs | case_bndr `elemVarEnv` final_usage = bndrs - | otherwise = tagged_bndrs - -- Leave the binders untagged if the case - -- binder occurs at all; see note above --} + (alt_usg, tagged_bndrs) = tagBinders rhs_usage bndrs + bndrs' = tagged_bndrs -- See Note [Binders in case alternatives] in - (final_usage, (con, final_bndrs, rhs')) } + case mb_scrut_var of + Just (scrut_var, scrut_rhs) -- See Note [Binder swap] + | scrut_var `localUsedIn` alt_usg -- (a) Fast path, usually false + , not (any shadowing bndrs) -- (b) + -> (addOneOcc usg_wo_scrut case_bndr NoOccInfo, + -- See Note [Case binder usage] for the NoOccInfo + (con, bndrs', Let (NonRec scrut_var' scrut_rhs) rhs')) + where + (usg_wo_scrut, scrut_var') = tagBinder alt_usg (localiseId scrut_var) + -- Note the localiseId; we're making a new binding + -- for it, and it might have an External Name, or + -- even be a GlobalId; Note [Binder swap on GlobalId scrutinees] + shadowing bndr = bndr `elemVarSet` rhs_fvs + rhs_fvs = exprFreeVars scrut_rhs + + _other -> (alt_usg, (con, bndrs', rhs')) } \end{code} @@ -905,8 +1054,15 @@ occAnalAlt env _case_bndr (con, bndrs, rhs) \begin{code} data OccEnv - = OccEnv OccEncl -- Enclosing context information - CtxtTy -- Tells about linearity + = OccEnv { occ_encl :: !OccEncl -- Enclosing context information + , occ_ctxt :: !CtxtTy -- Tells about linearity + , occ_scrut_ids :: !GblScrutIds } + +type GblScrutIds = IdSet -- GlobalIds that are scrutinised, and for which + -- we want to gather occurence info; see + -- Note [Binder swap for GlobalId scrutinee] + -- No need to prune this if there's a shadowing binding + -- because it's OK for it to be too big -- OccEncl is used to control whether to inline into constructor arguments -- For example: @@ -933,24 +1089,36 @@ type CtxtTy = [Bool] -- the CtxtTy inside applies initOccEnv :: OccEnv -initOccEnv = OccEnv OccRhs [] - -vanillaCtxt :: OccEnv -vanillaCtxt = OccEnv OccVanilla [] - -rhsCtxt :: OccEnv -rhsCtxt = OccEnv OccRhs [] +initOccEnv = OccEnv { occ_encl = OccRhs + , occ_ctxt = [] + , occ_scrut_ids = emptyVarSet } + +vanillaCtxt :: OccEnv -> OccEnv +vanillaCtxt env = OccEnv { occ_encl = OccVanilla, occ_ctxt = [] + , occ_scrut_ids = occ_scrut_ids env } + +rhsCtxt :: OccEnv -> OccEnv +rhsCtxt env = OccEnv { occ_encl = OccRhs, occ_ctxt = [] + , occ_scrut_ids = occ_scrut_ids env } + +mkAltEnv :: OccEnv -> Maybe (Id, CoreExpr) -> OccEnv +-- Does two things: a) makes the occ_ctxt = OccVanilla +-- b) extends the scrut_ids if necessary +mkAltEnv env (Just (scrut_id, _)) + | not (isLocalId scrut_id) + = OccEnv { occ_encl = OccVanilla + , occ_scrut_ids = extendVarSet (occ_scrut_ids env) scrut_id + , occ_ctxt = occ_ctxt env } +mkAltEnv env _ + | isRhsEnv env = env { occ_encl = OccVanilla } + | otherwise = env + +setCtxtTy :: OccEnv -> CtxtTy -> OccEnv +setCtxtTy env ctxt = env { occ_ctxt = ctxt } isRhsEnv :: OccEnv -> Bool -isRhsEnv (OccEnv OccRhs _) = True -isRhsEnv (OccEnv OccVanilla _) = False - -setVanillaCtxt :: OccEnv -> OccEnv -setVanillaCtxt (OccEnv OccRhs ctxt_ty) = OccEnv OccVanilla ctxt_ty -setVanillaCtxt other_env = other_env - -setCtxt :: OccEnv -> CtxtTy -> OccEnv -setCtxt (OccEnv encl _) ctxt = OccEnv encl ctxt +isRhsEnv (OccEnv { occ_encl = OccRhs }) = True +isRhsEnv (OccEnv { occ_encl = OccVanilla }) = False oneShotGroup :: OccEnv -> [CoreBndr] -> [CoreBndr] -- The result binders have one-shot-ness set that they might not have had originally. @@ -958,7 +1126,7 @@ oneShotGroup :: OccEnv -> [CoreBndr] -> [CoreBndr] -- linearity context knows that c,n are one-shot, and it records that fact in -- the binder. This is useful to guide subsequent float-in/float-out tranformations -oneShotGroup (OccEnv _encl ctxt) bndrs +oneShotGroup (OccEnv { occ_ctxt = ctxt }) bndrs = go ctxt bndrs [] where go _ [] rev_bndrs = reverse rev_bndrs @@ -972,8 +1140,8 @@ oneShotGroup (OccEnv _encl ctxt) bndrs go ctxt (bndr:bndrs) rev_bndrs = go ctxt bndrs (bndr:rev_bndrs) addAppCtxt :: OccEnv -> [Arg CoreBndr] -> OccEnv -addAppCtxt (OccEnv encl ctxt) args - = OccEnv encl (replicate (valArgCount args) True ++ ctxt) +addAppCtxt env@(OccEnv { occ_ctxt = ctxt }) args + = env { occ_ctxt = replicate (valArgCount args) True ++ ctxt } \end{code} %************************************************************************ @@ -984,6 +1152,8 @@ addAppCtxt (OccEnv encl ctxt) args \begin{code} type UsageDetails = IdEnv OccInfo -- A finite map from ids to their usage + -- INVARIANT: never IAmDead + -- (Deadness is signalled by not being in the map at all) (+++), combineAltsUsageDetails :: UsageDetails -> UsageDetails -> UsageDetails @@ -1002,8 +1172,9 @@ addOneOcc usage id info emptyDetails :: UsageDetails emptyDetails = (emptyVarEnv :: UsageDetails) -usedIn :: Id -> UsageDetails -> Bool -v `usedIn` details = isExportedId v || v `elemVarEnv` details +localUsedIn, usedIn :: Id -> UsageDetails -> Bool +v `localUsedIn` details = v `elemVarEnv` details +v `usedIn` details = isExportedId v || v `localUsedIn` details type IdWithOccInfo = Id @@ -1055,14 +1226,14 @@ setBinderOcc usage bndr \begin{code} mkOneOcc :: OccEnv -> Id -> InterestingCxt -> UsageDetails -mkOneOcc _env id int_cxt +mkOneOcc env id int_cxt | isLocalId id = unitVarEnv id (OneOcc False True int_cxt) - | otherwise = emptyDetails + | id `elemVarSet` occ_scrut_ids env = unitVarEnv id NoOccInfo + | otherwise = emptyDetails markMany, markInsideLam, markInsideSCC :: OccInfo -> OccInfo -markMany IAmDead = IAmDead -markMany _ = NoOccInfo +markMany _ = NoOccInfo markInsideSCC occ = markMany occ @@ -1071,19 +1242,18 @@ markInsideLam occ = occ addOccInfo, orOccInfo :: OccInfo -> OccInfo -> OccInfo -addOccInfo IAmDead info2 = info2 -addOccInfo info1 IAmDead = info1 -addOccInfo _ _ = NoOccInfo +addOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) ) + NoOccInfo -- Both branches are at least One + -- (Argument is never IAmDead) -- (orOccInfo orig new) is used -- when combining occurrence info from branches of a case -orOccInfo IAmDead info2 = info2 -orOccInfo info1 IAmDead = info1 orOccInfo (OneOcc in_lam1 _ int_cxt1) (OneOcc in_lam2 _ int_cxt2) = OneOcc (in_lam1 || in_lam2) False -- False, because it occurs in both branches (int_cxt1 && int_cxt2) -orOccInfo _ _ = NoOccInfo +orOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) ) + NoOccInfo \end{code}