From 805edf6e400001f6e11b4721b285ecd51e0c2445 Mon Sep 17 00:00:00 2001 From: "simonpj@microsoft.com" Date: Thu, 5 Oct 2006 12:10:23 +0000 Subject: [PATCH] Take 2 on the recursive-rule fix This is another attempt to fix the interaction between recursion and RULES. I just had it wrong before! Now the significance of the flag on IAmALoopBreaker is given in BasicTypes | IAmALoopBreaker -- Used by the occurrence analyser to mark loop-breakers -- in a group of recursive definitions !RulesOnly -- True <=> This loop breaker mentions the other binders -- in its recursive group only in its RULES, not -- in its rhs -- See OccurAnal Note [RulesOnly] --- compiler/basicTypes/BasicTypes.lhs | 32 ++----- compiler/coreSyn/CoreUnfold.lhs | 2 +- compiler/simplCore/OccurAnal.lhs | 166 ++++++++++++++++++++---------------- 3 files changed, 100 insertions(+), 100 deletions(-) diff --git a/compiler/basicTypes/BasicTypes.lhs b/compiler/basicTypes/BasicTypes.lhs index d1ad0be..c43280f 100644 --- a/compiler/basicTypes/BasicTypes.lhs +++ b/compiler/basicTypes/BasicTypes.lhs @@ -367,8 +367,6 @@ defn of OccInfo here, safely at the bottom data OccInfo = NoOccInfo -- Many occurrences, or unknown - | RulesOnly -- Occurs only in the RHS of one or more rules - | IAmDead -- Marks unused variables. Sometimes useful for -- lambda and case-bound variables. @@ -379,31 +377,14 @@ data OccInfo | IAmALoopBreaker -- Used by the occurrence analyser to mark loop-breakers -- in a group of recursive definitions - !Bool -- True <=> This loop breaker occurs only the RHS of a RULE -\end{code} - -Note [RulesOnly] -~~~~~~~~~~~~~~~~ -The RulesOnly constructor records if an Id occurs only in the RHS of a Rule. -Similarly, the boolean in IAmLoopbreaker True if the only reason the Id is a -loop-breaker only because of recursion through a RULE. In that case, -we can ignore the loop-breaker-ness for inlining purposes. Example -(from GHC.Enum): + !RulesOnly -- True <=> This loop breaker mentions the other binders + -- in its recursive group only in its RULES, not + -- in its rhs + -- See OccurAnal Note [RulesOnly] - eftInt :: Int# -> Int# -> [Int] - eftInt x y = ...(non-recursive)... - - {-# INLINE [0] eftIntFB #-} - eftIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> r - eftIntFB c n x y = ...(non-recursive)... - - {-# RULES - "eftInt" [~1] forall x y. eftInt x y = build (\ c n -> eftIntFB c n x y) - "eftIntList" [1] eftIntFB (:) [] = eftInt - #-} +type RulesOnly = Bool +\end{code} -The two look mutually recursive only because of their RULES; -we don't want that to inhibit inlining! \begin{code} isNoOcc :: OccInfo -> Bool @@ -455,7 +436,6 @@ isFragileOcc other = False instance Outputable OccInfo where -- only used for debugging; never parsed. KSW 1999-07 ppr NoOccInfo = empty - ppr RulesOnly = ptext SLIT("RulesOnly") ppr (IAmALoopBreaker ro) = ptext SLIT("LoopBreaker") <> if ro then char '!' else empty ppr IAmDead = ptext SLIT("Dead") ppr (OneOcc inside_lam one_branch int_cxt) diff --git a/compiler/coreSyn/CoreUnfold.lhs b/compiler/coreSyn/CoreUnfold.lhs index 6bb778d..2a2751e 100644 --- a/compiler/coreSyn/CoreUnfold.lhs +++ b/compiler/coreSyn/CoreUnfold.lhs @@ -533,7 +533,7 @@ callSiteInline dflags active_inline occ id arg_infos interesting_cont | not active_inline = False | otherwise = case occ of IAmDead -> pprTrace "callSiteInline: dead" (ppr id) False - IAmALoopBreaker False -> False -- Note [RulesOnly] in BasicTypes + IAmALoopBreaker False -> False -- Note [RulesOnly] in OccurAnal --OneOcc in_lam _ _ -> (not in_lam || is_cheap) && consider_safe True other -> is_cheap && consider_safe False -- We consider even the once-in-one-branch diff --git a/compiler/simplCore/OccurAnal.lhs b/compiler/simplCore/OccurAnal.lhs index d0bc385..d13fa3b 100644 --- a/compiler/simplCore/OccurAnal.lhs +++ b/compiler/simplCore/OccurAnal.lhs @@ -35,8 +35,8 @@ import Maybes ( orElse ) import Digraph ( stronglyConnCompR, SCC(..) ) import PrelNames ( buildIdKey, foldrIdKey, runSTRepIdKey, augmentIdKey ) import Unique ( Unique ) -import UniqFM ( keysUFM ) -import Util ( zipWithEqual, mapAndUnzip ) +import UniqFM ( keysUFM, intersectsUFM ) +import Util ( mapAndUnzip, mapAccumL ) import Outputable \end{code} @@ -90,12 +90,11 @@ occAnalBind env (NonRec binder rhs) body_usage = (body_usage, []) | otherwise -- It's mentioned in the body - = (final_body_usage `combineUsageDetails` rhs_usage, + = (body_usage' +++ addRuleUsage rhs_usage binder, -- Note [RulesOnly] [NonRec tagged_binder rhs']) - where - (final_body_usage, tagged_binder) = tagBinder body_usage binder - (rhs_usage, rhs') = occAnalRhs env tagged_binder rhs + (body_usage', tagged_binder) = tagBinder body_usage binder + (rhs_usage, rhs') = occAnalRhs env tagged_binder rhs \end{code} Dropping dead code for recursive bindings is done in a very simple way: @@ -137,20 +136,20 @@ It isn't easy to do a perfect job in one blow. Consider occAnalBind env (Rec pairs) body_usage = foldr (_scc_ "occAnalBind.dofinal" do_final_bind) (body_usage, []) sccs where - analysed_pairs :: [Details1] + analysed_pairs :: [Details] analysed_pairs = [ (bndr, rhs_usage, rhs') | (bndr, rhs) <- pairs, let (rhs_usage, rhs') = occAnalRhs env bndr rhs ] - sccs :: [SCC (Node Details1)] + sccs :: [SCC (Node Details)] sccs = _scc_ "occAnalBind.scc" stronglyConnCompR edges ---- stuff for dependency analysis of binds ------------------------------- - edges :: [Node Details1] + edges :: [Node Details] edges = _scc_ "occAnalBind.assoc" - [ (details, idUnique id, edges_from rhs_usage) + [ (details, idUnique id, edges_from id rhs_usage) | details@(id, rhs_usage, rhs) <- analysed_pairs ] @@ -163,46 +162,43 @@ occAnalBind env (Rec pairs) body_usage -- maybeToBool (lookupVarEnv rhs_usage bndr)] -- which has n**2 cost, and this meant that edges_from alone -- consumed 10% of total runtime! - edges_from :: UsageDetails -> [Unique] - edges_from rhs_usage = _scc_ "occAnalBind.edges_from" - keysUFM rhs_usage + edges_from :: Id -> UsageDetails -> [Unique] + edges_from bndr rhs_usage = _scc_ "occAnalBind.edges_from" + keysUFM (addRuleUsage rhs_usage bndr) - ---- stuff to "re-constitute" bindings from dependency-analysis info ------ + ---- Stuff to "re-constitute" bindings from dependency-analysis info ------ -- Non-recursive SCC do_final_bind (AcyclicSCC ((bndr, rhs_usage, rhs'), _, _)) (body_usage, binds_so_far) | not (bndr `usedIn` body_usage) = (body_usage, binds_so_far) -- Dead code | otherwise - = (combined_usage, new_bind : binds_so_far) + = (body_usage' +++ addRuleUsage rhs_usage bndr, new_bind : binds_so_far) where (body_usage', tagged_bndr) = tagBinder body_usage bndr - combined_usage = combineUsageDetails body_usage' rhs_usage new_bind = NonRec tagged_bndr rhs' -- Recursive SCC do_final_bind (CyclicSCC cycle) (body_usage, binds_so_far) | not (any (`usedIn` body_usage) bndrs) -- NB: look at body_usage, not total_usage = (body_usage, binds_so_far) -- Dead code - | otherwise - = (combined_usage, final_bind:binds_so_far) + | otherwise -- If any is used, they all are + = (final_usage, final_bind : binds_so_far) where - details = [details | (details, _, _) <- cycle] - bndrs = [bndr | (bndr, _, _) <- details] - rhs_usages = [rhs_usage | (_, rhs_usage, _) <- details] - rhs_usage = foldr1 combineUsageDetails rhs_usages - total_usage = rhs_usage `combineUsageDetails` body_usage - (combined_usage, tagged_bndrs) = tagBinders total_usage bndrs - - new_cycle :: [Node Details2] - new_cycle = zipWithEqual "reorder" mk_node tagged_bndrs cycle - final_bind = Rec (reOrderCycle rhs_usage new_cycle) - mk_node tagged_bndr ((_, _, rhs'), key, keys) = ((tagged_bndr, rhs'), key, keys) + details = [details | (details, _, _) <- cycle] + bndrs = [bndr | (bndr, _, _) <- details] + bndr_usages = [addRuleUsage rhs_usage bndr | (bndr, rhs_usage, _) <- details] + total_usage = foldr (+++) body_usage bndr_usages + (final_usage, tagged_cycle) = mapAccumL tag_bind total_usage cycle + tag_bind usg ((bndr,rhs_usg,rhs),k,ks) = (usg', ((bndr',rhs_usg,rhs),k,ks)) + where + (usg', bndr') = tagBinder usg bndr + final_bind = Rec (reOrderCycle (mkVarSet bndrs) tagged_cycle) {- An alternative; rebuild the edges. No semantic difference, but perf might change -- Hopefully 'bndrs' is a relatively small group now - -- Now get ready for the loop-breaking phase, this time ignoring RulesOnly references + -- Now get ready for the loop-breaking phase -- We've done dead-code elimination already, so no worries about un-referenced binders keys = map idUnique bndrs mk_node tagged_bndr (_, rhs_usage, rhs') @@ -252,36 +248,35 @@ Perhaps something cleverer would suffice. \begin{code} -type IdWithOccInfo = Id -- An Id with fresh PragmaInfo attached - type Node details = (details, Unique, [Unique]) -- The Ints are gotten from the Unique, -- which is gotten from the Id. -type Details1 = (Id, UsageDetails, CoreExpr) -type Details2 = (IdWithOccInfo, CoreExpr) +type Details = (Id, UsageDetails, CoreExpr) -reOrderRec :: UsageDetails -> SCC (Node Details2) -> [Details2] +reOrderRec :: IdSet -- Binders of this group + -> SCC (Node Details) + -> [(Id,CoreExpr)] -- Sorted into a plausible order. Enough of the Ids have -- IAmALoopBreaker pragmas that there are no loops left. -reOrderRec rhs_usg (AcyclicSCC (bind, _, _)) = [bind] -reOrderRec rhs_usg (CyclicSCC cycle) = reOrderCycle rhs_usg cycle +reOrderRec bndrs (AcyclicSCC ((bndr, _, rhs), _, _)) = [(bndr, rhs)] +reOrderRec bndrs (CyclicSCC cycle) = reOrderCycle bndrs cycle -reOrderCycle :: UsageDetails -> [Node Details2] -> [Details2] -reOrderCycle rhs_usg [] +reOrderCycle :: IdSet -> [Node Details] -> [(Id,CoreExpr)] +reOrderCycle bndrs [] = panic "reOrderCycle" -reOrderCycle rhs_usg [bind] -- Common case of simple self-recursion - = [(makeLoopBreaker rhs_usg tagged_bndr, rhs)] +reOrderCycle bndrs [bind] -- Common case of simple self-recursion + = [(makeLoopBreaker bndrs rhs_usg bndr, rhs)] where - ((tagged_bndr, rhs), _, _) = bind + ((bndr, rhs_usg, rhs), _, _) = bind -reOrderCycle rhs_usg (bind : binds) +reOrderCycle bndrs (bind : binds) = -- Choose a loop breaker, mark it no-inline, -- do SCC analysis on the rest, and recursively sort them out - concatMap (reOrderRec rhs_usg) (stronglyConnCompR unchosen) ++ - [(makeLoopBreaker rhs_usg tagged_bndr, rhs)] + concatMap (reOrderRec bndrs) (stronglyConnCompR unchosen) ++ + [(makeLoopBreaker bndrs rhs_usg bndr, rhs)] where - (chosen_pair, unchosen) = choose_loop_breaker bind (score bind) [] binds - (tagged_bndr, rhs) = chosen_pair + (chosen_bind, unchosen) = choose_loop_breaker bind (score bind) [] binds + (bndr, rhs_usg, rhs) = chosen_bind -- This loop looks for the bind with the lowest score -- to pick as the loop breaker. The rest accumulate in @@ -297,8 +292,8 @@ reOrderCycle rhs_usg (bind : binds) where sc = score bind - score :: Node Details2 -> Int -- Higher score => less likely to be picked as loop breaker - score ((bndr, rhs), _, _) + score :: Node Details -> Int -- Higher score => less likely to be picked as loop breaker + score ((bndr, _, rhs), _, _) | exprIsTrivial rhs = 4 -- Practically certain to be inlined -- Used to have also: && not (isExportedId bndr) -- But I found this sometimes cost an extra iteration when we have @@ -335,15 +330,15 @@ reOrderCycle rhs_usg (bind : binds) not_fun_ty ty = not (isFunTy (dropForAlls ty)) -makeLoopBreaker :: UsageDetails -> Id -> Id +makeLoopBreaker :: VarSet -- Binders of this group + -> UsageDetails -- Usage of this rhs (neglecting rules) + -> Id -> Id -- Set the loop-breaker flag, recording whether the thing occurs only in -- the RHS of a RULE (in this recursive group) -makeLoopBreaker rhs_usg bndr +makeLoopBreaker bndrs rhs_usg bndr = setIdOccInfo bndr (IAmALoopBreaker rules_only) where - rules_only = case lookupVarEnv rhs_usg bndr of - Just RulesOnly -> True - other -> False + rules_only = bndrs `intersectsUFM` rhs_usg \end{code} @occAnalRhs@ deals with the question of bindings where the Id is marked @@ -365,9 +360,8 @@ occAnalRhs :: OccEnv -> (UsageDetails, CoreExpr) occAnalRhs env id rhs - = (final_usage, rhs') + = occAnal ctxt rhs where - (rhs_usage, rhs') = occAnal ctxt rhs ctxt | certainly_inline id = env | otherwise = rhsCtxt -- Note that we generally use an rhsCtxt. This tells the occ anal n @@ -388,21 +382,47 @@ occAnalRhs env id rhs certainly_inline id = case idOccInfo id of OneOcc in_lam one_br _ -> not in_lam && one_br other -> False +\end{code} + +Note [RulesOnly] +~~~~~~~~~~~~~~~~~~ +If the binder has RULES inside it then we count the specialised Ids as +"extra rhs's". That way the "parent" keeps the specialised "children" +alive. If the parent dies (because it isn't referenced any more), +then the children will die too unless they are already referenced +directly. + +That's the basic idea. However in a recursive situation we want to be a bit +cleverer. Example (from GHC.Enum): + + eftInt :: Int# -> Int# -> [Int] + eftInt x y = ...(non-recursive)... - -- [March 98] A new wrinkle is that if the binder has specialisations inside - -- it then we count the specialised Ids as "extra rhs's". That way - -- the "parent" keeps the specialised "children" alive. If the parent - -- dies (because it isn't referenced any more), then the children will - -- die too unless they are already referenced directly. + {-# INLINE [0] eftIntFB #-} + eftIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> r + eftIntFB c n x y = ...(non-recursive)... - final_usage = addRuleUsage rhs_usage id + {-# RULES + "eftInt" [~1] forall x y. eftInt x y = build (\ c n -> eftIntFB c n x y) + "eftIntList" [1] eftIntFB (:) [] = eftInt + #-} + +The two look mutually recursive only because of their RULES; we don't want +that to inhibit inlining! + +So when we identify a LoopBreaker, we mark it to say whether it only mentions +the other binders in its recursive group in a RULE. If so, we can inline it, +because doing so will not expose new occurrences of binders in its group. + + +\begin{code} addRuleUsage :: UsageDetails -> Id -> UsageDetails -- Add the usage from RULES in Id to the usage addRuleUsage usage id = foldVarSet add usage (idRuleVars id) where - add v u = addOneOcc u v RulesOnly -- Give a non-committal binder info + add v u = addOneOcc u v NoOccInfo -- Give a non-committal binder info -- (i.e manyOcc) because many copies -- of the specialised thing can appear \end{code} @@ -517,7 +537,7 @@ occAnal env (Case scrut bndr ty alts) alts_usage = foldr1 combineAltsUsageDetails alts_usage_s alts_usage' = addCaseBndrUsage alts_usage (alts_usage1, tagged_bndr) = tagBinder alts_usage' bndr - total_usage = scrut_usage `combineUsageDetails` alts_usage1 + total_usage = scrut_usage +++ alts_usage1 in total_usage `seq` (total_usage, Case scrut' tagged_bndr ty alts') }} where @@ -549,7 +569,7 @@ occAnal env (Let bind body) occAnalArgs env args = case mapAndUnzip (occAnal arg_env) args of { (arg_uds_s, args') -> - (foldr combineUsageDetails emptyDetails arg_uds_s, args')} + (foldr (+++) emptyDetails arg_uds_s, args')} where arg_env = vanillaCtxt \end{code} @@ -574,7 +594,7 @@ occAnalApp env (Var fun, args) is_rhs = mapVarEnv markMany args_uds | otherwise = args_uds in - (fun_uds `combineUsageDetails` final_args_uds, mkApps (Var fun) args') } + (fun_uds +++ final_args_uds, mkApps (Var fun) args') } where fun_uniq = idUnique fun fun_uds = mkOneOcc env fun (valArgCount args > 0) @@ -604,7 +624,7 @@ occAnalApp env (fun, args) is_rhs case occAnalArgs env args of { (args_uds, args') -> let - final_uds = fun_uds `combineUsageDetails` args_uds + final_uds = fun_uds +++ args_uds in (final_uds, mkApps fun' args') }} @@ -622,12 +642,12 @@ appSpecial env n ctxt args go 1 (arg:args) -- The magic arg = case occAnal (setCtxt arg_env ctxt) arg of { (arg_uds, arg') -> case occAnalArgs env args of { (args_uds, args') -> - (combineUsageDetails arg_uds args_uds, arg':args') }} + (arg_uds +++ args_uds, arg':args') }} go n (arg:args) = case occAnal arg_env arg of { (arg_uds, arg') -> case go (n-1) args of { (args_uds, args') -> - (combineUsageDetails arg_uds args_uds, arg':args') }} + (arg_uds +++ args_uds, arg':args') }} \end{code} @@ -745,10 +765,10 @@ addAppCtxt (OccEnv encl ctxt) args \begin{code} type UsageDetails = IdEnv OccInfo -- A finite map from ids to their usage -combineUsageDetails, combineAltsUsageDetails +(+++), combineAltsUsageDetails :: UsageDetails -> UsageDetails -> UsageDetails -combineUsageDetails usage1 usage2 +(+++) usage1 usage2 = plusVarEnv_C addOccInfo usage1 usage2 combineAltsUsageDetails usage1 usage2 @@ -764,6 +784,8 @@ emptyDetails = (emptyVarEnv :: UsageDetails) usedIn :: Id -> UsageDetails -> Bool v `usedIn` details = isExportedId v || v `elemVarEnv` details +type IdWithOccInfo = Id + tagBinders :: UsageDetails -- Of scope -> [Id] -- Binders -> (UsageDetails, -- Details with binders removed @@ -830,7 +852,6 @@ addOccInfo, orOccInfo :: OccInfo -> OccInfo -> OccInfo addOccInfo IAmDead info2 = info2 addOccInfo info1 IAmDead = info1 -addOccInfo RulesOnly RulesOnly = RulesOnly addOccInfo info1 info2 = NoOccInfo -- (orOccInfo orig new) is used @@ -838,7 +859,6 @@ addOccInfo info1 info2 = NoOccInfo orOccInfo IAmDead info2 = info2 orOccInfo info1 IAmDead = info1 -orOccInfo RulesOnly RulesOnly = RulesOnly orOccInfo (OneOcc in_lam1 one_branch1 int_cxt1) (OneOcc in_lam2 one_branch2 int_cxt2) = OneOcc (in_lam1 || in_lam2) -- 1.7.10.4