X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2FsimplCore%2FOccurAnal.lhs;h=fc9104fb22b318b9bd0c15db78df1bae8bd7bf56;hb=84923cc7de2a93c22a2f72daf9ac863959efae13;hp=00fdebe234b3049f89743d13cb2e6028f3681a94;hpb=b7d8dffaf1fefdf2f6b52fcf039a06843a28d586;p=ghc-hetmet.git diff --git a/compiler/simplCore/OccurAnal.lhs b/compiler/simplCore/OccurAnal.lhs index 00fdebe..fc9104f 100644 --- a/compiler/simplCore/OccurAnal.lhs +++ b/compiler/simplCore/OccurAnal.lhs @@ -23,20 +23,19 @@ import CoreUtils ( exprIsTrivial, isDefaultAlt ) import Id ( isDataConWorkId, isOneShotBndr, setOneShotLambda, idOccInfo, setIdOccInfo, isLocalId, isExportedId, idArity, idHasRules, - idType, idUnique, Id + idUnique, Id ) import BasicTypes ( OccInfo(..), isOneOcc, InterestingCxt ) import VarSet import VarEnv -import Type ( isFunTy, dropForAlls ) import Maybes ( orElse ) import Digraph ( stronglyConnCompR, SCC(..) ) import PrelNames ( buildIdKey, foldrIdKey, runSTRepIdKey, augmentIdKey ) import Unique ( Unique ) -import UniqFM ( keysUFM ) -import Util ( zipWithEqual, mapAndUnzip ) +import UniqFM ( keysUFM, intersectsUFM ) +import Util ( mapAndUnzip, mapAccumL ) import Outputable \end{code} @@ -79,14 +78,6 @@ Bindings ~~~~~~~~ \begin{code} -type IdWithOccInfo = Id -- An Id with fresh PragmaInfo attached - -type Node details = (details, Unique, [Unique]) -- The Ints are gotten from the Unique, - -- which is gotten from the Id. -type Details1 = (Id, UsageDetails, CoreExpr) -type Details2 = (IdWithOccInfo, CoreExpr) - - occAnalBind :: OccEnv -> CoreBind -> UsageDetails -- Usage details of scope @@ -98,12 +89,11 @@ occAnalBind env (NonRec binder rhs) body_usage = (body_usage, []) | otherwise -- It's mentioned in the body - = (final_body_usage `combineUsageDetails` rhs_usage, + = (body_usage' +++ addRuleUsage rhs_usage binder, -- Note [RulesOnly] [NonRec tagged_binder rhs']) - where - (final_body_usage, tagged_binder) = tagBinder body_usage binder - (rhs_usage, rhs') = occAnalRhs env tagged_binder rhs + (body_usage', tagged_binder) = tagBinder body_usage binder + (rhs_usage, rhs') = occAnalRhs env tagged_binder rhs \end{code} Dropping dead code for recursive bindings is done in a very simple way: @@ -145,20 +135,20 @@ It isn't easy to do a perfect job in one blow. Consider occAnalBind env (Rec pairs) body_usage = foldr (_scc_ "occAnalBind.dofinal" do_final_bind) (body_usage, []) sccs where - analysed_pairs :: [Details1] + analysed_pairs :: [Details] analysed_pairs = [ (bndr, rhs_usage, rhs') | (bndr, rhs) <- pairs, let (rhs_usage, rhs') = occAnalRhs env bndr rhs ] - sccs :: [SCC (Node Details1)] + sccs :: [SCC (Node Details)] sccs = _scc_ "occAnalBind.scc" stronglyConnCompR edges ---- stuff for dependency analysis of binds ------------------------------- - edges :: [Node Details1] + edges :: [Node Details] edges = _scc_ "occAnalBind.assoc" - [ (details, idUnique id, edges_from rhs_usage) + [ (details, idUnique id, edges_from id rhs_usage) | details@(id, rhs_usage, rhs) <- analysed_pairs ] @@ -171,48 +161,64 @@ occAnalBind env (Rec pairs) body_usage -- maybeToBool (lookupVarEnv rhs_usage bndr)] -- which has n**2 cost, and this meant that edges_from alone -- consumed 10% of total runtime! - edges_from :: UsageDetails -> [Unique] - edges_from rhs_usage = _scc_ "occAnalBind.edges_from" - keysUFM rhs_usage + edges_from :: Id -> UsageDetails -> [Unique] + edges_from bndr rhs_usage = _scc_ "occAnalBind.edges_from" + keysUFM (addRuleUsage rhs_usage bndr) - ---- stuff to "re-constitute" bindings from dependency-analysis info ------ + ---- Stuff to "re-constitute" bindings from dependency-analysis info ------ -- Non-recursive SCC do_final_bind (AcyclicSCC ((bndr, rhs_usage, rhs'), _, _)) (body_usage, binds_so_far) | not (bndr `usedIn` body_usage) = (body_usage, binds_so_far) -- Dead code | otherwise - = (combined_usage, new_bind : binds_so_far) + = (body_usage' +++ addRuleUsage rhs_usage bndr, new_bind : binds_so_far) where - total_usage = combineUsageDetails body_usage rhs_usage - (combined_usage, tagged_bndr) = tagBinder total_usage bndr - new_bind = NonRec tagged_bndr rhs' + (body_usage', tagged_bndr) = tagBinder body_usage bndr + new_bind = NonRec tagged_bndr rhs' -- Recursive SCC do_final_bind (CyclicSCC cycle) (body_usage, binds_so_far) | not (any (`usedIn` body_usage) bndrs) -- NB: look at body_usage, not total_usage = (body_usage, binds_so_far) -- Dead code - | otherwise - = (combined_usage, final_bind:binds_so_far) + | otherwise -- If any is used, they all are + = (final_usage, final_bind : binds_so_far) where - details = [details | (details, _, _) <- cycle] - bndrs = [bndr | (bndr, _, _) <- details] - rhs_usages = [rhs_usage | (_, rhs_usage, _) <- details] - total_usage = foldr combineUsageDetails body_usage rhs_usages - (combined_usage, tagged_bndrs) = tagBinders total_usage bndrs - final_bind = Rec (reOrderRec env new_cycle) - - new_cycle = CyclicSCC (zipWithEqual "occAnalBind" mk_new_bind tagged_bndrs cycle) - mk_new_bind tagged_bndr ((_, _, rhs'), key, keys) = ((tagged_bndr, rhs'), key, keys) + details = [details | (details, _, _) <- cycle] + bndrs = [bndr | (bndr, _, _) <- details] + bndr_usages = [addRuleUsage rhs_usage bndr | (bndr, rhs_usage, _) <- details] + total_usage = foldr (+++) body_usage bndr_usages + (final_usage, tagged_cycle) = mapAccumL tag_bind total_usage cycle + tag_bind usg ((bndr,rhs_usg,rhs),k,ks) = (usg', ((bndr',rhs_usg,rhs),k,ks)) + where + (usg', bndr') = tagBinder usg bndr + final_bind = Rec (reOrderCycle (mkVarSet bndrs) tagged_cycle) + +{- An alternative; rebuild the edges. No semantic difference, but perf might change + + -- Hopefully 'bndrs' is a relatively small group now + -- Now get ready for the loop-breaking phase + -- We've done dead-code elimination already, so no worries about un-referenced binders + keys = map idUnique bndrs + mk_node tagged_bndr (_, rhs_usage, rhs') + = ((tagged_bndr, rhs'), idUnique tagged_bndr, used) + where + used = [key | key <- keys, used_outside_rule rhs_usage key ] + + used_outside_rule usage uniq = case lookupUFM_Directly usage uniq of + Nothing -> False + Just RulesOnly -> False -- Ignore rules + other -> True +-} \end{code} @reOrderRec@ is applied to the list of (binder,rhs) pairs for a cyclic strongly connected component (there's guaranteed to be a cycle). It returns the same pairs, but a) in a better order, - b) with some of the Ids having a IMustNotBeINLINEd pragma + b) with some of the Ids having a IAmALoopBreaker pragma -The "no-inline" Ids are sufficient to break all cycles in the SCC. This means +The "loop-breaker" Ids are sufficient to break all cycles in the SCC. This means that the simplifier can guarantee not to loop provided it never records an inlining for these no-inline guys. @@ -239,55 +245,37 @@ My solution was to make a=b bindings record b as Many, rather like INLINE bindin Perhaps something cleverer would suffice. =============== -You might think that you can prevent non-termination simply by making -sure that we simplify a recursive binding's RHS in an environment that -simply clones the recursive Id. But no. Consider - - letrec f = \x -> let z = f x' in ... - - in - let n = f y - in - case n of { ... } - -We bind n to its *simplified* RHS, we then *re-simplify* it when -we inline n. Then we may well inline f; and then the same thing -happens with z! - -I don't think it's possible to prevent non-termination by environment -manipulation in this way. Apart from anything else, successive -iterations of the simplifier may unroll recursive loops in cases like -that above. The idea of beaking every recursive loop with an -IMustNotBeINLINEd pragma is much much better. - \begin{code} -reOrderRec - :: OccEnv - -> SCC (Node Details2) - -> [Details2] - -- Sorted into a plausible order. Enough of the Ids have - -- dontINLINE pragmas that there are no loops left. - - -- Non-recursive case -reOrderRec env (AcyclicSCC (bind, _, _)) = [bind] - - -- Common case of simple self-recursion -reOrderRec env (CyclicSCC [bind]) - = [(setIdOccInfo tagged_bndr IAmALoopBreaker, rhs)] +type Node details = (details, Unique, [Unique]) -- The Ints are gotten from the Unique, + -- which is gotten from the Id. +type Details = (Id, UsageDetails, CoreExpr) + +reOrderRec :: IdSet -- Binders of this group + -> SCC (Node Details) + -> [(Id,CoreExpr)] +-- Sorted into a plausible order. Enough of the Ids have +-- IAmALoopBreaker pragmas that there are no loops left. +reOrderRec bndrs (AcyclicSCC ((bndr, _, rhs), _, _)) = [(bndr, rhs)] +reOrderRec bndrs (CyclicSCC cycle) = reOrderCycle bndrs cycle + +reOrderCycle :: IdSet -> [Node Details] -> [(Id,CoreExpr)] +reOrderCycle bndrs [] + = panic "reOrderCycle" +reOrderCycle bndrs [bind] -- Common case of simple self-recursion + = [(makeLoopBreaker bndrs rhs_usg bndr, rhs)] where - ((tagged_bndr, rhs), _, _) = bind + ((bndr, rhs_usg, rhs), _, _) = bind -reOrderRec env (CyclicSCC (bind : binds)) +reOrderCycle bndrs (bind : binds) = -- Choose a loop breaker, mark it no-inline, -- do SCC analysis on the rest, and recursively sort them out - concat (map (reOrderRec env) (stronglyConnCompR unchosen)) - ++ - [(setIdOccInfo tagged_bndr IAmALoopBreaker, rhs)] + concatMap (reOrderRec bndrs) (stronglyConnCompR unchosen) ++ + [(makeLoopBreaker bndrs rhs_usg bndr, rhs)] where - (chosen_pair, unchosen) = choose_loop_breaker bind (score bind) [] binds - (tagged_bndr, rhs) = chosen_pair + (chosen_bind, unchosen) = choose_loop_breaker bind (score bind) [] binds + (bndr, rhs_usg, rhs) = chosen_bind -- This loop looks for the bind with the lowest score -- to pick as the loop breaker. The rest accumulate in @@ -303,8 +291,8 @@ reOrderRec env (CyclicSCC (bind : binds)) where sc = score bind - score :: Node Details2 -> Int -- Higher score => less likely to be picked as loop breaker - score ((bndr, rhs), _, _) + score :: Node Details -> Int -- Higher score => less likely to be picked as loop breaker + score ((bndr, _, rhs), _, _) | exprIsTrivial rhs = 4 -- Practically certain to be inlined -- Used to have also: && not (isExportedId bndr) -- But I found this sometimes cost an extra iteration when we have @@ -312,7 +300,7 @@ reOrderRec env (CyclicSCC (bind : binds)) -- where df is the exported dictionary. Then df makes a really -- bad choice for loop breaker - | not_fun_ty (idType bndr) = 3 -- Data types help with cases + | is_con_app rhs = 3 -- Data types help with cases -- This used to have a lower score than inlineCandidate, but -- it's *really* helpful if dictionaries get inlined fast, -- so I'm experimenting with giving higher priority to data-typed things @@ -339,7 +327,26 @@ reOrderRec env (CyclicSCC (bind : binds)) -- we didn't stupidly choose d as the loop breaker. -- But we won't because constructor args are marked "Many". - not_fun_ty ty = not (isFunTy (dropForAlls ty)) + -- Cheap and cheerful; the simplifer moves casts out of the way + -- The lambda case is important to spot x = /\a. C (f a) + -- which comes up when C is a dictionary constructor and + -- f is a default method. + -- Example: the instance for Show (ST s a) in GHC.ST + is_con_app (Var v) = isDataConWorkId v + is_con_app (App f _) = is_con_app f + is_con_app (Lam b e) | isTyVar b = is_con_app e + is_con_app (Note _ e) = is_con_app e + is_con_app other = False + +makeLoopBreaker :: VarSet -- Binders of this group + -> UsageDetails -- Usage of this rhs (neglecting rules) + -> Id -> Id +-- Set the loop-breaker flag, recording whether the thing occurs only in +-- the RHS of a RULE (in this recursive group) +makeLoopBreaker bndrs rhs_usg bndr + = setIdOccInfo bndr (IAmALoopBreaker rules_only) + where + rules_only = bndrs `intersectsUFM` rhs_usg \end{code} @occAnalRhs@ deals with the question of bindings where the Id is marked @@ -361,9 +368,8 @@ occAnalRhs :: OccEnv -> (UsageDetails, CoreExpr) occAnalRhs env id rhs - = (final_usage, rhs') + = occAnal ctxt rhs where - (rhs_usage, rhs') = occAnal ctxt rhs ctxt | certainly_inline id = env | otherwise = rhsCtxt -- Note that we generally use an rhsCtxt. This tells the occ anal n @@ -384,14 +390,40 @@ occAnalRhs env id rhs certainly_inline id = case idOccInfo id of OneOcc in_lam one_br _ -> not in_lam && one_br other -> False +\end{code} + +Note [RulesOnly] +~~~~~~~~~~~~~~~~~~ +If the binder has RULES inside it then we count the specialised Ids as +"extra rhs's". That way the "parent" keeps the specialised "children" +alive. If the parent dies (because it isn't referenced any more), +then the children will die too unless they are already referenced +directly. + +That's the basic idea. However in a recursive situation we want to be a bit +cleverer. Example (from GHC.Enum): - -- [March 98] A new wrinkle is that if the binder has specialisations inside - -- it then we count the specialised Ids as "extra rhs's". That way - -- the "parent" keeps the specialised "children" alive. If the parent - -- dies (because it isn't referenced any more), then the children will - -- die too unless they are already referenced directly. + eftInt :: Int# -> Int# -> [Int] + eftInt x y = ...(non-recursive)... - final_usage = addRuleUsage rhs_usage id + {-# INLINE [0] eftIntFB #-} + eftIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> r + eftIntFB c n x y = ...(non-recursive)... + + {-# RULES + "eftInt" [~1] forall x y. eftInt x y = build (\ c n -> eftIntFB c n x y) + "eftIntList" [1] eftIntFB (:) [] = eftInt + #-} + +The two look mutually recursive only because of their RULES; we don't want +that to inhibit inlining! + +So when we identify a LoopBreaker, we mark it to say whether it only mentions +the other binders in its recursive group in a RULE. If so, we can inline it, +because doing so will not expose new occurrences of binders in its group. + + +\begin{code} addRuleUsage :: UsageDetails -> Id -> UsageDetails -- Add the usage from RULES in Id to the usage @@ -455,6 +487,14 @@ occAnal env (Note note body) = case occAnal env body of { (usage, body') -> (usage, Note note body') } + +occAnal env (Cast expr co) + = case occAnal env expr of { (usage, expr') -> + (markRhsUds env True usage, Cast expr' co) + -- If we see let x = y `cast` co + -- then mark y as 'Many' so that we don't + -- immediately inline y again. + } \end{code} \begin{code} @@ -502,13 +542,13 @@ occAnal env expr@(Lam _ _) is_one_shot b = isId b && isOneShotBndr b occAnal env (Case scrut bndr ty alts) - = case occ_anal_scrut scrut alts of { (scrut_usage, scrut') -> - case mapAndUnzip (occAnalAlt env bndr) alts of { (alts_usage_s, alts') -> + = case occ_anal_scrut scrut alts of { (scrut_usage, scrut') -> + case mapAndUnzip (occAnalAlt alt_env bndr) alts of { (alts_usage_s, alts') -> let alts_usage = foldr1 combineAltsUsageDetails alts_usage_s alts_usage' = addCaseBndrUsage alts_usage (alts_usage1, tagged_bndr) = tagBinder alts_usage' bndr - total_usage = scrut_usage `combineUsageDetails` alts_usage1 + total_usage = scrut_usage +++ alts_usage1 in total_usage `seq` (total_usage, Case scrut' tagged_bndr ty alts') }} where @@ -523,6 +563,10 @@ occAnal env (Case scrut bndr ty alts) Nothing -> usage Just occ -> extendVarEnv usage bndr (markMany occ) + alt_env = setVanillaCtxt env + -- Consider x = case v of { True -> (p,q); ... } + -- Then it's fine to inline p and q + occ_anal_scrut (Var v) (alt1 : other_alts) | not (null other_alts) || not (isDefaultAlt alt1) = (mkOneOcc env v True, Var v) @@ -536,7 +580,7 @@ occAnal env (Let bind body) occAnalArgs env args = case mapAndUnzip (occAnal arg_env) args of { (arg_uds_s, args') -> - (foldr combineUsageDetails emptyDetails arg_uds_s, args')} + (foldr (+++) emptyDetails arg_uds_s, args')} where arg_env = vanillaCtxt \end{code} @@ -545,27 +589,18 @@ Applications are dealt with specially because we want the "build hack" to work. \begin{code} --- Hack for build, fold, runST occAnalApp env (Var fun, args) is_rhs = case args_stuff of { (args_uds, args') -> let - -- We mark the free vars of the argument of a constructor or PAP - -- as "many", if it is the RHS of a let(rec). - -- This means that nothing gets inlined into a constructor argument - -- position, which is what we want. Typically those constructor - -- arguments are just variables, or trivial expressions. - -- - -- This is the *whole point* of the isRhsEnv predicate - final_args_uds - | isRhsEnv env, - isDataConWorkId fun || valArgCount args < idArity fun - = mapVarEnv markMany args_uds - | otherwise = args_uds + final_args_uds = markRhsUds env is_pap args_uds in - (fun_uds `combineUsageDetails` final_args_uds, mkApps (Var fun) args') } + (fun_uds +++ final_args_uds, mkApps (Var fun) args') } where fun_uniq = idUnique fun fun_uds = mkOneOcc env fun (valArgCount args > 0) + is_pap = isDataConWorkId fun || valArgCount args < idArity fun + + -- Hack for build, fold, runST args_stuff | fun_uniq == buildIdKey = appSpecial env 2 [True,True] args | fun_uniq == augmentIdKey = appSpecial env 2 [True,True] args | fun_uniq == foldrIdKey = appSpecial env 3 [False,True] args @@ -590,10 +625,27 @@ occAnalApp env (fun, args) is_rhs case occAnalArgs env args of { (args_uds, args') -> let - final_uds = fun_uds `combineUsageDetails` args_uds + final_uds = fun_uds +++ args_uds in (final_uds, mkApps fun' args') }} + +markRhsUds :: OccEnv -- Check if this is a RhsEnv + -> Bool -- and this is true + -> UsageDetails -- The do markMany on this + -> UsageDetails +-- We mark the free vars of the argument of a constructor or PAP +-- as "many", if it is the RHS of a let(rec). +-- This means that nothing gets inlined into a constructor argument +-- position, which is what we want. Typically those constructor +-- arguments are just variables, or trivial expressions. +-- +-- This is the *whole point* of the isRhsEnv predicate +markRhsUds env is_pap arg_uds + | isRhsEnv env && is_pap = mapVarEnv markMany arg_uds + | otherwise = arg_uds + + appSpecial :: OccEnv -> Int -> CtxtTy -- Argument number, and context to use for it -> [CoreExpr] @@ -608,12 +660,12 @@ appSpecial env n ctxt args go 1 (arg:args) -- The magic arg = case occAnal (setCtxt arg_env ctxt) arg of { (arg_uds, arg') -> case occAnalArgs env args of { (args_uds, args') -> - (combineUsageDetails arg_uds args_uds, arg':args') }} + (arg_uds +++ args_uds, arg':args') }} go n (arg:args) = case occAnal arg_env arg of { (arg_uds, arg') -> case go (n-1) args of { (args_uds, args') -> - (combineUsageDetails arg_uds args_uds, arg':args') }} + (arg_uds +++ args_uds, arg':args') }} \end{code} @@ -627,15 +679,22 @@ is rather like If e turns out to be (e1,e2) we indeed get something like let a = e1; b = e2; x = (a,b) in rhs +Note [Aug 06]: I don't think this is necessary any more, and it helpe + to know when binders are unused. See esp the call to + isDeadBinder in Simplify.mkDupableAlt + \begin{code} occAnalAlt env case_bndr (con, bndrs, rhs) = case occAnal env rhs of { (rhs_usage, rhs') -> let (final_usage, tagged_bndrs) = tagBinders rhs_usage bndrs + final_bndrs = tagged_bndrs -- See Note [Aug06] above +{- final_bndrs | case_bndr `elemVarEnv` final_usage = bndrs | otherwise = tagged_bndrs -- Leave the binders untagged if the case -- binder occurs at all; see note above +-} in (final_usage, (con, final_bndrs, rhs')) } \end{code} @@ -685,6 +744,10 @@ rhsCtxt = OccEnv OccRhs [] isRhsEnv (OccEnv OccRhs _) = True isRhsEnv (OccEnv OccVanilla _) = False +setVanillaCtxt :: OccEnv -> OccEnv +setVanillaCtxt (OccEnv OccRhs ctxt_ty) = OccEnv OccVanilla ctxt_ty +setVanillaCtxt other_env = other_env + setCtxt :: OccEnv -> CtxtTy -> OccEnv setCtxt (OccEnv encl _) ctxt = OccEnv encl ctxt @@ -720,10 +783,10 @@ addAppCtxt (OccEnv encl ctxt) args \begin{code} type UsageDetails = IdEnv OccInfo -- A finite map from ids to their usage -combineUsageDetails, combineAltsUsageDetails +(+++), combineAltsUsageDetails :: UsageDetails -> UsageDetails -> UsageDetails -combineUsageDetails usage1 usage2 +(+++) usage1 usage2 = plusVarEnv_C addOccInfo usage1 usage2 combineAltsUsageDetails usage1 usage2 @@ -739,6 +802,8 @@ emptyDetails = (emptyVarEnv :: UsageDetails) usedIn :: Id -> UsageDetails -> Bool v `usedIn` details = isExportedId v || v `elemVarEnv` details +type IdWithOccInfo = Id + tagBinders :: UsageDetails -- Of scope -> [Id] -- Binders -> (UsageDetails, -- Details with binders removed @@ -803,9 +868,9 @@ markInsideLam occ = occ addOccInfo, orOccInfo :: OccInfo -> OccInfo -> OccInfo -addOccInfo IAmDead info2 = info2 -addOccInfo info1 IAmDead = info1 -addOccInfo info1 info2 = NoOccInfo +addOccInfo IAmDead info2 = info2 +addOccInfo info1 IAmDead = info1 +addOccInfo info1 info2 = NoOccInfo -- (orOccInfo orig new) is used -- when combining occurrence info from branches of a case @@ -817,6 +882,5 @@ orOccInfo (OneOcc in_lam1 one_branch1 int_cxt1) = OneOcc (in_lam1 || in_lam2) False -- False, because it occurs in both branches (int_cxt1 && int_cxt2) - orOccInfo info1 info2 = NoOccInfo \end{code}