X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Fcompiler%2FsimplCore%2FOccurAnal.lhs;h=ae09f03adfcf76d0f8b6ce86050404809f01e15b;hb=ca0b7c66f2e8e50f15a03c406408d9e86455f8eb;hp=9bb19b9be0504c3fdb9fec4dd20c148888c89a7d;hpb=62514f77fc32d5381708474142b5bbc1b2c3b033;p=ghc-hetmet.git diff --git a/ghc/compiler/simplCore/OccurAnal.lhs b/ghc/compiler/simplCore/OccurAnal.lhs index 9bb19b9..ae09f03 100644 --- a/ghc/compiler/simplCore/OccurAnal.lhs +++ b/ghc/compiler/simplCore/OccurAnal.lhs @@ -12,35 +12,31 @@ core expression with (hopefully) improved usage information. \begin{code} module OccurAnal ( - occurAnalyseBinds, occurAnalyseExpr, occurAnalyseGlobalExpr, - markBinderInsideLambda + occurAnalyseBinds, occurAnalyseGlobalExpr, occurAnalyseRule ) where #include "HsVersions.h" -import BinderInfo -import CmdLineOpts ( SimplifierSwitch(..) ) import CoreSyn -import CoreUtils ( exprIsTrivial, idSpecVars ) -import Const ( Con(..), Literal(..) ) -import Id ( idWantsToBeINLINEd, isSpecPragmaId, - getInlinePragma, setInlinePragma, - omitIfaceSigForId, - getIdSpecialisation, +import CoreFVs ( idRuleVars ) +import CoreUtils ( exprIsTrivial ) +import Id ( isDataConWorkId, isOneShotLambda, setOneShotLambda, + idOccInfo, setIdOccInfo, + isExportedId, modifyIdInfo, idInfo, idArity, + idSpecialisation, isLocalId, idType, idUnique, Id ) -import IdInfo ( InlinePragInfo(..), OccInfo(..) ) -import SpecEnv ( isEmptySpecEnv ) +import IdInfo ( copyIdInfo ) +import BasicTypes ( OccInfo(..), isOneOcc ) import VarSet import VarEnv -import PrelInfo ( noRepStrIds, noRepIntegerIds ) -import Name ( isExported, isLocallyDefined ) -import Type ( splitFunTy_maybe, splitForAllTys ) -import Maybes ( maybeToBool ) +import Type ( isFunTy, dropForAlls ) +import Maybes ( orElse ) import Digraph ( stronglyConnCompR, SCC(..) ) -import Unique ( u2i ) +import PrelNames ( buildIdKey, foldrIdKey, runSTRepIdKey, augmentIdKey ) +import Unique ( Unique ) import UniqFM ( keysUFM ) import Util ( zipWithEqual, mapAndUnzip ) import Outputable @@ -56,40 +52,20 @@ import Outputable Here's the externally-callable interface: \begin{code} -occurAnalyseBinds - :: (SimplifierSwitch -> Bool) - -> [CoreBind] - -> [CoreBind] - -occurAnalyseBinds simplifier_sw_chkr binds - = binds' - where - (_, _, binds') = occAnalTop initial_env binds - - initial_env = OccEnv (simplifier_sw_chkr IgnoreINLINEPragma) - (\id -> isLocallyDefined id) -- Anything local is interesting - emptyVarSet -\end{code} - - -\begin{code} -occurAnalyseExpr :: (Id -> Bool) -- Tells if a variable is interesting - -> CoreExpr - -> (IdEnv BinderInfo, -- Occ info for interesting free vars - CoreExpr) - -occurAnalyseExpr interesting expr - = occAnal initial_env expr - where - initial_env = OccEnv False {- Do not ignore INLINE Pragma -} - interesting - emptyVarSet - occurAnalyseGlobalExpr :: CoreExpr -> CoreExpr occurAnalyseGlobalExpr expr = -- Top level expr, so no interesting free vars, and -- discard occurence info returned - snd (occurAnalyseExpr (\_ -> False) expr) + snd (occAnal (initOccEnv emptyVarSet) expr) + +occurAnalyseRule :: CoreRule -> CoreRule +occurAnalyseRule rule@(BuiltinRule _ _) = rule +occurAnalyseRule (Rule str act tpl_vars tpl_args rhs) + -- Add occ info to tpl_vars, rhs + = Rule str act tpl_vars' tpl_args rhs' + where + (rhs_uds, rhs') = occAnal (initOccEnv (mkVarSet tpl_vars)) rhs + (_, tpl_vars') = tagBinders rhs_uds tpl_vars \end{code} @@ -101,22 +77,20 @@ occurAnalyseGlobalExpr expr In @occAnalTop@ we do indirection-shorting. That is, if we have this: - loc = + x_local = ... - exp = loc + x_exported = loc where exp is exported, and loc is not, then we replace it with this: - loc = exp - exp = + x_local = x_exported + x_exported = ... -Without this we never get rid of the exp = loc thing. -This save a gratuitous jump -(from \tr{x_exported} to \tr{x_local}), and makes strictness -information propagate better. -This used to happen in the final phase, but its tidier to do it here. - +Without this we never get rid of the x_exported = x_local thing. This +save a gratuitous jump (from \tr{x_exported} to \tr{x_local}), and +makes strictness information propagate better. This used to happen in +the final phase, but it's tidier to do it here. If more than one exported thing is equal to a local thing (i.e., the local thing really is shared), then we do one only: @@ -147,82 +121,94 @@ and it's dangerous to do this fiddling in STG land because we might elminate a binding that's mentioned in the unfolding for something. - \begin{code} -occAnalTop :: OccEnv -- What's in scope - -> [CoreBind] - -> (IdEnv BinderInfo, -- Occurrence info - IdEnv Id, -- Indirection elimination info - [CoreBind] - ) - -occAnalTop env [] = (emptyDetails, emptyVarEnv, []) - --- Special case for eliminating indirections --- Note: it's a shortcoming that this only works for --- non-recursive bindings. Elminating indirections --- makes perfect sense for recursive bindings too, but --- it's more complicated to implement, so I haven't done so - -occAnalTop env (bind : binds) - = case bind of - NonRec exported_id (Var local_id) | shortMeOut ind_env exported_id local_id - -> -- Aha! An indirection; let's eliminate it! - (scope_usage, ind_env', binds') +occurAnalyseBinds :: [CoreBind] -> [CoreBind] + +occurAnalyseBinds binds + = binds' + where + (_, _, binds') = go (initOccEnv emptyVarSet) binds + + go :: OccEnv -> [CoreBind] + -> (UsageDetails, -- Occurrence info + IdEnv Id, -- Indirection elimination info + -- Maps local-id -> exported-id, but it embodies + -- bindings of the form exported-id = local-id in + -- the argument to go + [CoreBind]) -- Occ-analysed bindings, less the exported-id=local-id ones + + go env [] = (emptyDetails, emptyVarEnv, []) + + go env (bind : binds) + = let + new_env = env `addNewCands` (bindersOf bind) + (scope_usage, ind_env, binds') = go new_env binds + (final_usage, new_binds) = occAnalBind env (zapBind ind_env bind) scope_usage + -- NB: I zap before occur-analysing, so + -- I don't need to worry about getting the + -- occ info on the new bindings right. + in + case bind of + NonRec exported_id (Var local_id) + | shortMeOut ind_env exported_id local_id + -- Special case for eliminating indirections + -- Note: it's a shortcoming that this only works for + -- non-recursive bindings. Elminating indirections + -- makes perfect sense for recursive bindings too, but + -- it's more complicated to implement, so I haven't done so + -> (scope_usage, ind_env', binds') where ind_env' = extendVarEnv ind_env local_id exported_id - other -> -- Ho ho! The normal case - (final_usage, ind_env, new_binds ++ binds') - where - (final_usage, new_binds) = occAnalBind env (zap_bind bind) scope_usage - where - new_env = env `addNewCands` (bindersOf bind) - (scope_usage, ind_env, binds') = occAnalTop new_env binds + other -> -- Ho ho! The normal case + (final_usage, ind_env, new_binds ++ binds') + - -- Deal with any indirections - zap_bind (NonRec bndr rhs) - | bndr `elemVarEnv` ind_env = Rec (zap (bndr,rhs)) +-- Deal with any indirections +zapBind ind_env (NonRec bndr rhs) + | bndr `elemVarEnv` ind_env = Rec (zap ind_env (bndr,rhs)) -- The Rec isn't strictly necessary, but it's convenient - zap_bind (Rec pairs) - | or [id `elemVarEnv` ind_env | (id,_) <- pairs] = Rec (concat (map zap pairs)) - - zap_bind bind = bind - - zap pair@(bndr,rhs) = case lookupVarEnv ind_env bndr of - Nothing -> [pair] - Just exported_id -> [(bndr, Var exported_id), - (exported_id, rhs)] - +zapBind ind_env (Rec pairs) + | or [id `elemVarEnv` ind_env | (id,_) <- pairs] = Rec (concat (map (zap ind_env) pairs)) + +zapBind ind_env bind = bind + +zap ind_env pair@(local_id,rhs) + = case lookupVarEnv ind_env local_id of + Nothing -> [pair] + Just exported_id -> [(local_id, Var exported_id), + (exported_id', rhs)] + where + exported_id' = modifyIdInfo (copyIdInfo (idInfo local_id)) exported_id + shortMeOut ind_env exported_id local_id - = isExported exported_id && -- Only if this is exported - - isLocallyDefined local_id && -- Only if this one is defined in this - -- module, so that we *can* change its - -- binding to be the exported thing! - - not (isExported local_id) && -- Only if this one is not itself exported, - -- since the transformation will nuke it - - not (omitIfaceSigForId local_id) && -- Don't do the transformation if rhs_id is - -- something like a constructor, whose - -- definition is implicitly exported and - -- which must not vanish. - -- To illustrate the preceding check consider - -- data T = MkT Int - -- mkT = MkT - -- f x = MkT (x+1) - -- Here, we'll make a local, non-exported, defn for MkT, and without the - -- above condition we'll transform it to: - -- mkT = \x. MkT [x] - -- f = \y. mkT (y+1) - -- This is bad because mkT will get the IdDetails of MkT, and won't - -- be exported. Also the code generator won't make a definition for - -- the MkT constructor. - -- Slightly gruesome, this. - - - not (local_id `elemVarEnv` ind_env) -- Only if not already substituted for +-- The if-then-else stuff is just so I can get a pprTrace to see +-- how often I don't get shorting out becuase of IdInfo stuff + = if isExportedId exported_id && -- Only if this is exported + + isLocalId local_id && -- Only if this one is defined in this + -- module, so that we *can* change its + -- binding to be the exported thing! + + not (isExportedId local_id) && -- Only if this one is not itself exported, + -- since the transformation will nuke it + + not (local_id `elemVarEnv` ind_env) -- Only if not already substituted for + then + True + +{- No longer needed + if shortableIdInfo (idInfo exported_id) -- Only if its IdInfo is 'shortable' + -- (see the defn of IdInfo.shortableIdInfo) + then True + else +#ifdef DEBUG + pprTrace "shortMeOut:" (ppr exported_id) +#endif + False +-} + else + False \end{code} @@ -238,7 +224,7 @@ Bindings \begin{code} type IdWithOccInfo = Id -- An Id with fresh PragmaInfo attached -type Node details = (details, Int, [Int]) -- The Ints are gotten from the Unique, +type Node details = (details, Unique, [Unique]) -- The Ints are gotten from the Unique, -- which is gotten from the Id. type Details1 = (Id, UsageDetails, CoreExpr) type Details2 = (IdWithOccInfo, CoreExpr) @@ -260,7 +246,7 @@ occAnalBind env (NonRec binder rhs) body_usage where (final_body_usage, tagged_binder) = tagBinder body_usage binder - (rhs_usage, rhs') = occAnalRhs env binder rhs + (rhs_usage, rhs') = occAnalRhs env tagged_binder rhs \end{code} Dropping dead code for recursive bindings is done in a very simple way: @@ -302,15 +288,13 @@ It isn't easy to do a perfect job in one blow. Consider occAnalBind env (Rec pairs) body_usage = foldr (_scc_ "occAnalBind.dofinal" do_final_bind) (body_usage, []) sccs where - pp_item (_, bndr, _) = ppr bndr - binders = map fst pairs - new_env = env `addNewCands` binders + rhs_env = env `addNewCands` binders analysed_pairs :: [Details1] analysed_pairs = [ (bndr, rhs_usage, rhs') | (bndr, rhs) <- pairs, - let (rhs_usage, rhs') = occAnalRhs new_env bndr rhs + let (rhs_usage, rhs') = occAnalRhs rhs_env bndr rhs ] sccs :: [SCC (Node Details1)] @@ -320,7 +304,7 @@ occAnalBind env (Rec pairs) body_usage ---- stuff for dependency analysis of binds ------------------------------- edges :: [Node Details1] edges = _scc_ "occAnalBind.assoc" - [ (details, IBOX(u2i (idUnique id)), edges_from rhs_usage) + [ (details, idUnique id, edges_from rhs_usage) | details@(id, rhs_usage, rhs) <- analysed_pairs ] @@ -333,7 +317,7 @@ occAnalBind env (Rec pairs) body_usage -- maybeToBool (lookupVarEnv rhs_usage bndr)] -- which has n**2 cost, and this meant that edges_from alone -- consumed 10% of total runtime! - edges_from :: UsageDetails -> [Int] + edges_from :: UsageDetails -> [Unique] edges_from rhs_usage = _scc_ "occAnalBind.edges_from" keysUFM rhs_usage @@ -436,7 +420,7 @@ reOrderRec env (AcyclicSCC (bind, _, _)) = [bind] -- Common case of simple self-recursion reOrderRec env (CyclicSCC [bind]) - = [(setInlinePragma tagged_bndr IAmALoopBreaker, rhs)] + = [(setIdOccInfo tagged_bndr IAmALoopBreaker, rhs)] where ((tagged_bndr, rhs), _, _) = bind @@ -445,7 +429,7 @@ reOrderRec env (CyclicSCC (bind : binds)) -- do SCC analysis on the rest, and recursively sort them out concat (map (reOrderRec env) (stronglyConnCompR unchosen)) ++ - [(setInlinePragma tagged_bndr IAmALoopBreaker, rhs)] + [(setIdOccInfo tagged_bndr IAmALoopBreaker, rhs)] where (chosen_pair, unchosen) = choose_loop_breaker bind (score bind) [] binds @@ -467,22 +451,29 @@ reOrderRec env (CyclicSCC (bind : binds)) score :: Node Details2 -> Int -- Higher score => less likely to be picked as loop breaker score ((bndr, rhs), _, _) - | exprIsTrivial rhs && - not (isExported bndr) = 3 -- Practically certain to be inlined - | inlineCandidate bndr = 3 -- Likely to be inlined - | not_fun_ty (idType bndr) = 2 -- Data types help with cases - | not (isEmptySpecEnv (getIdSpecialisation bndr)) = 1 - -- Avoid things with a SpecEnv; we'd like - -- to take advantage of the SpecEnv in the subsequent bindings + | exprIsTrivial rhs = 4 -- Practically certain to be inlined + -- Used to have also: && not (isExportedId bndr) + -- But I found this sometimes cost an extra iteration when we have + -- rec { d = (a,b); a = ...df...; b = ...df...; df = d } + -- where df is the exported dictionary. Then df makes a really + -- bad choice for loop breaker + + | not_fun_ty (idType bndr) = 3 -- Data types help with cases + -- This used to have a lower score than inlineCandidate, but + -- it's *really* helpful if dictionaries get inlined fast, + -- so I'm experimenting with giving higher priority to data-typed things + + | inlineCandidate bndr rhs = 2 -- Likely to be inlined + + | not (isEmptyCoreRules (idSpecialisation bndr)) = 1 + -- Avoid things with specialisations; we'd like + -- to take advantage of them in the subsequent bindings + | otherwise = 0 - inlineCandidate :: Id -> Bool - inlineCandidate id - = case getInlinePragma id of - IWantToBeINLINEd -> True - IMustBeINLINEd -> True - ICanSafelyBeINLINEd _ _ -> True - other -> False + inlineCandidate :: Id -> CoreExpr -> Bool + inlineCandidate id (Note InlineMe _) = True + inlineCandidate id rhs = isOneOcc (idOccInfo id) -- Real example (the Enum Ordering instance from PrelBase): -- rec f = \ x -> case d of (p,q,r) -> p x @@ -494,9 +485,7 @@ reOrderRec env (CyclicSCC (bind : binds)) -- we didn't stupidly choose d as the loop breaker. -- But we won't because constructor args are marked "Many". - not_fun_ty ty = not (maybeToBool (splitFunTy_maybe rho_ty)) - where - (_, rho_ty) = splitForAllTys ty + not_fun_ty ty = not (isFunTy (dropForAlls ty)) \end{code} @occAnalRhs@ deals with the question of bindings where the Id is marked @@ -509,42 +498,49 @@ ToDo: try using the occurrence info for the inline'd binder. [March 97] We do the same for atomic RHSs. Reason: see notes with reOrderRec. [June 98, SLPJ] I've undone this change; I don't understand it. See notes with reOrderRec. -[March 98] A new wrinkle is that if the binder has specialisations inside -it then we count the specialised Ids as "extra rhs's". That way -the "parent" keeps the specialised "children" alive. If the parent -dies (because it isn't referenced any more), then the children will -die too unless they are already referenced directly. \begin{code} occAnalRhs :: OccEnv -> Id -> CoreExpr -- Binder and rhs + -- For non-recs the binder is alrady tagged + -- with occurrence info -> (UsageDetails, CoreExpr) -{- DELETED SLPJ June 98: seems quite bogus to me -occAnalRhs env id (Var v) - | isCandidate env v - = (unitVarEnv v (markMany (funOccurrence 0)), Var v) - - | otherwise - = (emptyDetails, Var v) --} - occAnalRhs env id rhs - | idWantsToBeINLINEd id - = (mapVarEnv markMany total_usage, rhs') - - | otherwise - = (total_usage, rhs') - + = (final_usage, rhs') where - (rhs_usage, rhs') = occAnal env rhs - lazy_rhs_usage = mapVarEnv markLazy rhs_usage - total_usage = foldVarSet add lazy_rhs_usage spec_ids - add v u = addOneOcc u v noBinderInfo -- Give a non-committal binder info - -- (i.e manyOcc) because many copies - -- of the specialised thing can appear - spec_ids = idSpecVars id -\end{code} + (rhs_usage, rhs') = occAnal ctxt rhs + ctxt | certainly_inline id = env + | otherwise = rhsCtxt env + -- Note that we generally use an rhsCtxt. This tells the occ anal n + -- that it's looking at an RHS, which has an effect in occAnalApp + -- + -- But there's a problem. Consider + -- x1 = a0 : [] + -- x2 = a1 : x1 + -- x3 = a2 : x2 + -- g = f x3 + -- First time round, it looks as if x1 and x2 occur as an arg of a + -- let-bound constructor ==> give them a many-occurrence. + -- But then x3 is inlined (unconditionally as it happens) and + -- next time round, x2 will be, and the next time round x1 will be + -- Result: multiple simplifier iterations. Sigh. + -- Crude solution: use rhsCtxt for things that occur just once... + + certainly_inline id = case idOccInfo id of + OneOcc in_lam one_br -> not in_lam && one_br + other -> False + + -- [March 98] A new wrinkle is that if the binder has specialisations inside + -- it then we count the specialised Ids as "extra rhs's". That way + -- the "parent" keeps the specialised "children" alive. If the parent + -- dies (because it isn't referenced any more), then the children will + -- die too unless they are already referenced directly. + + final_usage = foldVarSet add rhs_usage (idRuleVars id) + add v u = addOneOcc u v NoOccInfo -- Give a non-committal binder info + -- (i.e manyOcc) because many copies + -- of the specialised thing can appear \end{code} @@ -558,9 +554,19 @@ occAnal :: OccEnv occAnal env (Type t) = (emptyDetails, Type t) -occAnal env (Var v) - | isCandidate env v = (unitVarEnv v funOccZero, Var v) - | otherwise = (emptyDetails, Var v) +occAnal env (Var v) + = (var_uds, Var v) + where + var_uds | isCandidate env v = unitVarEnv v oneOcc + | otherwise = emptyDetails + + -- At one stage, I gathered the idRuleVars for v here too, + -- which in a way is the right thing to do. + -- But that went wrong right after specialisation, when + -- the *occurrences* of the overloaded function didn't have any + -- rules in them, so the *specialised* versions looked as if they + -- weren't used at all. + \end{code} We regard variables that occur as constructor arguments as "dangerousToDup": @@ -579,40 +585,15 @@ If we aren't careful we duplicate the (expensive x) call! Constructors are rather like lambdas in this way. \begin{code} - -- For NoRep literals we have to report an occurrence of - -- the things which tidyCore will later add, so that when - -- we are compiling the very module in which those thin-air Ids - -- are defined we have them in scope! -occAnal env expr@(Con (Literal lit) args) - = ASSERT( null args ) - (mk_lit_uds lit, expr) - where - mk_lit_uds (NoRepStr _ _) = try noRepStrIds - mk_lit_uds (NoRepInteger _ _) = try noRepIntegerIds - mk_lit_uds lit = emptyDetails - - try vs = foldr add emptyDetails vs - add v uds | isCandidate env v = extendVarEnv uds v funOccZero - | otherwise = uds - -occAnal env (Con con args) - = case mapAndUnzip (occAnal env) args of { (arg_uds_s, args') -> - let - arg_uds = foldr combineUsageDetails emptyDetails arg_uds_s - - -- We mark the free vars of the argument of a constructor as "many" - -- This means that nothing gets inlined into a constructor argument - -- position, which is what we want. Typically those constructor - -- arguments are just variables, or trivial expressions. - final_arg_uds = case con of - DataCon _ -> mapVarEnv markMany arg_uds - other -> arg_uds - in - (final_arg_uds, Con con args') - } +occAnal env expr@(Lit lit) = (emptyDetails, expr) \end{code} \begin{code} +occAnal env (Note InlineMe body) + = case occAnal env body of { (usage, body') -> + (mapVarEnv markMany usage, Note InlineMe body') + } + occAnal env (Note note@(SCC cc) body) = case occAnal env body of { (usage, body') -> (mapVarEnv markInsideSCC usage, Note note body') @@ -625,12 +606,17 @@ occAnal env (Note note body) \end{code} \begin{code} -occAnal env (App fun arg) - = case occAnal env fun of { (fun_usage, fun') -> - case occAnal env arg of { (arg_usage, arg') -> - (fun_usage `combineUsageDetails` mapVarEnv markLazy arg_usage, App fun' arg') - }} - +occAnal env app@(App fun arg) + = occAnalApp env (collectArgs app) False + +-- Ignore type variables altogether +-- (a) occurrences inside type lambdas only not marked as InsideLam +-- (b) type variables not in environment + +occAnal env expr@(Lam x body) | isTyVar x + = case occAnal env body of { (body_usage, body') -> + (body_usage, Lam x body') + } -- For value lambdas we do a special hack. Consider -- (\x. \y. ...x...) @@ -642,103 +628,284 @@ occAnal env (App fun arg) -- Then, the simplifier is careful when partially applying lambdas. occAnal env expr@(Lam _ _) - = case occAnal (env `addNewCands` binders) body of { (body_usage, body') -> + = case occAnal env_body body of { (body_usage, body') -> let (final_usage, tagged_binders) = tagBinders body_usage binders + -- URGH! Sept 99: we don't seem to be able to use binders' here, because + -- we get linear-typed things in the resulting program that we can't handle yet. + -- (e.g. PrelShow) TODO + + really_final_usage = if linear then + final_usage + else + mapVarEnv markInsideLam final_usage in - (mapVarEnv markInsideLam final_usage, + (really_final_usage, mkLams tagged_binders body') } where - (binders, body) = collectBinders expr - + (binders, body) = collectBinders expr + (linear, env1, _) = oneShotGroup env binders + env2 = env1 `addNewCands` binders -- Add in-scope binders + env_body = vanillaCtxt env2 -- Body is (no longer) an RhsContext occAnal env (Case scrut bndr alts) - = case mapAndUnzip (occAnalAlt alt_env) alts of { (alts_usage_s, alts') -> - case occAnal env scrut of { (scrut_usage, scrut') -> + = case mapAndUnzip (occAnalAlt alt_env bndr) alts of { (alts_usage_s, alts') -> + case occAnal (vanillaCtxt env) scrut of { (scrut_usage, scrut') -> + -- No need for rhsCtxt let alts_usage = foldr1 combineAltsUsageDetails alts_usage_s - (alts_usage1, tagged_bndr) = tagBinder alts_usage bndr + alts_usage' = addCaseBndrUsage alts_usage + (alts_usage1, tagged_bndr) = tagBinder alts_usage' bndr total_usage = scrut_usage `combineUsageDetails` alts_usage1 in total_usage `seq` (total_usage, Case scrut' tagged_bndr alts') }} where alt_env = env `addNewCand` bndr + -- The case binder gets a usage of either "many" or "dead", never "one". + -- Reason: we like to inline single occurrences, to eliminate a binding, + -- but inlining a case binder *doesn't* eliminate a binding. + -- We *don't* want to transform + -- case x of w { (p,q) -> f w } + -- into + -- case x of w { (p,q) -> f (p,q) } + addCaseBndrUsage usage = case lookupVarEnv usage bndr of + Nothing -> usage + Just occ -> extendVarEnv usage bndr (markMany occ) + occAnal env (Let bind body) = case occAnal new_env body of { (body_usage, body') -> case occAnalBind env bind body_usage of { (final_usage, new_binds) -> (final_usage, mkLets new_binds body') }} where new_env = env `addNewCands` (bindersOf bind) + +occAnalArgs env args + = case mapAndUnzip (occAnal arg_env) args of { (arg_uds_s, args') -> + (foldr combineUsageDetails emptyDetails arg_uds_s, args')} + where + arg_env = vanillaCtxt env \end{code} +Applications are dealt with specially because we want +the "build hack" to work. + +\begin{code} +-- Hack for build, fold, runST +occAnalApp env (Var fun, args) is_rhs + = case args_stuff of { (args_uds, args') -> + let + -- We mark the free vars of the argument of a constructor or PAP + -- as "many", if it is the RHS of a let(rec). + -- This means that nothing gets inlined into a constructor argument + -- position, which is what we want. Typically those constructor + -- arguments are just variables, or trivial expressions. + -- + -- This is the *whole point* of the isRhsEnv predicate + final_args_uds + | isRhsEnv env, + isDataConWorkId fun || valArgCount args < idArity fun + = mapVarEnv markMany args_uds + | otherwise = args_uds + in + (fun_uds `combineUsageDetails` final_args_uds, mkApps (Var fun) args') } + where + fun_uniq = idUnique fun + + fun_uds | isCandidate env fun = unitVarEnv fun oneOcc + | otherwise = emptyDetails + + args_stuff | fun_uniq == buildIdKey = appSpecial env 2 [True,True] args + | fun_uniq == augmentIdKey = appSpecial env 2 [True,True] args + | fun_uniq == foldrIdKey = appSpecial env 3 [False,True] args + | fun_uniq == runSTRepIdKey = appSpecial env 2 [True] args + -- (foldr k z xs) may call k many times, but it never + -- shares a partial application of k; hence [False,True] + -- This means we can optimise + -- foldr (\x -> let v = ...x... in \y -> ...v...) z xs + -- by floating in the v + + | otherwise = occAnalArgs env args + + +occAnalApp env (fun, args) is_rhs + = case occAnal (addAppCtxt env args) fun of { (fun_uds, fun') -> + -- The addAppCtxt is a bit cunning. One iteration of the simplifier + -- often leaves behind beta redexs like + -- (\x y -> e) a1 a2 + -- Here we would like to mark x,y as one-shot, and treat the whole + -- thing much like a let. We do this by pushing some True items + -- onto the context stack. + + case occAnalArgs env args of { (args_uds, args') -> + let + final_uds = fun_uds `combineUsageDetails` args_uds + in + (final_uds, mkApps fun' args') }} + +appSpecial :: OccEnv + -> Int -> CtxtTy -- Argument number, and context to use for it + -> [CoreExpr] + -> (UsageDetails, [CoreExpr]) +appSpecial env n ctxt args + = go n args + where + arg_env = vanillaCtxt env + + go n [] = (emptyDetails, []) -- Too few args + + go 1 (arg:args) -- The magic arg + = case occAnal (setCtxt arg_env ctxt) arg of { (arg_uds, arg') -> + case occAnalArgs env args of { (args_uds, args') -> + (combineUsageDetails arg_uds args_uds, arg':args') }} + + go n (arg:args) + = case occAnal arg_env arg of { (arg_uds, arg') -> + case go (n-1) args of { (args_uds, args') -> + (combineUsageDetails arg_uds args_uds, arg':args') }} +\end{code} + + Case alternatives ~~~~~~~~~~~~~~~~~ +If the case binder occurs at all, the other binders effectively do too. +For example + case e of x { (a,b) -> rhs } +is rather like + let x = (a,b) in rhs +If e turns out to be (e1,e2) we indeed get something like + let a = e1; b = e2; x = (a,b) in rhs + \begin{code} -occAnalAlt env (con, bndrs, rhs) +occAnalAlt env case_bndr (con, bndrs, rhs) = case occAnal (env `addNewCands` bndrs) rhs of { (rhs_usage, rhs') -> let (final_usage, tagged_bndrs) = tagBinders rhs_usage bndrs + final_bndrs | case_bndr `elemVarEnv` final_usage = bndrs + | otherwise = tagged_bndrs + -- Leave the binders untagged if the case + -- binder occurs at all; see note above in - (final_usage, (con, tagged_bndrs, rhs')) } + (final_usage, (con, final_bndrs, rhs')) } \end{code} %************************************************************************ %* * -\subsection[OccurAnal-types]{Data types} +\subsection[OccurAnal-types]{OccEnv} %* * %************************************************************************ \begin{code} -data OccEnv = - OccEnv - Bool -- IgnoreINLINEPragma flag - -- False <=> OK to use INLINEPragma information - -- True <=> ignore INLINEPragma information +data OccEnv + = OccEnv IdSet -- In-scope Ids; we gather info about these only + OccEncl -- Enclosing context information + CtxtTy -- Tells about linearity + +-- OccEncl is used to control whether to inline into constructor arguments +-- For example: +-- x = (p,q) -- Don't inline p or q +-- y = /\a -> (p a, q a) -- Still don't inline p or q +-- z = f (p,q) -- Do inline p,q; it may make a rule fire +-- So OccEncl tells enought about the context to know what to do when +-- we encounter a contructor application or PAP. + +data OccEncl + = OccRhs -- RHS of let(rec), albeit perhaps inside a type lambda + -- Don't inline into constructor args here + | OccVanilla -- Argument of function, body of lambda, scruintee of case etc. + -- Do inline into constructor args here + +type CtxtTy = [Bool] + -- [] No info + -- + -- True:ctxt Analysing a function-valued expression that will be + -- applied just once + -- + -- False:ctxt Analysing a function-valued expression that may + -- be applied many times; but when it is, + -- the CtxtTy inside applies - (Id -> Bool) -- Tells whether an Id occurrence is interesting, - -- given the set of in-scope variables +initOccEnv :: VarSet -> OccEnv +initOccEnv vars = OccEnv vars OccRhs [] - IdSet -- In-scope Ids +isRhsEnv (OccEnv _ OccRhs _) = True +isRhsEnv (OccEnv _ OccVanilla _) = False +isCandidate :: OccEnv -> Id -> Bool +isCandidate (OccEnv cands encl _) id = id `elemVarSet` cands addNewCands :: OccEnv -> [Id] -> OccEnv -addNewCands (OccEnv ip ifun cands) ids - = OccEnv ip ifun (cands `unionVarSet` mkVarSet ids) +addNewCands (OccEnv cands encl ctxt) ids + = OccEnv (extendVarSetList cands ids) encl ctxt addNewCand :: OccEnv -> Id -> OccEnv -addNewCand (OccEnv ip ifun cands) id - = OccEnv ip ifun (extendVarSet cands id) +addNewCand (OccEnv cands encl ctxt) id + = OccEnv (extendVarSet cands id) encl ctxt -isCandidate :: OccEnv -> Id -> Bool -isCandidate (OccEnv _ ifun cands) id = id `elemVarSet` cands || ifun id +setCtxt :: OccEnv -> CtxtTy -> OccEnv +setCtxt (OccEnv cands encl _) ctxt = OccEnv cands encl ctxt + +oneShotGroup :: OccEnv -> [CoreBndr] -> (Bool, OccEnv, [CoreBndr]) + -- True <=> this is a one-shot linear lambda group + -- The [CoreBndr] are the binders. + + -- The result binders have one-shot-ness set that they might not have had originally. + -- This happens in (build (\cn -> e)). Here the occurrence analyser + -- linearity context knows that c,n are one-shot, and it records that fact in + -- the binder. This is useful to guide subsequent float-in/float-out tranformations + +oneShotGroup (OccEnv cands encl ctxt) bndrs + = case go ctxt bndrs [] of + (new_ctxt, new_bndrs) -> (all is_one_shot new_bndrs, OccEnv cands encl new_ctxt, new_bndrs) + where + is_one_shot b = isId b && isOneShotLambda b + + go ctxt [] rev_bndrs = (ctxt, reverse rev_bndrs) + + go (lin_ctxt:ctxt) (bndr:bndrs) rev_bndrs + | isId bndr = go ctxt bndrs (bndr':rev_bndrs) + where + bndr' | lin_ctxt = setOneShotLambda bndr + | otherwise = bndr + + go ctxt (bndr:bndrs) rev_bndrs = go ctxt bndrs (bndr:rev_bndrs) -type UsageDetails = IdEnv BinderInfo -- A finite map from ids to their usage +vanillaCtxt (OccEnv cands _ _) = OccEnv cands OccVanilla [] +rhsCtxt (OccEnv cands _ _) = OccEnv cands OccRhs [] + +addAppCtxt (OccEnv cands encl ctxt) args + = OccEnv cands encl (replicate (valArgCount args) True ++ ctxt) +\end{code} + +%************************************************************************ +%* * +\subsection[OccurAnal-types]{OccEnv} +%* * +%************************************************************************ + +\begin{code} +type UsageDetails = IdEnv OccInfo -- A finite map from ids to their usage combineUsageDetails, combineAltsUsageDetails :: UsageDetails -> UsageDetails -> UsageDetails combineUsageDetails usage1 usage2 - = plusVarEnv_C addBinderInfo usage1 usage2 + = plusVarEnv_C addOccInfo usage1 usage2 combineAltsUsageDetails usage1 usage2 - = plusVarEnv_C orBinderInfo usage1 usage2 + = plusVarEnv_C orOccInfo usage1 usage2 -addOneOcc :: UsageDetails -> Id -> BinderInfo -> UsageDetails +addOneOcc :: UsageDetails -> Id -> OccInfo -> UsageDetails addOneOcc usage id info - = plusVarEnv_C addBinderInfo usage (unitVarEnv id info) + = plusVarEnv_C addOccInfo usage (unitVarEnv id info) -- ToDo: make this more efficient emptyDetails = (emptyVarEnv :: UsageDetails) -unitDetails id info = (unitVarEnv id info :: UsageDetails) - usedIn :: Id -> UsageDetails -> Bool -v `usedIn` details = isExported v - || v `elemVarEnv` details - || isSpecPragmaId v +v `usedIn` details = isExportedId v || v `elemVarEnv` details tagBinders :: UsageDetails -- Of scope -> [Id] -- Binders @@ -748,7 +915,7 @@ tagBinders :: UsageDetails -- Of scope tagBinders usage binders = let usage' = usage `delVarEnvList` binders - uss = map (setBinderPrag usage) binders + uss = map (setBinderOcc usage) binders in usage' `seq` (usage', uss) @@ -760,58 +927,61 @@ tagBinder :: UsageDetails -- Of scope tagBinder usage binder = let usage' = usage `delVarEnv` binder - binder' = setBinderPrag usage binder + binder' = setBinderOcc usage binder in usage' `seq` (usage', binder') +setBinderOcc :: UsageDetails -> CoreBndr -> CoreBndr +setBinderOcc usage bndr + | isTyVar bndr = bndr + | isExportedId bndr = case idOccInfo bndr of + NoOccInfo -> bndr + other -> setIdOccInfo bndr NoOccInfo + -- Don't use local usage info for visible-elsewhere things + -- BUT *do* erase any IAmALoopBreaker annotation, because we're + -- about to re-generate it and it shouldn't be "sticky" + + | otherwise = setIdOccInfo bndr occ_info + where + occ_info = lookupVarEnv usage bndr `orElse` IAmDead +\end{code} -setBinderPrag :: UsageDetails -> CoreBndr -> CoreBndr -setBinderPrag usage bndr - | isTyVar bndr - = bndr - | otherwise - = case old_prag of - NoInlinePragInfo -> new_bndr - IAmDead -> new_bndr -- The next three are annotations - ICanSafelyBeINLINEd _ _ -> new_bndr -- from the previous iteration of - IAmALoopBreaker -> new_bndr -- the occurrence analyser +%************************************************************************ +%* * +\subsection{Operations over OccInfo} +%* * +%************************************************************************ - IAmASpecPragmaId -> bndr -- Don't ever overwrite or drop these as dead +\begin{code} +oneOcc :: OccInfo +oneOcc = OneOcc False True - other | its_now_dead -> new_bndr -- Overwrite the others iff it's now dead - | otherwise -> bndr +markMany, markInsideLam, markInsideSCC :: OccInfo -> OccInfo - where - old_prag = getInlinePragma bndr - new_bndr = setInlinePragma bndr new_prag +markMany IAmDead = IAmDead +markMany other = NoOccInfo - its_now_dead = case new_prag of - IAmDead -> True - other -> False +markInsideSCC occ = markMany occ - new_prag = occInfoToInlinePrag occ_info +markInsideLam (OneOcc _ one_br) = OneOcc True one_br +markInsideLam occ = occ - occ_info - | isExported bndr = noBinderInfo - -- Don't use local usage info for visible-elsewhere things - -- But NB that we do set NoInlinePragma for exported things - -- thereby nuking any IAmALoopBreaker from a previous pass. +addOccInfo, orOccInfo :: OccInfo -> OccInfo -> OccInfo - | otherwise = case lookupVarEnv usage bndr of - Nothing -> deadOccurrence - Just info -> info +addOccInfo IAmDead info2 = info2 +addOccInfo info1 IAmDead = info1 +addOccInfo info1 info2 = NoOccInfo -markBinderInsideLambda :: CoreBndr -> CoreBndr -markBinderInsideLambda bndr - | isTyVar bndr - = bndr +-- (orOccInfo orig new) is used +-- when combining occurrence info from branches of a case - | otherwise - = case getInlinePragma bndr of - ICanSafelyBeINLINEd not_in_lam nalts - -> bndr `setInlinePragma` ICanSafelyBeINLINEd InsideLam nalts - other -> bndr +orOccInfo IAmDead info2 = info2 +orOccInfo info1 IAmDead = info1 +orOccInfo (OneOcc in_lam1 one_branch1) + (OneOcc in_lam2 one_branch2) + = OneOcc (in_lam1 || in_lam2) + False -- False, because it occurs in both branches -funOccZero = funOccurrence 0 +orOccInfo info1 info2 = NoOccInfo \end{code}