X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2Fspecialise%2FSpecialise.lhs;h=f6f85a114099ce6c91ae90cdc4b5aee8286a51ac;hp=3564c27380d1d7229bcb74dfc7a454718e6d6461;hb=707ea5881703d680155aab268bdbf7edc113e3b1;hpb=10f18550c3684368b9d8e5b7adcccc14994cf170 diff --git a/compiler/specialise/Specialise.lhs b/compiler/specialise/Specialise.lhs index 3564c27..f6f85a1 100644 --- a/compiler/specialise/Specialise.lhs +++ b/compiler/specialise/Specialise.lhs @@ -4,48 +4,35 @@ \section[Specialise]{Stamping out overloading, and (optionally) polymorphism} \begin{code} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and fix --- any warnings in the module. See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings --- for details - module Specialise ( specProgram ) where #include "HsVersions.h" -import DynFlags ( DynFlags, DynFlag(..) ) -import Id ( Id, idName, idType, mkUserLocal, - idInlinePragma, setInlinePragma ) -import TcType ( Type, mkTyVarTy, tcSplitSigmaTy, - tyVarsOfTypes, tyVarsOfTheta, isClassPred, - tcCmpType, isUnLiftedType - ) -import CoreSubst ( Subst, mkEmptySubst, extendTvSubstList, lookupIdSubst, - substBndr, substBndrs, substTy, substInScope, - cloneIdBndr, cloneIdBndrs, cloneRecIdBndrs - ) +import Id +import TcType +import CoreMonad +import CoreSubst +import CoreUnfold import VarSet import VarEnv import CoreSyn import Rules -import CoreUtils ( applyTypeToArgs, mkPiTypes ) +import CoreUtils ( exprIsTrivial, applyTypeToArgs, mkPiTypes ) import CoreFVs ( exprFreeVars, exprsFreeVars, idFreeVars ) -import CoreLint ( showPass, endPass ) -import UniqSupply ( UniqSupply, - UniqSM, initUs_, - MonadUnique(..) - ) +import UniqSupply ( UniqSM, initUs_, MonadUnique(..) ) import Name import MkId ( voidArgId, realWorldPrimId ) -import FiniteMap -import Maybes ( catMaybes, maybeToBool ) -import ErrUtils ( dumpIfSet_dyn ) +import Maybes ( catMaybes, isJust ) +import BasicTypes +import HscTypes import Bag import Util import Outputable import FastString +import Data.Map (Map) +import qualified Data.Map as Map +import qualified FiniteMap as Map \end{code} %************************************************************************ @@ -486,8 +473,6 @@ of this is permanently ruled out. Still, this is no great hardship, because we intend to eliminate overloading altogether anyway! - - A note about non-tyvar dictionaries ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Some Ids have types like @@ -512,7 +497,7 @@ Should we specialise wrt this compound-type dictionary? We used to say But it is simpler and more uniform to specialise wrt these dicts too; and in future GHC is likely to support full fledged type signatures like - f ;: Eq [(a,b)] => ... + f :: Eq [(a,b)] => ... %************************************************************************ @@ -575,34 +560,98 @@ Hence, the invariant is this: %************************************************************************ \begin{code} -specProgram :: DynFlags -> UniqSupply -> [CoreBind] -> IO [CoreBind] -specProgram dflags us binds = do - - showPass dflags "Specialise" +specProgram :: ModGuts -> CoreM ModGuts +specProgram guts + = do { hpt_rules <- getRuleBase + ; let local_rules = mg_rules guts + rule_base = extendRuleBaseList hpt_rules (mg_rules guts) - let binds' = initSM us (do (binds', uds') <- go binds - return (dumpAllDictBinds uds' binds')) + -- Specialise the bindings of this module + ; (binds', uds) <- runSpecM (go (mg_binds guts)) - endPass dflags "Specialise" Opt_D_dump_spec binds' + -- Specialise imported functions + ; (new_rules, spec_binds) <- specImports emptyVarSet rule_base uds - dumpIfSet_dyn dflags Opt_D_dump_rules "Top-level specialisations" - (pprRulesForUser (rulesOfBinds binds')) - - return binds' + ; return (guts { mg_binds = spec_binds ++ binds' + , mg_rules = local_rules ++ new_rules }) } where -- We need to start with a Subst that knows all the things -- that are in scope, so that the substitution engine doesn't -- accidentally re-use a unique that's already in use -- Easiest thing is to do it all at once, as if all the top-level -- decls were mutually recursive - top_subst = mkEmptySubst (mkInScopeSet (mkVarSet (bindersOfBinds binds))) + top_subst = mkEmptySubst $ mkInScopeSet $ mkVarSet $ + bindersOfBinds $ mg_binds guts go [] = return ([], emptyUDs) go (bind:binds) = do (binds', uds) <- go binds (bind', uds') <- specBind top_subst bind uds return (bind' ++ binds', uds') + +specImports :: VarSet -- Don't specialise these ones + -- See Note [Avoiding recursive specialisation] + -> RuleBase -- Rules from this module and the home package + -- (but not external packages, which can change) + -> UsageDetails -- Calls for imported things, and floating bindings + -> CoreM ( [CoreRule] -- New rules + , [CoreBind] ) -- Specialised bindings and floating bindings +specImports done rb uds + = do { let import_calls = varEnvElts (ud_calls uds) + ; (rules, spec_binds) <- go rb import_calls + ; return (rules, wrapDictBinds (ud_binds uds) spec_binds) } + where + go _ [] = return ([], []) + go rb (CIS fn calls_for_fn : other_calls) + = do { (rules1, spec_binds1) <- specImport done rb fn (Map.toList calls_for_fn) + ; (rules2, spec_binds2) <- go (extendRuleBaseList rb rules1) other_calls + ; return (rules1 ++ rules2, spec_binds1 ++ spec_binds2) } + +specImport :: VarSet -- Don't specialise these + -- See Note [Avoiding recursive specialisation] + -> RuleBase -- Rules from this module + -> Id -> [CallInfo] -- Imported function and calls for it + -> CoreM ( [CoreRule] -- New rules + , [CoreBind] ) -- Specialised bindings +specImport done rb fn calls_for_fn + | not (fn `elemVarSet` done) + , isInlinablePragma (idInlinePragma fn) + , Just rhs <- maybeUnfoldingTemplate (realIdUnfolding fn) + = do { -- Get rules from the external package state + -- We keep doing this in case we "page-fault in" + -- more rules as we go along + ; hsc_env <- getHscEnv + ; eps <- liftIO $ hscEPS hsc_env + ; let full_rb = unionRuleBase rb (eps_rule_base eps) + rules_for_fn = getRules full_rb fn + + ; (rules1, spec_pairs, uds) <- runSpecM $ + specCalls emptySubst rules_for_fn calls_for_fn fn rhs + ; let spec_binds1 = [NonRec b r | (b,r) <- spec_pairs] + -- After the rules kick in we may get recursion, but + -- we rely on a global GlomBinds to sort that out later + + -- Now specialise any cascaded calls + ; (rules2, spec_binds2) <- specImports (extendVarSet done fn) + (extendRuleBaseList rb rules1) + uds + + ; return (rules2 ++ rules1, spec_binds2 ++ spec_binds1) } + + | otherwise + = WARN( True, ptext (sLit "specImport discard") <+> ppr fn <+> ppr calls_for_fn ) + return ([], []) \end{code} +Avoiding recursive specialisation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +When we specialise 'f' we may find new overloaded calls to 'g', 'h' in +'f's RHS. So we want to specialise g,h. But we don't want to +specialise f any more! It's possible that f's RHS might have a +recursive yet-more-specialised call, so we'd diverge in that case. +And if the call is to the same type, one specialisation is enough. +Avoiding this recursive specialisation loop is the reason for the +'done' VarSet passed to specImports and specImport. + %************************************************************************ %* * \subsubsection{@specExpr@: the main function} @@ -611,22 +660,22 @@ specProgram dflags us binds = do \begin{code} specVar :: Subst -> Id -> CoreExpr -specVar subst v = lookupIdSubst subst v +specVar subst v = lookupIdSubst (text "specVar") subst v specExpr :: Subst -> CoreExpr -> SpecM (CoreExpr, UsageDetails) -- We carry a substitution down: --- a) we must clone any binding that might flaot outwards, +-- a) we must clone any binding that might float outwards, -- to avoid name clashes -- b) we carry a type substitution to use when analysing -- the RHS of specialised bindings (no type-let!) ---------------- First the easy cases -------------------- -specExpr subst (Type ty) = return (Type (substTy subst ty), emptyUDs) +specExpr subst (Type ty) = return (Type (CoreSubst.substTy subst ty), emptyUDs) specExpr subst (Var v) = return (specVar subst v, emptyUDs) specExpr _ (Lit lit) = return (Lit lit, emptyUDs) specExpr subst (Cast e co) = do (e', uds) <- specExpr subst e - return ((Cast e' (substTy subst co)), uds) + return ((Cast e' (CoreSubst.substTy subst co)), uds) specExpr subst (Note note body) = do (body', uds) <- specExpr subst body return (Note (specNote subst note) body', uds) @@ -641,35 +690,27 @@ specExpr subst expr@(App {}) return (App fun' arg', uds_arg `plusUDs` uds_app) go (Var f) args = case specVar subst f of - Var f' -> return (Var f', mkCallUDs subst f' args) + Var f' -> return (Var f', mkCallUDs f' args) e' -> return (e', emptyUDs) -- I don't expect this! go other _ = specExpr subst other ---------------- Lambda/case require dumping of usage details -------------------- specExpr subst e@(Lam _ _) = do (body', uds) <- specExpr subst' body - let (filtered_uds, body'') = dumpUDs bndrs' uds body' - return (mkLams bndrs' body'', filtered_uds) + let (free_uds, dumped_dbs) = dumpUDs bndrs' uds + return (mkLams bndrs' (wrapDictBindsE dumped_dbs body'), free_uds) where (bndrs, body) = collectBinders e (subst', bndrs') = substBndrs subst bndrs -- More efficient to collect a group of binders together all at once -- and we don't want to split a lambda group with dumped bindings -specExpr subst (Case scrut case_bndr ty alts) = do - (scrut', uds_scrut) <- specExpr subst scrut - (alts', uds_alts) <- mapAndCombineSM spec_alt alts - return (Case scrut' case_bndr' (substTy subst ty) alts', uds_scrut `plusUDs` uds_alts) - where - (subst_alt, case_bndr') = substBndr subst case_bndr - -- No need to clone case binder; it can't float like a let(rec) - - spec_alt (con, args, rhs) = do - (rhs', uds) <- specExpr subst_rhs rhs - let (uds', rhs'') = dumpUDs args uds rhs' - return ((con, args', rhs''), uds') - where - (subst_rhs, args') = substBndrs subst_alt args +specExpr subst (Case scrut case_bndr ty alts) + = do { (scrut', scrut_uds) <- specExpr subst scrut + ; (scrut'', case_bndr', alts', alts_uds) + <- specCase subst scrut' case_bndr alts + ; return (Case scrut'' case_bndr' (CoreSubst.substTy subst ty) alts' + , scrut_uds `plusUDs` alts_uds) } ---------------- Finally, let is the interesting case -------------------- specExpr subst (Let bind body) = do @@ -688,11 +729,117 @@ specExpr subst (Let bind body) = do -- Must apply the type substitution to coerceions specNote :: Subst -> Note -> Note specNote _ note = note + + +specCase :: Subst + -> CoreExpr -- Scrutinee, already done + -> Id -> [CoreAlt] + -> SpecM ( CoreExpr -- New scrutinee + , Id + , [CoreAlt] + , UsageDetails) +specCase subst scrut' case_bndr [(con, args, rhs)] + | isDictId case_bndr -- See Note [Floating dictionaries out of cases] + , interestingDict scrut' + , not (isDeadBinder case_bndr && null sc_args') + = do { (case_bndr_flt : sc_args_flt) <- mapM clone_me (case_bndr' : sc_args') + + ; let sc_rhss = [ Case (Var case_bndr_flt) case_bndr' (idType sc_arg') + [(con, args', Var sc_arg')] + | sc_arg' <- sc_args' ] + + -- Extend the substitution for RHS to map the *original* binders + -- to their floated verions. Attach an unfolding to these floated + -- binders so they look interesting to interestingDict + mb_sc_flts :: [Maybe DictId] + mb_sc_flts = map (lookupVarEnv clone_env) args' + clone_env = zipVarEnv sc_args' (zipWith add_unf sc_args_flt sc_rhss) + subst_prs = (case_bndr, Var (add_unf case_bndr_flt scrut')) + : [ (arg, Var sc_flt) + | (arg, Just sc_flt) <- args `zip` mb_sc_flts ] + subst_rhs' = extendIdSubstList subst_rhs subst_prs + + ; (rhs', rhs_uds) <- specExpr subst_rhs' rhs + ; let scrut_bind = mkDB (NonRec case_bndr_flt scrut') + case_bndr_set = unitVarSet case_bndr_flt + sc_binds = [(NonRec sc_arg_flt sc_rhs, case_bndr_set) + | (sc_arg_flt, sc_rhs) <- sc_args_flt `zip` sc_rhss ] + flt_binds = scrut_bind : sc_binds + (free_uds, dumped_dbs) = dumpUDs (case_bndr':args') rhs_uds + all_uds = flt_binds `addDictBinds` free_uds + alt' = (con, args', wrapDictBindsE dumped_dbs rhs') + ; return (Var case_bndr_flt, case_bndr', [alt'], all_uds) } + where + (subst_rhs, (case_bndr':args')) = substBndrs subst (case_bndr:args) + sc_args' = filter is_flt_sc_arg args' + + clone_me bndr = do { uniq <- getUniqueM + ; return (mkUserLocal occ uniq ty loc) } + where + name = idName bndr + ty = idType bndr + occ = nameOccName name + loc = getSrcSpan name + + add_unf sc_flt sc_rhs -- Sole purpose: make sc_flt respond True to interestingDictId + = setIdUnfolding sc_flt (mkSimpleUnfolding sc_rhs) + + arg_set = mkVarSet args' + is_flt_sc_arg var = isId var + && not (isDeadBinder var) + && isDictTy var_ty + && not (tyVarsOfType var_ty `intersectsVarSet` arg_set) + where + var_ty = idType var + + +specCase subst scrut case_bndr alts + = do { (alts', uds_alts) <- mapAndCombineSM spec_alt alts + ; return (scrut, case_bndr', alts', uds_alts) } + where + (subst_alt, case_bndr') = substBndr subst case_bndr + spec_alt (con, args, rhs) = do + (rhs', uds) <- specExpr subst_rhs rhs + let (free_uds, dumped_dbs) = dumpUDs (case_bndr' : args') uds + return ((con, args', wrapDictBindsE dumped_dbs rhs'), free_uds) + where + (subst_rhs, args') = substBndrs subst_alt args \end{code} +Note [Floating dictionaries out of cases] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + g = \d. case d of { MkD sc ... -> ...(f sc)... } +Naively we can't float d2's binding out of the case expression, +because 'sc' is bound by the case, and that in turn means we can't +specialise f, which seems a pity. + +So we invert the case, by floating out a binding +for 'sc_flt' thus: + sc_flt = case d of { MkD sc ... -> sc } +Now we can float the call instance for 'f'. Indeed this is just +what'll happen if 'sc' was originally bound with a let binding, +but case is more efficient, and necessary with equalities. So it's +good to work with both. + +You might think that this won't make any difference, because the +call instance will only get nuked by the \d. BUT if 'g' itself is +specialised, then transitively we should be able to specialise f. + +In general, given + case e of cb { MkD sc ... -> ...(f sc)... } +we transform to + let cb_flt = e + sc_flt = case cb_flt of { MkD sc ... -> sc } + in + case cb_flt of bg { MkD sc ... -> ....(f sc_flt)... } + +The "_flt" things are the floated binds; we use the current substitution +to substitute sc -> sc_flt in the RHS + %************************************************************************ %* * -\subsubsection{Dealing with a binding} + Dealing with a binding %* * %************************************************************************ @@ -703,215 +850,445 @@ specBind :: Subst -- Use this for RHSs -> SpecM ([CoreBind], -- New bindings UsageDetails) -- And info to pass upstream -specBind rhs_subst bind body_uds - = do { (bind', bind_uds) <- specBindItself rhs_subst bind (calls body_uds) - ; return (finishSpecBind bind' bind_uds body_uds) } - -finishSpecBind :: CoreBind -> UsageDetails -> UsageDetails -> ([CoreBind], UsageDetails) -finishSpecBind bind - (MkUD { dict_binds = rhs_dbs, calls = rhs_calls, ud_fvs = rhs_fvs }) - (MkUD { dict_binds = body_dbs, calls = body_calls, ud_fvs = body_fvs }) - | not (mkVarSet bndrs `intersectsVarSet` all_fvs) - -- Common case 1: the bound variables are not - -- mentioned in the dictionary bindings - = ([bind], MkUD { dict_binds = body_dbs `unionBags` rhs_dbs - -- It's important that the `unionBags` is this way round, - -- because body_uds may bind dictionaries that are - -- used in the calls passed to specDefn. So the - -- dictionary bindings in rhs_uds may mention - -- dictionaries bound in body_uds. - , calls = all_calls - , ud_fvs = all_fvs }) - - | case bind of { NonRec {} -> True; Rec {} -> False } - -- Common case 2: no specialisation happened, and binding - -- is non-recursive. But the binding may be - -- mentioned in body_dbs, so we should put it first - = ([], MkUD { dict_binds = rhs_dbs `unionBags` ((bind, b_fvs) `consBag` body_dbs) - , calls = all_calls - , ud_fvs = all_fvs `unionVarSet` b_fvs }) - - | otherwise -- General case: make a huge Rec (sigh) - = ([], MkUD { dict_binds = unitBag (Rec all_db_prs, all_db_fvs) - , calls = all_calls - , ud_fvs = all_fvs `unionVarSet` b_fvs }) - where - all_fvs = rhs_fvs `unionVarSet` body_fvs - all_calls = zapCalls bndrs (rhs_calls `unionCalls` body_calls) - - bndrs = bindersOf bind - b_fvs = bind_fvs bind - - (all_db_prs, all_db_fvs) = add (bind, b_fvs) $ - foldrBag add ([], emptyVarSet) $ - rhs_dbs `unionBags` body_dbs - add (NonRec b r, b_fvs) (prs, fvs) = ((b,r) : prs, b_fvs `unionVarSet` fvs) - add (Rec b_prs, b_fvs) (prs, fvs) = (b_prs ++ prs, b_fvs `unionVarSet` fvs) - -specBindItself :: Subst -> CoreBind -> CallDetails -> SpecM (CoreBind, UsageDetails) - --- specBindItself deals with the RHS, specialising it according --- to the calls found in the body (if any) -specBindItself rhs_subst (NonRec bndr rhs) call_info = do - ((bndr',rhs'), spec_defns, spec_uds) <- specDefn rhs_subst call_info (bndr,rhs) - let - new_bind | null spec_defns = NonRec bndr' rhs' - | otherwise = Rec ((bndr',rhs'):spec_defns) - -- bndr' mentions the spec_defns in its SpecEnv - -- Not sure why we couln't just put the spec_defns first - return (new_bind, spec_uds) - -specBindItself rhs_subst (Rec pairs) call_info = do - stuff <- mapM (specDefn rhs_subst call_info) pairs - let - (pairs', spec_defns_s, spec_uds_s) = unzip3 stuff - spec_defns = concat spec_defns_s - spec_uds = plusUDList spec_uds_s - new_bind = Rec (spec_defns ++ pairs') - return (new_bind, spec_uds) - - -specDefn :: Subst -- Subst to use for RHS - -> CallDetails -- Info on how it is used in its scope - -> (Id, CoreExpr) -- The thing being bound and its un-processed RHS - -> SpecM ((Id, CoreExpr), -- The thing and its processed RHS - -- the Id may now have specialisations attached +-- Returned UsageDetails: +-- No calls for binders of this bind +specBind rhs_subst (NonRec fn rhs) body_uds + = do { (rhs', rhs_uds) <- specExpr rhs_subst rhs + ; (fn', spec_defns, body_uds1) <- specDefn rhs_subst body_uds fn rhs + + ; let pairs = spec_defns ++ [(fn', rhs')] + -- fn' mentions the spec_defns in its rules, + -- so put the latter first + + combined_uds = body_uds1 `plusUDs` rhs_uds + -- This way round a call in rhs_uds of a function f + -- at type T will override a call of f at T in body_uds1; and + -- that is good because it'll tend to keep "earlier" calls + -- See Note [Specialisation of dictionary functions] + + (free_uds, dump_dbs, float_all) = dumpBindUDs [fn] combined_uds + -- See Note [From non-recursive to recursive] + + final_binds | isEmptyBag dump_dbs = [NonRec b r | (b,r) <- pairs] + | otherwise = [Rec (flattenDictBinds dump_dbs pairs)] + + ; if float_all then + -- Rather than discard the calls mentioning the bound variables + -- we float this binding along with the others + return ([], free_uds `snocDictBinds` final_binds) + else + -- No call in final_uds mentions bound variables, + -- so we can just leave the binding here + return (final_binds, free_uds) } + + +specBind rhs_subst (Rec pairs) body_uds + -- Note [Specialising a recursive group] + = do { let (bndrs,rhss) = unzip pairs + ; (rhss', rhs_uds) <- mapAndCombineSM (specExpr rhs_subst) rhss + ; let scope_uds = body_uds `plusUDs` rhs_uds + -- Includes binds and calls arising from rhss + + ; (bndrs1, spec_defns1, uds1) <- specDefns rhs_subst scope_uds pairs + + ; (bndrs3, spec_defns3, uds3) + <- if null spec_defns1 -- Common case: no specialisation + then return (bndrs1, [], uds1) + else do { -- Specialisation occurred; do it again + (bndrs2, spec_defns2, uds2) + <- specDefns rhs_subst uds1 (bndrs1 `zip` rhss) + ; return (bndrs2, spec_defns2 ++ spec_defns1, uds2) } + + ; let (final_uds, dumped_dbs, float_all) = dumpBindUDs bndrs uds3 + bind = Rec (flattenDictBinds dumped_dbs $ + spec_defns3 ++ zip bndrs3 rhss') + + ; if float_all then + return ([], final_uds `snocDictBind` bind) + else + return ([bind], final_uds) } + + +--------------------------- +specDefns :: Subst + -> UsageDetails -- Info on how it is used in its scope + -> [(Id,CoreExpr)] -- The things being bound and their un-processed RHS + -> SpecM ([Id], -- Original Ids with RULES added + [(Id,CoreExpr)], -- Extra, specialised bindings + UsageDetails) -- Stuff to fling upwards from the specialised versions + +-- Specialise a list of bindings (the contents of a Rec), but flowing usages +-- upwards binding by binding. Example: { f = ...g ...; g = ...f .... } +-- Then if the input CallDetails has a specialised call for 'g', whose specialisation +-- in turn generates a specialised call for 'f', we catch that in this one sweep. +-- But not vice versa (it's a fixpoint problem). + +specDefns _subst uds [] + = return ([], [], uds) +specDefns subst uds ((bndr,rhs):pairs) + = do { (bndrs1, spec_defns1, uds1) <- specDefns subst uds pairs + ; (bndr1, spec_defns2, uds2) <- specDefn subst uds1 bndr rhs + ; return (bndr1 : bndrs1, spec_defns1 ++ spec_defns2, uds2) } + +--------------------------- +specDefn :: Subst + -> UsageDetails -- Info on how it is used in its scope + -> Id -> CoreExpr -- The thing being bound and its un-processed RHS + -> SpecM (Id, -- Original Id with added RULES [(Id,CoreExpr)], -- Extra, specialised bindings - UsageDetails -- Stuff to fling upwards from the RHS and its - ) -- specialised versions - -specDefn subst calls (fn, rhs) + UsageDetails) -- Stuff to fling upwards from the specialised versions + +specDefn subst body_uds fn rhs + = do { let (body_uds_without_me, calls_for_me) = callsForMe fn body_uds + rules_for_me = idCoreRules fn + ; (rules, spec_defns, spec_uds) <- specCalls subst rules_for_me + calls_for_me fn rhs + ; return ( fn `addIdSpecialisations` rules + , spec_defns + , body_uds_without_me `plusUDs` spec_uds) } + -- It's important that the `plusUDs` is this way + -- round, because body_uds_without_me may bind + -- dictionaries that are used in calls_for_me passed + -- to specDefn. So the dictionary bindings in + -- spec_uds may mention dictionaries bound in + -- body_uds_without_me + +--------------------------- +specCalls :: Subst + -> [CoreRule] -- Existing RULES for the fn + -> [CallInfo] + -> Id -> CoreExpr + -> SpecM ([CoreRule], -- New RULES for the fn + [(Id,CoreExpr)], -- Extra, specialised bindings + UsageDetails) -- New usage details from the specialised RHSs + +-- This function checks existing rules, and does not create +-- duplicate ones. So the caller does not nneed to do this filtering. +-- See 'already_covered' + +specCalls subst rules_for_me calls_for_me fn rhs -- The first case is the interesting one | rhs_tyvars `lengthIs` n_tyvars -- Rhs of fn's defn has right number of big lambdas && rhs_ids `lengthAtLeast` n_dicts -- and enough dict args && notNull calls_for_me -- And there are some calls to specialise + && not (isNeverActive (idInlineActivation fn)) + -- Don't specialise NOINLINE things + -- See Note [Auto-specialisation and RULES] -- && not (certainlyWillInline (idUnfolding fn)) -- And it's not small -- See Note [Inline specialisation] for why we do not --- switch off specialisation for inline functions = do - = do - -- Specialise the body of the function - (rhs', rhs_uds) <- specExpr subst rhs - - -- Make a specialised version for each call in calls_for_me - stuff <- mapM spec_call calls_for_me - let - (spec_defns, spec_uds, spec_rules) = unzip3 stuff - - fn' = addIdSpecialisations fn spec_rules +-- switch off specialisation for inline functions - return ((fn',rhs'), - spec_defns, - rhs_uds `plusUDs` plusUDList spec_uds) + = -- pprTrace "specDefn: some" (ppr fn $$ ppr calls_for_me $$ ppr rules_for_me) $ + do { stuff <- mapM spec_call calls_for_me + ; let (spec_defns, spec_uds, spec_rules) = unzip3 (catMaybes stuff) + ; return (spec_rules, spec_defns, plusUDList spec_uds) } | otherwise -- No calls or RHS doesn't fit our preconceptions = WARN( notNull calls_for_me, ptext (sLit "Missed specialisation opportunity for") <+> ppr fn ) -- Note [Specialisation shape] - (do { (rhs', rhs_uds) <- specExpr subst rhs - ; return ((fn, rhs'), [], rhs_uds) }) + -- pprTrace "specDefn: none" (ppr fn $$ ppr calls_for_me) $ + return ([], [], emptyUDs) where fn_type = idType fn + fn_arity = idArity fn + fn_unf = realIdUnfolding fn -- Ignore loop-breaker-ness here (tyvars, theta, _) = tcSplitSigmaTy fn_type n_tyvars = length tyvars n_dicts = length theta - inline_prag = idInlinePragma fn + inl_prag = idInlinePragma fn + inl_act = inlinePragmaActivation inl_prag + is_local = isLocalId fn - -- It's important that we "see past" any INLINE pragma - -- else we'll fail to specialise an INLINE thing - (inline_rhs, rhs_inside) = dropInline rhs - (rhs_tyvars, rhs_ids, rhs_body) = collectTyAndValBinders rhs_inside + -- Figure out whether the function has an INLINE pragma + -- See Note [Inline specialisations] - rhs_dicts = take n_dicts rhs_ids - body = mkLams (drop n_dicts rhs_ids) rhs_body + spec_arity = unfoldingArity fn_unf - n_dicts -- Arity of the *specialised* inline rule + + (rhs_tyvars, rhs_ids, rhs_body) = collectTyAndValBinders rhs + + rhs_dict_ids = take n_dicts rhs_ids + body = mkLams (drop n_dicts rhs_ids) rhs_body -- Glue back on the non-dict lambdas - calls_for_me = case lookupFM calls fn of - Nothing -> [] - Just cs -> fmToList cs + already_covered :: [CoreExpr] -> Bool + already_covered args -- Note [Specialisations already covered] + = isJust (lookupRule (const True) realIdUnfolding + (substInScope subst) + fn args rules_for_me) + + mk_ty_args :: [Maybe Type] -> [CoreExpr] + mk_ty_args call_ts = zipWithEqual "spec_call" mk_ty_arg rhs_tyvars call_ts + where + mk_ty_arg rhs_tyvar Nothing = Type (mkTyVarTy rhs_tyvar) + mk_ty_arg _ (Just ty) = Type ty ---------------------------------------------------------- -- Specialise to one particular call pattern - spec_call :: (CallKey, ([DictExpr], VarSet)) -- Call instance - -> SpecM ((Id,CoreExpr), -- Specialised definition - UsageDetails, -- Usage details from specialised body - CoreRule) -- Info for the Id's SpecEnv + spec_call :: CallInfo -- Call instance + -> SpecM (Maybe ((Id,CoreExpr), -- Specialised definition + UsageDetails, -- Usage details from specialised body + CoreRule)) -- Info for the Id's SpecEnv spec_call (CallKey call_ts, (call_ds, _)) - = ASSERT( call_ts `lengthIs` n_tyvars && call_ds `lengthIs` n_dicts ) do - -- Calls are only recorded for properly-saturated applications + = ASSERT( call_ts `lengthIs` n_tyvars && call_ds `lengthIs` n_dicts ) - -- Suppose f's defn is f = /\ a b c d -> \ d1 d2 -> rhs - -- Supppose the call is for f [Just t1, Nothing, Just t3, Nothing] [dx1, dx2] + -- Suppose f's defn is f = /\ a b c -> \ d1 d2 -> rhs + -- Supppose the call is for f [Just t1, Nothing, Just t3] [dx1, dx2] -- Construct the new binding - -- f1 = SUBST[a->t1,c->t3, d1->d1', d2->d2'] (/\ b d -> rhs) + -- f1 = SUBST[a->t1,c->t3, d1->d1', d2->d2'] (/\ b -> rhs) -- PLUS the usage-details -- { d1' = dx1; d2' = dx2 } - -- where d1', d2' are cloned versions of d1,d2, with the type substitution applied. + -- where d1', d2' are cloned versions of d1,d2, with the type substitution + -- applied. These auxiliary bindings just avoid duplication of dx1, dx2 -- -- Note that the substitution is applied to the whole thing. -- This is convenient, but just slightly fragile. Notably: - -- * There had better be no name clashes in a/b/c/d - -- - let - -- poly_tyvars = [b,d] in the example above + -- * There had better be no name clashes in a/b/c + do { let + -- poly_tyvars = [b] in the example above -- spec_tyvars = [a,c] - -- ty_args = [t1,b,t3,d] - poly_tyvars = [tv | (tv, Nothing) <- rhs_tyvars `zip` call_ts] - spec_tyvars = [tv | (tv, Just _) <- rhs_tyvars `zip` call_ts] - ty_args = zipWithEqual "spec_call" mk_ty_arg rhs_tyvars call_ts - where - mk_ty_arg rhs_tyvar Nothing = Type (mkTyVarTy rhs_tyvar) - mk_ty_arg _ (Just ty) = Type ty - - spec_ty_args = [ty | Just ty <- call_ts] - rhs_subst = extendTvSubstList subst (spec_tyvars `zip` spec_ty_args) - - (rhs_subst', rhs_dicts') <- cloneBinders rhs_subst rhs_dicts - let - inst_args = ty_args ++ map Var rhs_dicts' - - -- Figure out the type of the specialised function - body_ty = applyTypeToArgs rhs fn_type inst_args - (lam_args, app_args) -- Add a dummy argument if body_ty is unlifted - | isUnLiftedType body_ty -- C.f. WwLib.mkWorkerArgs - = (poly_tyvars ++ [voidArgId], poly_tyvars ++ [realWorldPrimId]) - | otherwise = (poly_tyvars, poly_tyvars) - spec_id_ty = mkPiTypes lam_args body_ty - - spec_f <- newIdSM fn spec_id_ty - (spec_rhs, rhs_uds) <- specExpr rhs_subst' (mkLams lam_args body) - let + -- ty_args = [t1,b,t3] + poly_tyvars = [tv | (tv, Nothing) <- rhs_tyvars `zip` call_ts] + spec_tv_binds = [(tv,ty) | (tv, Just ty) <- rhs_tyvars `zip` call_ts] + spec_ty_args = map snd spec_tv_binds + ty_args = mk_ty_args call_ts + rhs_subst = CoreSubst.extendTvSubstList subst spec_tv_binds + + ; (rhs_subst1, inst_dict_ids) <- newDictBndrs rhs_subst rhs_dict_ids + -- Clone rhs_dicts, including instantiating their types + + ; let (rhs_subst2, dx_binds) = bindAuxiliaryDicts rhs_subst1 $ + (my_zipEqual rhs_dict_ids inst_dict_ids call_ds) + inst_args = ty_args ++ map Var inst_dict_ids + + ; if already_covered inst_args then + return Nothing + else do + { -- Figure out the type of the specialised function + let body_ty = applyTypeToArgs rhs fn_type inst_args + (lam_args, app_args) -- Add a dummy argument if body_ty is unlifted + | isUnLiftedType body_ty -- C.f. WwLib.mkWorkerArgs + = (poly_tyvars ++ [voidArgId], poly_tyvars ++ [realWorldPrimId]) + | otherwise = (poly_tyvars, poly_tyvars) + spec_id_ty = mkPiTypes lam_args body_ty + + ; spec_f <- newSpecIdSM fn spec_id_ty + ; (spec_rhs, rhs_uds) <- specExpr rhs_subst2 (mkLams lam_args body) + ; let -- The rule to put in the function's specialisation is: - -- forall b,d, d1',d2'. f t1 b t3 d d1' d2' = f1 b d - rule_name = mkFastString ("SPEC " ++ showSDoc (ppr fn <+> ppr spec_ty_args)) - spec_env_rule = mkLocalRule - rule_name - inline_prag -- Note [Auto-specialisation and RULES] - (idName fn) - (poly_tyvars ++ rhs_dicts') - inst_args - (mkVarApps (Var spec_f) app_args) + -- forall b, d1',d2'. f t1 b t3 d1' d2' = f1 b + rule_name = mkFastString ("SPEC " ++ showSDoc (ppr fn <+> ppr spec_ty_args)) + spec_env_rule = mkRule True {- Auto generated -} is_local + rule_name + inl_act -- Note [Auto-specialisation and RULES] + (idName fn) + (poly_tyvars ++ inst_dict_ids) + inst_args + (mkVarApps (Var spec_f) app_args) -- Add the { d1' = dx1; d2' = dx2 } usage stuff - final_uds = foldr addDictBind rhs_uds (my_zipEqual "spec_call" rhs_dicts' call_ds) + final_uds = foldr consDictBind rhs_uds dx_binds + + -- Add an InlineRule if the parent has one + -- See Note [Inline specialisations] + spec_unf + = case inlinePragmaSpec inl_prag of + Inline -> mkInlineUnfolding (Just spec_arity) spec_rhs + Inlinable -> mkInlinableUnfolding spec_rhs + _ -> NoUnfolding + + -- Adding arity information just propagates it a bit faster + -- See Note [Arity decrease] in Simplify + -- Copy InlinePragma information from the parent Id. + -- So if f has INLINE[1] so does spec_f + spec_f_w_arity = spec_f `setIdArity` max 0 (fn_arity - n_dicts) + `setInlinePragma` inl_prag + `setIdUnfolding` spec_unf + + ; return (Just ((spec_f_w_arity, spec_rhs), final_uds, spec_env_rule)) } } + where + my_zipEqual xs ys zs + | debugIsOn && not (equalLength xs ys && equalLength ys zs) + = pprPanic "my_zipEqual" (vcat [ ppr xs, ppr ys + , ppr fn <+> ppr call_ts + , ppr (idType fn), ppr theta + , ppr n_dicts, ppr rhs_dict_ids + , ppr rhs]) + | otherwise = zip3 xs ys zs + +bindAuxiliaryDicts + :: Subst + -> [(DictId,DictId,CoreExpr)] -- (orig_dict, inst_dict, dx) + -> (Subst, -- Substitute for all orig_dicts + [CoreBind]) -- Auxiliary bindings +-- Bind any dictionary arguments to fresh names, to preserve sharing +-- Substitution already substitutes orig_dict -> inst_dict +bindAuxiliaryDicts subst triples = go subst [] triples + where + go subst binds [] = (subst, binds) + go subst binds ((d, dx_id, dx) : pairs) + | exprIsTrivial dx = go (extendIdSubst subst d dx) binds pairs + -- No auxiliary binding necessary + -- Note that we bind the *original* dict in the substitution, + -- overriding any d->dx_id binding put there by substBndrs + + | otherwise = go subst_w_unf (NonRec dx_id dx : binds) pairs + where + dx_id1 = dx_id `setIdUnfolding` mkSimpleUnfolding dx + subst_w_unf = extendIdSubst subst d (Var dx_id1) + -- Important! We're going to substitute dx_id1 for d + -- and we want it to look "interesting", else we won't gather *any* + -- consequential calls. E.g. + -- f d = ...g d.... + -- If we specialise f for a call (f (dfun dNumInt)), we'll get + -- a consequent call (g d') with an auxiliary definition + -- d' = df dNumInt + -- We want that consequent call to look interesting + -- + -- Again, note that we bind the *original* dict in the substitution, + -- overriding any d->dx_id binding put there by substBndrs +\end{code} - spec_pr | inline_rhs = (spec_f `setInlinePragma` inline_prag, Note InlineMe spec_rhs) - | otherwise = (spec_f, spec_rhs) +Note [From non-recursive to recursive] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Even in the non-recursive case, if any dict-binds depend on 'fn' we might +have built a recursive knot - return (spec_pr, final_uds, spec_env_rule) + f a d x = + MkUD { ud_binds = d7 = MkD ..f.. + , ud_calls = ...(f T d7)... } - where - my_zipEqual doc xs ys - | debugIsOn && not (equalLength xs ys) - = pprPanic "my_zipEqual" (vcat - [ ppr xs, ppr ys - , ppr fn <+> ppr call_ts - , ppr (idType fn), ppr theta - , ppr n_dicts, ppr rhs_dicts - , ppr rhs]) - | otherwise = zipEqual doc xs ys -\end{code} +The we generate + + Rec { fs x = [T/a, d7/d] + f a d x = + RULE f T _ = fs + d7 = ...f... } + +Here the recursion is only through the RULE. + + +Note [Specialisation of dictionary functions] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Here is a nasty example that bit us badly: see Trac #3591 + + dfun a d = MkD a d (meth d) + d4 = + d2 = dfun T d4 + d1 = $p1 d2 + d3 = dfun T d1 + +None of these definitions is recursive. What happened was that we +generated a specialisation: + + RULE forall d. dfun T d = dT + dT = (MkD a d (meth d)) [T/a, d1/d] + = MkD T d1 (meth d1) + +But now we use the RULE on the RHS of d2, to get + + d2 = dT = MkD d1 (meth d1) + d1 = $p1 d2 + +and now d1 is bottom! The problem is that when specialising 'dfun' we +should first dump "below" the binding all floated dictionary bindings +that mention 'dfun' itself. So d2 and d3 (and hence d1) must be +placed below 'dfun', and thus unavailable to it when specialising +'dfun'. That in turn means that the call (dfun T d1) must be +discarded. On the other hand, the call (dfun T d4) is fine, assuming +d4 doesn't mention dfun. + +But look at this: + + class C a where { foo,bar :: [a] -> [a] } + + instance C Int where + foo x = r_bar x + bar xs = reverse xs + + r_bar :: C a => [a] -> [a] + r_bar xs = bar (xs ++ xs) + +That translates to: + + r_bar a (c::C a) (xs::[a]) = bar a d (xs ++ xs) + + Rec { $fCInt :: C Int = MkC foo_help reverse + foo_help (xs::[Int]) = r_bar Int $fCInt xs } + +The call (r_bar $fCInt) mentions $fCInt, + which mentions foo_help, + which mentions r_bar +But we DO want to specialise r_bar at Int: + + Rec { $fCInt :: C Int = MkC foo_help reverse + foo_help (xs::[Int]) = r_bar Int $fCInt xs + + r_bar a (c::C a) (xs::[a]) = bar a d (xs ++ xs) + RULE r_bar Int _ = r_bar_Int + + r_bar_Int xs = bar Int $fCInt (xs ++ xs) + } + +Note that, because of its RULE, r_bar joins the recursive +group. (In this case it'll unravel a short moment later.) + + +Conclusion: we catch the nasty case using filter_dfuns in +callsForMe. To be honest I'm not 100% certain that this is 100% +right, but it works. Sigh. + + +Note [Specialising a recursive group] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + let rec { f x = ...g x'... + ; g y = ...f y'.... } + in f 'a' +Here we specialise 'f' at Char; but that is very likely to lead to +a specialisation of 'g' at Char. We must do the latter, else the +whole point of specialisation is lost. + +But we do not want to keep iterating to a fixpoint, because in the +presence of polymorphic recursion we might generate an infinite number +of specialisations. + +So we use the following heuristic: + * Arrange the rec block in dependency order, so far as possible + (the occurrence analyser already does this) + + * Specialise it much like a sequence of lets + + * Then go through the block a second time, feeding call-info from + the RHSs back in the bottom, as it were + +In effect, the ordering maxmimises the effectiveness of each sweep, +and we do just two sweeps. This should catch almost every case of +monomorphic recursion -- the exception could be a very knotted-up +recursion with multiple cycles tied up together. + +This plan is implemented in the Rec case of specBindItself. + +Note [Specialisations already covered] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We obviously don't want to generate two specialisations for the same +argument pattern. There are two wrinkles + +1. We do the already-covered test in specDefn, not when we generate +the CallInfo in mkCallUDs. We used to test in the latter place, but +we now iterate the specialiser somewhat, and the Id at the call site +might therefore not have all the RULES that we can see in specDefn + +2. What about two specialisations where the second is an *instance* +of the first? If the more specific one shows up first, we'll generate +specialisations for both. If the *less* specific one shows up first, +we *don't* currently generate a specialisation for the more specific +one. (See the call to lookupRule in already_covered.) Reasons: + (a) lookupRule doesn't say which matches are exact (bad reason) + (b) if the earlier specialisation is user-provided, it's + far from clear that we should auto-specialise further Note [Auto-specialisation and RULES] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -935,10 +1312,14 @@ also add RULE f g_spec = 0 But that's a bit complicated. For now we ask the programmer's help, -by *copying the INLINE activation pragma* to the auto-specialised rule. -So if g says {-# NOINLINE[2] g #-}, then the auto-spec rule will also -not be active until phase 2. +by *copying the INLINE activation pragma* to the auto-specialised +rule. So if g says {-# NOINLINE[2] g #-}, then the auto-spec rule +will also not be active until phase 2. And that's what programmers +should jolly well do anyway, even aside from specialisation, to ensure +that g doesn't inline too early. +This in turn means that the RULE would never fire for a NOINLINE +thing so not much point in generating a specialisation at all. Note [Specialisation shape] ~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -951,7 +1332,7 @@ then its body must look like Reason: when specialising the body for a call (f ty dexp), we want to substitute dexp for d, and pick up specialised calls in the body of f. -This doesn't always work. One example I came across was htis: +This doesn't always work. One example I came across was this: newtype Gen a = MkGen{ unGen :: Int -> a } choose :: Eq a => a -> Gen a @@ -964,12 +1345,12 @@ It's a silly exapmle, but we get where choose doesn't have any dict arguments. Thus far I have not tried to fix this (wait till there's a real example). - Note [Inline specialisations] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We transfer to the specialised function any INLINE stuff from the -original. This means (a) the Activation in the IdInfo, and (b) any -InlineMe on the RHS. +original. This means + (a) the Activation for its inlining (from its InlinePragma) + (b) any InlineRule This is a change (Jun06). Previously the idea is that the point of inlining was precisely to specialise the function at its call site, @@ -988,14 +1369,6 @@ arguments alone are enough to specialise (even though the args are too boring to trigger inlining), and it's certainly better to call the specialised version. -A case in point is dictionary functions, which are current marked -INLINE, but which are worth specialising. - -\begin{code} -dropInline :: CoreExpr -> (Bool, CoreExpr) -dropInline (Note InlineMe rhs) = (True, rhs) -dropInline rhs = (False, rhs) -\end{code} %************************************************************************ %* * @@ -1006,24 +1379,24 @@ dropInline rhs = (False, rhs) \begin{code} data UsageDetails = MkUD { - dict_binds :: !(Bag DictBind), + ud_binds :: !(Bag DictBind), -- Floated dictionary bindings -- The order is important; -- in ds1 `union` ds2, bindings in ds2 can depend on those in ds1 -- (Remember, Bags preserve order in GHC.) - calls :: !CallDetails, + ud_calls :: !CallDetails - ud_fvs :: !VarSet -- A superset of the variables mentioned in - -- either dict_binds or calls + -- INVARIANT: suppose bs = bindersOf ud_binds + -- Then 'calls' may *mention* 'bs', + -- but there should be no calls *for* bs } instance Outputable UsageDetails where - ppr (MkUD { dict_binds = dbs, calls = calls, ud_fvs = fvs }) + ppr (MkUD { ud_binds = dbs, ud_calls = calls }) = ptext (sLit "MkUD") <+> braces (sep (punctuate comma [ptext (sLit "binds") <+> equals <+> ppr dbs, - ptext (sLit "calls") <+> equals <+> ppr calls, - ptext (sLit "fvs") <+> equals <+> ppr fvs])) + ptext (sLit "calls") <+> equals <+> ppr calls])) type DictBind = (CoreBind, VarSet) -- The set is the free vars of the binding @@ -1032,18 +1405,27 @@ type DictBind = (CoreBind, VarSet) type DictExpr = CoreExpr emptyUDs :: UsageDetails -emptyUDs = MkUD { dict_binds = emptyBag, calls = emptyFM, ud_fvs = emptyVarSet } +emptyUDs = MkUD { ud_binds = emptyBag, ud_calls = emptyVarEnv } ------------------------------------------------------------ -type CallDetails = FiniteMap Id CallInfo +type CallDetails = IdEnv CallInfoSet newtype CallKey = CallKey [Maybe Type] -- Nothing => unconstrained type argument -type CallInfo = FiniteMap CallKey - ([DictExpr], VarSet) -- Dict args and the vars of the whole - -- call (including tyvars) - -- [*not* include the main id itself, of course] - -- The finite maps eliminate duplicates - -- The list of types and dictionaries is guaranteed to - -- match the type of f + +-- CallInfo uses a Map, thereby ensuring that +-- we record only one call instance for any key +-- +-- The list of types and dictionaries is guaranteed to +-- match the type of f +data CallInfoSet = CIS Id (Map CallKey ([DictExpr], VarSet)) + -- Range is dict args and the vars of the whole + -- call (including tyvars) + -- [*not* include the main id itself, of course] + +type CallInfo = (CallKey, ([DictExpr], VarSet)) + +instance Outputable CallInfoSet where + ppr (CIS fn map) = hang (ptext (sLit "CIS") <+> ppr fn) + 2 (ppr map) instance Outputable CallKey where ppr (CallKey ts) = ppr ts @@ -1062,13 +1444,23 @@ instance Ord CallKey where cmp (Just t1) (Just t2) = tcCmpType t1 t2 unionCalls :: CallDetails -> CallDetails -> CallDetails -unionCalls c1 c2 = plusFM_C plusFM c1 c2 +unionCalls c1 c2 = plusVarEnv_C unionCallInfoSet c1 c2 +unionCallInfoSet :: CallInfoSet -> CallInfoSet -> CallInfoSet +unionCallInfoSet (CIS f calls1) (CIS _ calls2) = CIS f (calls1 `Map.union` calls2) + +callDetailsFVs :: CallDetails -> VarSet +callDetailsFVs calls = foldVarEnv (unionVarSet . callInfoFVs) emptyVarSet calls + +callInfoFVs :: CallInfoSet -> VarSet +callInfoFVs (CIS _ call_info) = Map.foldRight (\(_,fv) vs -> unionVarSet fv vs) emptyVarSet call_info + +------------------------------------------------------------ singleCall :: Id -> [Maybe Type] -> [DictExpr] -> UsageDetails singleCall id tys dicts - = MkUD {dict_binds = emptyBag, - calls = unitFM id (unitFM (CallKey tys) (dicts, call_fvs)), - ud_fvs = call_fvs } + = MkUD {ud_binds = emptyBag, + ud_calls = unitVarEnv id $ CIS id $ + Map.singleton (CallKey tys) (dicts, call_fvs) } where call_fvs = exprsFreeVars dicts `unionVarSet` tys_fvs tys_fvs = tyVarsOfTypes (catMaybes tys) @@ -1082,24 +1474,24 @@ singleCall id tys dicts -- -- We don't include the 'id' itself. -mkCallUDs :: Subst -> Id -> [CoreExpr] -> UsageDetails -mkCallUDs subst f args - | null theta +mkCallUDs :: Id -> [CoreExpr] -> UsageDetails +mkCallUDs f args + | not (want_calls_for f) -- Imported from elsewhere + || null theta -- Not overloaded || not (all isClassPred theta) -- Only specialise if all overloading is on class params. -- In ptic, with implicit params, the type args -- *don't* say what the value of the implicit param is! || not (spec_tys `lengthIs` n_tyvars) || not ( dicts `lengthIs` n_dicts) - || maybeToBool (lookupRule (\_act -> True) (substInScope subst) emptyRuleBase f args) - -- There's already a rule covering this call. A typical case - -- is where there's an explicit user-provided rule. Then - -- we don't want to create a specialised version - -- of the function that overlaps. - = emptyUDs -- Not overloaded, or no specialisation wanted + || not (any interestingDict dicts) -- Note [Interesting dictionary arguments] + -- See also Note [Specialisations already covered] + = -- pprTrace "mkCallUDs: discarding" (vcat [ppr f, ppr args, ppr n_tyvars, ppr n_dicts, ppr (map interestingDict dicts)]) + emptyUDs -- Not overloaded, or no specialisation wanted | otherwise - = singleCall f spec_tys dicts + = -- pprTrace "mkCallUDs: keeping" (vcat [ppr f, ppr args, ppr n_tyvars, ppr n_dicts, ppr (map interestingDict dicts)]) + singleCall f spec_tys dicts where (tyvars, theta, _) = tcSplitSigmaTy (idType f) constrained_tyvars = tyVarsOfTheta theta @@ -1113,21 +1505,44 @@ mkCallUDs subst f args | tyvar `elemVarSet` constrained_tyvars = Just ty | otherwise = Nothing ------------------------------------------------------------- + want_calls_for f = isLocalId f || isInlinablePragma (idInlinePragma f) +\end{code} + +Note [Interesting dictionary arguments] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider this + \a.\d:Eq a. let f = ... in ...(f d)... +There really is not much point in specialising f wrt the dictionary d, +because the code for the specialised f is not improved at all, because +d is lambda-bound. We simply get junk specialisations. + +What is "interesting"? Just that it has *some* structure. + +\begin{code} +interestingDict :: CoreExpr -> Bool +-- A dictionary argument is interesting if it has *some* structure +interestingDict (Var v) = hasSomeUnfolding (idUnfolding v) + || isDataConWorkId v +interestingDict (Type _) = False +interestingDict (App fn (Type _)) = interestingDict fn +interestingDict (Note _ a) = interestingDict a +interestingDict (Cast e _) = interestingDict e +interestingDict _ = True +\end{code} + +\begin{code} plusUDs :: UsageDetails -> UsageDetails -> UsageDetails -plusUDs (MkUD {dict_binds = db1, calls = calls1, ud_fvs = fvs1}) - (MkUD {dict_binds = db2, calls = calls2, ud_fvs = fvs2}) - = MkUD {dict_binds = d, calls = c, ud_fvs = fvs1 `unionVarSet` fvs2} - where - d = db1 `unionBags` db2 - c = calls1 `unionCalls` calls2 +plusUDs (MkUD {ud_binds = db1, ud_calls = calls1}) + (MkUD {ud_binds = db2, ud_calls = calls2}) + = MkUD { ud_binds = db1 `unionBags` db2 + , ud_calls = calls1 `unionCalls` calls2 } plusUDList :: [UsageDetails] -> UsageDetails plusUDList = foldr plusUDs emptyUDs --- zapCalls deletes calls to ids from uds -zapCalls :: [Id] -> CallDetails -> CallDetails -zapCalls ids calls = delListFromFM calls ids +----------------------------- +_dictBindBndrs :: Bag DictBind -> [Id] +_dictBindBndrs dbs = foldrBag ((++) . bindersOf . fst) [] dbs mkDB :: CoreBind -> DictBind mkDB bind = (bind, bind_fvs bind) @@ -1147,45 +1562,102 @@ pair_fvs (bndr, rhs) = exprFreeVars rhs `unionVarSet` idFreeVars bndr -- type T a = Int -- x :: T a = 3 -addDictBind :: (Id,CoreExpr) -> UsageDetails -> UsageDetails -addDictBind (dict,rhs) uds - = uds { dict_binds = db `consBag` dict_binds uds - , ud_fvs = ud_fvs uds `unionVarSet` fvs } +flattenDictBinds :: Bag DictBind -> [(Id,CoreExpr)] -> [(Id,CoreExpr)] +flattenDictBinds dbs pairs + = foldrBag add pairs dbs where - db@(_, fvs) = mkDB (NonRec dict rhs) + add (NonRec b r,_) pairs = (b,r) : pairs + add (Rec prs1, _) pairs = prs1 ++ pairs + +snocDictBinds :: UsageDetails -> [CoreBind] -> UsageDetails +-- Add ud_binds to the tail end of the bindings in uds +snocDictBinds uds dbs + = uds { ud_binds = ud_binds uds `unionBags` + foldr (consBag . mkDB) emptyBag dbs } + +consDictBind :: CoreBind -> UsageDetails -> UsageDetails +consDictBind bind uds = uds { ud_binds = mkDB bind `consBag` ud_binds uds } -dumpAllDictBinds :: UsageDetails -> [CoreBind] -> [CoreBind] -dumpAllDictBinds (MkUD {dict_binds = dbs}) binds +addDictBinds :: [DictBind] -> UsageDetails -> UsageDetails +addDictBinds binds uds = uds { ud_binds = listToBag binds `unionBags` ud_binds uds } + +snocDictBind :: UsageDetails -> CoreBind -> UsageDetails +snocDictBind uds bind = uds { ud_binds = ud_binds uds `snocBag` mkDB bind } + +wrapDictBinds :: Bag DictBind -> [CoreBind] -> [CoreBind] +wrapDictBinds dbs binds = foldrBag add binds dbs where add (bind,_) binds = bind : binds -dumpUDs :: [CoreBndr] - -> UsageDetails -> CoreExpr - -> (UsageDetails, CoreExpr) -dumpUDs bndrs (MkUD { dict_binds = orig_dbs - , calls = orig_calls - , ud_fvs = fvs}) body - = (new_uds, foldrBag add_let body dump_dbs) - -- This may delete fewer variables - -- than in priciple possible +wrapDictBindsE :: Bag DictBind -> CoreExpr -> CoreExpr +wrapDictBindsE dbs expr + = foldrBag add expr dbs where - new_uds = - MkUD { dict_binds = free_dbs - , calls = free_calls - , ud_fvs = fvs `minusVarSet` bndr_set} - + add (bind,_) expr = Let bind expr + +---------------------- +dumpUDs :: [CoreBndr] -> UsageDetails -> (UsageDetails, Bag DictBind) +-- Used at a lambda or case binder; just dump anything mentioning the binder +dumpUDs bndrs uds@(MkUD { ud_binds = orig_dbs, ud_calls = orig_calls }) + | null bndrs = (uds, emptyBag) -- Common in case alternatives + | otherwise = -- pprTrace "dumpUDs" (ppr bndrs $$ ppr free_uds $$ ppr dump_dbs) $ + (free_uds, dump_dbs) + where + free_uds = MkUD { ud_binds = free_dbs, ud_calls = free_calls } + bndr_set = mkVarSet bndrs + (free_dbs, dump_dbs, dump_set) = splitDictBinds orig_dbs bndr_set + free_calls = deleteCallsMentioning dump_set $ -- Drop calls mentioning bndr_set on the floor + deleteCallsFor bndrs orig_calls -- Discard calls for bndr_set; there should be + -- no calls for any of the dicts in dump_dbs + +dumpBindUDs :: [CoreBndr] -> UsageDetails -> (UsageDetails, Bag DictBind, Bool) +-- Used at a lambda or case binder; just dump anything mentioning the binder +dumpBindUDs bndrs (MkUD { ud_binds = orig_dbs, ud_calls = orig_calls }) + = -- pprTrace "dumpBindUDs" (ppr bndrs $$ ppr free_uds $$ ppr dump_dbs) $ + (free_uds, dump_dbs, float_all) + where + free_uds = MkUD { ud_binds = free_dbs, ud_calls = free_calls } bndr_set = mkVarSet bndrs - add_let (bind,_) body = Let bind body + (free_dbs, dump_dbs, dump_set) = splitDictBinds orig_dbs bndr_set + free_calls = deleteCallsFor bndrs orig_calls + float_all = dump_set `intersectsVarSet` callDetailsFVs free_calls + +callsForMe :: Id -> UsageDetails -> (UsageDetails, [CallInfo]) +callsForMe fn (MkUD { ud_binds = orig_dbs, ud_calls = orig_calls }) + = -- pprTrace ("callsForMe") + -- (vcat [ppr fn, + -- text "Orig dbs =" <+> ppr (_dictBindBndrs orig_dbs), + -- text "Orig calls =" <+> ppr orig_calls, + -- text "Dep set =" <+> ppr dep_set, + -- text "Calls for me =" <+> ppr calls_for_me]) $ + (uds_without_me, calls_for_me) + where + uds_without_me = MkUD { ud_binds = orig_dbs, ud_calls = delVarEnv orig_calls fn } + calls_for_me = case lookupVarEnv orig_calls fn of + Nothing -> [] + Just (CIS _ calls) -> filter_dfuns (Map.toList calls) - (free_dbs, dump_dbs, dump_set) - = foldlBag dump_db (emptyBag, emptyBag, bndr_set) orig_dbs - -- Important that it's foldl not foldr; - -- we're accumulating the set of dumped ids in dump_set + dep_set = foldlBag go (unitVarSet fn) orig_dbs + go dep_set (db,fvs) | fvs `intersectsVarSet` dep_set + = extendVarSetList dep_set (bindersOf db) + | otherwise = dep_set - free_calls = filterCalls dump_set orig_calls + -- Note [Specialisation of dictionary functions] + filter_dfuns | isDFunId fn = filter ok_call + | otherwise = \cs -> cs - dump_db (free_dbs, dump_dbs, dump_idset) db@(bind, fvs) + ok_call (_, (_,fvs)) = not (fvs `intersectsVarSet` dep_set) + +---------------------- +splitDictBinds :: Bag DictBind -> IdSet -> (Bag DictBind, Bag DictBind, IdSet) +-- Returns (free_dbs, dump_dbs, dump_set) +splitDictBinds dbs bndr_set + = foldlBag split_db (emptyBag, emptyBag, bndr_set) dbs + -- Important that it's foldl not foldr; + -- we're accumulating the set of dumped ids in dump_set + where + split_db (free_dbs, dump_dbs, dump_idset) db@(bind, fvs) | dump_idset `intersectsVarSet` fvs -- Dump it = (free_dbs, dump_dbs `snocBag` db, extendVarSetList dump_idset (bindersOf bind)) @@ -1193,14 +1665,20 @@ dumpUDs bndrs (MkUD { dict_binds = orig_dbs | otherwise -- Don't dump it = (free_dbs `snocBag` db, dump_dbs, dump_idset) -filterCalls :: VarSet -> CallDetails -> CallDetails --- Remove any calls that mention the variables -filterCalls bs calls - = mapFM (\_ cs -> filter_calls cs) $ - filterFM (\k _ -> not (k `elemVarSet` bs)) calls + +---------------------- +deleteCallsMentioning :: VarSet -> CallDetails -> CallDetails +-- Remove calls *mentioning* bs +deleteCallsMentioning bs calls + = mapVarEnv filter_calls calls where - filter_calls :: CallInfo -> CallInfo - filter_calls = filterFM (\_ (_, fvs) -> not (fvs `intersectsVarSet` bs)) + filter_calls :: CallInfoSet -> CallInfoSet + filter_calls (CIS f calls) = CIS f (Map.filter keep_call calls) + keep_call (_, fvs) = not (fvs `intersectsVarSet` bs) + +deleteCallsFor :: [Id] -> CallDetails -> CallDetails +-- Remove calls *for* bs +deleteCallsFor bs calls = delVarEnvList calls bs \end{code} @@ -1213,8 +1691,9 @@ filterCalls bs calls \begin{code} type SpecM a = UniqSM a -initSM :: UniqSupply -> SpecM a -> a -initSM = initUs_ +runSpecM:: SpecM a -> CoreM a +runSpecM spec = do { us <- getUniqueSupplyM + ; return (initUs_ us spec) } mapAndCombineSM :: (a -> SpecM (b, UsageDetails)) -> [a] -> SpecM ([b], UsageDetails) mapAndCombineSM _ [] = return ([], emptyUDs) @@ -1235,19 +1714,28 @@ cloneBindSM subst (Rec pairs) = do let (subst', bndrs') = cloneRecIdBndrs subst us (map fst pairs) return (subst', subst', Rec (bndrs' `zip` map snd pairs)) -cloneBinders :: Subst -> [CoreBndr] -> SpecM (Subst, [CoreBndr]) -cloneBinders subst bndrs = do - us <- getUniqueSupplyM - return (cloneIdBndrs subst us bndrs) - -newIdSM :: Id -> Type -> SpecM Id -newIdSM old_id new_ty = do - uniq <- getUniqueM - let - -- Give the new Id a similar occurrence name to the old one - name = idName old_id - new_id = mkUserLocal (mkSpecOcc (nameOccName name)) uniq new_ty (getSrcSpan name) - return new_id +newDictBndrs :: Subst -> [CoreBndr] -> SpecM (Subst, [CoreBndr]) +-- Make up completely fresh binders for the dictionaries +-- Their bindings are going to float outwards +newDictBndrs subst bndrs + = do { bndrs' <- mapM new bndrs + ; let subst' = extendIdSubstList subst + [(d, Var d') | (d,d') <- bndrs `zip` bndrs'] + ; return (subst', bndrs' ) } + where + new b = do { uniq <- getUniqueM + ; let n = idName b + ty' = CoreSubst.substTy subst (idType b) + ; return (mkUserLocal (nameOccName n) uniq ty' (getSrcSpan n)) } + +newSpecIdSM :: Id -> Type -> SpecM Id + -- Give the new Id a similar occurrence name to the old one +newSpecIdSM old_id new_ty + = do { uniq <- getUniqueM + ; let name = idName old_id + new_occ = mkSpecOcc (nameOccName name) + new_id = mkUserLocal new_occ uniq new_ty (getSrcSpan name) + ; return new_id } \end{code}