#include "HsVersions.h"
-import DynFlags ( DynFlags, DynFlag(..) )
-import Id ( Id, idName, idType, mkUserLocal )
-import TcType ( Type, mkTyVarTy, tcSplitSigmaTy,
- tyVarsOfTypes, tyVarsOfTheta, isClassPred,
- tcCmpType, isUnLiftedType
- )
-import CoreSubst ( Subst, mkEmptySubst, extendTvSubstList, lookupIdSubst,
- substBndr, substBndrs, substTy, substInScope,
- cloneIdBndr, cloneIdBndrs, cloneRecIdBndrs
- )
+import Id
+import TcType
+import CoreMonad
+import CoreSubst
+import CoreUnfold
import VarSet
import VarEnv
import CoreSyn
-import CoreUtils ( applyTypeToArgs, mkPiTypes )
-import CoreFVs ( exprFreeVars, exprsFreeVars, idRuleVars )
-import CoreTidy ( tidyRules )
-import CoreLint ( showPass, endPass )
-import Rules ( addIdSpecialisations, mkLocalRule, lookupRule, emptyRuleBase, rulesOfBinds )
-import PprCore ( pprRules )
-import UniqSupply ( UniqSupply,
- UniqSM, initUs_, thenUs, returnUs, getUniqueUs,
- getUs, mapUs
- )
-import Name ( nameOccName, mkSpecOcc, getSrcLoc )
+import Rules
+import CoreUtils ( exprIsTrivial, applyTypeToArgs, mkPiTypes )
+import CoreFVs ( exprFreeVars, exprsFreeVars, idFreeVars )
+import UniqSupply ( UniqSM, initUs_, MonadUnique(..) )
+import Name
import MkId ( voidArgId, realWorldPrimId )
-import FiniteMap
-import Maybes ( catMaybes, maybeToBool )
-import ErrUtils ( dumpIfSet_dyn )
-import BasicTypes ( Activation( AlwaysActive ) )
+import Maybes ( catMaybes, isJust )
+import BasicTypes
+import HscTypes
import Bag
-import List ( partition )
-import Util ( zipEqual, zipWithEqual, cmpList, lengthIs,
- equalLength, lengthAtLeast, notNull )
+import Util
import Outputable
import FastString
-infixr 9 `thenSM`
+import Data.Map (Map)
+import qualified Data.Map as Map
+import qualified FiniteMap as Map
\end{code}
%************************************************************************
Still, this is no great hardship, because we intend to eliminate
overloading altogether anyway!
-
-
A note about non-tyvar dictionaries
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Some Ids have types like
But it is simpler and more uniform to specialise wrt these dicts too;
and in future GHC is likely to support full fledged type signatures
like
- f ;: Eq [(a,b)] => ...
+ f :: Eq [(a,b)] => ...
%************************************************************************
%************************************************************************
\begin{code}
-specProgram :: DynFlags -> UniqSupply -> [CoreBind] -> IO [CoreBind]
-specProgram dflags us binds
- = do
- showPass dflags "Specialise"
+specProgram :: ModGuts -> CoreM ModGuts
+specProgram guts
+ = do { hpt_rules <- getRuleBase
+ ; let local_rules = mg_rules guts
+ rule_base = extendRuleBaseList hpt_rules (mg_rules guts)
- let binds' = initSM us (go binds `thenSM` \ (binds', uds') ->
- returnSM (dumpAllDictBinds uds' binds'))
+ -- Specialise the bindings of this module
+ ; (binds', uds) <- runSpecM (go (mg_binds guts))
- endPass dflags "Specialise" Opt_D_dump_spec binds'
+ -- Specialise imported functions
+ ; (new_rules, spec_binds) <- specImports emptyVarSet rule_base uds
- dumpIfSet_dyn dflags Opt_D_dump_rules "Top-level specialisations"
- (pprRules (tidyRules emptyTidyEnv (rulesOfBinds binds')))
-
- return binds'
+ ; return (guts { mg_binds = spec_binds ++ binds'
+ , mg_rules = local_rules ++ new_rules }) }
where
-- We need to start with a Subst that knows all the things
-- that are in scope, so that the substitution engine doesn't
-- accidentally re-use a unique that's already in use
-- Easiest thing is to do it all at once, as if all the top-level
-- decls were mutually recursive
- top_subst = mkEmptySubst (mkInScopeSet (mkVarSet (bindersOfBinds binds)))
+ top_subst = mkEmptySubst $ mkInScopeSet $ mkVarSet $
+ bindersOfBinds $ mg_binds guts
+
+ go [] = return ([], emptyUDs)
+ go (bind:binds) = do (binds', uds) <- go binds
+ (bind', uds') <- specBind top_subst bind uds
+ return (bind' ++ binds', uds')
+
+specImports :: VarSet -- Don't specialise these ones
+ -- See Note [Avoiding recursive specialisation]
+ -> RuleBase -- Rules from this module and the home package
+ -- (but not external packages, which can change)
+ -> UsageDetails -- Calls for imported things, and floating bindings
+ -> CoreM ( [CoreRule] -- New rules
+ , [CoreBind] ) -- Specialised bindings and floating bindings
+specImports done rb uds
+ = do { let import_calls = varEnvElts (ud_calls uds)
+ ; (rules, spec_binds) <- go rb import_calls
+ ; return (rules, wrapDictBinds (ud_binds uds) spec_binds) }
+ where
+ go _ [] = return ([], [])
+ go rb (CIS fn calls_for_fn : other_calls)
+ = do { (rules1, spec_binds1) <- specImport done rb fn (Map.toList calls_for_fn)
+ ; (rules2, spec_binds2) <- go (extendRuleBaseList rb rules1) other_calls
+ ; return (rules1 ++ rules2, spec_binds1 ++ spec_binds2) }
+
+specImport :: VarSet -- Don't specialise these
+ -- See Note [Avoiding recursive specialisation]
+ -> RuleBase -- Rules from this module
+ -> Id -> [CallInfo] -- Imported function and calls for it
+ -> CoreM ( [CoreRule] -- New rules
+ , [CoreBind] ) -- Specialised bindings
+specImport done rb fn calls_for_fn
+ | not (fn `elemVarSet` done)
+ , isInlinablePragma (idInlinePragma fn)
+ , Just rhs <- maybeUnfoldingTemplate (realIdUnfolding fn)
+ = do { -- Get rules from the external package state
+ -- We keep doing this in case we "page-fault in"
+ -- more rules as we go along
+ ; hsc_env <- getHscEnv
+ ; eps <- liftIO $ hscEPS hsc_env
+ ; let full_rb = unionRuleBase rb (eps_rule_base eps)
+ rules_for_fn = getRules full_rb fn
+
+ ; (rules1, spec_pairs, uds) <- runSpecM $
+ specCalls emptySubst rules_for_fn calls_for_fn fn rhs
+ ; let spec_binds1 = [NonRec b r | (b,r) <- spec_pairs]
+ -- After the rules kick in we may get recursion, but
+ -- we rely on a global GlomBinds to sort that out later
+
+ -- Now specialise any cascaded calls
+ ; (rules2, spec_binds2) <- specImports (extendVarSet done fn)
+ (extendRuleBaseList rb rules1)
+ uds
+
+ ; return (rules2 ++ rules1, spec_binds2 ++ spec_binds1) }
- go [] = returnSM ([], emptyUDs)
- go (bind:binds) = go binds `thenSM` \ (binds', uds) ->
- specBind top_subst bind uds `thenSM` \ (bind', uds') ->
- returnSM (bind' ++ binds', uds')
+ | otherwise
+ = WARN( True, ptext (sLit "specImport discard") <+> ppr fn <+> ppr calls_for_fn )
+ return ([], [])
\end{code}
+Avoiding recursive specialisation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When we specialise 'f' we may find new overloaded calls to 'g', 'h' in
+'f's RHS. So we want to specialise g,h. But we don't want to
+specialise f any more! It's possible that f's RHS might have a
+recursive yet-more-specialised call, so we'd diverge in that case.
+And if the call is to the same type, one specialisation is enough.
+Avoiding this recursive specialisation loop is the reason for the
+'done' VarSet passed to specImports and specImport.
+
%************************************************************************
%* *
\subsubsection{@specExpr@: the main function}
\begin{code}
specVar :: Subst -> Id -> CoreExpr
-specVar subst v = lookupIdSubst subst v
+specVar subst v = lookupIdSubst (text "specVar") subst v
specExpr :: Subst -> CoreExpr -> SpecM (CoreExpr, UsageDetails)
-- We carry a substitution down:
--- a) we must clone any binding that might flaot outwards,
+-- a) we must clone any binding that might float outwards,
-- to avoid name clashes
-- b) we carry a type substitution to use when analysing
-- the RHS of specialised bindings (no type-let!)
---------------- First the easy cases --------------------
-specExpr subst (Type ty) = returnSM (Type (substTy subst ty), emptyUDs)
-specExpr subst (Var v) = returnSM (specVar subst v, emptyUDs)
-specExpr subst (Lit lit) = returnSM (Lit lit, emptyUDs)
-
-specExpr subst (Note note body)
- = specExpr subst body `thenSM` \ (body', uds) ->
- returnSM (Note (specNote subst note) body', uds)
+specExpr subst (Type ty) = return (Type (CoreSubst.substTy subst ty), emptyUDs)
+specExpr subst (Var v) = return (specVar subst v, emptyUDs)
+specExpr _ (Lit lit) = return (Lit lit, emptyUDs)
+specExpr subst (Cast e co) = do
+ (e', uds) <- specExpr subst e
+ return ((Cast e' (CoreSubst.substTy subst co)), uds)
+specExpr subst (Note note body) = do
+ (body', uds) <- specExpr subst body
+ return (Note (specNote subst note) body', uds)
---------------- Applications might generate a call instance --------------------
-specExpr subst expr@(App fun arg)
+specExpr subst expr@(App {})
= go expr []
where
- go (App fun arg) args = specExpr subst arg `thenSM` \ (arg', uds_arg) ->
- go fun (arg':args) `thenSM` \ (fun', uds_app) ->
- returnSM (App fun' arg', uds_arg `plusUDs` uds_app)
+ go (App fun arg) args = do (arg', uds_arg) <- specExpr subst arg
+ (fun', uds_app) <- go fun (arg':args)
+ return (App fun' arg', uds_arg `plusUDs` uds_app)
go (Var f) args = case specVar subst f of
- Var f' -> returnSM (Var f', mkCallUDs subst f' args)
- e' -> returnSM (e', emptyUDs) -- I don't expect this!
- go other args = specExpr subst other
+ Var f' -> return (Var f', mkCallUDs f' args)
+ e' -> return (e', emptyUDs) -- I don't expect this!
+ go other _ = specExpr subst other
---------------- Lambda/case require dumping of usage details --------------------
-specExpr subst e@(Lam _ _)
- = specExpr subst' body `thenSM` \ (body', uds) ->
- let
- (filtered_uds, body'') = dumpUDs bndrs' uds body'
- in
- returnSM (mkLams bndrs' body'', filtered_uds)
+specExpr subst e@(Lam _ _) = do
+ (body', uds) <- specExpr subst' body
+ let (free_uds, dumped_dbs) = dumpUDs bndrs' uds
+ return (mkLams bndrs' (wrapDictBindsE dumped_dbs body'), free_uds)
where
(bndrs, body) = collectBinders e
(subst', bndrs') = substBndrs subst bndrs
-- More efficient to collect a group of binders together all at once
-- and we don't want to split a lambda group with dumped bindings
-specExpr subst (Case scrut case_bndr ty alts)
- = specExpr subst scrut `thenSM` \ (scrut', uds_scrut) ->
- mapAndCombineSM spec_alt alts `thenSM` \ (alts', uds_alts) ->
- returnSM (Case scrut' case_bndr' (substTy subst ty) alts', uds_scrut `plusUDs` uds_alts)
- where
- (subst_alt, case_bndr') = substBndr subst case_bndr
- -- No need to clone case binder; it can't float like a let(rec)
-
- spec_alt (con, args, rhs)
- = specExpr subst_rhs rhs `thenSM` \ (rhs', uds) ->
- let
- (uds', rhs'') = dumpUDs args uds rhs'
- in
- returnSM ((con, args', rhs''), uds')
- where
- (subst_rhs, args') = substBndrs subst_alt args
+specExpr subst (Case scrut case_bndr ty alts)
+ = do { (scrut', scrut_uds) <- specExpr subst scrut
+ ; (scrut'', case_bndr', alts', alts_uds)
+ <- specCase subst scrut' case_bndr alts
+ ; return (Case scrut'' case_bndr' (CoreSubst.substTy subst ty) alts'
+ , scrut_uds `plusUDs` alts_uds) }
---------------- Finally, let is the interesting case --------------------
-specExpr subst (Let bind body)
- = -- Clone binders
- cloneBindSM subst bind `thenSM` \ (rhs_subst, body_subst, bind') ->
-
- -- Deal with the body
- specExpr body_subst body `thenSM` \ (body', body_uds) ->
+specExpr subst (Let bind body) = do
+ -- Clone binders
+ (rhs_subst, body_subst, bind') <- cloneBindSM subst bind
+
+ -- Deal with the body
+ (body', body_uds) <- specExpr body_subst body
- -- Deal with the bindings
- specBind rhs_subst bind' body_uds `thenSM` \ (binds', uds) ->
+ -- Deal with the bindings
+ (binds', uds) <- specBind rhs_subst bind' body_uds
- -- All done
- returnSM (foldr Let body' binds', uds)
+ -- All done
+ return (foldr Let body' binds', uds)
-- Must apply the type substitution to coerceions
-specNote subst (Coerce t1 t2) = Coerce (substTy subst t1) (substTy subst t2)
-specNote subst note = note
+specNote :: Subst -> Note -> Note
+specNote _ note = note
+
+
+specCase :: Subst
+ -> CoreExpr -- Scrutinee, already done
+ -> Id -> [CoreAlt]
+ -> SpecM ( CoreExpr -- New scrutinee
+ , Id
+ , [CoreAlt]
+ , UsageDetails)
+specCase subst scrut' case_bndr [(con, args, rhs)]
+ | isDictId case_bndr -- See Note [Floating dictionaries out of cases]
+ , interestingDict scrut'
+ , not (isDeadBinder case_bndr && null sc_args')
+ = do { (case_bndr_flt : sc_args_flt) <- mapM clone_me (case_bndr' : sc_args')
+
+ ; let sc_rhss = [ Case (Var case_bndr_flt) case_bndr' (idType sc_arg')
+ [(con, args', Var sc_arg')]
+ | sc_arg' <- sc_args' ]
+
+ -- Extend the substitution for RHS to map the *original* binders
+ -- to their floated verions. Attach an unfolding to these floated
+ -- binders so they look interesting to interestingDict
+ mb_sc_flts :: [Maybe DictId]
+ mb_sc_flts = map (lookupVarEnv clone_env) args'
+ clone_env = zipVarEnv sc_args' (zipWith add_unf sc_args_flt sc_rhss)
+ subst_prs = (case_bndr, Var (add_unf case_bndr_flt scrut'))
+ : [ (arg, Var sc_flt)
+ | (arg, Just sc_flt) <- args `zip` mb_sc_flts ]
+ subst_rhs' = extendIdSubstList subst_rhs subst_prs
+
+ ; (rhs', rhs_uds) <- specExpr subst_rhs' rhs
+ ; let scrut_bind = mkDB (NonRec case_bndr_flt scrut')
+ case_bndr_set = unitVarSet case_bndr_flt
+ sc_binds = [(NonRec sc_arg_flt sc_rhs, case_bndr_set)
+ | (sc_arg_flt, sc_rhs) <- sc_args_flt `zip` sc_rhss ]
+ flt_binds = scrut_bind : sc_binds
+ (free_uds, dumped_dbs) = dumpUDs (case_bndr':args') rhs_uds
+ all_uds = flt_binds `addDictBinds` free_uds
+ alt' = (con, args', wrapDictBindsE dumped_dbs rhs')
+ ; return (Var case_bndr_flt, case_bndr', [alt'], all_uds) }
+ where
+ (subst_rhs, (case_bndr':args')) = substBndrs subst (case_bndr:args)
+ sc_args' = filter is_flt_sc_arg args'
+
+ clone_me bndr = do { uniq <- getUniqueM
+ ; return (mkUserLocal occ uniq ty loc) }
+ where
+ name = idName bndr
+ ty = idType bndr
+ occ = nameOccName name
+ loc = getSrcSpan name
+
+ add_unf sc_flt sc_rhs -- Sole purpose: make sc_flt respond True to interestingDictId
+ = setIdUnfolding sc_flt (mkSimpleUnfolding sc_rhs)
+
+ arg_set = mkVarSet args'
+ is_flt_sc_arg var = isId var
+ && not (isDeadBinder var)
+ && isDictTy var_ty
+ && not (tyVarsOfType var_ty `intersectsVarSet` arg_set)
+ where
+ var_ty = idType var
+
+
+specCase subst scrut case_bndr alts
+ = do { (alts', uds_alts) <- mapAndCombineSM spec_alt alts
+ ; return (scrut, case_bndr', alts', uds_alts) }
+ where
+ (subst_alt, case_bndr') = substBndr subst case_bndr
+ spec_alt (con, args, rhs) = do
+ (rhs', uds) <- specExpr subst_rhs rhs
+ let (free_uds, dumped_dbs) = dumpUDs (case_bndr' : args') uds
+ return ((con, args', wrapDictBindsE dumped_dbs rhs'), free_uds)
+ where
+ (subst_rhs, args') = substBndrs subst_alt args
\end{code}
+Note [Floating dictionaries out of cases]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+ g = \d. case d of { MkD sc ... -> ...(f sc)... }
+Naively we can't float d2's binding out of the case expression,
+because 'sc' is bound by the case, and that in turn means we can't
+specialise f, which seems a pity.
+
+So we invert the case, by floating out a binding
+for 'sc_flt' thus:
+ sc_flt = case d of { MkD sc ... -> sc }
+Now we can float the call instance for 'f'. Indeed this is just
+what'll happen if 'sc' was originally bound with a let binding,
+but case is more efficient, and necessary with equalities. So it's
+good to work with both.
+
+You might think that this won't make any difference, because the
+call instance will only get nuked by the \d. BUT if 'g' itself is
+specialised, then transitively we should be able to specialise f.
+
+In general, given
+ case e of cb { MkD sc ... -> ...(f sc)... }
+we transform to
+ let cb_flt = e
+ sc_flt = case cb_flt of { MkD sc ... -> sc }
+ in
+ case cb_flt of bg { MkD sc ... -> ....(f sc_flt)... }
+
+The "_flt" things are the floated binds; we use the current substitution
+to substitute sc -> sc_flt in the RHS
+
%************************************************************************
%* *
-\subsubsection{Dealing with a binding}
+ Dealing with a binding
%* *
%************************************************************************
-> SpecM ([CoreBind], -- New bindings
UsageDetails) -- And info to pass upstream
-specBind rhs_subst bind body_uds
- = specBindItself rhs_subst bind (calls body_uds) `thenSM` \ (bind', bind_uds) ->
- let
- bndrs = bindersOf bind
- all_uds = zapCalls bndrs (body_uds `plusUDs` bind_uds)
- -- It's important that the `plusUDs` is this way round,
- -- because body_uds may bind dictionaries that are
- -- used in the calls passed to specDefn. So the
- -- dictionary bindings in bind_uds may mention
- -- dictionaries bound in body_uds.
- in
- case splitUDs bndrs all_uds of
-
- (_, ([],[])) -- This binding doesn't bind anything needed
- -- in the UDs, so put the binding here
- -- This is the case for most non-dict bindings, except
- -- for the few that are mentioned in a dict binding
- -- that is floating upwards in body_uds
- -> returnSM ([bind'], all_uds)
-
- (float_uds, (dict_binds, calls)) -- This binding is needed in the UDs, so float it out
- -> returnSM ([], float_uds `plusUDs` mkBigUD bind' dict_binds calls)
-
-
--- A truly gruesome function
-mkBigUD bind@(NonRec _ _) dbs calls
- = -- Common case: non-recursive and no specialisations
- -- (if there were any specialistions it would have been made recursive)
- MkUD { dict_binds = listToBag (mkDB bind : dbs),
- calls = listToCallDetails calls }
-
-mkBigUD bind dbs calls
- = -- General case
- MkUD { dict_binds = unitBag (mkDB (Rec (bind_prs bind ++ dbsToPairs dbs))),
- -- Make a huge Rec
- calls = listToCallDetails calls }
- where
- bind_prs (NonRec b r) = [(b,r)]
- bind_prs (Rec prs) = prs
-
- dbsToPairs [] = []
- dbsToPairs ((bind,_):dbs) = bind_prs bind ++ dbsToPairs dbs
-
--- specBindItself deals with the RHS, specialising it according
--- to the calls found in the body (if any)
-specBindItself rhs_subst (NonRec bndr rhs) call_info
- = specDefn rhs_subst call_info (bndr,rhs) `thenSM` \ ((bndr',rhs'), spec_defns, spec_uds) ->
- let
- new_bind | null spec_defns = NonRec bndr' rhs'
- | otherwise = Rec ((bndr',rhs'):spec_defns)
- -- bndr' mentions the spec_defns in its SpecEnv
- -- Not sure why we couln't just put the spec_defns first
- in
- returnSM (new_bind, spec_uds)
-
-specBindItself rhs_subst (Rec pairs) call_info
- = mapSM (specDefn rhs_subst call_info) pairs `thenSM` \ stuff ->
- let
- (pairs', spec_defns_s, spec_uds_s) = unzip3 stuff
- spec_defns = concat spec_defns_s
- spec_uds = plusUDList spec_uds_s
- new_bind = Rec (spec_defns ++ pairs')
- in
- returnSM (new_bind, spec_uds)
-
-
-specDefn :: Subst -- Subst to use for RHS
- -> CallDetails -- Info on how it is used in its scope
- -> (Id, CoreExpr) -- The thing being bound and its un-processed RHS
- -> SpecM ((Id, CoreExpr), -- The thing and its processed RHS
- -- the Id may now have specialisations attached
+-- Returned UsageDetails:
+-- No calls for binders of this bind
+specBind rhs_subst (NonRec fn rhs) body_uds
+ = do { (rhs', rhs_uds) <- specExpr rhs_subst rhs
+ ; (fn', spec_defns, body_uds1) <- specDefn rhs_subst body_uds fn rhs
+
+ ; let pairs = spec_defns ++ [(fn', rhs')]
+ -- fn' mentions the spec_defns in its rules,
+ -- so put the latter first
+
+ combined_uds = body_uds1 `plusUDs` rhs_uds
+ -- This way round a call in rhs_uds of a function f
+ -- at type T will override a call of f at T in body_uds1; and
+ -- that is good because it'll tend to keep "earlier" calls
+ -- See Note [Specialisation of dictionary functions]
+
+ (free_uds, dump_dbs, float_all) = dumpBindUDs [fn] combined_uds
+ -- See Note [From non-recursive to recursive]
+
+ final_binds | isEmptyBag dump_dbs = [NonRec b r | (b,r) <- pairs]
+ | otherwise = [Rec (flattenDictBinds dump_dbs pairs)]
+
+ ; if float_all then
+ -- Rather than discard the calls mentioning the bound variables
+ -- we float this binding along with the others
+ return ([], free_uds `snocDictBinds` final_binds)
+ else
+ -- No call in final_uds mentions bound variables,
+ -- so we can just leave the binding here
+ return (final_binds, free_uds) }
+
+
+specBind rhs_subst (Rec pairs) body_uds
+ -- Note [Specialising a recursive group]
+ = do { let (bndrs,rhss) = unzip pairs
+ ; (rhss', rhs_uds) <- mapAndCombineSM (specExpr rhs_subst) rhss
+ ; let scope_uds = body_uds `plusUDs` rhs_uds
+ -- Includes binds and calls arising from rhss
+
+ ; (bndrs1, spec_defns1, uds1) <- specDefns rhs_subst scope_uds pairs
+
+ ; (bndrs3, spec_defns3, uds3)
+ <- if null spec_defns1 -- Common case: no specialisation
+ then return (bndrs1, [], uds1)
+ else do { -- Specialisation occurred; do it again
+ (bndrs2, spec_defns2, uds2)
+ <- specDefns rhs_subst uds1 (bndrs1 `zip` rhss)
+ ; return (bndrs2, spec_defns2 ++ spec_defns1, uds2) }
+
+ ; let (final_uds, dumped_dbs, float_all) = dumpBindUDs bndrs uds3
+ bind = Rec (flattenDictBinds dumped_dbs $
+ spec_defns3 ++ zip bndrs3 rhss')
+
+ ; if float_all then
+ return ([], final_uds `snocDictBind` bind)
+ else
+ return ([bind], final_uds) }
+
+
+---------------------------
+specDefns :: Subst
+ -> UsageDetails -- Info on how it is used in its scope
+ -> [(Id,CoreExpr)] -- The things being bound and their un-processed RHS
+ -> SpecM ([Id], -- Original Ids with RULES added
+ [(Id,CoreExpr)], -- Extra, specialised bindings
+ UsageDetails) -- Stuff to fling upwards from the specialised versions
+
+-- Specialise a list of bindings (the contents of a Rec), but flowing usages
+-- upwards binding by binding. Example: { f = ...g ...; g = ...f .... }
+-- Then if the input CallDetails has a specialised call for 'g', whose specialisation
+-- in turn generates a specialised call for 'f', we catch that in this one sweep.
+-- But not vice versa (it's a fixpoint problem).
+
+specDefns _subst uds []
+ = return ([], [], uds)
+specDefns subst uds ((bndr,rhs):pairs)
+ = do { (bndrs1, spec_defns1, uds1) <- specDefns subst uds pairs
+ ; (bndr1, spec_defns2, uds2) <- specDefn subst uds1 bndr rhs
+ ; return (bndr1 : bndrs1, spec_defns1 ++ spec_defns2, uds2) }
+
+---------------------------
+specDefn :: Subst
+ -> UsageDetails -- Info on how it is used in its scope
+ -> Id -> CoreExpr -- The thing being bound and its un-processed RHS
+ -> SpecM (Id, -- Original Id with added RULES
[(Id,CoreExpr)], -- Extra, specialised bindings
- UsageDetails -- Stuff to fling upwards from the RHS and its
- ) -- specialised versions
-
-specDefn subst calls (fn, rhs)
+ UsageDetails) -- Stuff to fling upwards from the specialised versions
+
+specDefn subst body_uds fn rhs
+ = do { let (body_uds_without_me, calls_for_me) = callsForMe fn body_uds
+ rules_for_me = idCoreRules fn
+ ; (rules, spec_defns, spec_uds) <- specCalls subst rules_for_me
+ calls_for_me fn rhs
+ ; return ( fn `addIdSpecialisations` rules
+ , spec_defns
+ , body_uds_without_me `plusUDs` spec_uds) }
+ -- It's important that the `plusUDs` is this way
+ -- round, because body_uds_without_me may bind
+ -- dictionaries that are used in calls_for_me passed
+ -- to specDefn. So the dictionary bindings in
+ -- spec_uds may mention dictionaries bound in
+ -- body_uds_without_me
+
+---------------------------
+specCalls :: Subst
+ -> [CoreRule] -- Existing RULES for the fn
+ -> [CallInfo]
+ -> Id -> CoreExpr
+ -> SpecM ([CoreRule], -- New RULES for the fn
+ [(Id,CoreExpr)], -- Extra, specialised bindings
+ UsageDetails) -- New usage details from the specialised RHSs
+
+-- This function checks existing rules, and does not create
+-- duplicate ones. So the caller does not nneed to do this filtering.
+-- See 'already_covered'
+
+specCalls subst rules_for_me calls_for_me fn rhs
-- The first case is the interesting one
- | rhs_tyvars `lengthIs` n_tyvars -- Rhs of fn's defn has right number of big lambdas
- && rhs_bndrs `lengthAtLeast` n_dicts -- and enough dict args
+ | rhs_tyvars `lengthIs` n_tyvars -- Rhs of fn's defn has right number of big lambdas
+ && rhs_ids `lengthAtLeast` n_dicts -- and enough dict args
&& notNull calls_for_me -- And there are some calls to specialise
+ && not (isNeverActive (idInlineActivation fn))
+ -- Don't specialise NOINLINE things
+ -- See Note [Auto-specialisation and RULES]
+
+-- && not (certainlyWillInline (idUnfolding fn)) -- And it's not small
+-- See Note [Inline specialisation] for why we do not
+-- switch off specialisation for inline functions
--- At one time I tried not specialising small functions
--- but sometimes there are big functions marked INLINE
--- that we'd like to specialise. In particular, dictionary
--- functions, which Marcin is keen to inline
--- && not (certainlyWillInline fn) -- And it's not small
- -- If it's small, it's better just to inline
- -- it than to construct lots of specialisations
- = -- Specialise the body of the function
- specExpr subst rhs `thenSM` \ (rhs', rhs_uds) ->
-
- -- Make a specialised version for each call in calls_for_me
- mapSM spec_call calls_for_me `thenSM` \ stuff ->
- let
- (spec_defns, spec_uds, spec_rules) = unzip3 stuff
-
- fn' = addIdSpecialisations fn spec_rules
- in
- returnSM ((fn',rhs'),
- spec_defns,
- rhs_uds `plusUDs` plusUDList spec_uds)
+ = -- pprTrace "specDefn: some" (ppr fn $$ ppr calls_for_me $$ ppr rules_for_me) $
+ do { stuff <- mapM spec_call calls_for_me
+ ; let (spec_defns, spec_uds, spec_rules) = unzip3 (catMaybes stuff)
+ ; return (spec_rules, spec_defns, plusUDList spec_uds) }
| otherwise -- No calls or RHS doesn't fit our preconceptions
- = specExpr subst rhs `thenSM` \ (rhs', rhs_uds) ->
- returnSM ((fn, rhs'), [], rhs_uds)
+ = WARN( notNull calls_for_me, ptext (sLit "Missed specialisation opportunity for") <+> ppr fn )
+ -- Note [Specialisation shape]
+ -- pprTrace "specDefn: none" (ppr fn $$ ppr calls_for_me) $
+ return ([], [], emptyUDs)
where
fn_type = idType fn
+ fn_arity = idArity fn
+ fn_unf = realIdUnfolding fn -- Ignore loop-breaker-ness here
(tyvars, theta, _) = tcSplitSigmaTy fn_type
n_tyvars = length tyvars
n_dicts = length theta
+ inl_prag = idInlinePragma fn
+ inl_act = inlinePragmaActivation inl_prag
+ is_local = isLocalId fn
- (rhs_tyvars, rhs_ids, rhs_body)
- = collectTyAndValBinders (dropInline rhs)
- -- It's important that we "see past" any INLINE pragma
- -- else we'll fail to specialise an INLINE thing
+ -- Figure out whether the function has an INLINE pragma
+ -- See Note [Inline specialisations]
- rhs_dicts = take n_dicts rhs_ids
- rhs_bndrs = rhs_tyvars ++ rhs_dicts
- body = mkLams (drop n_dicts rhs_ids) rhs_body
+ spec_arity = unfoldingArity fn_unf - n_dicts -- Arity of the *specialised* inline rule
+
+ (rhs_tyvars, rhs_ids, rhs_body) = collectTyAndValBinders rhs
+
+ rhs_dict_ids = take n_dicts rhs_ids
+ body = mkLams (drop n_dicts rhs_ids) rhs_body
-- Glue back on the non-dict lambdas
- calls_for_me = case lookupFM calls fn of
- Nothing -> []
- Just cs -> fmToList cs
+ already_covered :: [CoreExpr] -> Bool
+ already_covered args -- Note [Specialisations already covered]
+ = isJust (lookupRule (const True) realIdUnfolding
+ (substInScope subst)
+ fn args rules_for_me)
+
+ mk_ty_args :: [Maybe Type] -> [CoreExpr]
+ mk_ty_args call_ts = zipWithEqual "spec_call" mk_ty_arg rhs_tyvars call_ts
+ where
+ mk_ty_arg rhs_tyvar Nothing = Type (mkTyVarTy rhs_tyvar)
+ mk_ty_arg _ (Just ty) = Type ty
----------------------------------------------------------
-- Specialise to one particular call pattern
- spec_call :: (CallKey, ([DictExpr], VarSet)) -- Call instance
- -> SpecM ((Id,CoreExpr), -- Specialised definition
- UsageDetails, -- Usage details from specialised body
- CoreRule) -- Info for the Id's SpecEnv
- spec_call (CallKey call_ts, (call_ds, call_fvs))
+ spec_call :: CallInfo -- Call instance
+ -> SpecM (Maybe ((Id,CoreExpr), -- Specialised definition
+ UsageDetails, -- Usage details from specialised body
+ CoreRule)) -- Info for the Id's SpecEnv
+ spec_call (CallKey call_ts, (call_ds, _))
= ASSERT( call_ts `lengthIs` n_tyvars && call_ds `lengthIs` n_dicts )
- -- Calls are only recorded for properly-saturated applications
- -- Suppose f's defn is f = /\ a b c d -> \ d1 d2 -> rhs
- -- Supppose the call is for f [Just t1, Nothing, Just t3, Nothing] [dx1, dx2]
+ -- Suppose f's defn is f = /\ a b c -> \ d1 d2 -> rhs
+ -- Supppose the call is for f [Just t1, Nothing, Just t3] [dx1, dx2]
-- Construct the new binding
- -- f1 = SUBST[a->t1,c->t3, d1->d1', d2->d2'] (/\ b d -> rhs)
+ -- f1 = SUBST[a->t1,c->t3, d1->d1', d2->d2'] (/\ b -> rhs)
-- PLUS the usage-details
-- { d1' = dx1; d2' = dx2 }
- -- where d1', d2' are cloned versions of d1,d2, with the type substitution applied.
+ -- where d1', d2' are cloned versions of d1,d2, with the type substitution
+ -- applied. These auxiliary bindings just avoid duplication of dx1, dx2
--
-- Note that the substitution is applied to the whole thing.
-- This is convenient, but just slightly fragile. Notably:
- -- * There had better be no name clashes in a/b/c/d
- --
- let
- -- poly_tyvars = [b,d] in the example above
+ -- * There had better be no name clashes in a/b/c
+ do { let
+ -- poly_tyvars = [b] in the example above
-- spec_tyvars = [a,c]
- -- ty_args = [t1,b,t3,d]
- poly_tyvars = [tv | (tv, Nothing) <- rhs_tyvars `zip` call_ts]
- spec_tyvars = [tv | (tv, Just _) <- rhs_tyvars `zip` call_ts]
- ty_args = zipWithEqual "spec_call" mk_ty_arg rhs_tyvars call_ts
- where
- mk_ty_arg rhs_tyvar Nothing = Type (mkTyVarTy rhs_tyvar)
- mk_ty_arg rhs_tyvar (Just ty) = Type ty
- rhs_subst = extendTvSubstList subst (spec_tyvars `zip` [ty | Just ty <- call_ts])
- in
- cloneBinders rhs_subst rhs_dicts `thenSM` \ (rhs_subst', rhs_dicts') ->
- let
- inst_args = ty_args ++ map Var rhs_dicts'
-
- -- Figure out the type of the specialised function
- body_ty = applyTypeToArgs rhs fn_type inst_args
- (lam_args, app_args) -- Add a dummy argument if body_ty is unlifted
- | isUnLiftedType body_ty -- C.f. WwLib.mkWorkerArgs
- = (poly_tyvars ++ [voidArgId], poly_tyvars ++ [realWorldPrimId])
- | otherwise = (poly_tyvars, poly_tyvars)
- spec_id_ty = mkPiTypes lam_args body_ty
- in
- newIdSM fn spec_id_ty `thenSM` \ spec_f ->
- specExpr rhs_subst' (mkLams lam_args body) `thenSM` \ (spec_rhs, rhs_uds) ->
- let
+ -- ty_args = [t1,b,t3]
+ poly_tyvars = [tv | (tv, Nothing) <- rhs_tyvars `zip` call_ts]
+ spec_tv_binds = [(tv,ty) | (tv, Just ty) <- rhs_tyvars `zip` call_ts]
+ spec_ty_args = map snd spec_tv_binds
+ ty_args = mk_ty_args call_ts
+ rhs_subst = CoreSubst.extendTvSubstList subst spec_tv_binds
+
+ ; (rhs_subst1, inst_dict_ids) <- newDictBndrs rhs_subst rhs_dict_ids
+ -- Clone rhs_dicts, including instantiating their types
+
+ ; let (rhs_subst2, dx_binds) = bindAuxiliaryDicts rhs_subst1 $
+ (my_zipEqual rhs_dict_ids inst_dict_ids call_ds)
+ inst_args = ty_args ++ map Var inst_dict_ids
+
+ ; if already_covered inst_args then
+ return Nothing
+ else do
+ { -- Figure out the type of the specialised function
+ let body_ty = applyTypeToArgs rhs fn_type inst_args
+ (lam_args, app_args) -- Add a dummy argument if body_ty is unlifted
+ | isUnLiftedType body_ty -- C.f. WwLib.mkWorkerArgs
+ = (poly_tyvars ++ [voidArgId], poly_tyvars ++ [realWorldPrimId])
+ | otherwise = (poly_tyvars, poly_tyvars)
+ spec_id_ty = mkPiTypes lam_args body_ty
+
+ ; spec_f <- newSpecIdSM fn spec_id_ty
+ ; (spec_rhs, rhs_uds) <- specExpr rhs_subst2 (mkLams lam_args body)
+ ; let
-- The rule to put in the function's specialisation is:
- -- forall b,d, d1',d2'. f t1 b t3 d d1' d2' = f1 b d
- spec_env_rule = mkLocalRule (mkFastString ("SPEC " ++ showSDoc (ppr fn)))
- AlwaysActive (idName fn)
- (poly_tyvars ++ rhs_dicts')
- inst_args
- (mkVarApps (Var spec_f) app_args)
+ -- forall b, d1',d2'. f t1 b t3 d1' d2' = f1 b
+ rule_name = mkFastString ("SPEC " ++ showSDoc (ppr fn <+> ppr spec_ty_args))
+ spec_env_rule = mkRule True {- Auto generated -} is_local
+ rule_name
+ inl_act -- Note [Auto-specialisation and RULES]
+ (idName fn)
+ (poly_tyvars ++ inst_dict_ids)
+ inst_args
+ (mkVarApps (Var spec_f) app_args)
-- Add the { d1' = dx1; d2' = dx2 } usage stuff
- final_uds = foldr addDictBind rhs_uds (my_zipEqual "spec_call" rhs_dicts' call_ds)
+ final_uds = foldr consDictBind rhs_uds dx_binds
+
+ -- Add an InlineRule if the parent has one
+ -- See Note [Inline specialisations]
+ spec_unf
+ = case inlinePragmaSpec inl_prag of
+ Inline -> mkInlineUnfolding (Just spec_arity) spec_rhs
+ Inlinable -> mkInlinableUnfolding spec_rhs
+ _ -> NoUnfolding
+
+ -- Adding arity information just propagates it a bit faster
+ -- See Note [Arity decrease] in Simplify
+ -- Copy InlinePragma information from the parent Id.
+ -- So if f has INLINE[1] so does spec_f
+ spec_f_w_arity = spec_f `setIdArity` max 0 (fn_arity - n_dicts)
+ `setInlinePragma` inl_prag
+ `setIdUnfolding` spec_unf
+
+ ; return (Just ((spec_f_w_arity, spec_rhs), final_uds, spec_env_rule)) } }
+ where
+ my_zipEqual xs ys zs
+ | debugIsOn && not (equalLength xs ys && equalLength ys zs)
+ = pprPanic "my_zipEqual" (vcat [ ppr xs, ppr ys
+ , ppr fn <+> ppr call_ts
+ , ppr (idType fn), ppr theta
+ , ppr n_dicts, ppr rhs_dict_ids
+ , ppr rhs])
+ | otherwise = zip3 xs ys zs
+
+bindAuxiliaryDicts
+ :: Subst
+ -> [(DictId,DictId,CoreExpr)] -- (orig_dict, inst_dict, dx)
+ -> (Subst, -- Substitute for all orig_dicts
+ [CoreBind]) -- Auxiliary bindings
+-- Bind any dictionary arguments to fresh names, to preserve sharing
+-- Substitution already substitutes orig_dict -> inst_dict
+bindAuxiliaryDicts subst triples = go subst [] triples
+ where
+ go subst binds [] = (subst, binds)
+ go subst binds ((d, dx_id, dx) : pairs)
+ | exprIsTrivial dx = go (extendIdSubst subst d dx) binds pairs
+ -- No auxiliary binding necessary
+ -- Note that we bind the *original* dict in the substitution,
+ -- overriding any d->dx_id binding put there by substBndrs
+
+ | otherwise = go subst_w_unf (NonRec dx_id dx : binds) pairs
+ where
+ dx_id1 = dx_id `setIdUnfolding` mkSimpleUnfolding dx
+ subst_w_unf = extendIdSubst subst d (Var dx_id1)
+ -- Important! We're going to substitute dx_id1 for d
+ -- and we want it to look "interesting", else we won't gather *any*
+ -- consequential calls. E.g.
+ -- f d = ...g d....
+ -- If we specialise f for a call (f (dfun dNumInt)), we'll get
+ -- a consequent call (g d') with an auxiliary definition
+ -- d' = df dNumInt
+ -- We want that consequent call to look interesting
+ --
+ -- Again, note that we bind the *original* dict in the substitution,
+ -- overriding any d->dx_id binding put there by substBndrs
+\end{code}
- -- NOTE: we don't add back in any INLINE pragma on the RHS, so even if
- -- the original function said INLINE, the specialised copies won't.
- -- The idea is that the point of inlining was precisely to specialise
- -- the function at its call site, and that's not so important for the
- -- specialised copies. But it still smells like an ad hoc decision.
+Note [From non-recursive to recursive]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Even in the non-recursive case, if any dict-binds depend on 'fn' we might
+have built a recursive knot
- in
- returnSM ((spec_f, spec_rhs),
- final_uds,
- spec_env_rule)
+ f a d x = <blah>
+ MkUD { ud_binds = d7 = MkD ..f..
+ , ud_calls = ...(f T d7)... }
- where
- my_zipEqual doc xs ys
- | not (equalLength xs ys) = pprPanic "my_zipEqual" (ppr xs $$ ppr ys $$ (ppr fn <+> ppr call_ts) $$ ppr rhs)
- | otherwise = zipEqual doc xs ys
+The we generate
+
+ Rec { fs x = <blah>[T/a, d7/d]
+ f a d x = <blah>
+ RULE f T _ = fs
+ d7 = ...f... }
+
+Here the recursion is only through the RULE.
+
+
+Note [Specialisation of dictionary functions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Here is a nasty example that bit us badly: see Trac #3591
+
+ dfun a d = MkD a d (meth d)
+ d4 = <blah>
+ d2 = dfun T d4
+ d1 = $p1 d2
+ d3 = dfun T d1
+
+None of these definitions is recursive. What happened was that we
+generated a specialisation:
+
+ RULE forall d. dfun T d = dT
+ dT = (MkD a d (meth d)) [T/a, d1/d]
+ = MkD T d1 (meth d1)
+
+But now we use the RULE on the RHS of d2, to get
+
+ d2 = dT = MkD d1 (meth d1)
+ d1 = $p1 d2
+
+and now d1 is bottom! The problem is that when specialising 'dfun' we
+should first dump "below" the binding all floated dictionary bindings
+that mention 'dfun' itself. So d2 and d3 (and hence d1) must be
+placed below 'dfun', and thus unavailable to it when specialising
+'dfun'. That in turn means that the call (dfun T d1) must be
+discarded. On the other hand, the call (dfun T d4) is fine, assuming
+d4 doesn't mention dfun.
+
+But look at this:
+
+ class C a where { foo,bar :: [a] -> [a] }
+
+ instance C Int where
+ foo x = r_bar x
+ bar xs = reverse xs
+
+ r_bar :: C a => [a] -> [a]
+ r_bar xs = bar (xs ++ xs)
+
+That translates to:
+
+ r_bar a (c::C a) (xs::[a]) = bar a d (xs ++ xs)
+
+ Rec { $fCInt :: C Int = MkC foo_help reverse
+ foo_help (xs::[Int]) = r_bar Int $fCInt xs }
+
+The call (r_bar $fCInt) mentions $fCInt,
+ which mentions foo_help,
+ which mentions r_bar
+But we DO want to specialise r_bar at Int:
+
+ Rec { $fCInt :: C Int = MkC foo_help reverse
+ foo_help (xs::[Int]) = r_bar Int $fCInt xs
+
+ r_bar a (c::C a) (xs::[a]) = bar a d (xs ++ xs)
+ RULE r_bar Int _ = r_bar_Int
+
+ r_bar_Int xs = bar Int $fCInt (xs ++ xs)
+ }
+
+Note that, because of its RULE, r_bar joins the recursive
+group. (In this case it'll unravel a short moment later.)
+
+
+Conclusion: we catch the nasty case using filter_dfuns in
+callsForMe. To be honest I'm not 100% certain that this is 100%
+right, but it works. Sigh.
+
+
+Note [Specialising a recursive group]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+ let rec { f x = ...g x'...
+ ; g y = ...f y'.... }
+ in f 'a'
+Here we specialise 'f' at Char; but that is very likely to lead to
+a specialisation of 'g' at Char. We must do the latter, else the
+whole point of specialisation is lost.
+
+But we do not want to keep iterating to a fixpoint, because in the
+presence of polymorphic recursion we might generate an infinite number
+of specialisations.
+
+So we use the following heuristic:
+ * Arrange the rec block in dependency order, so far as possible
+ (the occurrence analyser already does this)
+
+ * Specialise it much like a sequence of lets
+
+ * Then go through the block a second time, feeding call-info from
+ the RHSs back in the bottom, as it were
+
+In effect, the ordering maxmimises the effectiveness of each sweep,
+and we do just two sweeps. This should catch almost every case of
+monomorphic recursion -- the exception could be a very knotted-up
+recursion with multiple cycles tied up together.
+
+This plan is implemented in the Rec case of specBindItself.
+
+Note [Specialisations already covered]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We obviously don't want to generate two specialisations for the same
+argument pattern. There are two wrinkles
+
+1. We do the already-covered test in specDefn, not when we generate
+the CallInfo in mkCallUDs. We used to test in the latter place, but
+we now iterate the specialiser somewhat, and the Id at the call site
+might therefore not have all the RULES that we can see in specDefn
+
+2. What about two specialisations where the second is an *instance*
+of the first? If the more specific one shows up first, we'll generate
+specialisations for both. If the *less* specific one shows up first,
+we *don't* currently generate a specialisation for the more specific
+one. (See the call to lookupRule in already_covered.) Reasons:
+ (a) lookupRule doesn't say which matches are exact (bad reason)
+ (b) if the earlier specialisation is user-provided, it's
+ far from clear that we should auto-specialise further
+
+Note [Auto-specialisation and RULES]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider:
+ g :: Num a => a -> a
+ g = ...
+
+ f :: (Int -> Int) -> Int
+ f w = ...
+ {-# RULE f g = 0 #-}
+
+Suppose that auto-specialisation makes a specialised version of
+g::Int->Int That version won't appear in the LHS of the RULE for f.
+So if the specialisation rule fires too early, the rule for f may
+never fire.
+
+It might be possible to add new rules, to "complete" the rewrite system.
+Thus when adding
+ RULE forall d. g Int d = g_spec
+also add
+ RULE f g_spec = 0
+
+But that's a bit complicated. For now we ask the programmer's help,
+by *copying the INLINE activation pragma* to the auto-specialised
+rule. So if g says {-# NOINLINE[2] g #-}, then the auto-spec rule
+will also not be active until phase 2. And that's what programmers
+should jolly well do anyway, even aside from specialisation, to ensure
+that g doesn't inline too early.
+
+This in turn means that the RULE would never fire for a NOINLINE
+thing so not much point in generating a specialisation at all.
+
+Note [Specialisation shape]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We only specialise a function if it has visible top-level lambdas
+corresponding to its overloading. E.g. if
+ f :: forall a. Eq a => ....
+then its body must look like
+ f = /\a. \d. ...
+
+Reason: when specialising the body for a call (f ty dexp), we want to
+substitute dexp for d, and pick up specialised calls in the body of f.
+
+This doesn't always work. One example I came across was this:
+ newtype Gen a = MkGen{ unGen :: Int -> a }
+
+ choose :: Eq a => a -> Gen a
+ choose n = MkGen (\r -> n)
+
+ oneof = choose (1::Int)
+
+It's a silly exapmle, but we get
+ choose = /\a. g `cast` co
+where choose doesn't have any dict arguments. Thus far I have not
+tried to fix this (wait till there's a real example).
+
+Note [Inline specialisations]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We transfer to the specialised function any INLINE stuff from the
+original. This means
+ (a) the Activation for its inlining (from its InlinePragma)
+ (b) any InlineRule
+
+This is a change (Jun06). Previously the idea is that the point of
+inlining was precisely to specialise the function at its call site,
+and that's not so important for the specialised copies. But
+*pragma-directed* specialisation now takes place in the
+typechecker/desugarer, with manually specified INLINEs. The
+specialiation here is automatic. It'd be very odd if a function
+marked INLINE was specialised (because of some local use), and then
+forever after (including importing modules) the specialised version
+wasn't INLINEd. After all, the programmer said INLINE!
+
+You might wonder why we don't just not specialise INLINE functions.
+It's because even INLINE functions are sometimes not inlined, when
+they aren't applied to interesting arguments. But perhaps the type
+arguments alone are enough to specialise (even though the args are too
+boring to trigger inlining), and it's certainly better to call the
+specialised version.
-dropInline :: CoreExpr -> CoreExpr
-dropInline (Note InlineMe rhs) = rhs
-dropInline rhs = rhs
-\end{code}
%************************************************************************
%* *
\begin{code}
data UsageDetails
= MkUD {
- dict_binds :: !(Bag DictBind),
+ ud_binds :: !(Bag DictBind),
-- Floated dictionary bindings
-- The order is important;
-- in ds1 `union` ds2, bindings in ds2 can depend on those in ds1
-- (Remember, Bags preserve order in GHC.)
- calls :: !CallDetails
+ ud_calls :: !CallDetails
+
+ -- INVARIANT: suppose bs = bindersOf ud_binds
+ -- Then 'calls' may *mention* 'bs',
+ -- but there should be no calls *for* bs
}
+instance Outputable UsageDetails where
+ ppr (MkUD { ud_binds = dbs, ud_calls = calls })
+ = ptext (sLit "MkUD") <+> braces (sep (punctuate comma
+ [ptext (sLit "binds") <+> equals <+> ppr dbs,
+ ptext (sLit "calls") <+> equals <+> ppr calls]))
+
type DictBind = (CoreBind, VarSet)
-- The set is the free vars of the binding
-- both tyvars and dicts
type DictExpr = CoreExpr
-emptyUDs = MkUD { dict_binds = emptyBag, calls = emptyFM }
-
-type ProtoUsageDetails = ([DictBind],
- [(Id, CallKey, ([DictExpr], VarSet))]
- )
+emptyUDs :: UsageDetails
+emptyUDs = MkUD { ud_binds = emptyBag, ud_calls = emptyVarEnv }
------------------------------------------------------------
-type CallDetails = FiniteMap Id CallInfo
+type CallDetails = IdEnv CallInfoSet
newtype CallKey = CallKey [Maybe Type] -- Nothing => unconstrained type argument
-type CallInfo = FiniteMap CallKey
- ([DictExpr], VarSet) -- Dict args and the vars of the whole
- -- call (including tyvars)
- -- [*not* include the main id itself, of course]
- -- The finite maps eliminate duplicates
- -- The list of types and dictionaries is guaranteed to
- -- match the type of f
+
+-- CallInfo uses a Map, thereby ensuring that
+-- we record only one call instance for any key
+--
+-- The list of types and dictionaries is guaranteed to
+-- match the type of f
+data CallInfoSet = CIS Id (Map CallKey ([DictExpr], VarSet))
+ -- Range is dict args and the vars of the whole
+ -- call (including tyvars)
+ -- [*not* include the main id itself, of course]
+
+type CallInfo = (CallKey, ([DictExpr], VarSet))
+
+instance Outputable CallInfoSet where
+ ppr (CIS fn map) = hang (ptext (sLit "CIS") <+> ppr fn)
+ 2 (ppr map)
+
+instance Outputable CallKey where
+ ppr (CallKey ts) = ppr ts
-- Type isn't an instance of Ord, so that we can control which
-- instance we use. That's tiresome here. Oh well
instance Eq CallKey where
- k1 == k2 = case k1 `compare` k2 of { EQ -> True; other -> False }
+ k1 == k2 = case k1 `compare` k2 of { EQ -> True; _ -> False }
instance Ord CallKey where
compare (CallKey k1) (CallKey k2) = cmpList cmp k1 k2
where
- cmp Nothing Nothing = EQ
- cmp Nothing (Just t2) = LT
- cmp (Just t1) Nothing = GT
+ cmp Nothing Nothing = EQ
+ cmp Nothing (Just _) = LT
+ cmp (Just _) Nothing = GT
cmp (Just t1) (Just t2) = tcCmpType t1 t2
unionCalls :: CallDetails -> CallDetails -> CallDetails
-unionCalls c1 c2 = plusFM_C plusFM c1 c2
+unionCalls c1 c2 = plusVarEnv_C unionCallInfoSet c1 c2
+
+unionCallInfoSet :: CallInfoSet -> CallInfoSet -> CallInfoSet
+unionCallInfoSet (CIS f calls1) (CIS _ calls2) = CIS f (calls1 `Map.union` calls2)
+
+callDetailsFVs :: CallDetails -> VarSet
+callDetailsFVs calls = foldVarEnv (unionVarSet . callInfoFVs) emptyVarSet calls
+
+callInfoFVs :: CallInfoSet -> VarSet
+callInfoFVs (CIS _ call_info) = Map.foldRight (\(_,fv) vs -> unionVarSet fv vs) emptyVarSet call_info
-singleCall :: Id -> [Maybe Type] -> [DictExpr] -> CallDetails
+------------------------------------------------------------
+singleCall :: Id -> [Maybe Type] -> [DictExpr] -> UsageDetails
singleCall id tys dicts
- = unitFM id (unitFM (CallKey tys) (dicts, call_fvs))
+ = MkUD {ud_binds = emptyBag,
+ ud_calls = unitVarEnv id $ CIS id $
+ Map.singleton (CallKey tys) (dicts, call_fvs) }
where
call_fvs = exprsFreeVars dicts `unionVarSet` tys_fvs
tys_fvs = tyVarsOfTypes (catMaybes tys)
--
-- We don't include the 'id' itself.
-listToCallDetails calls
- = foldr (unionCalls . mk_call) emptyFM calls
- where
- mk_call (id, tys, dicts_w_fvs) = unitFM id (unitFM tys dicts_w_fvs)
- -- NB: the free vars of the call are provided
-
-callDetailsToList calls = [ (id,tys,dicts)
- | (id,fm) <- fmToList calls,
- (tys, dicts) <- fmToList fm
- ]
-
-mkCallUDs subst f args
- | null theta
+mkCallUDs :: Id -> [CoreExpr] -> UsageDetails
+mkCallUDs f args
+ | not (want_calls_for f) -- Imported from elsewhere
+ || null theta -- Not overloaded
|| not (all isClassPred theta)
-- Only specialise if all overloading is on class params.
-- In ptic, with implicit params, the type args
-- *don't* say what the value of the implicit param is!
|| not (spec_tys `lengthIs` n_tyvars)
|| not ( dicts `lengthIs` n_dicts)
- || maybeToBool (lookupRule (\act -> True) (substInScope subst) emptyRuleBase f args)
- -- There's already a rule covering this call. A typical case
- -- is where there's an explicit user-provided rule. Then
- -- we don't want to create a specialised version
- -- of the function that overlaps.
- = emptyUDs -- Not overloaded, or no specialisation wanted
+ || not (any interestingDict dicts) -- Note [Interesting dictionary arguments]
+ -- See also Note [Specialisations already covered]
+ = -- pprTrace "mkCallUDs: discarding" (vcat [ppr f, ppr args, ppr n_tyvars, ppr n_dicts, ppr (map interestingDict dicts)])
+ emptyUDs -- Not overloaded, or no specialisation wanted
| otherwise
- = MkUD {dict_binds = emptyBag,
- calls = singleCall f spec_tys dicts
- }
+ = -- pprTrace "mkCallUDs: keeping" (vcat [ppr f, ppr args, ppr n_tyvars, ppr n_dicts, ppr (map interestingDict dicts)])
+ singleCall f spec_tys dicts
where
(tyvars, theta, _) = tcSplitSigmaTy (idType f)
constrained_tyvars = tyVarsOfTheta theta
| tyvar `elemVarSet` constrained_tyvars = Just ty
| otherwise = Nothing
-------------------------------------------------------------
+ want_calls_for f = isLocalId f || isInlinablePragma (idInlinePragma f)
+\end{code}
+
+Note [Interesting dictionary arguments]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider this
+ \a.\d:Eq a. let f = ... in ...(f d)...
+There really is not much point in specialising f wrt the dictionary d,
+because the code for the specialised f is not improved at all, because
+d is lambda-bound. We simply get junk specialisations.
+
+What is "interesting"? Just that it has *some* structure.
+
+\begin{code}
+interestingDict :: CoreExpr -> Bool
+-- A dictionary argument is interesting if it has *some* structure
+interestingDict (Var v) = hasSomeUnfolding (idUnfolding v)
+ || isDataConWorkId v
+interestingDict (Type _) = False
+interestingDict (App fn (Type _)) = interestingDict fn
+interestingDict (Note _ a) = interestingDict a
+interestingDict (Cast e _) = interestingDict e
+interestingDict _ = True
+\end{code}
+
+\begin{code}
plusUDs :: UsageDetails -> UsageDetails -> UsageDetails
-plusUDs (MkUD {dict_binds = db1, calls = calls1})
- (MkUD {dict_binds = db2, calls = calls2})
- = MkUD {dict_binds = d, calls = c}
- where
- d = db1 `unionBags` db2
- c = calls1 `unionCalls` calls2
+plusUDs (MkUD {ud_binds = db1, ud_calls = calls1})
+ (MkUD {ud_binds = db2, ud_calls = calls2})
+ = MkUD { ud_binds = db1 `unionBags` db2
+ , ud_calls = calls1 `unionCalls` calls2 }
+plusUDList :: [UsageDetails] -> UsageDetails
plusUDList = foldr plusUDs emptyUDs
--- zapCalls deletes calls to ids from uds
-zapCalls ids uds = uds {calls = delListFromFM (calls uds) ids}
+-----------------------------
+_dictBindBndrs :: Bag DictBind -> [Id]
+_dictBindBndrs dbs = foldrBag ((++) . bindersOf . fst) [] dbs
+mkDB :: CoreBind -> DictBind
mkDB bind = (bind, bind_fvs bind)
+bind_fvs :: CoreBind -> VarSet
bind_fvs (NonRec bndr rhs) = pair_fvs (bndr,rhs)
bind_fvs (Rec prs) = foldl delVarSet rhs_fvs bndrs
where
bndrs = map fst prs
rhs_fvs = unionVarSets (map pair_fvs prs)
-pair_fvs (bndr, rhs) = exprFreeVars rhs `unionVarSet` idRuleVars bndr
+pair_fvs :: (Id, CoreExpr) -> VarSet
+pair_fvs (bndr, rhs) = exprFreeVars rhs `unionVarSet` idFreeVars bndr
-- Don't forget variables mentioned in the
-- rules of the bndr. C.f. OccAnal.addRuleUsage
+ -- Also tyvars mentioned in its type; they may not appear in the RHS
+ -- type T a = Int
+ -- x :: T a = 3
+
+flattenDictBinds :: Bag DictBind -> [(Id,CoreExpr)] -> [(Id,CoreExpr)]
+flattenDictBinds dbs pairs
+ = foldrBag add pairs dbs
+ where
+ add (NonRec b r,_) pairs = (b,r) : pairs
+ add (Rec prs1, _) pairs = prs1 ++ pairs
+
+snocDictBinds :: UsageDetails -> [CoreBind] -> UsageDetails
+-- Add ud_binds to the tail end of the bindings in uds
+snocDictBinds uds dbs
+ = uds { ud_binds = ud_binds uds `unionBags`
+ foldr (consBag . mkDB) emptyBag dbs }
+consDictBind :: CoreBind -> UsageDetails -> UsageDetails
+consDictBind bind uds = uds { ud_binds = mkDB bind `consBag` ud_binds uds }
-addDictBind (dict,rhs) uds = uds { dict_binds = mkDB (NonRec dict rhs) `consBag` dict_binds uds }
+addDictBinds :: [DictBind] -> UsageDetails -> UsageDetails
+addDictBinds binds uds = uds { ud_binds = listToBag binds `unionBags` ud_binds uds }
-dumpAllDictBinds (MkUD {dict_binds = dbs}) binds
+snocDictBind :: UsageDetails -> CoreBind -> UsageDetails
+snocDictBind uds bind = uds { ud_binds = ud_binds uds `snocBag` mkDB bind }
+
+wrapDictBinds :: Bag DictBind -> [CoreBind] -> [CoreBind]
+wrapDictBinds dbs binds
= foldrBag add binds dbs
where
add (bind,_) binds = bind : binds
-dumpUDs :: [CoreBndr]
- -> UsageDetails -> CoreExpr
- -> (UsageDetails, CoreExpr)
-dumpUDs bndrs uds body
- = (free_uds, foldr add_let body dict_binds)
+wrapDictBindsE :: Bag DictBind -> CoreExpr -> CoreExpr
+wrapDictBindsE dbs expr
+ = foldrBag add expr dbs
where
- (free_uds, (dict_binds, _)) = splitUDs bndrs uds
- add_let (bind,_) body = Let bind body
-
-splitUDs :: [CoreBndr]
- -> UsageDetails
- -> (UsageDetails, -- These don't mention the binders
- ProtoUsageDetails) -- These do
-
-splitUDs bndrs uds@(MkUD {dict_binds = orig_dbs,
- calls = orig_calls})
-
- = if isEmptyBag dump_dbs && null dump_calls then
- -- Common case: binder doesn't affect floats
- (uds, ([],[]))
-
- else
- -- Binders bind some of the fvs of the floats
- (MkUD {dict_binds = free_dbs,
- calls = listToCallDetails free_calls},
- (bagToList dump_dbs, dump_calls)
- )
-
+ add (bind,_) expr = Let bind expr
+
+----------------------
+dumpUDs :: [CoreBndr] -> UsageDetails -> (UsageDetails, Bag DictBind)
+-- Used at a lambda or case binder; just dump anything mentioning the binder
+dumpUDs bndrs uds@(MkUD { ud_binds = orig_dbs, ud_calls = orig_calls })
+ | null bndrs = (uds, emptyBag) -- Common in case alternatives
+ | otherwise = -- pprTrace "dumpUDs" (ppr bndrs $$ ppr free_uds $$ ppr dump_dbs) $
+ (free_uds, dump_dbs)
where
+ free_uds = MkUD { ud_binds = free_dbs, ud_calls = free_calls }
bndr_set = mkVarSet bndrs
+ (free_dbs, dump_dbs, dump_set) = splitDictBinds orig_dbs bndr_set
+ free_calls = deleteCallsMentioning dump_set $ -- Drop calls mentioning bndr_set on the floor
+ deleteCallsFor bndrs orig_calls -- Discard calls for bndr_set; there should be
+ -- no calls for any of the dicts in dump_dbs
+
+dumpBindUDs :: [CoreBndr] -> UsageDetails -> (UsageDetails, Bag DictBind, Bool)
+-- Used at a lambda or case binder; just dump anything mentioning the binder
+dumpBindUDs bndrs (MkUD { ud_binds = orig_dbs, ud_calls = orig_calls })
+ = -- pprTrace "dumpBindUDs" (ppr bndrs $$ ppr free_uds $$ ppr dump_dbs) $
+ (free_uds, dump_dbs, float_all)
+ where
+ free_uds = MkUD { ud_binds = free_dbs, ud_calls = free_calls }
+ bndr_set = mkVarSet bndrs
+ (free_dbs, dump_dbs, dump_set) = splitDictBinds orig_dbs bndr_set
+ free_calls = deleteCallsFor bndrs orig_calls
+ float_all = dump_set `intersectsVarSet` callDetailsFVs free_calls
+
+callsForMe :: Id -> UsageDetails -> (UsageDetails, [CallInfo])
+callsForMe fn (MkUD { ud_binds = orig_dbs, ud_calls = orig_calls })
+ = -- pprTrace ("callsForMe")
+ -- (vcat [ppr fn,
+ -- text "Orig dbs =" <+> ppr (_dictBindBndrs orig_dbs),
+ -- text "Orig calls =" <+> ppr orig_calls,
+ -- text "Dep set =" <+> ppr dep_set,
+ -- text "Calls for me =" <+> ppr calls_for_me]) $
+ (uds_without_me, calls_for_me)
+ where
+ uds_without_me = MkUD { ud_binds = orig_dbs, ud_calls = delVarEnv orig_calls fn }
+ calls_for_me = case lookupVarEnv orig_calls fn of
+ Nothing -> []
+ Just (CIS _ calls) -> filter_dfuns (Map.toList calls)
- (free_dbs, dump_dbs, dump_idset)
- = foldlBag dump_db (emptyBag, emptyBag, bndr_set) orig_dbs
- -- Important that it's foldl not foldr;
- -- we're accumulating the set of dumped ids in dump_set
+ dep_set = foldlBag go (unitVarSet fn) orig_dbs
+ go dep_set (db,fvs) | fvs `intersectsVarSet` dep_set
+ = extendVarSetList dep_set (bindersOf db)
+ | otherwise = dep_set
+
+ -- Note [Specialisation of dictionary functions]
+ filter_dfuns | isDFunId fn = filter ok_call
+ | otherwise = \cs -> cs
- -- Filter out any calls that mention things that are being dumped
- orig_call_list = callDetailsToList orig_calls
- (dump_calls, free_calls) = partition captured orig_call_list
- captured (id,tys,(dicts, fvs)) = fvs `intersectsVarSet` dump_idset
- || id `elemVarSet` dump_idset
+ ok_call (_, (_,fvs)) = not (fvs `intersectsVarSet` dep_set)
- dump_db (free_dbs, dump_dbs, dump_idset) db@(bind, fvs)
+----------------------
+splitDictBinds :: Bag DictBind -> IdSet -> (Bag DictBind, Bag DictBind, IdSet)
+-- Returns (free_dbs, dump_dbs, dump_set)
+splitDictBinds dbs bndr_set
+ = foldlBag split_db (emptyBag, emptyBag, bndr_set) dbs
+ -- Important that it's foldl not foldr;
+ -- we're accumulating the set of dumped ids in dump_set
+ where
+ split_db (free_dbs, dump_dbs, dump_idset) db@(bind, fvs)
| dump_idset `intersectsVarSet` fvs -- Dump it
= (free_dbs, dump_dbs `snocBag` db,
extendVarSetList dump_idset (bindersOf bind))
| otherwise -- Don't dump it
= (free_dbs `snocBag` db, dump_dbs, dump_idset)
+
+
+----------------------
+deleteCallsMentioning :: VarSet -> CallDetails -> CallDetails
+-- Remove calls *mentioning* bs
+deleteCallsMentioning bs calls
+ = mapVarEnv filter_calls calls
+ where
+ filter_calls :: CallInfoSet -> CallInfoSet
+ filter_calls (CIS f calls) = CIS f (Map.filter keep_call calls)
+ keep_call (_, fvs) = not (fvs `intersectsVarSet` bs)
+
+deleteCallsFor :: [Id] -> CallDetails -> CallDetails
+-- Remove calls *for* bs
+deleteCallsFor bs calls = delVarEnvList calls bs
\end{code}
\begin{code}
type SpecM a = UniqSM a
-thenSM = thenUs
-returnSM = returnUs
-getUniqSM = getUniqueUs
-mapSM = mapUs
-initSM = initUs_
+runSpecM:: SpecM a -> CoreM a
+runSpecM spec = do { us <- getUniqueSupplyM
+ ; return (initUs_ us spec) }
-mapAndCombineSM f [] = returnSM ([], emptyUDs)
-mapAndCombineSM f (x:xs) = f x `thenSM` \ (y, uds1) ->
- mapAndCombineSM f xs `thenSM` \ (ys, uds2) ->
- returnSM (y:ys, uds1 `plusUDs` uds2)
+mapAndCombineSM :: (a -> SpecM (b, UsageDetails)) -> [a] -> SpecM ([b], UsageDetails)
+mapAndCombineSM _ [] = return ([], emptyUDs)
+mapAndCombineSM f (x:xs) = do (y, uds1) <- f x
+ (ys, uds2) <- mapAndCombineSM f xs
+ return (y:ys, uds1 `plusUDs` uds2)
cloneBindSM :: Subst -> CoreBind -> SpecM (Subst, Subst, CoreBind)
-- Clone the binders of the bind; return new bind with the cloned binders
-- Return the substitution to use for RHSs, and the one to use for the body
-cloneBindSM subst (NonRec bndr rhs)
- = getUs `thenUs` \ us ->
- let
- (subst', bndr') = cloneIdBndr subst us bndr
- in
- returnUs (subst, subst', NonRec bndr' rhs)
-
-cloneBindSM subst (Rec pairs)
- = getUs `thenUs` \ us ->
- let
- (subst', bndrs') = cloneRecIdBndrs subst us (map fst pairs)
- in
- returnUs (subst', subst', Rec (bndrs' `zip` map snd pairs))
-
-cloneBinders subst bndrs
- = getUs `thenUs` \ us ->
- returnUs (cloneIdBndrs subst us bndrs)
-
-newIdSM old_id new_ty
- = getUniqSM `thenSM` \ uniq ->
- let
- -- Give the new Id a similar occurrence name to the old one
- name = idName old_id
- new_id = mkUserLocal (mkSpecOcc (nameOccName name)) uniq new_ty (getSrcLoc name)
- in
- returnSM new_id
+cloneBindSM subst (NonRec bndr rhs) = do
+ us <- getUniqueSupplyM
+ let (subst', bndr') = cloneIdBndr subst us bndr
+ return (subst, subst', NonRec bndr' rhs)
+
+cloneBindSM subst (Rec pairs) = do
+ us <- getUniqueSupplyM
+ let (subst', bndrs') = cloneRecIdBndrs subst us (map fst pairs)
+ return (subst', subst', Rec (bndrs' `zip` map snd pairs))
+
+newDictBndrs :: Subst -> [CoreBndr] -> SpecM (Subst, [CoreBndr])
+-- Make up completely fresh binders for the dictionaries
+-- Their bindings are going to float outwards
+newDictBndrs subst bndrs
+ = do { bndrs' <- mapM new bndrs
+ ; let subst' = extendIdSubstList subst
+ [(d, Var d') | (d,d') <- bndrs `zip` bndrs']
+ ; return (subst', bndrs' ) }
+ where
+ new b = do { uniq <- getUniqueM
+ ; let n = idName b
+ ty' = CoreSubst.substTy subst (idType b)
+ ; return (mkUserLocal (nameOccName n) uniq ty' (getSrcSpan n)) }
+
+newSpecIdSM :: Id -> Type -> SpecM Id
+ -- Give the new Id a similar occurrence name to the old one
+newSpecIdSM old_id new_ty
+ = do { uniq <- getUniqueM
+ ; let name = idName old_id
+ new_occ = mkSpecOcc (nameOccName name)
+ new_id = mkUserLocal new_occ uniq new_ty (getSrcSpan name)
+ ; return new_id }
\end{code}