X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2Fspecialise%2FSpecConstr.lhs;h=f214f0cae8d2e1d10f9bdf3fb17c3203407a7a3a;hp=abf53604db27411b72d9f6e081cb195a9de1e2d4;hb=e95ee1f718c6915c478005aad8af81705357d6ab;hpb=7a327c1297615a9498e7117a0017b09ff2458d53 diff --git a/compiler/specialise/SpecConstr.lhs b/compiler/specialise/SpecConstr.lhs index abf5360..f214f0c 100644 --- a/compiler/specialise/SpecConstr.lhs +++ b/compiler/specialise/SpecConstr.lhs @@ -4,41 +4,54 @@ \section[SpecConstr]{Specialise over constructors} \begin{code} +-- The above warning supression flag is a temporary kludge. +-- While working on this module you are encouraged to remove it and fix +-- any warnings in the module. See +-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings +-- for details + module SpecConstr( - specConstrProgram + specConstrProgram, SpecConstrAnnotation(..) ) where #include "HsVersions.h" import CoreSyn -import CoreLint ( showPass, endPass ) -import CoreUtils ( exprType, mkPiTypes ) +import CoreSubst +import CoreUtils +import CoreUnfold ( couldBeSmallEnoughToInline ) import CoreFVs ( exprsFreeVars ) -import CoreTidy ( tidyRules ) -import PprCore ( pprRules ) +import CoreMonad +import HscTypes ( ModGuts(..) ) import WwLib ( mkWorkerArgs ) -import DataCon ( dataConRepArity, dataConUnivTyVars ) -import Type ( Type, tyConAppArgs ) -import Coercion ( coercionKind ) -import Rules ( matchN ) -import Id ( Id, idName, idType, isDataConWorkId_maybe, - mkUserLocal, mkSysLocal, idUnfolding, isLocalId ) -import Var ( Var ) +import DataCon ( dataConTyCon, dataConRepArity, dataConUnivTyVars ) +import TyCon ( TyCon ) +import Literal ( literalType ) +import Coercion +import Rules +import Type hiding( substTy ) +import Id +import MkCore ( mkImpossibleExpr ) +import Var import VarEnv import VarSet -import Name ( nameOccName, nameSrcLoc ) -import Rules ( addIdSpecialisations, mkLocalRule, rulesOfBinds ) -import OccName ( mkSpecOcc ) -import ErrUtils ( dumpIfSet_dyn ) -import DynFlags ( DynFlags, DynFlag(..) ) -import BasicTypes ( Activation(..) ) -import Maybes ( orElse, catMaybes, isJust ) -import Util ( zipWithEqual, lengthAtLeast, notNull ) -import List ( nubBy, partition ) +import Name +import BasicTypes +import DynFlags ( DynFlags(..) ) +import StaticFlags ( opt_PprStyle_Debug ) +import Maybes ( orElse, catMaybes, isJust, isNothing ) +import Demand +import DmdAnal ( both ) +import Serialized ( deserializeWithData ) +import Util import UniqSupply import Outputable import FastString import UniqFM +import MonadUtils +import Control.Monad ( zipWithM ) +import Data.List +import Data.Data ( Data, Typeable ) \end{code} ----------------------------------------------------- @@ -115,7 +128,7 @@ This happens if Hence the "OR" part of Note [Good arguments] below. -ALTERNATIVE: pass both boxed and unboxed versions. This no longer saves +ALTERNATIVE 2: pass both boxed and unboxed versions. This no longer saves allocation, but does perhaps save evals. In the RULE we'd have something like @@ -125,6 +138,25 @@ If at the call site the (I# x) was an unfolding, then we'd have to rely on CSE to eliminate the duplicate allocation.... This alternative doesn't look attractive enough to pursue. +ALTERNATIVE 3: ignore the reboxing problem. The trouble is that +the conservative reboxing story prevents many useful functions from being +specialised. Example: + foo :: Maybe Int -> Int -> Int + foo (Just m) 0 = 0 + foo x@(Just m) n = foo x (n-m) +Here the use of 'x' will clearly not require boxing in the specialised function. + +The strictness analyser has the same problem, in fact. Example: + f p@(a,b) = ... +If we pass just 'a' and 'b' to the worker, it might need to rebox the +pair to create (a,b). A more sophisticated analysis might figure out +precisely the cases in which this could happen, but the strictness +analyser does no such analysis; it just passes 'a' and 'b', and hopes +for the best. + +So my current choice is to make SpecConstr similarly aggressive, and +ignore the bad potential of reboxing. + Note [Good arguments] ~~~~~~~~~~~~~~~~~~~~~ @@ -316,6 +348,74 @@ The recursive call ends up looking like So we want to spot the construtor application inside the cast. That's why we have the Cast case in argToPat +Note [Local recursive groups] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +For a *local* recursive group, we can see all the calls to the +function, so we seed the specialisation loop from the calls in the +body, not from the calls in the RHS. Consider: + + bar m n = foo n (n,n) (n,n) (n,n) (n,n) + where + foo n p q r s + | n == 0 = m + | n > 3000 = case p of { (p1,p2) -> foo (n-1) (p2,p1) q r s } + | n > 2000 = case q of { (q1,q2) -> foo (n-1) p (q2,q1) r s } + | n > 1000 = case r of { (r1,r2) -> foo (n-1) p q (r2,r1) s } + | otherwise = case s of { (s1,s2) -> foo (n-1) p q r (s2,s1) } + +If we start with the RHSs of 'foo', we get lots and lots of specialisations, +most of which are not needed. But if we start with the (single) call +in the rhs of 'bar' we get exactly one fully-specialised copy, and all +the recursive calls go to this fully-specialised copy. Indeed, the original +function is later collected as dead code. This is very important in +specialising the loops arising from stream fusion, for example in NDP where +we were getting literally hundreds of (mostly unused) specialisations of +a local function. + +Note [Do not specialise diverging functions] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Specialising a function that just diverges is a waste of code. +Furthermore, it broke GHC (simpl014) thus: + {-# STR Sb #-} + f = \x. case x of (a,b) -> f x +If we specialise f we get + f = \x. case x of (a,b) -> fspec a b +But fspec doesn't have decent strictnes info. As it happened, +(f x) :: IO t, so the state hack applied and we eta expanded fspec, +and hence f. But now f's strictness is less than its arity, which +breaks an invariant. + +Note [Forcing specialisation] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +With stream fusion and in other similar cases, we want to fully specialise +some (but not necessarily all!) loops regardless of their size and the +number of specialisations. We allow a library to specify this by annotating +a type with ForceSpecConstr and then adding a parameter of that type to the +loop. Here is a (simplified) example from the vector library: + + data SPEC = SPEC | SPEC2 + {-# ANN type SPEC ForceSpecConstr #-} + + foldl :: (a -> b -> a) -> a -> Stream b -> a + {-# INLINE foldl #-} + foldl f z (Stream step s _) = foldl_loop SPEC z s + where + foldl_loop SPEC z s = case step s of + Yield x s' -> foldl_loop SPEC (f z x) s' + Skip -> foldl_loop SPEC z s' + Done -> z + +SpecConstr will spot the SPEC parameter and always fully specialise +foldl_loop. Note that we can't just annotate foldl_loop since it isn't a +top-level function but even if we could, inlining etc. could easily drop the +annotation. We also have to prevent the SPEC argument from being removed by +w/w which is why SPEC is a sum type. This is all quite ugly; we ought to come +up with a better design. + +ForceSpecConstr arguments are spotted in scExpr' and scTopBinds which then set +force_spec to True when calling specLoop. This flag makes specLoop and +specialise ignore specConstrCount and specConstrThreshold when deciding +whether to specialise a function. ----------------------------------------------------- Stuff not yet handled @@ -392,7 +492,19 @@ But perhaps the first one isn't good. After all, we know that tpl_B2 is a T (I# x) really, because T is strict and Int has one constructor. (We can't unbox the strict fields, becuase T is polymorphic!) +%************************************************************************ +%* * +\subsection{Annotations} +%* * +%************************************************************************ + +Annotating a type with NoSpecConstr will make SpecConstr not specialise +for arguments of that type. +\begin{code} +data SpecConstrAnnotation = NoSpecConstr | ForceSpecConstr + deriving( Data, Typeable, Eq ) +\end{code} %************************************************************************ %* * @@ -401,24 +513,19 @@ unbox the strict fields, becuase T is polymorphic!) %************************************************************************ \begin{code} -specConstrProgram :: DynFlags -> UniqSupply -> [CoreBind] -> IO [CoreBind] -specConstrProgram dflags us binds +specConstrProgram :: ModGuts -> CoreM ModGuts +specConstrProgram guts = do - showPass dflags "SpecConstr" - - let (binds', _) = initUs us (go emptyScEnv binds) - - endPass dflags "SpecConstr" Opt_D_dump_spec binds' - - dumpIfSet_dyn dflags Opt_D_dump_rules "Top-level specialisations" - (pprRules (tidyRules emptyTidyEnv (rulesOfBinds binds'))) - - return binds' + dflags <- getDynFlags + us <- getUniqueSupplyM + annos <- getFirstAnnotations deserializeWithData guts + let binds' = fst $ initUs us (go (initScEnv dflags annos) (mg_binds guts)) + return (guts { mg_binds = binds' }) where - go env [] = returnUs [] - go env (bind:binds) = scBind env bind `thenUs` \ (env', _, bind') -> - go env' binds `thenUs` \ binds' -> - returnUs (bind' : binds') + go _ [] = return [] + go env (bind:binds) = do (env', bind') <- scTopBind env bind + binds' <- go env' binds + return (bind' : binds') \end{code} @@ -429,24 +536,54 @@ specConstrProgram dflags us binds %************************************************************************ \begin{code} -data ScEnv = SCE { scope :: InScopeEnv, - -- Binds all non-top-level variables in scope +data ScEnv = SCE { sc_size :: Maybe Int, -- Size threshold + sc_count :: Maybe Int, -- Max # of specialisations for any one fn + -- See Note [Avoiding exponential blowup] - cons :: ConstrEnv + sc_subst :: Subst, -- Current substitution + -- Maps InIds to OutExprs + + sc_how_bound :: HowBoundEnv, + -- Binds interesting non-top-level variables + -- Domain is OutVars (*after* applying the substitution) + + sc_vals :: ValueEnv, + -- Domain is OutIds (*after* applying the substitution) + -- Used even for top-level bindings (but not imported ones) + + sc_annotations :: UniqFM SpecConstrAnnotation } -type InScopeEnv = VarEnv HowBound +--------------------- +-- As we go, we apply a substitution (sc_subst) to the current term +type InExpr = CoreExpr -- _Before_ applying the subst +type InVar = Var + +type OutExpr = CoreExpr -- _After_ applying the subst +type OutId = Id +type OutVar = Var -type ConstrEnv = IdEnv ConValue -data ConValue = CV AltCon [CoreArg] - -- Variables known to be bound to a constructor - -- in a particular case alternative +--------------------- +type HowBoundEnv = VarEnv HowBound -- Domain is OutVars +--------------------- +type ValueEnv = IdEnv Value -- Domain is OutIds +data Value = ConVal AltCon [CoreArg] -- _Saturated_ constructors + | LambdaVal -- Inlinable lambdas or PAPs -instance Outputable ConValue where - ppr (CV con args) = ppr con <+> interpp'SP args +instance Outputable Value where + ppr (ConVal con args) = ppr con <+> interpp'SP args + ppr LambdaVal = ptext (sLit "") -emptyScEnv = SCE { scope = emptyVarEnv, cons = emptyVarEnv } +--------------------- +initScEnv :: DynFlags -> UniqFM SpecConstrAnnotation -> ScEnv +initScEnv dflags anns + = SCE { sc_size = specConstrThreshold dflags, + sc_count = specConstrCount dflags, + sc_subst = emptySubst, + sc_how_bound = emptyVarEnv, + sc_vals = emptyVarEnv, + sc_annotations = anns } data HowBound = RecFun -- These are the recursive functions for which -- we seek interesting call patterns @@ -454,74 +591,163 @@ data HowBound = RecFun -- These are the recursive functions for which | RecArg -- These are those functions' arguments, or their sub-components; -- we gather occurrence information for these - | Other -- We track all others so we know what's in scope - -- This is used in spec_one to check what needs to be - -- passed as a parameter and what is in scope at the - -- function definition site - instance Outputable HowBound where ppr RecFun = text "RecFun" ppr RecArg = text "RecArg" - ppr Other = text "Other" - -lookupScopeEnv env v = lookupVarEnv (scope env) v - -extendBndrs env bndrs = env { scope = extendVarEnvList (scope env) [(b,Other) | b <- bndrs] } -extendBndr env bndr = env { scope = extendVarEnv (scope env) bndr Other } - - -- When we encounter - -- case scrut of b - -- C x y -> ... - -- we want to bind b, and perhaps scrut too, to (C x y) -extendCaseBndrs :: ScEnv -> Id -> CoreExpr -> AltCon -> [Var] -> ScEnv -extendCaseBndrs env case_bndr scrut con alt_bndrs - = case con of - DEFAULT -> env1 - LitAlt lit -> extendCons env1 scrut case_bndr (CV con []) - DataAlt dc -> extend_data_con dc + +lookupHowBound :: ScEnv -> Id -> Maybe HowBound +lookupHowBound env id = lookupVarEnv (sc_how_bound env) id + +scSubstId :: ScEnv -> Id -> CoreExpr +scSubstId env v = lookupIdSubst (text "scSubstId") (sc_subst env) v + +scSubstTy :: ScEnv -> Type -> Type +scSubstTy env ty = substTy (sc_subst env) ty + +zapScSubst :: ScEnv -> ScEnv +zapScSubst env = env { sc_subst = zapSubstEnv (sc_subst env) } + +extendScInScope :: ScEnv -> [Var] -> ScEnv + -- Bring the quantified variables into scope +extendScInScope env qvars = env { sc_subst = extendInScopeList (sc_subst env) qvars } + + -- Extend the substitution +extendScSubst :: ScEnv -> Var -> OutExpr -> ScEnv +extendScSubst env var expr = env { sc_subst = extendSubst (sc_subst env) var expr } + +extendScSubstList :: ScEnv -> [(Var,OutExpr)] -> ScEnv +extendScSubstList env prs = env { sc_subst = extendSubstList (sc_subst env) prs } + +extendHowBound :: ScEnv -> [Var] -> HowBound -> ScEnv +extendHowBound env bndrs how_bound + = env { sc_how_bound = extendVarEnvList (sc_how_bound env) + [(bndr,how_bound) | bndr <- bndrs] } + +extendBndrsWith :: HowBound -> ScEnv -> [Var] -> (ScEnv, [Var]) +extendBndrsWith how_bound env bndrs + = (env { sc_subst = subst', sc_how_bound = hb_env' }, bndrs') where - cur_scope = scope env - env1 = env { scope = extendVarEnvList cur_scope - [(b,how_bound) | b <- case_bndr:alt_bndrs] } - - -- Record RecArg for the components iff the scrutinee is RecArg - -- I think the only reason for this is to keep the usage envt small - -- so is it worth it at all? - -- [This comment looks plain wrong to me, so I'm ignoring it - -- "Also forget if the scrutinee is a RecArg, because we're - -- now in the branch of a case, and we don't want to - -- record a non-scrutinee use of v if we have - -- case v of { (a,b) -> ...(f v)... }" ] - how_bound = get_how scrut - where - get_how (Var v) = lookupVarEnv cur_scope v `orElse` Other - get_how (Cast e _) = get_how e - get_how (Note _ e) = get_how e - get_how other = Other - - extend_data_con data_con = - extendCons env1 scrut case_bndr (CV con vanilla_args) - where - vanilla_args = map Type (tyConAppArgs (idType case_bndr)) ++ - varsToCoreExprs alt_bndrs - -extendCons :: ScEnv -> CoreExpr -> Id -> ConValue -> ScEnv -extendCons env scrut case_bndr val - = case scrut of - Var v -> env { cons = extendVarEnv cons1 v val } - other -> env { cons = cons1 } + (subst', bndrs') = substBndrs (sc_subst env) bndrs + hb_env' = sc_how_bound env `extendVarEnvList` + [(bndr,how_bound) | bndr <- bndrs'] + +extendBndrWith :: HowBound -> ScEnv -> Var -> (ScEnv, Var) +extendBndrWith how_bound env bndr + = (env { sc_subst = subst', sc_how_bound = hb_env' }, bndr') where - cons1 = extendVarEnv (cons env) case_bndr val - - -- When we encounter a recursive function binding - -- f = \x y -> ... - -- we want to extend the scope env with bindings - -- that record that f is a RecFn and x,y are RecArgs -extendRecBndr env fn bndrs - = env { scope = scope env `extendVarEnvList` - ((fn,RecFun): [(bndr,RecArg) | bndr <- bndrs]) } + (subst', bndr') = substBndr (sc_subst env) bndr + hb_env' = extendVarEnv (sc_how_bound env) bndr' how_bound + +extendRecBndrs :: ScEnv -> [Var] -> (ScEnv, [Var]) +extendRecBndrs env bndrs = (env { sc_subst = subst' }, bndrs') + where + (subst', bndrs') = substRecBndrs (sc_subst env) bndrs + +extendBndr :: ScEnv -> Var -> (ScEnv, Var) +extendBndr env bndr = (env { sc_subst = subst' }, bndr') + where + (subst', bndr') = substBndr (sc_subst env) bndr + +extendValEnv :: ScEnv -> Id -> Maybe Value -> ScEnv +extendValEnv env _ Nothing = env +extendValEnv env id (Just cv) = env { sc_vals = extendVarEnv (sc_vals env) id cv } + +extendCaseBndrs :: ScEnv -> Id -> AltCon -> [Var] -> (ScEnv, [Var]) +-- When we encounter +-- case scrut of b +-- C x y -> ... +-- we want to bind b, to (C x y) +-- NB1: Extends only the sc_vals part of the envt +-- NB2: Kill the dead-ness info on the pattern binders x,y, since +-- they are potentially made alive by the [b -> C x y] binding +extendCaseBndrs env case_bndr con alt_bndrs + | isDeadBinder case_bndr + = (env, alt_bndrs) + | otherwise + = (env1, map zap alt_bndrs) + -- NB: We used to bind v too, if scrut = (Var v); but + -- the simplifer has already done this so it seems + -- redundant to do so here + -- case scrut of + -- Var v -> extendValEnv env1 v cval + -- _other -> env1 + where + zap v | isTyCoVar v = v -- See NB2 above + | otherwise = zapIdOccInfo v + env1 = extendValEnv env case_bndr cval + cval = case con of + DEFAULT -> Nothing + LitAlt {} -> Just (ConVal con []) + DataAlt {} -> Just (ConVal con vanilla_args) + where + vanilla_args = map Type (tyConAppArgs (idType case_bndr)) ++ + varsToCoreExprs alt_bndrs + +ignoreTyCon :: ScEnv -> TyCon -> Bool +ignoreTyCon env tycon + = lookupUFM (sc_annotations env) tycon == Just NoSpecConstr + +ignoreType :: ScEnv -> Type -> Bool +ignoreType env ty + = case splitTyConApp_maybe ty of + Just (tycon, _) -> ignoreTyCon env tycon + _ -> False + +ignoreAltCon :: ScEnv -> AltCon -> Bool +ignoreAltCon env (DataAlt dc) = ignoreTyCon env (dataConTyCon dc) +ignoreAltCon env (LitAlt lit) = ignoreType env (literalType lit) +ignoreAltCon _ DEFAULT = True + +forceSpecBndr :: ScEnv -> Var -> Bool +forceSpecBndr env var = forceSpecFunTy env . snd . splitForAllTys . varType $ var + +forceSpecFunTy :: ScEnv -> Type -> Bool +forceSpecFunTy env = any (forceSpecArgTy env) . fst . splitFunTys + +forceSpecArgTy :: ScEnv -> Type -> Bool +forceSpecArgTy env ty + | Just ty' <- coreView ty = forceSpecArgTy env ty' + +forceSpecArgTy env ty + | Just (tycon, tys) <- splitTyConApp_maybe ty + , tycon /= funTyCon + = lookupUFM (sc_annotations env) tycon == Just ForceSpecConstr + || any (forceSpecArgTy env) tys + +forceSpecArgTy _ _ = False + +decreaseSpecCount :: ScEnv -> Int -> ScEnv +-- See Note [Avoiding exponential blowup] +decreaseSpecCount env n_specs + = env { sc_count = case sc_count env of + Nothing -> Nothing + Just n -> Just (n `div` (n_specs + 1)) } + -- The "+1" takes account of the original function; + -- See Note [Avoiding exponential blowup] \end{code} +Note [Avoiding exponential blowup] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The sc_count field of the ScEnv says how many times we are prepared to +duplicate a single function. But we must take care with recursive +specialiations. Consider + + let $j1 = let $j2 = let $j3 = ... + in + ...$j3... + in + ...$j2... + in + ...$j1... + +If we specialise $j1 then in each specialisation (as well as the original) +we can specialise $j2, and similarly $j3. Even if we make just *one* +specialisation of each, becuase we also have the original we'll get 2^n +copies of $j3, which is not good. + +So when recursively specialising we divide the sc_count by the number of +copies we are making at this level, including the original. + %************************************************************************ %* * @@ -532,34 +758,40 @@ extendRecBndr env fn bndrs \begin{code} data ScUsage = SCU { - calls :: !(IdEnv ([Call])), -- Calls + scu_calls :: CallEnv, -- Calls -- The functions are a subset of the -- RecFuns in the ScEnv - occs :: !(IdEnv ArgOcc) -- Information on argument occurrences - } -- The variables are a subset of the - -- RecArg in the ScEnv + scu_occs :: !(IdEnv ArgOcc) -- Information on argument occurrences + } -- The domain is OutIds -type Call = (ConstrEnv, [CoreArg]) +type CallEnv = IdEnv [Call] +type Call = (ValueEnv, [CoreArg]) -- The arguments of the call, together with the -- env giving the constructor bindings at the call site -nullUsage = SCU { calls = emptyVarEnv, occs = emptyVarEnv } +nullUsage :: ScUsage +nullUsage = SCU { scu_calls = emptyVarEnv, scu_occs = emptyVarEnv } + +combineCalls :: CallEnv -> CallEnv -> CallEnv +combineCalls = plusVarEnv_C (++) -combineUsage u1 u2 = SCU { calls = plusVarEnv_C (++) (calls u1) (calls u2), - occs = plusVarEnv_C combineOcc (occs u1) (occs u2) } +combineUsage :: ScUsage -> ScUsage -> ScUsage +combineUsage u1 u2 = SCU { scu_calls = combineCalls (scu_calls u1) (scu_calls u2), + scu_occs = plusVarEnv_C combineOcc (scu_occs u1) (scu_occs u2) } +combineUsages :: [ScUsage] -> ScUsage combineUsages [] = nullUsage combineUsages us = foldr1 combineUsage us -lookupOcc :: ScUsage -> Var -> (ScUsage, ArgOcc) -lookupOcc (SCU { calls = sc_calls, occs = sc_occs }) bndr - = (SCU {calls = sc_calls, occs = delVarEnv sc_occs bndr}, +lookupOcc :: ScUsage -> OutVar -> (ScUsage, ArgOcc) +lookupOcc (SCU { scu_calls = sc_calls, scu_occs = sc_occs }) bndr + = (SCU {scu_calls = sc_calls, scu_occs = delVarEnv sc_occs bndr}, lookupVarEnv sc_occs bndr `orElse` NoOcc) -lookupOccs :: ScUsage -> [Var] -> (ScUsage, [ArgOcc]) -lookupOccs (SCU { calls = sc_calls, occs = sc_occs }) bndrs - = (SCU {calls = sc_calls, occs = delVarEnvList sc_occs bndrs}, +lookupOccs :: ScUsage -> [OutVar] -> (ScUsage, [ArgOcc]) +lookupOccs (SCU { scu_calls = sc_calls, scu_occs = sc_occs }) bndrs + = (SCU {scu_calls = sc_calls, scu_occs = delVarEnvList sc_occs bndrs}, [lookupVarEnv sc_occs b `orElse` NoOcc | b <- bndrs]) data ArgOcc = NoOcc -- Doesn't occur at all; or a type argument @@ -588,32 +820,49 @@ A pattern binds b, x::a, y::b, z::b->a, but not 'a'! -} instance Outputable ArgOcc where - ppr (ScrutOcc xs) = ptext SLIT("scrut-occ") <> ppr xs - ppr UnkOcc = ptext SLIT("unk-occ") - ppr BothOcc = ptext SLIT("both-occ") - ppr NoOcc = ptext SLIT("no-occ") - + ppr (ScrutOcc xs) = ptext (sLit "scrut-occ") <> ppr xs + ppr UnkOcc = ptext (sLit "unk-occ") + ppr BothOcc = ptext (sLit "both-occ") + ppr NoOcc = ptext (sLit "no-occ") + +-- Experimentally, this vesion of combineOcc makes ScrutOcc "win", so +-- that if the thing is scrutinised anywhere then we get to see that +-- in the overall result, even if it's also used in a boxed way +-- This might be too agressive; see Note [Reboxing] Alternative 3 +combineOcc :: ArgOcc -> ArgOcc -> ArgOcc combineOcc NoOcc occ = occ combineOcc occ NoOcc = occ combineOcc (ScrutOcc xs) (ScrutOcc ys) = ScrutOcc (plusUFM_C combineOccs xs ys) +combineOcc _occ (ScrutOcc ys) = ScrutOcc ys +combineOcc (ScrutOcc xs) _occ = ScrutOcc xs combineOcc UnkOcc UnkOcc = UnkOcc combineOcc _ _ = BothOcc combineOccs :: [ArgOcc] -> [ArgOcc] -> [ArgOcc] combineOccs xs ys = zipWithEqual "combineOccs" combineOcc xs ys +setScrutOcc :: ScEnv -> ScUsage -> OutExpr -> ArgOcc -> ScUsage +-- _Overwrite_ the occurrence info for the scrutinee, if the scrutinee +-- is a variable, and an interesting variable +setScrutOcc env usg (Cast e _) occ = setScrutOcc env usg e occ +setScrutOcc env usg (Note _ e) occ = setScrutOcc env usg e occ +setScrutOcc env usg (Var v) occ + | Just RecArg <- lookupHowBound env v = usg { scu_occs = extendVarEnv (scu_occs usg) v occ } + | otherwise = usg +setScrutOcc _env usg _other _occ -- Catch-all + = usg + conArgOccs :: ArgOcc -> AltCon -> [ArgOcc] -- Find usage of components of data con; returns [UnkOcc...] if unknown -- See Note [ScrutOcc] for the extra UnkOccs in the vanilla datacon case conArgOccs (ScrutOcc fm) (DataAlt dc) | Just pat_arg_occs <- lookupUFM fm dc - = [UnkOcc | tv <- dataConUnivTyVars dc] ++ pat_arg_occs + = [UnkOcc | _ <- dataConUnivTyVars dc] ++ pat_arg_occs -conArgOccs other con = repeat UnkOcc +conArgOccs _other _con = repeat UnkOcc \end{code} - %************************************************************************ %* * \subsection{The main recursive function} @@ -624,180 +873,358 @@ The main recursive function gathers up usage information, and creates specialised versions of functions. \begin{code} -scExpr :: ScEnv -> CoreExpr -> UniqSM (ScUsage, CoreExpr) +scExpr, scExpr' :: ScEnv -> CoreExpr -> UniqSM (ScUsage, CoreExpr) -- The unique supply is needed when we invent -- a new name for the specialised function and its args -scExpr env e@(Type t) = returnUs (nullUsage, e) -scExpr env e@(Lit l) = returnUs (nullUsage, e) -scExpr env e@(Var v) = returnUs (varUsage env v UnkOcc, e) -scExpr env (Note n e) = scExpr env e `thenUs` \ (usg,e') -> - returnUs (usg, Note n e') -scExpr env (Cast e co)= scExpr env e `thenUs` \ (usg,e') -> - returnUs (usg, Cast e' co) -scExpr env (Lam b e) = scExpr (extendBndr env b) e `thenUs` \ (usg,e') -> - returnUs (usg, Lam b e') - -scExpr env (Case scrut b ty alts) - = do { (alt_usgs, alt_occs, alts') <- mapAndUnzip3Us sc_alt alts - ; let (alt_usg, b_occ) = lookupOcc (combineUsages alt_usgs) b - scrut_occ = foldr combineOcc b_occ alt_occs - -- The combined usage of the scrutinee is given - -- by scrut_occ, which is passed to scScrut, which - -- in turn treats a bare-variable scrutinee specially - ; (scrut_usg, scrut') <- scScrut env scrut scrut_occ - ; return (alt_usg `combineUsage` scrut_usg, - Case scrut' b ty alts') } +scExpr env e = scExpr' env e + + +scExpr' env (Var v) = case scSubstId env v of + Var v' -> return (varUsage env v' UnkOcc, Var v') + e' -> scExpr (zapScSubst env) e' + +scExpr' env (Type t) = return (nullUsage, Type (scSubstTy env t)) +scExpr' _ e@(Lit {}) = return (nullUsage, e) +scExpr' env (Note n e) = do (usg,e') <- scExpr env e + return (usg, Note n e') +scExpr' env (Cast e co) = do (usg, e') <- scExpr env e + return (usg, Cast e' (scSubstTy env co)) +scExpr' env e@(App _ _) = scApp env (collectArgs e) +scExpr' env (Lam b e) = do let (env', b') = extendBndr env b + (usg, e') <- scExpr env' e + return (usg, Lam b' e') + +scExpr' env (Case scrut b ty alts) + = do { (scrut_usg, scrut') <- scExpr env scrut + ; case isValue (sc_vals env) scrut' of + Just (ConVal con args) -> sc_con_app con args scrut' + _other -> sc_vanilla scrut_usg scrut' + } where - sc_alt (con,bs,rhs) - = do { let env1 = extendCaseBndrs env b scrut con bs - ; (usg,rhs') <- scExpr env1 rhs - ; let (usg', arg_occs) = lookupOccs usg bs + sc_con_app con args scrut' -- Known constructor; simplify + = do { let (_, bs, rhs) = findAlt con alts + `orElse` (DEFAULT, [], mkImpossibleExpr (coreAltsType alts)) + alt_env' = extendScSubstList env ((b,scrut') : bs `zip` trimConArgs con args) + ; scExpr alt_env' rhs } + + sc_vanilla scrut_usg scrut' -- Normal case + = do { let (alt_env,b') = extendBndrWith RecArg env b + -- Record RecArg for the components + + ; (alt_usgs, alt_occs, alts') + <- mapAndUnzip3M (sc_alt alt_env scrut' b') alts + + ; let (alt_usg, b_occ) = lookupOcc (combineUsages alt_usgs) b' + scrut_occ = foldr combineOcc b_occ alt_occs + scrut_usg' = setScrutOcc env scrut_usg scrut' scrut_occ + -- The combined usage of the scrutinee is given + -- by scrut_occ, which is passed to scScrut, which + -- in turn treats a bare-variable scrutinee specially + + ; return (alt_usg `combineUsage` scrut_usg', + Case scrut' b' (scSubstTy env ty) alts') } + + sc_alt env _scrut' b' (con,bs,rhs) + = do { let (env1, bs1) = extendBndrsWith RecArg env bs + (env2, bs2) = extendCaseBndrs env1 b' con bs1 + ; (usg,rhs') <- scExpr env2 rhs + ; let (usg', arg_occs) = lookupOccs usg bs2 scrut_occ = case con of DataAlt dc -> ScrutOcc (unitUFM dc arg_occs) - other -> ScrutOcc emptyUFM - ; return (usg', scrut_occ, (con,bs,rhs')) } - -scExpr env (Let bind body) - = scBind env bind `thenUs` \ (env', bind_usg, bind') -> - scExpr env' body `thenUs` \ (body_usg, body') -> - returnUs (bind_usg `combineUsage` body_usg, Let bind' body') - -scExpr env e@(App _ _) - = do { let (fn, args) = collectArgs e - ; (fn_usg, fn') <- scScrut env fn (ScrutOcc emptyUFM) - -- Process the function too. It's almost always a variable, - -- but not always. In particular, if this pass follows float-in, - -- which it may, we can get - -- (let f = ...f... in f) arg1 arg2 - -- We use scScrut to record the fact that the function is called - -- Perhpas we should check that it has at least one value arg, - -- but currently we don't bother - - ; (arg_usgs, args') <- mapAndUnzipUs (scExpr env) args - ; let call_usg = case fn of - Var f | Just RecFun <- lookupScopeEnv env f - -> SCU { calls = unitVarEnv f [(cons env, args)], - occs = emptyVarEnv } - other -> nullUsage - ; return (combineUsages arg_usgs `combineUsage` fn_usg - `combineUsage` call_usg, - mkApps fn' args') } + _ -> ScrutOcc emptyUFM + ; return (usg', scrut_occ, (con, bs2, rhs')) } + +scExpr' env (Let (NonRec bndr rhs) body) + | isTyCoVar bndr -- Type-lets may be created by doBeta + = scExpr' (extendScSubst env bndr rhs) body + + | otherwise -- Note [Local let bindings] + = do { let (body_env, bndr') = extendBndr env bndr + body_env2 = extendHowBound body_env [bndr'] RecFun + ; (body_usg, body') <- scExpr body_env2 body + + ; (rhs_usg, rhs_info) <- scRecRhs env (bndr',rhs) + + -- NB: We don't use the ForceSpecConstr mechanism (see + -- Note [Forcing specialisation]) for non-recursive bindings + -- at the moment. I'm not sure if this is the right thing to do. + ; let force_spec = False + ; (spec_usg, specs) <- specialise env force_spec + (scu_calls body_usg) + rhs_info + (SI [] 0 (Just rhs_usg)) + + ; return (body_usg { scu_calls = scu_calls body_usg `delVarEnv` bndr' } + `combineUsage` spec_usg, + mkLets [NonRec b r | (b,r) <- specInfoBinds rhs_info specs] body') + } + + +-- A *local* recursive group: see Note [Local recursive groups] +scExpr' env (Let (Rec prs) body) + = do { let (bndrs,rhss) = unzip prs + (rhs_env1,bndrs') = extendRecBndrs env bndrs + rhs_env2 = extendHowBound rhs_env1 bndrs' RecFun + force_spec = any (forceSpecBndr env) bndrs' + -- Note [Forcing specialisation] + + ; (rhs_usgs, rhs_infos) <- mapAndUnzipM (scRecRhs rhs_env2) (bndrs' `zip` rhss) + ; (body_usg, body') <- scExpr rhs_env2 body + + -- NB: start specLoop from body_usg + ; (spec_usg, specs) <- specLoop rhs_env2 force_spec + (scu_calls body_usg) rhs_infos nullUsage + [SI [] 0 (Just usg) | usg <- rhs_usgs] + -- Do not unconditionally use rhs_usgs. + -- Instead use them only if we find an unspecialised call + -- See Note [Local recursive groups] + + ; let all_usg = spec_usg `combineUsage` body_usg + bind' = Rec (concat (zipWith specInfoBinds rhs_infos specs)) + + ; return (all_usg { scu_calls = scu_calls all_usg `delVarEnvList` bndrs' }, + Let bind' body') } +\end{code} +Note [Local let bindings] +~~~~~~~~~~~~~~~~~~~~~~~~~ +It is not uncommon to find this ----------------------- -scScrut :: ScEnv -> CoreExpr -> ArgOcc -> UniqSM (ScUsage, CoreExpr) --- Used for the scrutinee of a case, --- or the function of an application. --- Remember to look through casts -scScrut env e@(Var v) occ = returnUs (varUsage env v occ, e) -scScrut env (Cast e co) occ = do { (usg, e') <- scScrut env e occ - ; returnUs (usg, Cast e' co) } -scScrut env e occ = scExpr env e + let $j = \x. in ...$j True...$j True... +Here $j is an arbitrary let-bound function, but it often comes up for +join points. We might like to specialise $j for its call patterns. +Notice the difference from a letrec, where we look for call patterns +in the *RHS* of the function. Here we look for call patterns in the +*body* of the let. ----------------------- -scBind :: ScEnv -> CoreBind -> UniqSM (ScEnv, ScUsage, CoreBind) -scBind env (Rec [(fn,rhs)]) - | notNull val_bndrs - = scExpr env_fn_body body `thenUs` \ (usg, body') -> - specialise env fn bndrs body' usg `thenUs` \ (rules, spec_prs) -> - -- Note body': the specialised copies should be based on the - -- optimised version of the body, in case there were - -- nested functions inside. - let - SCU { calls = calls, occs = occs } = usg - in - returnUs (extendBndr env fn, -- For the body of the letrec, just - -- extend the env with Other to record - -- that it's in scope; no funny RecFun business - SCU { calls = calls `delVarEnv` fn, occs = occs `delVarEnvList` val_bndrs}, - Rec ((fn `addIdSpecialisations` rules, mkLams bndrs body') : spec_prs)) +At one point I predicated this on the RHS mentioning the outer +recursive function, but that's not essential and might even be +harmful. I'm not sure. + + +\begin{code} +scApp :: ScEnv -> (InExpr, [InExpr]) -> UniqSM (ScUsage, CoreExpr) + +scApp env (Var fn, args) -- Function is a variable + = ASSERT( not (null args) ) + do { args_w_usgs <- mapM (scExpr env) args + ; let (arg_usgs, args') = unzip args_w_usgs + arg_usg = combineUsages arg_usgs + ; case scSubstId env fn of + fn'@(Lam {}) -> scExpr (zapScSubst env) (doBeta fn' args') + -- Do beta-reduction and try again + + Var fn' -> return (arg_usg `combineUsage` fn_usg, mkApps (Var fn') args') + where + fn_usg = case lookupHowBound env fn' of + Just RecFun -> SCU { scu_calls = unitVarEnv fn' [(sc_vals env, args')], + scu_occs = emptyVarEnv } + Just RecArg -> SCU { scu_calls = emptyVarEnv, + scu_occs = unitVarEnv fn' (ScrutOcc emptyUFM) } + Nothing -> nullUsage + + + other_fn' -> return (arg_usg, mkApps other_fn' args') } + -- NB: doing this ignores any usage info from the substituted + -- function, but I don't think that matters. If it does + -- we can fix it. where - (bndrs,body) = collectBinders rhs - val_bndrs = filter isId bndrs - env_fn_body = extendRecBndr env fn bndrs + doBeta :: OutExpr -> [OutExpr] -> OutExpr + -- ToDo: adjust for System IF + doBeta (Lam bndr body) (arg : args) = Let (NonRec bndr arg) (doBeta body args) + doBeta fn args = mkApps fn args + +-- The function is almost always a variable, but not always. +-- In particular, if this pass follows float-in, +-- which it may, we can get +-- (let f = ...f... in f) arg1 arg2 +scApp env (other_fn, args) + = do { (fn_usg, fn') <- scExpr env other_fn + ; (arg_usgs, args') <- mapAndUnzipM (scExpr env) args + ; return (combineUsages arg_usgs `combineUsage` fn_usg, mkApps fn' args') } -scBind env (Rec prs) - = mapAndUnzipUs do_one prs `thenUs` \ (usgs, prs') -> - returnUs (extendBndrs env (map fst prs), combineUsages usgs, Rec prs') +---------------------- +scTopBind :: ScEnv -> CoreBind -> UniqSM (ScEnv, CoreBind) +scTopBind env (Rec prs) + | Just threshold <- sc_size env + , not force_spec + , not (all (couldBeSmallEnoughToInline threshold) rhss) + -- No specialisation + = do { let (rhs_env,bndrs') = extendRecBndrs env bndrs + ; (_, rhss') <- mapAndUnzipM (scExpr rhs_env) rhss + ; return (rhs_env, Rec (bndrs' `zip` rhss')) } + | otherwise -- Do specialisation + = do { let (rhs_env1,bndrs') = extendRecBndrs env bndrs + rhs_env2 = extendHowBound rhs_env1 bndrs' RecFun + + ; (rhs_usgs, rhs_infos) <- mapAndUnzipM (scRecRhs rhs_env2) (bndrs' `zip` rhss) + ; let rhs_usg = combineUsages rhs_usgs + + ; (_, specs) <- specLoop rhs_env2 force_spec + (scu_calls rhs_usg) rhs_infos nullUsage + [SI [] 0 Nothing | _ <- bndrs] + + ; return (rhs_env1, -- For the body of the letrec, delete the RecFun business + Rec (concat (zipWith specInfoBinds rhs_infos specs))) } where - do_one (bndr,rhs) = scExpr env rhs `thenUs` \ (usg, rhs') -> - returnUs (usg, (bndr,rhs')) + (bndrs,rhss) = unzip prs + force_spec = any (forceSpecBndr env) bndrs + -- Note [Forcing specialisation] + +scTopBind env (NonRec bndr rhs) + = do { (_, rhs') <- scExpr env rhs + ; let (env1, bndr') = extendBndr env bndr + env2 = extendValEnv env1 bndr' (isValue (sc_vals env) rhs') + ; return (env2, NonRec bndr' rhs') } -scBind env (NonRec bndr rhs) - = scExpr env rhs `thenUs` \ (usg, rhs') -> - returnUs (extendBndr env bndr, usg, NonRec bndr rhs') +---------------------- +scRecRhs :: ScEnv -> (OutId, InExpr) -> UniqSM (ScUsage, RhsInfo) +scRecRhs env (bndr,rhs) + = do { let (arg_bndrs,body) = collectBinders rhs + (body_env, arg_bndrs') = extendBndrsWith RecArg env arg_bndrs + ; (body_usg, body') <- scExpr body_env body + ; let (rhs_usg, arg_occs) = lookupOccs body_usg arg_bndrs' + ; return (rhs_usg, RI bndr (mkLams arg_bndrs' body') + arg_bndrs body arg_occs) } + -- The arg_occs says how the visible, + -- lambda-bound binders of the RHS are used + -- (including the TyVar binders) + -- Two pats are the same if they match both ways ---------------------- +specInfoBinds :: RhsInfo -> SpecInfo -> [(Id,CoreExpr)] +specInfoBinds (RI fn new_rhs _ _ _) (SI specs _ _) + = [(id,rhs) | OS _ _ id rhs <- specs] ++ + [(fn `addIdSpecialisations` rules, new_rhs)] + where + rules = [r | OS _ r _ _ <- specs] + +---------------------- +varUsage :: ScEnv -> OutVar -> ArgOcc -> ScUsage varUsage env v use - | Just RecArg <- lookupScopeEnv env v = SCU { calls = emptyVarEnv, - occs = unitVarEnv v use } + | Just RecArg <- lookupHowBound env v = SCU { scu_calls = emptyVarEnv + , scu_occs = unitVarEnv v use } | otherwise = nullUsage \end{code} %************************************************************************ %* * -\subsection{The specialiser} + The specialiser itself %* * %************************************************************************ \begin{code} -specialise :: ScEnv - -> Id -- Functionn - -> [CoreBndr] -> CoreExpr -- Its RHS - -> ScUsage -- Info on usage - -> UniqSM ([CoreRule], -- Rules - [(Id,CoreExpr)]) -- Bindings - -specialise env fn bndrs body body_usg - = do { let (_, bndr_occs) = lookupOccs body_usg bndrs - - ; mb_calls <- -- pprTrace "specialise" (ppr fn <+> ppr bndrs <+> ppr bndr_occs) $ - mapM (callToPats (scope env) bndr_occs) - (lookupVarEnv (calls body_usg) fn `orElse` []) - - ; let good_calls :: [([Var], [CoreArg])] - good_calls = catMaybes mb_calls - in_scope = mkInScopeSet $ unionVarSets $ - [ exprsFreeVars pats `delVarSetList` vs - | (vs,pats) <- good_calls ] - uniq_calls = nubBy (same_call in_scope) good_calls - ; mapAndUnzipUs (spec_one env fn (mkLams bndrs body)) - (uniq_calls `zip` [1..]) } - where - -- Two calls are the same if they match both ways - same_call in_scope (vs1,as1)(vs2,as2) - = isJust (matchN in_scope vs1 as1 as2) - && isJust (matchN in_scope vs2 as2 as1) - -callToPats :: InScopeEnv -> [ArgOcc] -> Call - -> UniqSM (Maybe ([Var], [CoreExpr])) - -- The VarSet is the variables to quantify over in the rule - -- The [CoreExpr] are the argument patterns for the rule -callToPats in_scope bndr_occs (con_env, args) - | length args < length bndr_occs -- Check saturated - = return Nothing +data RhsInfo = RI OutId -- The binder + OutExpr -- The new RHS + [InVar] InExpr -- The *original* RHS (\xs.body) + -- Note [Specialise original body] + [ArgOcc] -- Info on how the xs occur in body + +data SpecInfo = SI [OneSpec] -- The specialisations we have generated + + Int -- Length of specs; used for numbering them + + (Maybe ScUsage) -- Nothing => we have generated specialisations + -- from calls in the *original* RHS + -- Just cs => we haven't, and this is the usage + -- of the original RHS + -- See Note [Local recursive groups] + + -- One specialisation: Rule plus definition +data OneSpec = OS CallPat -- Call pattern that generated this specialisation + CoreRule -- Rule connecting original id with the specialisation + OutId OutExpr -- Spec id + its rhs + + +specLoop :: ScEnv + -> Bool -- force specialisation? + -- Note [Forcing specialisation] + -> CallEnv + -> [RhsInfo] + -> ScUsage -> [SpecInfo] -- One per binder; acccumulating parameter + -> UniqSM (ScUsage, [SpecInfo]) -- ...ditto... +specLoop env force_spec all_calls rhs_infos usg_so_far specs_so_far + = do { specs_w_usg <- zipWithM (specialise env force_spec all_calls) rhs_infos specs_so_far + ; let (new_usg_s, all_specs) = unzip specs_w_usg + new_usg = combineUsages new_usg_s + new_calls = scu_calls new_usg + all_usg = usg_so_far `combineUsage` new_usg + ; if isEmptyVarEnv new_calls then + return (all_usg, all_specs) + else + specLoop env force_spec new_calls rhs_infos all_usg all_specs } + +specialise + :: ScEnv + -> Bool -- force specialisation? + -- Note [Forcing specialisation] + -> CallEnv -- Info on calls + -> RhsInfo + -> SpecInfo -- Original RHS plus patterns dealt with + -> UniqSM (ScUsage, SpecInfo) -- New specialised versions and their usage + +-- Note: the rhs here is the optimised version of the original rhs +-- So when we make a specialised copy of the RHS, we're starting +-- from an RHS whose nested functions have been optimised already. + +specialise env force_spec bind_calls (RI fn _ arg_bndrs body arg_occs) + spec_info@(SI specs spec_count mb_unspec) + | not (isBottomingId fn) -- Note [Do not specialise diverging functions] + , not (isNeverActive (idInlineActivation fn)) -- See Note [Transfer activation] + , notNull arg_bndrs -- Only specialise functions + , Just all_calls <- lookupVarEnv bind_calls fn + = do { (boring_call, pats) <- callsToPats env specs arg_occs all_calls +-- ; pprTrace "specialise" (vcat [ ppr fn <+> text "with" <+> int (length pats) <+> text "good patterns" +-- , text "arg_occs" <+> ppr arg_occs +-- , text "calls" <+> ppr all_calls +-- , text "good pats" <+> ppr pats]) $ +-- return () + + -- Bale out if too many specialisations + ; let n_pats = length pats + spec_count' = n_pats + spec_count + ; case sc_count env of + Just max | not force_spec && spec_count' > max + -> pprTrace "SpecConstr" msg $ + return (nullUsage, spec_info) + where + msg = vcat [ sep [ ptext (sLit "Function") <+> quotes (ppr fn) + , nest 2 (ptext (sLit "has") <+> + speakNOf spec_count' (ptext (sLit "call pattern")) <> comma <+> + ptext (sLit "but the limit is") <+> int max) ] + , ptext (sLit "Use -fspec-constr-count=n to set the bound") + , extra ] + extra | not opt_PprStyle_Debug = ptext (sLit "Use -dppr-debug to see specialisations") + | otherwise = ptext (sLit "Specialisations:") <+> ppr (pats ++ [p | OS p _ _ _ <- specs]) + + _normal_case -> do { + + let spec_env = decreaseSpecCount env n_pats + ; (spec_usgs, new_specs) <- mapAndUnzipM (spec_one spec_env fn arg_bndrs body) + (pats `zip` [spec_count..]) + -- See Note [Specialise original body] + + ; let spec_usg = combineUsages spec_usgs + (new_usg, mb_unspec') + = case mb_unspec of + Just rhs_usg | boring_call -> (spec_usg `combineUsage` rhs_usg, Nothing) + _ -> (spec_usg, mb_unspec) + + ; return (new_usg, SI (new_specs ++ specs) spec_count' mb_unspec') } } | otherwise - = do { prs <- argsToPats in_scope con_env (args `zip` bndr_occs) - ; let (good_pats, pats) = unzip prs - pat_fvs = varSetElems (exprsFreeVars pats) - qvars = filter (not . (`elemVarEnv` in_scope)) pat_fvs - -- Quantify over variables that are not in sccpe - -- See Note [Shadowing] at the top - - ; -- pprTrace "callToPats" (ppr args $$ ppr prs $$ ppr bndr_occs) $ - if or good_pats - then return (Just (qvars, pats)) - else return Nothing } + = return (nullUsage, spec_info) -- The boring case + --------------------- spec_one :: ScEnv - -> Id -- Function - -> CoreExpr -- Rhs of the original function - -> (([Var], [CoreArg]), Int) - -> UniqSM (CoreRule, (Id,CoreExpr)) -- Rule and binding + -> OutId -- Function + -> [InVar] -- Lambda-binders of RHS; should match patterns + -> InExpr -- Body of the original function + -> (CallPat, Int) + -> UniqSM (ScUsage, OneSpec) -- Rule and binding -- spec_one creates a specialised copy of the function, together -- with a rule for using it. I'm very proud of how short this @@ -811,7 +1238,8 @@ spec_one :: ScEnv [c::*, v::(b,c) are presumably bound by the (...) part] ==> f_spec = /\ b c \ v::(b,c) hw::[(a,(b,c))] -> - (...entire RHS of f...) (b,c) ((:) (a,(b,c)) (x,v) hw) + (...entire body of f...) [b -> (b,c), + y -> ((:) (a,(b,c)) (x,v) hw)] RULE: forall b::* c::*, -- Note, *not* forall a, x v::(b,c), @@ -820,44 +1248,109 @@ spec_one :: ScEnv f (b,c) ((:) (a,(b,c)) (x,v) hw) = f_spec b c v hw -} -spec_one env fn rhs ((vars_to_bind, pats), rule_number) - = getUniqueUs `thenUs` \ spec_uniq -> - let - fn_name = idName fn - fn_loc = nameSrcLoc fn_name - spec_occ = mkSpecOcc (nameOccName fn_name) +spec_one env fn arg_bndrs body (call_pat@(qvars, pats), rule_number) + = do { spec_uniq <- getUniqueUs + ; let spec_env = extendScSubstList (extendScInScope env qvars) + (arg_bndrs `zip` pats) + fn_name = idName fn + fn_loc = nameSrcSpan fn_name + spec_occ = mkSpecOcc (nameOccName fn_name) + rule_name = mkFastString ("SC:" ++ showSDoc (ppr fn <> int rule_number)) + spec_name = mkInternalName spec_uniq spec_occ fn_loc +-- ; pprTrace "{spec_one" (ppr (sc_count env) <+> ppr fn <+> ppr pats <+> text "-->" <+> ppr spec_name) $ +-- return () + + -- Specialise the body + ; (spec_usg, spec_body) <- scExpr spec_env body + +-- ; pprTrace "done spec_one}" (ppr fn) $ +-- return () + + -- And build the results + ; let spec_id = mkLocalId spec_name (mkPiTypes spec_lam_args body_ty) + `setIdStrictness` spec_str -- See Note [Transfer strictness] + `setIdArity` count isId spec_lam_args + spec_str = calcSpecStrictness fn spec_lam_args pats + (spec_lam_args, spec_call_args) = mkWorkerArgs qvars body_ty + -- Usual w/w hack to avoid generating + -- a spec_rhs of unlifted type and no args + + spec_rhs = mkLams spec_lam_args spec_body + body_ty = exprType spec_body + rule_rhs = mkVarApps (Var spec_id) spec_call_args + inline_act = idInlineActivation fn + rule = mkLocalRule rule_name inline_act fn_name qvars pats rule_rhs + -- See Note [Transfer activation] + ; return (spec_usg, OS call_pat rule spec_id spec_rhs) } + +calcSpecStrictness :: Id -- The original function + -> [Var] -> [CoreExpr] -- Call pattern + -> StrictSig -- Strictness of specialised thing +-- See Note [Transfer strictness] +calcSpecStrictness fn qvars pats + = StrictSig (mkTopDmdType spec_dmds TopRes) + where + spec_dmds = [ lookupVarEnv dmd_env qv `orElse` lazyDmd | qv <- qvars, isId qv ] + StrictSig (DmdType _ dmds _) = idStrictness fn + + dmd_env = go emptyVarEnv dmds pats + + go env ds (Type {} : pats) = go env ds pats + go env (d:ds) (pat : pats) = go (go_one env d pat) ds pats + go env _ _ = env + + go_one env d (Var v) = extendVarEnv_C both env v d + go_one env (Box d) e = go_one env d e + go_one env (Eval (Prod ds)) e + | (Var _, args) <- collectArgs e = go env ds args + go_one env _ _ = env - -- Put the type variables first; the type of a term - -- variable may mention a type variable - (tvs, ids) = partition isTyVar vars_to_bind - bndrs = tvs ++ ids - spec_body = mkApps rhs pats - body_ty = exprType spec_body - - (spec_lam_args, spec_call_args) = mkWorkerArgs bndrs body_ty - -- Usual w/w hack to avoid generating - -- a spec_rhs of unlifted type and no args - - rule_name = mkFastString ("SC:" ++ showSDoc (ppr fn <> int rule_number)) - spec_rhs = mkLams spec_lam_args spec_body - spec_id = mkUserLocal spec_occ spec_uniq (mkPiTypes spec_lam_args body_ty) fn_loc - rule_rhs = mkVarApps (Var spec_id) spec_call_args - rule = mkLocalRule rule_name specConstrActivation fn_name bndrs pats rule_rhs - in - returnUs (rule, (spec_id, spec_rhs)) - --- In which phase should the specialise-constructor rules be active? --- Originally I made them always-active, but Manuel found that --- this defeated some clever user-written rules. So Plan B --- is to make them active only in Phase 0; after all, currently, --- the specConstr transformation is only run after the simplifier --- has reached Phase 0. In general one would want it to be --- flag-controllable, but for now I'm leaving it baked in --- [SLPJ Oct 01] -specConstrActivation :: Activation -specConstrActivation = ActiveAfter 0 -- Baked in; see comments above \end{code} +Note [Specialise original body] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The RhsInfo for a binding keeps the *original* body of the binding. We +must specialise that, *not* the result of applying specExpr to the RHS +(which is also kept in RhsInfo). Otherwise we end up specialising a +specialised RHS, and that can lead directly to exponential behaviour. + +Note [Transfer activation] +~~~~~~~~~~~~~~~~~~~~~~~~~~ + This note is for SpecConstr, but exactly the same thing + happens in the overloading specialiser; see + Note [Auto-specialisation and RULES] in Specialise. + +In which phase should the specialise-constructor rules be active? +Originally I made them always-active, but Manuel found that this +defeated some clever user-written rules. Then I made them active only +in Phase 0; after all, currently, the specConstr transformation is +only run after the simplifier has reached Phase 0, but that meant +that specialisations didn't fire inside wrappers; see test +simplCore/should_compile/spec-inline. + +So now I just use the inline-activation of the parent Id, as the +activation for the specialiation RULE, just like the main specialiser; + +This in turn means there is no point in specialising NOINLINE things, +so we test for that. + +Note [Transfer strictness] +~~~~~~~~~~~~~~~~~~~~~~~~~~ +We must transfer strictness information from the original function to +the specialised one. Suppose, for example + + f has strictness SS + and a RULE f (a:as) b = f_spec a as b + +Now we want f_spec to have strictess LLS, otherwise we'll use call-by-need +when calling f_spec instead of call-by-value. And that can result in +unbounded worsening in space (cf the classic foldl vs foldl') + +See Trac #3437 for a good example. + +The function calcSpecStrictness performs the calculation. + + %************************************************************************ %* * \subsection{Argument analysis} @@ -869,14 +1362,61 @@ they are constructor applications. \begin{code} +type CallPat = ([Var], [CoreExpr]) -- Quantified variables and arguments + + +callsToPats :: ScEnv -> [OneSpec] -> [ArgOcc] -> [Call] -> UniqSM (Bool, [CallPat]) + -- Result has no duplicate patterns, + -- nor ones mentioned in done_pats + -- Bool indicates that there was at least one boring pattern +callsToPats env done_specs bndr_occs calls + = do { mb_pats <- mapM (callToPats env bndr_occs) calls + + ; let good_pats :: [([Var], [CoreArg])] + good_pats = catMaybes mb_pats + done_pats = [p | OS p _ _ _ <- done_specs] + is_done p = any (samePat p) done_pats + + ; return (any isNothing mb_pats, + filterOut is_done (nubBy samePat good_pats)) } + +callToPats :: ScEnv -> [ArgOcc] -> Call -> UniqSM (Maybe CallPat) + -- The [Var] is the variables to quantify over in the rule + -- Type variables come first, since they may scope + -- over the following term variables + -- The [CoreExpr] are the argument patterns for the rule +callToPats env bndr_occs (con_env, args) + | length args < length bndr_occs -- Check saturated + = return Nothing + | otherwise + = do { let in_scope = substInScope (sc_subst env) + ; prs <- argsToPats env in_scope con_env (args `zip` bndr_occs) + ; let (interesting_s, pats) = unzip prs + pat_fvs = varSetElems (exprsFreeVars pats) + qvars = filterOut (`elemInScopeSet` in_scope) pat_fvs + -- Quantify over variables that are not in sccpe + -- at the call site + -- See Note [Shadowing] at the top + + (tvs, ids) = partition isTyCoVar qvars + qvars' = tvs ++ ids + -- Put the type variables first; the type of a term + -- variable may mention a type variable + + ; -- pprTrace "callToPats" (ppr args $$ ppr prs $$ ppr bndr_occs) $ + if or interesting_s + then return (Just (qvars', pats)) + else return Nothing } + -- argToPat takes an actual argument, and returns an abstracted -- version, consisting of just the "constructor skeleton" of the -- argument, with non-constructor sub-expression replaced by new -- placeholder variables. For example: -- C a (D (f x) (g y)) ==> C p1 (D p2 p3) -argToPat :: InScopeEnv -- What's in scope at the fn defn site - -> ConstrEnv -- ConstrEnv at the call site +argToPat :: ScEnv + -> InScopeSet -- What's in scope at the fn defn site + -> ValueEnv -- ValueEnv at the call site -> CoreArg -- A call arg (or component thereof) -> ArgOcc -> UniqSM (Bool, CoreArg) @@ -890,36 +1430,51 @@ argToPat :: InScopeEnv -- What's in scope at the fn defn site -- lvl7 --> (True, lvl7) if lvl7 is bound -- somewhere further out -argToPat in_scope con_env arg@(Type ty) arg_occ +argToPat _env _in_scope _val_env arg@(Type {}) _arg_occ = return (False, arg) -argToPat in_scope con_env (Var v) arg_occ - | not (isLocalId v) || v `elemVarEnv` in_scope - = -- The recursive call passes a variable that - -- is in scope at the function definition site - -- It's worth specialising on this if - -- (a) it's used in an interesting way in the body - -- (b) we know what its value is - if (case arg_occ of { UnkOcc -> False; other -> True }) -- (a) - && isValueUnfolding (idUnfolding v) -- (b) - then return (True, Var v) - else wildCardPat (idType v) - -argToPat in_scope con_env (Let _ arg) arg_occ - = argToPat in_scope con_env arg arg_occ +argToPat env in_scope val_env (Note _ arg) arg_occ + = argToPat env in_scope val_env arg arg_occ + -- Note [Notes in call patterns] + -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + -- Ignore Notes. In particular, we want to ignore any InlineMe notes + -- Perhaps we should not ignore profiling notes, but I'm going to + -- ride roughshod over them all for now. + --- See Note [Notes in RULE matching] in Rules + +argToPat env in_scope val_env (Let _ arg) arg_occ + = argToPat env in_scope val_env arg arg_occ + -- See Note [Matching lets] in Rule.lhs -- Look through let expressions - -- e.g. f (let v = rhs in \y -> ...v...) - -- Here we can specialise for f (\y -> ...) + -- e.g. f (let v = rhs in (v,w)) + -- Here we can specialise for f (v,w) -- because the rule-matcher will look through the let. -argToPat in_scope con_env (Cast arg co) arg_occ - = do { (interesting, arg') <- argToPat in_scope con_env arg arg_occ - ; if interesting then - return (interesting, Cast arg' co) - else - wildCardPat (snd (coercionKind co)) } +{- Disabled; see Note [Matching cases] in Rule.lhs +argToPat env in_scope val_env (Case scrut _ _ [(_, _, rhs)]) arg_occ + | exprOkForSpeculation scrut -- See Note [Matching cases] in Rule.hhs + = argToPat env in_scope val_env rhs arg_occ +-} + +argToPat env in_scope val_env (Cast arg co) arg_occ + | not (ignoreType env ty2) + = do { (interesting, arg') <- argToPat env in_scope val_env arg arg_occ + ; if not interesting then + wildCardPat ty2 + else do + { -- Make a wild-card pattern for the coercion + uniq <- getUniqueUs + ; let co_name = mkSysTvName uniq (fsLit "sg") + co_var = mkCoVar co_name (mkCoKind ty1 ty2) + ; return (interesting, Cast arg' (mkTyVarTy co_var)) } } + where + (ty1, ty2) = coercionKind co + + -argToPat in_scope con_env arg arg_occ +{- Disabling lambda specialisation for now + It's fragile, and the spec_loop can be infinite +argToPat in_scope val_env arg arg_occ | is_value_lam arg = return (True, arg) where @@ -927,75 +1482,154 @@ argToPat in_scope con_env arg arg_occ | isId v = True -- it is inside a type lambda | otherwise = is_value_lam e is_value_lam other = False +-} -argToPat in_scope con_env arg arg_occ - | Just (CV dc args) <- is_con_app_maybe con_env arg + -- Check for a constructor application + -- NB: this *precedes* the Var case, so that we catch nullary constrs +argToPat env in_scope val_env arg arg_occ + | Just (ConVal dc args) <- isValue val_env arg + , not (ignoreAltCon env dc) , case arg_occ of ScrutOcc _ -> True -- Used only by case scrutinee - BothOcc -> case arg of -- Used by case scrut - App {} -> True -- ...and elsewhere... - other -> False - other -> False -- No point; the arg is not decomposed - = do { args' <- argsToPats in_scope con_env (args `zip` conArgOccs arg_occ dc) + BothOcc -> case arg of -- Used elsewhere + App {} -> True -- see Note [Reboxing] + _other -> False + _other -> False -- No point; the arg is not decomposed + = do { args' <- argsToPats env in_scope val_env (args `zip` conArgOccs arg_occ dc) ; return (True, mk_con_app dc (map snd args')) } -argToPat in_scope con_env (Var v) arg_occ - = -- A variable bound inside the function. - -- Don't make a wild-card, because we may usefully share - -- e.g. f a = let x = ... in f (x,x) - -- NB: this case follows the lambda and con-app cases!! - return (False, Var v) - --- The default case: make a wild-card -argToPat in_scope con_env arg arg_occ = wildCardPat (exprType arg) + -- Check if the argument is a variable that + -- is in scope at the function definition site + -- It's worth specialising on this if + -- (a) it's used in an interesting way in the body + -- (b) we know what its value is +argToPat env in_scope val_env (Var v) arg_occ + | case arg_occ of { UnkOcc -> False; _other -> True }, -- (a) + is_value, -- (b) + not (ignoreType env (varType v)) + = return (True, Var v) + where + is_value + | isLocalId v = v `elemInScopeSet` in_scope + && isJust (lookupVarEnv val_env v) + -- Local variables have values in val_env + | otherwise = isValueUnfolding (idUnfolding v) + -- Imports have unfoldings + +-- I'm really not sure what this comment means +-- And by not wild-carding we tend to get forall'd +-- variables that are in soope, which in turn can +-- expose the weakness in let-matching +-- See Note [Matching lets] in Rules + + -- Check for a variable bound inside the function. + -- Don't make a wild-card, because we may usefully share + -- e.g. f a = let x = ... in f (x,x) + -- NB: this case follows the lambda and con-app cases!! +-- argToPat _in_scope _val_env (Var v) _arg_occ +-- = return (False, Var v) + -- SLPJ : disabling this to avoid proliferation of versions + -- also works badly when thinking about seeding the loop + -- from the body of the let + -- f x y = letrec g z = ... in g (x,y) + -- We don't want to specialise for that *particular* x,y + + -- The default case: make a wild-card +argToPat _env _in_scope _val_env arg _arg_occ + = wildCardPat (exprType arg) wildCardPat :: Type -> UniqSM (Bool, CoreArg) wildCardPat ty = do { uniq <- getUniqueUs - ; let id = mkSysLocal FSLIT("sc") uniq ty + ; let id = mkSysLocal (fsLit "sc") uniq ty ; return (False, Var id) } -argsToPats :: InScopeEnv -> ConstrEnv +argsToPats :: ScEnv -> InScopeSet -> ValueEnv -> [(CoreArg, ArgOcc)] -> UniqSM [(Bool, CoreArg)] -argsToPats in_scope con_env args - = mapUs do_one args +argsToPats env in_scope val_env args + = mapM do_one args where - do_one (arg,occ) = argToPat in_scope con_env arg occ + do_one (arg,occ) = argToPat env in_scope val_env arg occ \end{code} \begin{code} -is_con_app_maybe :: ConstrEnv -> CoreExpr -> Maybe ConValue -is_con_app_maybe env (Var v) - = case lookupVarEnv env v of - Just stuff -> Just stuff - -- You might think we could look in the idUnfolding here +isValue :: ValueEnv -> CoreExpr -> Maybe Value +isValue _env (Lit lit) + = Just (ConVal (LitAlt lit) []) + +isValue env (Var v) + | Just stuff <- lookupVarEnv env v + = Just stuff -- You might think we could look in the idUnfolding here -- but that doesn't take account of which branch of a -- case we are in, which is the whole point - Nothing | isCheapUnfolding unf - -> is_con_app_maybe env (unfoldingTemplate unf) - where - unf = idUnfolding v - -- However we do want to consult the unfolding - -- as well, for let-bound constructors! + | not (isLocalId v) && isCheapUnfolding unf + = isValue env (unfoldingTemplate unf) + where + unf = idUnfolding v + -- However we do want to consult the unfolding + -- as well, for let-bound constructors! + +isValue env (Lam b e) + | isTyCoVar b = case isValue env e of + Just _ -> Just LambdaVal + Nothing -> Nothing + | otherwise = Just LambdaVal - other -> Nothing +isValue _env expr -- Maybe it's a constructor application + | (Var fun, args) <- collectArgs expr + = case isDataConWorkId_maybe fun of -is_con_app_maybe env (Lit lit) - = Just (CV (LitAlt lit) []) + Just con | args `lengthAtLeast` dataConRepArity con + -- Check saturated; might be > because the + -- arity excludes type args + -> Just (ConVal (DataAlt con) args) -is_con_app_maybe env expr - = case collectArgs expr of - (Var fun, args) | Just con <- isDataConWorkId_maybe fun, - args `lengthAtLeast` dataConRepArity con - -- Might be > because the arity excludes type args - -> Just (CV (DataAlt con) args) + _other | valArgCount args < idArity fun + -- Under-applied function + -> Just LambdaVal -- Partial application - other -> Nothing + _other -> Nothing + +isValue _env _expr = Nothing mk_con_app :: AltCon -> [CoreArg] -> CoreExpr mk_con_app (LitAlt lit) [] = Lit lit mk_con_app (DataAlt con) args = mkConApp con args -mk_con_app other args = panic "SpecConstr.mk_con_app" +mk_con_app _other _args = panic "SpecConstr.mk_con_app" + +samePat :: CallPat -> CallPat -> Bool +samePat (vs1, as1) (vs2, as2) + = all2 same as1 as2 + where + same (Var v1) (Var v2) + | v1 `elem` vs1 = v2 `elem` vs2 + | v2 `elem` vs2 = False + | otherwise = v1 == v2 + + same (Lit l1) (Lit l2) = l1==l2 + same (App f1 a1) (App f2 a2) = same f1 f2 && same a1 a2 + + same (Type {}) (Type {}) = True -- Note [Ignore type differences] + same (Note _ e1) e2 = same e1 e2 -- Ignore casts and notes + same (Cast e1 _) e2 = same e1 e2 + same e1 (Note _ e2) = same e1 e2 + same e1 (Cast e2 _) = same e1 e2 + + same e1 e2 = WARN( bad e1 || bad e2, ppr e1 $$ ppr e2) + False -- Let, lambda, case should not occur + bad (Case {}) = True + bad (Let {}) = True + bad (Lam {}) = True + bad _other = False \end{code} + +Note [Ignore type differences] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We do not want to generate specialisations where the call patterns +differ only in their type arguments! Not only is it utterly useless, +but it also means that (with polymorphic recursion) we can generate +an infinite number of specialisations. Example is Data.Sequence.adjustTree, +I think. +