+ToDo [Nov 2010]
+~~~~~~~~~~~~~~~
+1. Use a library type rather than an annotation for ForceSpecConstr
+2. Nuke NoSpecConstr
+
%
% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
%
-- for details
module SpecConstr(
- specConstrProgram
+ specConstrProgram
+#ifdef GHCI
+ , SpecConstrAnnotation(..)
+#endif
) where
#include "HsVersions.h"
import CoreSubst
import CoreUtils
import CoreUnfold ( couldBeSmallEnoughToInline )
-import CoreLint ( showPass, endPass )
import CoreFVs ( exprsFreeVars )
-import CoreTidy ( tidyRules )
-import PprCore ( pprRules )
+import CoreMonad
+import HscTypes ( ModGuts(..) )
import WwLib ( mkWorkerArgs )
-import DataCon ( dataConRepArity, dataConUnivTyVars )
+import DataCon
import Coercion
+import Rules
import Type hiding( substTy )
-import Id ( Id, idName, idType, isDataConWorkId_maybe, idArity,
- mkUserLocal, mkSysLocal, idUnfolding, isLocalId )
+import Id
+import MkCore ( mkImpossibleExpr )
import Var
import VarEnv
import VarSet
import Name
-import Rules ( addIdSpecialisations, mkLocalRule, rulesOfBinds )
-import OccName ( mkSpecOcc )
-import ErrUtils ( dumpIfSet_dyn )
-import DynFlags ( DynFlags(..), DynFlag(..) )
-import StaticFlags ( opt_SpecInlineJoinPoints )
-import BasicTypes ( Activation(..) )
-import Maybes ( orElse, catMaybes, isJust )
+import BasicTypes
+import DynFlags ( DynFlags(..) )
+import StaticFlags ( opt_PprStyle_Debug )
+import Maybes ( orElse, catMaybes, isJust, isNothing )
+import Demand
+import DmdAnal ( both )
+import Serialized ( deserializeWithData )
import Util
-import List ( nubBy, partition )
import UniqSupply
import Outputable
import FastString
import UniqFM
+import MonadUtils
+import Control.Monad ( zipWithM )
+import Data.List
+
+
+-- See Note [SpecConstrAnnotation]
+#ifndef GHCI
+type SpecConstrAnnotation = ()
+#else
+import TyCon ( TyCon )
+import GHC.Exts( SpecConstrAnnotation(..) )
+#endif
\end{code}
-----------------------------------------------------
So we want to spot the construtor application inside the cast.
That's why we have the Cast case in argToPat
+Note [Local recursive groups]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+For a *local* recursive group, we can see all the calls to the
+function, so we seed the specialisation loop from the calls in the
+body, not from the calls in the RHS. Consider:
+
+ bar m n = foo n (n,n) (n,n) (n,n) (n,n)
+ where
+ foo n p q r s
+ | n == 0 = m
+ | n > 3000 = case p of { (p1,p2) -> foo (n-1) (p2,p1) q r s }
+ | n > 2000 = case q of { (q1,q2) -> foo (n-1) p (q2,q1) r s }
+ | n > 1000 = case r of { (r1,r2) -> foo (n-1) p q (r2,r1) s }
+ | otherwise = case s of { (s1,s2) -> foo (n-1) p q r (s2,s1) }
+
+If we start with the RHSs of 'foo', we get lots and lots of specialisations,
+most of which are not needed. But if we start with the (single) call
+in the rhs of 'bar' we get exactly one fully-specialised copy, and all
+the recursive calls go to this fully-specialised copy. Indeed, the original
+function is later collected as dead code. This is very important in
+specialising the loops arising from stream fusion, for example in NDP where
+we were getting literally hundreds of (mostly unused) specialisations of
+a local function.
+
+In a case like the above we end up never calling the original un-specialised
+function. (Although we still leave its code around just in case.)
+
+However, if we find any boring calls in the body, including *unsaturated*
+ones, such as
+ letrec foo x y = ....foo...
+ in map foo xs
+then we will end up calling the un-specialised function, so then we *should*
+use the calls in the un-specialised RHS as seeds. We call these "boring
+call patterns, and callsToPats reports if it finds any of these.
+
+
+Note [Do not specialise diverging functions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Specialising a function that just diverges is a waste of code.
+Furthermore, it broke GHC (simpl014) thus:
+ {-# STR Sb #-}
+ f = \x. case x of (a,b) -> f x
+If we specialise f we get
+ f = \x. case x of (a,b) -> fspec a b
+But fspec doesn't have decent strictnes info. As it happened,
+(f x) :: IO t, so the state hack applied and we eta expanded fspec,
+and hence f. But now f's strictness is less than its arity, which
+breaks an invariant.
+
+Note [SpecConstrAnnotation]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+SpecConstrAnnotation is defined in GHC.Exts, and is only guaranteed to
+be available in stage 2 (well, until the bootstrap compiler can be
+guaranteed to have it)
+
+So we define it to be () in stage1 (ie when GHCI is undefined), and
+'#ifdef' out the code that uses it.
+
+See also Note [Forcing specialisation]
+
+Note [Forcing specialisation]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+With stream fusion and in other similar cases, we want to fully specialise
+some (but not necessarily all!) loops regardless of their size and the
+number of specialisations. We allow a library to specify this by annotating
+a type with ForceSpecConstr and then adding a parameter of that type to the
+loop. Here is a (simplified) example from the vector library:
+
+ data SPEC = SPEC | SPEC2
+ {-# ANN type SPEC ForceSpecConstr #-}
+
+ foldl :: (a -> b -> a) -> a -> Stream b -> a
+ {-# INLINE foldl #-}
+ foldl f z (Stream step s _) = foldl_loop SPEC z s
+ where
+ foldl_loop !sPEC z s = case step s of
+ Yield x s' -> foldl_loop sPEC (f z x) s'
+ Skip -> foldl_loop sPEC z s'
+ Done -> z
+
+SpecConstr will spot the SPEC parameter and always fully specialise
+foldl_loop. Note that
+
+ * We have to prevent the SPEC argument from being removed by
+ w/w which is why (a) SPEC is a sum type, and (b) we have to seq on
+ the SPEC argument.
+
+ * And lastly, the SPEC argument is ultimately eliminated by
+ SpecConstr itself so there is no runtime overhead.
+
+This is all quite ugly; we ought to come up with a better design.
+
+ForceSpecConstr arguments are spotted in scExpr' and scTopBinds which then set
+sc_force to True when calling specLoop. This flag does three things:
+ * Ignore specConstrThreshold, to specialise functions of arbitrary size
+ (see scTopBind)
+ * Ignore specConstrCount, to make arbitrary numbers of specialisations
+ (see specialise)
+ * Specialise even for arguments that are not scrutinised in the loop
+ (see argToPat; Trac #4488)
+
+This flag is inherited for nested non-recursive bindings (which are likely to
+be join points and hence should be fully specialised) but reset for nested
+recursive bindings.
+
+What alternatives did I consider? Annotating the loop itself doesn't
+work because (a) it is local and (b) it will be w/w'ed and I having
+w/w propagating annotation somehow doesn't seem like a good idea. The
+types of the loop arguments really seem to be the most persistent
+thing.
+
+Annotating the types that make up the loop state doesn't work,
+either, because (a) it would prevent us from using types like Either
+or tuples here, (b) we don't want to restrict the set of types that
+can be used in Stream states and (c) some types are fixed by the user
+(e.g., the accumulator here) but we still want to specialise as much
+as possible.
+
+ForceSpecConstr is done by way of an annotation:
+ data SPEC = SPEC | SPEC2
+ {-# ANN type SPEC ForceSpecConstr #-}
+But SPEC is the *only* type so annotated, so it'd be better to
+use a particular library type.
+
+Alternatives to ForceSpecConstr
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Instead of giving the loop an extra argument of type SPEC, we
+also considered *wrapping* arguments in SPEC, thus
+ data SPEC a = SPEC a | SPEC2
+
+ loop = \arg -> case arg of
+ SPEC state ->
+ case state of (x,y) -> ... loop (SPEC (x',y')) ...
+ S2 -> error ...
+The idea is that a SPEC argument says "specialise this argument
+regardless of whether the function case-analyses it. But this
+doesn't work well:
+ * SPEC must still be a sum type, else the strictness analyser
+ eliminates it
+ * But that means that 'loop' won't be strict in its real payload
+This loss of strictness in turn screws up specialisation, because
+we may end up with calls like
+ loop (SPEC (case z of (p,q) -> (q,p)))
+Without the SPEC, if 'loop' was strict, the case would move out
+and we'd see loop applied to a pair. But if 'loop' isn' strict
+this doesn't look like a specialisable call.
+
+Note [NoSpecConstr]
+~~~~~~~~~~~~~~~~~~~
+The ignoreDataCon stuff allows you to say
+ {-# ANN type T NoSpecConstr #-}
+to mean "don't specialise on arguments of this type. It was added
+before we had ForceSpecConstr. Lacking ForceSpecConstr we specialised
+regardless of size; and then we needed a way to turn that *off*. Now
+that we have ForceSpecConstr, this NoSpecConstr is probably redundant.
+(Used only for PArray.)
-----------------------------------------------------
Stuff not yet handled
a T (I# x) really, because T is strict and Int has one constructor. (We can't
unbox the strict fields, becuase T is polymorphic!)
-
-
%************************************************************************
%* *
\subsection{Top level wrapper stuff}
%************************************************************************
\begin{code}
-specConstrProgram :: DynFlags -> UniqSupply -> [CoreBind] -> IO [CoreBind]
-specConstrProgram dflags us binds
+specConstrProgram :: ModGuts -> CoreM ModGuts
+specConstrProgram guts
= do
- showPass dflags "SpecConstr"
-
- let (binds', _) = initUs us (go (initScEnv dflags) binds)
-
- endPass dflags "SpecConstr" Opt_D_dump_spec binds'
-
- dumpIfSet_dyn dflags Opt_D_dump_rules "Top-level specialisations"
- (pprRules (tidyRules emptyTidyEnv (rulesOfBinds binds')))
-
- return binds'
+ dflags <- getDynFlags
+ us <- getUniqueSupplyM
+ annos <- getFirstAnnotations deserializeWithData guts
+ let binds' = fst $ initUs us (go (initScEnv dflags annos) (mg_binds guts))
+ return (guts { mg_binds = binds' })
where
go _ [] = return []
- go env (bind:binds) = do (env', _, bind') <- scBind env bind
+ go env (bind:binds) = do (env', bind') <- scTopBind env bind
binds' <- go env' binds
return (bind' : binds')
\end{code}
%************************************************************************
\begin{code}
-data ScEnv = SCE { sc_size :: Maybe Int, -- Size threshold
+data ScEnv = SCE { sc_size :: Maybe Int, -- Size threshold
+ sc_count :: Maybe Int, -- Max # of specialisations for any one fn
+ -- See Note [Avoiding exponential blowup]
+ sc_force :: Bool, -- Force specialisation?
+ -- See Note [Forcing specialisation]
sc_subst :: Subst, -- Current substitution
-- Maps InIds to OutExprs
-- Binds interesting non-top-level variables
-- Domain is OutVars (*after* applying the substitution)
- sc_vals :: ValueEnv
+ sc_vals :: ValueEnv,
-- Domain is OutIds (*after* applying the substitution)
-- Used even for top-level bindings (but not imported ones)
+
+ sc_annotations :: UniqFM SpecConstrAnnotation
}
---------------------
-- As we go, we apply a substitution (sc_subst) to the current term
-type InExpr = CoreExpr -- *Before* applying the subst
+type InExpr = CoreExpr -- _Before_ applying the subst
+type InVar = Var
-type OutExpr = CoreExpr -- *After* applying the subst
+type OutExpr = CoreExpr -- _After_ applying the subst
type OutId = Id
type OutVar = Var
---------------------
type ValueEnv = IdEnv Value -- Domain is OutIds
-data Value = ConVal AltCon [CoreArg] -- *Saturated* constructors
+data Value = ConVal AltCon [CoreArg] -- _Saturated_ constructors
+ -- The AltCon is never DEFAULT
| LambdaVal -- Inlinable lambdas or PAPs
instance Outputable Value where
ppr (ConVal con args) = ppr con <+> interpp'SP args
- ppr LambdaVal = ptext SLIT("<Lambda>")
+ ppr LambdaVal = ptext (sLit "<Lambda>")
---------------------
-initScEnv :: DynFlags -> ScEnv
-initScEnv dflags
+initScEnv :: DynFlags -> UniqFM SpecConstrAnnotation -> ScEnv
+initScEnv dflags anns
= SCE { sc_size = specConstrThreshold dflags,
+ sc_count = specConstrCount dflags,
+ sc_force = False,
sc_subst = emptySubst,
sc_how_bound = emptyVarEnv,
- sc_vals = emptyVarEnv }
+ sc_vals = emptyVarEnv,
+ sc_annotations = anns }
data HowBound = RecFun -- These are the recursive functions for which
-- we seek interesting call patterns
ppr RecFun = text "RecFun"
ppr RecArg = text "RecArg"
+scForce :: ScEnv -> Bool -> ScEnv
+scForce env b = env { sc_force = b }
+
lookupHowBound :: ScEnv -> Id -> Maybe HowBound
lookupHowBound env id = lookupVarEnv (sc_how_bound env) id
scSubstId :: ScEnv -> Id -> CoreExpr
-scSubstId env v = lookupIdSubst (sc_subst env) v
+scSubstId env v = lookupIdSubst (text "scSubstId") (sc_subst env) v
scSubstTy :: ScEnv -> Type -> Type
scSubstTy env ty = substTy (sc_subst env) ty
extendValEnv env _ Nothing = env
extendValEnv env id (Just cv) = env { sc_vals = extendVarEnv (sc_vals env) id cv }
-extendCaseBndrs :: ScEnv -> CoreExpr -> Id -> AltCon -> [Var] -> ScEnv
+extendCaseBndrs :: ScEnv -> OutExpr -> OutId -> AltCon -> [Var] -> (ScEnv, [Var])
-- When we encounter
-- case scrut of b
-- C x y -> ...
--- we want to bind b, and perhaps scrut too, to (C x y)
--- NB: Extends only the sc_vals part of the envt
+-- we want to bind b, to (C x y)
+-- NB1: Extends only the sc_vals part of the envt
+-- NB2: Kill the dead-ness info on the pattern binders x,y, since
+-- they are potentially made alive by the [b -> C x y] binding
extendCaseBndrs env scrut case_bndr con alt_bndrs
- = case scrut of
- Var v -> extendValEnv env1 v cval
- _other -> env1
+ = (env2, alt_bndrs')
where
- env1 = extendValEnv env case_bndr cval
+ live_case_bndr = not (isDeadBinder case_bndr)
+ env1 | Var v <- scrut = extendValEnv env v cval
+ | otherwise = env -- See Note [Add scrutinee to ValueEnv too]
+ env2 | live_case_bndr = extendValEnv env1 case_bndr cval
+ | otherwise = env1
+
+ alt_bndrs' | case scrut of { Var {} -> True; _ -> live_case_bndr }
+ = map zap alt_bndrs
+ | otherwise
+ = alt_bndrs
+
cval = case con of
DEFAULT -> Nothing
LitAlt {} -> Just (ConVal con [])
where
vanilla_args = map Type (tyConAppArgs (idType case_bndr)) ++
varsToCoreExprs alt_bndrs
+
+ zap v | isTyCoVar v = v -- See NB2 above
+ | otherwise = zapIdOccInfo v
+
+
+decreaseSpecCount :: ScEnv -> Int -> ScEnv
+-- See Note [Avoiding exponential blowup]
+decreaseSpecCount env n_specs
+ = env { sc_count = case sc_count env of
+ Nothing -> Nothing
+ Just n -> Just (n `div` (n_specs + 1)) }
+ -- The "+1" takes account of the original function;
+ -- See Note [Avoiding exponential blowup]
+
+---------------------------------------------------
+-- See Note [SpecConstrAnnotation]
+ignoreType :: ScEnv -> Type -> Bool
+ignoreDataCon :: ScEnv -> DataCon -> Bool
+forceSpecBndr :: ScEnv -> Var -> Bool
+#ifndef GHCI
+ignoreType _ _ = False
+ignoreDataCon _ _ = False
+forceSpecBndr _ _ = False
+
+#else /* GHCI */
+
+ignoreDataCon env dc = ignoreTyCon env (dataConTyCon dc)
+
+ignoreType env ty
+ = case splitTyConApp_maybe ty of
+ Just (tycon, _) -> ignoreTyCon env tycon
+ _ -> False
+
+ignoreTyCon :: ScEnv -> TyCon -> Bool
+ignoreTyCon env tycon
+ = lookupUFM (sc_annotations env) tycon == Just NoSpecConstr
+
+forceSpecBndr env var = forceSpecFunTy env . snd . splitForAllTys . varType $ var
+
+forceSpecFunTy :: ScEnv -> Type -> Bool
+forceSpecFunTy env = any (forceSpecArgTy env) . fst . splitFunTys
+
+forceSpecArgTy :: ScEnv -> Type -> Bool
+forceSpecArgTy env ty
+ | Just ty' <- coreView ty = forceSpecArgTy env ty'
+
+forceSpecArgTy env ty
+ | Just (tycon, tys) <- splitTyConApp_maybe ty
+ , tycon /= funTyCon
+ = lookupUFM (sc_annotations env) tycon == Just ForceSpecConstr
+ || any (forceSpecArgTy env) tys
+
+forceSpecArgTy _ _ = False
+#endif /* GHCI */
\end{code}
+Note [Add scrutinee to ValueEnv too]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider this:
+ case x of y
+ (a,b) -> case b of c
+ I# v -> ...(f y)...
+By the time we get to the call (f y), the ValueEnv
+will have a binding for y, and for c
+ y -> (a,b)
+ c -> I# v
+BUT that's not enough! Looking at the call (f y) we
+see that y is pair (a,b), but we also need to know what 'b' is.
+So in extendCaseBndrs we must *also* add the binding
+ b -> I# v
+else we lose a useful specialisation for f. This is necessary even
+though the simplifier has systematically replaced uses of 'x' with 'y'
+and 'b' with 'c' in the code. The use of 'b' in the ValueEnv came
+from outside the case. See Trac #4908 for the live example.
+
+Note [Avoiding exponential blowup]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The sc_count field of the ScEnv says how many times we are prepared to
+duplicate a single function. But we must take care with recursive
+specialiations. Consider
+
+ let $j1 = let $j2 = let $j3 = ...
+ in
+ ...$j3...
+ in
+ ...$j2...
+ in
+ ...$j1...
+
+If we specialise $j1 then in each specialisation (as well as the original)
+we can specialise $j2, and similarly $j3. Even if we make just *one*
+specialisation of each, becuase we also have the original we'll get 2^n
+copies of $j3, which is not good.
+
+So when recursively specialising we divide the sc_count by the number of
+copies we are making at this level, including the original.
+
%************************************************************************
%* *
combineUsages [] = nullUsage
combineUsages us = foldr1 combineUsage us
-lookupOcc :: ScUsage -> OutVar -> (ScUsage, ArgOcc)
-lookupOcc (SCU { scu_calls = sc_calls, scu_occs = sc_occs }) bndr
- = (SCU {scu_calls = sc_calls, scu_occs = delVarEnv sc_occs bndr},
- lookupVarEnv sc_occs bndr `orElse` NoOcc)
-
lookupOccs :: ScUsage -> [OutVar] -> (ScUsage, [ArgOcc])
lookupOccs (SCU { scu_calls = sc_calls, scu_occs = sc_occs }) bndrs
= (SCU {scu_calls = sc_calls, scu_occs = delVarEnvList sc_occs bndrs},
data ArgOcc = NoOcc -- Doesn't occur at all; or a type argument
| UnkOcc -- Used in some unknown way
- | ScrutOcc (UniqFM [ArgOcc]) -- See Note [ScrutOcc]
-
- | BothOcc -- Definitely taken apart, *and* perhaps used in some other way
+ | ScrutOcc -- See Note [ScrutOcc]
+ (DataConEnv [ArgOcc]) -- How the sub-components are used
-{- Note [ScrutOcc]
+type DataConEnv a = UniqFM a -- Keyed by DataCon
+{- Note [ScrutOcc]
+~~~~~~~~~~~~~~~~~~~
An occurrence of ScrutOcc indicates that the thing, or a `cast` version of the thing,
is *only* taken apart or applied.
-}
instance Outputable ArgOcc where
- ppr (ScrutOcc xs) = ptext SLIT("scrut-occ") <> ppr xs
- ppr UnkOcc = ptext SLIT("unk-occ")
- ppr BothOcc = ptext SLIT("both-occ")
- ppr NoOcc = ptext SLIT("no-occ")
+ ppr (ScrutOcc xs) = ptext (sLit "scrut-occ") <> ppr xs
+ ppr UnkOcc = ptext (sLit "unk-occ")
+ ppr NoOcc = ptext (sLit "no-occ")
+
+evalScrutOcc :: ArgOcc
+evalScrutOcc = ScrutOcc emptyUFM
-- Experimentally, this vesion of combineOcc makes ScrutOcc "win", so
-- that if the thing is scrutinised anywhere then we get to see that
combineOcc NoOcc occ = occ
combineOcc occ NoOcc = occ
combineOcc (ScrutOcc xs) (ScrutOcc ys) = ScrutOcc (plusUFM_C combineOccs xs ys)
-combineOcc _occ (ScrutOcc ys) = ScrutOcc ys
-combineOcc (ScrutOcc xs) _occ = ScrutOcc xs
+combineOcc UnkOcc (ScrutOcc ys) = ScrutOcc ys
+combineOcc (ScrutOcc xs) UnkOcc = ScrutOcc xs
combineOcc UnkOcc UnkOcc = UnkOcc
-combineOcc _ _ = BothOcc
combineOccs :: [ArgOcc] -> [ArgOcc] -> [ArgOcc]
combineOccs xs ys = zipWithEqual "combineOccs" combineOcc xs ys
setScrutOcc :: ScEnv -> ScUsage -> OutExpr -> ArgOcc -> ScUsage
--- *Overwrite* the occurrence info for the scrutinee, if the scrutinee
+-- _Overwrite_ the occurrence info for the scrutinee, if the scrutinee
-- is a variable, and an interesting variable
setScrutOcc env usg (Cast e _) occ = setScrutOcc env usg e occ
setScrutOcc env usg (Note _ e) occ = setScrutOcc env usg e occ
| otherwise = usg
setScrutOcc _env usg _other _occ -- Catch-all
= usg
-
-conArgOccs :: ArgOcc -> AltCon -> [ArgOcc]
--- Find usage of components of data con; returns [UnkOcc...] if unknown
--- See Note [ScrutOcc] for the extra UnkOccs in the vanilla datacon case
-
-conArgOccs (ScrutOcc fm) (DataAlt dc)
- | Just pat_arg_occs <- lookupUFM fm dc
- = [UnkOcc | _ <- dataConUnivTyVars dc] ++ pat_arg_occs
-
-conArgOccs _other _con = repeat UnkOcc
\end{code}
%************************************************************************
scExpr' env (Var v) = case scSubstId env v of
- Var v' -> return (varUsage env v' UnkOcc, Var v')
+ Var v' -> return (mkVarUsage env v' [], Var v')
e' -> scExpr (zapScSubst env) e'
scExpr' env (Type t) = return (nullUsage, Type (scSubstTy env t))
where
sc_con_app con args scrut' -- Known constructor; simplify
= do { let (_, bs, rhs) = findAlt con alts
- alt_env' = extendScSubstList env ((b,scrut') : bs `zip` trimConArgs con args)
+ `orElse` (DEFAULT, [], mkImpossibleExpr (coreAltsType alts))
+ alt_env' = extendScSubstList env ((b,scrut') : bs `zip` trimConArgs con args)
; scExpr alt_env' rhs }
sc_vanilla scrut_usg scrut' -- Normal case
; (alt_usgs, alt_occs, alts')
<- mapAndUnzip3M (sc_alt alt_env scrut' b') alts
- ; let (alt_usg, b_occ) = lookupOcc (combineUsages alt_usgs) b'
- scrut_occ = foldr combineOcc b_occ alt_occs
- scrut_usg' = setScrutOcc env scrut_usg scrut' scrut_occ
+ ; let scrut_occ = foldr1 combineOcc alt_occs -- Never empty
+ scrut_usg' = setScrutOcc env scrut_usg scrut' scrut_occ
-- The combined usage of the scrutinee is given
-- by scrut_occ, which is passed to scScrut, which
-- in turn treats a bare-variable scrutinee specially
- ; return (alt_usg `combineUsage` scrut_usg',
+ ; return (foldr combineUsage scrut_usg' alt_usgs,
Case scrut' b' (scSubstTy env ty) alts') }
sc_alt env scrut' b' (con,bs,rhs)
- = do { let (env1, bs') = extendBndrsWith RecArg env bs
- env2 = extendCaseBndrs env1 scrut' b' con bs'
- ; (usg,rhs') <- scExpr env2 rhs
- ; let (usg', arg_occs) = lookupOccs usg bs'
+ = do { let (env1, bs1) = extendBndrsWith RecArg env bs
+ (env2, bs2) = extendCaseBndrs env1 scrut' b' con bs1
+ ; (usg, rhs') <- scExpr env2 rhs
+ ; let (usg', b_occ:arg_occs) = lookupOccs usg (b':bs2)
scrut_occ = case con of
DataAlt dc -> ScrutOcc (unitUFM dc arg_occs)
- _ofther -> ScrutOcc emptyUFM
- ; return (usg', scrut_occ, (con,bs',rhs')) }
+ _ -> ScrutOcc emptyUFM
+ ; return (usg', b_occ `combineOcc` scrut_occ, (con, bs2, rhs')) }
scExpr' env (Let (NonRec bndr rhs) body)
- | isTyVar bndr -- Type-lets may be created by doBeta
+ | isTyCoVar bndr -- Type-lets may be created by doBeta
= scExpr' (extendScSubst env bndr rhs) body
- | otherwise
+
+ | otherwise
= do { let (body_env, bndr') = extendBndr env bndr
- ; (rhs_usg, (_, args', rhs_body', _)) <- scRecRhs env (bndr',rhs)
- ; let rhs' = mkLams args' rhs_body'
-
- ; if not opt_SpecInlineJoinPoints || null args' || isEmptyVarEnv (scu_calls rhs_usg) then do
- do { -- Vanilla case
- let body_env2 = extendValEnv body_env bndr' (isValue (sc_vals env) rhs')
- -- Record if the RHS is a value
- ; (body_usg, body') <- scExpr body_env2 body
- ; return (body_usg `combineUsage` rhs_usg, Let (NonRec bndr' rhs') body') }
- else -- For now, just brutally inline the join point
- do { let body_env2 = extendScSubst env bndr rhs'
- ; scExpr body_env2 body } }
-
+ ; (rhs_usg, rhs_info) <- scRecRhs env (bndr',rhs)
+
+ ; let body_env2 = extendHowBound body_env [bndr'] RecFun
+ -- Note [Local let bindings]
+ RI _ rhs' _ _ _ = rhs_info
+ body_env3 = extendValEnv body_env2 bndr' (isValue (sc_vals env) rhs')
-{- Old code
- do { -- Join-point case
- let body_env2 = extendHowBound body_env [bndr'] RecFun
- -- If the RHS of this 'let' contains calls
- -- to recursive functions that we're trying
- -- to specialise, then treat this let too
- -- as one to specialise
- ; (body_usg, body') <- scExpr body_env2 body
+ ; (body_usg, body') <- scExpr body_env3 body
- ; (spec_usg, _, specs) <- specialise env (scu_calls body_usg) ([], rhs_info)
+ -- NB: For non-recursive bindings we inherit sc_force flag from
+ -- the parent function (see Note [Forcing specialisation])
+ ; (spec_usg, specs) <- specialise env
+ (scu_calls body_usg)
+ rhs_info
+ (SI [] 0 (Just rhs_usg))
- ; return (body_usg { scu_calls = scu_calls body_usg `delVarEnv` bndr' }
- `combineUsage` rhs_usg `combineUsage` spec_usg,
- mkLets [NonRec b r | (b,r) <- addRules rhs_info specs] body')
+ ; return (body_usg { scu_calls = scu_calls body_usg `delVarEnv` bndr' }
+ `combineUsage` rhs_usg `combineUsage` spec_usg,
+ mkLets [NonRec b r | (b,r) <- specInfoBinds rhs_info specs] body')
}
--}
+
+-- A *local* recursive group: see Note [Local recursive groups]
scExpr' env (Let (Rec prs) body)
- = do { (env', bind_usg, bind') <- scBind env (Rec prs)
- ; (body_usg, body') <- scExpr env' body
- ; return (bind_usg `combineUsage` body_usg, Let bind' body') }
+ = do { let (bndrs,rhss) = unzip prs
+ (rhs_env1,bndrs') = extendRecBndrs env bndrs
+ rhs_env2 = extendHowBound rhs_env1 bndrs' RecFun
+ force_spec = any (forceSpecBndr env) bndrs'
+ -- Note [Forcing specialisation]
+
+ ; (rhs_usgs, rhs_infos) <- mapAndUnzipM (scRecRhs rhs_env2) (bndrs' `zip` rhss)
+ ; (body_usg, body') <- scExpr rhs_env2 body
+
+ -- NB: start specLoop from body_usg
+ ; (spec_usg, specs) <- specLoop (scForce rhs_env2 force_spec)
+ (scu_calls body_usg) rhs_infos nullUsage
+ [SI [] 0 (Just usg) | usg <- rhs_usgs]
+ -- Do not unconditionally generate specialisations from rhs_usgs
+ -- Instead use them only if we find an unspecialised call
+ -- See Note [Local recursive groups]
+
+ ; let rhs_usg = combineUsages rhs_usgs
+ all_usg = spec_usg `combineUsage` rhs_usg `combineUsage` body_usg
+ bind' = Rec (concat (zipWith specInfoBinds rhs_infos specs))
+
+ ; return (all_usg { scu_calls = scu_calls all_usg `delVarEnvList` bndrs' },
+ Let bind' body') }
+\end{code}
+
+Note [Local let bindings]
+~~~~~~~~~~~~~~~~~~~~~~~~~
+It is not uncommon to find this
+
+ let $j = \x. <blah> in ...$j True...$j True...
+
+Here $j is an arbitrary let-bound function, but it often comes up for
+join points. We might like to specialise $j for its call patterns.
+Notice the difference from a letrec, where we look for call patterns
+in the *RHS* of the function. Here we look for call patterns in the
+*body* of the let.
+At one point I predicated this on the RHS mentioning the outer
+recursive function, but that's not essential and might even be
+harmful. I'm not sure.
------------------------------------
+
+\begin{code}
scApp :: ScEnv -> (InExpr, [InExpr]) -> UniqSM (ScUsage, CoreExpr)
scApp env (Var fn, args) -- Function is a variable
fn'@(Lam {}) -> scExpr (zapScSubst env) (doBeta fn' args')
-- Do beta-reduction and try again
- Var fn' -> return (arg_usg `combineUsage` fn_usg, mkApps (Var fn') args')
- where
- fn_usg = case lookupHowBound env fn' of
- Just RecFun -> SCU { scu_calls = unitVarEnv fn' [(sc_vals env, args')],
- scu_occs = emptyVarEnv }
- Just RecArg -> SCU { scu_calls = emptyVarEnv,
- scu_occs = unitVarEnv fn' (ScrutOcc emptyUFM) }
- Nothing -> nullUsage
-
+ Var fn' -> return (arg_usg `combineUsage` mkVarUsage env fn' args',
+ mkApps (Var fn') args')
other_fn' -> return (arg_usg, mkApps other_fn' args') }
-- NB: doing this ignores any usage info from the substituted
; return (combineUsages arg_usgs `combineUsage` fn_usg, mkApps fn' args') }
----------------------
-scBind :: ScEnv -> CoreBind -> UniqSM (ScEnv, ScUsage, CoreBind)
-scBind env (Rec prs)
+mkVarUsage :: ScEnv -> Id -> [CoreExpr] -> ScUsage
+mkVarUsage env fn args
+ = case lookupHowBound env fn of
+ Just RecFun -> SCU { scu_calls = unitVarEnv fn [(sc_vals env, args)]
+ , scu_occs = emptyVarEnv }
+ Just RecArg -> SCU { scu_calls = emptyVarEnv
+ , scu_occs = unitVarEnv fn arg_occ }
+ Nothing -> nullUsage
+ where
+ -- I rather think we could use UnkOcc all the time
+ arg_occ | null args = UnkOcc
+ | otherwise = evalScrutOcc
+
+----------------------
+scTopBind :: ScEnv -> CoreBind -> UniqSM (ScEnv, CoreBind)
+scTopBind env (Rec prs)
| Just threshold <- sc_size env
+ , not force_spec
, not (all (couldBeSmallEnoughToInline threshold) rhss)
-- No specialisation
= do { let (rhs_env,bndrs') = extendRecBndrs env bndrs
- ; (rhs_usgs, rhss') <- mapAndUnzipM (scExpr rhs_env) rhss
- ; return (rhs_env, combineUsages rhs_usgs, Rec (bndrs' `zip` rhss')) }
+ ; (_, rhss') <- mapAndUnzipM (scExpr rhs_env) rhss
+ ; return (rhs_env, Rec (bndrs' `zip` rhss')) }
| otherwise -- Do specialisation
= do { let (rhs_env1,bndrs') = extendRecBndrs env bndrs
rhs_env2 = extendHowBound rhs_env1 bndrs' RecFun
; (rhs_usgs, rhs_infos) <- mapAndUnzipM (scRecRhs rhs_env2) (bndrs' `zip` rhss)
; let rhs_usg = combineUsages rhs_usgs
- ; (spec_usg, specs) <- spec_loop rhs_env2 (scu_calls rhs_usg)
- (repeat [] `zip` rhs_infos)
-
- ; let all_usg = rhs_usg `combineUsage` spec_usg
+ ; (_, specs) <- specLoop (scForce rhs_env2 force_spec)
+ (scu_calls rhs_usg) rhs_infos nullUsage
+ [SI [] 0 Nothing | _ <- bndrs]
; return (rhs_env1, -- For the body of the letrec, delete the RecFun business
- all_usg { scu_calls = scu_calls rhs_usg `delVarEnvList` bndrs' },
- Rec (concat (zipWith addRules rhs_infos specs))) }
+ Rec (concat (zipWith specInfoBinds rhs_infos specs))) }
where
(bndrs,rhss) = unzip prs
+ force_spec = any (forceSpecBndr env) bndrs
+ -- Note [Forcing specialisation]
- spec_loop :: ScEnv
- -> CallEnv
- -> [([CallPat], RhsInfo)] -- One per binder
- -> UniqSM (ScUsage, [[SpecInfo]]) -- One list per binder
- spec_loop env all_calls rhs_stuff
- = do { (spec_usg_s, new_pats_s, specs) <- mapAndUnzip3M (specialise env all_calls) rhs_stuff
- ; let spec_usg = combineUsages spec_usg_s
- ; if all null new_pats_s then
- return (spec_usg, specs) else do
- { (spec_usg1, specs1) <- spec_loop env (scu_calls spec_usg)
- (zipWith add_pats new_pats_s rhs_stuff)
- ; return (spec_usg `combineUsage` spec_usg1, zipWith (++) specs specs1) } }
-
- add_pats :: [CallPat] -> ([CallPat], RhsInfo) -> ([CallPat], RhsInfo)
- add_pats new_pats (done_pats, rhs_info) = (done_pats ++ new_pats, rhs_info)
-
-scBind env (NonRec bndr rhs)
- = do { (usg, rhs') <- scExpr env rhs
+scTopBind env (NonRec bndr rhs)
+ = do { (_, rhs') <- scExpr env rhs
; let (env1, bndr') = extendBndr env bndr
env2 = extendValEnv env1 bndr' (isValue (sc_vals env) rhs')
- ; return (env2, usg, NonRec bndr' rhs') }
+ ; return (env2, NonRec bndr' rhs') }
----------------------
scRecRhs :: ScEnv -> (OutId, InExpr) -> UniqSM (ScUsage, RhsInfo)
(body_env, arg_bndrs') = extendBndrsWith RecArg env arg_bndrs
; (body_usg, body') <- scExpr body_env body
; let (rhs_usg, arg_occs) = lookupOccs body_usg arg_bndrs'
- ; return (rhs_usg, (bndr, arg_bndrs', body', arg_occs)) }
-
+ ; return (rhs_usg, RI bndr (mkLams arg_bndrs' body')
+ arg_bndrs body arg_occs) }
-- The arg_occs says how the visible,
-- lambda-bound binders of the RHS are used
-- (including the TyVar binders)
-- Two pats are the same if they match both ways
----------------------
-addRules :: RhsInfo -> [SpecInfo] -> [(Id,CoreExpr)]
-addRules (fn, args, body, _) specs
- = [(id,rhs) | (_,id,rhs) <- specs] ++
- [(fn `addIdSpecialisations` rules, mkLams args body)]
- where
- rules = [r | (r,_,_) <- specs]
+specInfoBinds :: RhsInfo -> SpecInfo -> [(Id,CoreExpr)]
+specInfoBinds (RI fn new_rhs _ _ _) (SI specs _ _)
+ = [(id,rhs) | OS _ _ id rhs <- specs] ++
+ -- First the specialised bindings
-----------------------
-varUsage :: ScEnv -> OutVar -> ArgOcc -> ScUsage
-varUsage env v use
- | Just RecArg <- lookupHowBound env v = SCU { scu_calls = emptyVarEnv
- , scu_occs = unitVarEnv v use }
- | otherwise = nullUsage
+ [(fn `addIdSpecialisations` rules, new_rhs)]
+ -- And now the original binding
+ where
+ rules = [r | OS _ r _ _ <- specs]
\end{code}
%************************************************************************
\begin{code}
-type RhsInfo = (OutId, [OutVar], OutExpr, [ArgOcc])
- -- Info about the *original* RHS of a binding we are specialising
- -- Original binding f = \xs.body
- -- Plus info about usage of arguments
+data RhsInfo = RI OutId -- The binder
+ OutExpr -- The new RHS
+ [InVar] InExpr -- The *original* RHS (\xs.body)
+ -- Note [Specialise original body]
+ [ArgOcc] -- Info on how the xs occur in body
-type SpecInfo = (CoreRule, OutId, OutExpr)
- -- One specialisation: Rule plus definition
+data SpecInfo = SI [OneSpec] -- The specialisations we have generated
+ Int -- Length of specs; used for numbering them
+
+ (Maybe ScUsage) -- Just cs => we have not yet used calls in the
+ -- from calls in the *original* RHS as
+ -- seeds for new specialisations;
+ -- if you decide to do so, here is the
+ -- RHS usage (which has not yet been
+ -- unleashed)
+ -- Nothing => we have
+ -- See Note [Local recursive groups]
+
+ -- One specialisation: Rule plus definition
+data OneSpec = OS CallPat -- Call pattern that generated this specialisation
+ CoreRule -- Rule connecting original id with the specialisation
+ OutId OutExpr -- Spec id + its rhs
+
+
+specLoop :: ScEnv
+ -> CallEnv
+ -> [RhsInfo]
+ -> ScUsage -> [SpecInfo] -- One per binder; acccumulating parameter
+ -> UniqSM (ScUsage, [SpecInfo]) -- ...ditto...
+
+specLoop env all_calls rhs_infos usg_so_far specs_so_far
+ = do { specs_w_usg <- zipWithM (specialise env all_calls) rhs_infos specs_so_far
+ ; let (new_usg_s, all_specs) = unzip specs_w_usg
+ new_usg = combineUsages new_usg_s
+ new_calls = scu_calls new_usg
+ all_usg = usg_so_far `combineUsage` new_usg
+ ; if isEmptyVarEnv new_calls then
+ return (all_usg, all_specs)
+ else
+ specLoop env new_calls rhs_infos all_usg all_specs }
specialise
:: ScEnv
-> CallEnv -- Info on calls
- -> ([CallPat], RhsInfo) -- Original RHS plus patterns dealt with
- -> UniqSM (ScUsage, [CallPat], [SpecInfo]) -- Specialised calls
+ -> RhsInfo
+ -> SpecInfo -- Original RHS plus patterns dealt with
+ -> UniqSM (ScUsage, SpecInfo) -- New specialised versions and their usage
+-- Note: this only generates *specialised* bindings
+-- The original binding is added by specInfoBinds
+--
-- Note: the rhs here is the optimised version of the original rhs
-- So when we make a specialised copy of the RHS, we're starting
-- from an RHS whose nested functions have been optimised already.
-specialise env bind_calls (done_pats, (fn, arg_bndrs, body, arg_occs))
- | notNull arg_bndrs, -- Only specialise functions
- Just all_calls <- lookupVarEnv bind_calls fn
- = do { pats <- callsToPats env done_pats arg_occs all_calls
--- ; pprTrace "specialise" (vcat [ppr fn <+> ppr arg_occs,
--- text "calls" <+> ppr all_calls,
--- text "good pats" <+> ppr pats]) $
+specialise env bind_calls (RI fn _ arg_bndrs body arg_occs)
+ spec_info@(SI specs spec_count mb_unspec)
+ | not (isBottomingId fn) -- Note [Do not specialise diverging functions]
+ , not (isNeverActive (idInlineActivation fn)) -- See Note [Transfer activation]
+ , notNull arg_bndrs -- Only specialise functions
+ , Just all_calls <- lookupVarEnv bind_calls fn
+ = do { (boring_call, pats) <- callsToPats env specs arg_occs all_calls
+-- ; pprTrace "specialise" (vcat [ ppr fn <+> text "with" <+> int (length pats) <+> text "good patterns"
+-- , text "arg_occs" <+> ppr arg_occs
+-- , text "calls" <+> ppr all_calls
+-- , text "good pats" <+> ppr pats]) $
-- return ()
- ; (spec_usgs, specs) <- mapAndUnzipM (spec_one env fn arg_bndrs body)
- (pats `zip` [length done_pats..])
-
- ; return (combineUsages spec_usgs, pats, specs) }
+ -- Bale out if too many specialisations
+ ; let n_pats = length pats
+ spec_count' = n_pats + spec_count
+ ; case sc_count env of
+ Just max | not (sc_force env) && spec_count' > max
+ -> pprTrace "SpecConstr" msg $
+ return (nullUsage, spec_info)
+ where
+ msg = vcat [ sep [ ptext (sLit "Function") <+> quotes (ppr fn)
+ , nest 2 (ptext (sLit "has") <+>
+ speakNOf spec_count' (ptext (sLit "call pattern")) <> comma <+>
+ ptext (sLit "but the limit is") <+> int max) ]
+ , ptext (sLit "Use -fspec-constr-count=n to set the bound")
+ , extra ]
+ extra | not opt_PprStyle_Debug = ptext (sLit "Use -dppr-debug to see specialisations")
+ | otherwise = ptext (sLit "Specialisations:") <+> ppr (pats ++ [p | OS p _ _ _ <- specs])
+
+ _normal_case -> do {
+
+ let spec_env = decreaseSpecCount env n_pats
+ ; (spec_usgs, new_specs) <- mapAndUnzipM (spec_one spec_env fn arg_bndrs body)
+ (pats `zip` [spec_count..])
+ -- See Note [Specialise original body]
+
+ ; let spec_usg = combineUsages spec_usgs
+ (new_usg, mb_unspec')
+ = case mb_unspec of
+ Just rhs_usg | boring_call -> (spec_usg `combineUsage` rhs_usg, Nothing)
+ _ -> (spec_usg, mb_unspec)
+
+ ; return (new_usg, SI (new_specs ++ specs) spec_count' mb_unspec') } }
| otherwise
- = return (nullUsage, [], []) -- The boring case
+ = return (nullUsage, spec_info) -- The boring case
---------------------
spec_one :: ScEnv
-> OutId -- Function
- -> [Var] -- Lambda-binders of RHS; should match patterns
- -> CoreExpr -- Body of the original function
- -> (([Var], [CoreArg]), Int)
- -> UniqSM (ScUsage, SpecInfo) -- Rule and binding
+ -> [InVar] -- Lambda-binders of RHS; should match patterns
+ -> InExpr -- Body of the original function
+ -> (CallPat, Int)
+ -> UniqSM (ScUsage, OneSpec) -- Rule and binding
-- spec_one creates a specialised copy of the function, together
-- with a rule for using it. I'm very proud of how short this
f (b,c) ((:) (a,(b,c)) (x,v) hw) = f_spec b c v hw
-}
-spec_one env fn arg_bndrs body ((qvars, pats), rule_number)
- = do { -- Specialise the body
- let spec_env = extendScSubstList (extendScInScope env qvars)
+spec_one env fn arg_bndrs body (call_pat@(qvars, pats), rule_number)
+ = do { spec_uniq <- getUniqueUs
+ ; let spec_env = extendScSubstList (extendScInScope env qvars)
(arg_bndrs `zip` pats)
+ fn_name = idName fn
+ fn_loc = nameSrcSpan fn_name
+ spec_occ = mkSpecOcc (nameOccName fn_name)
+ rule_name = mkFastString ("SC:" ++ showSDoc (ppr fn <> int rule_number))
+ spec_name = mkInternalName spec_uniq spec_occ fn_loc
+-- ; pprTrace "{spec_one" (ppr (sc_count env) <+> ppr fn <+> ppr pats <+> text "-->" <+> ppr spec_name) $
+-- return ()
+
+ -- Specialise the body
; (spec_usg, spec_body) <- scExpr spec_env body
--- ; pprTrace "spec_one" (ppr fn <+> vcat [text "pats" <+> ppr pats,
--- text "calls" <+> (ppr (scu_calls spec_usg))])
--- (return ())
+-- ; pprTrace "done spec_one}" (ppr fn) $
+-- return ()
-- And build the results
- ; spec_uniq <- getUniqueUs
- ; let (spec_lam_args, spec_call_args) = mkWorkerArgs qvars body_ty
+ ; let spec_id = mkLocalId spec_name (mkPiTypes spec_lam_args body_ty)
+ `setIdStrictness` spec_str -- See Note [Transfer strictness]
+ `setIdArity` count isId spec_lam_args
+ spec_str = calcSpecStrictness fn spec_lam_args pats
+ (spec_lam_args, spec_call_args) = mkWorkerArgs qvars body_ty
-- Usual w/w hack to avoid generating
-- a spec_rhs of unlifted type and no args
-
- fn_name = idName fn
- fn_loc = nameSrcSpan fn_name
- spec_occ = mkSpecOcc (nameOccName fn_name)
- rule_name = mkFastString ("SC:" ++ showSDoc (ppr fn <> int rule_number))
- spec_rhs = mkLams spec_lam_args spec_body
- spec_id = mkUserLocal spec_occ spec_uniq (mkPiTypes spec_lam_args body_ty) fn_loc
- body_ty = exprType spec_body
- rule_rhs = mkVarApps (Var spec_id) spec_call_args
- rule = mkLocalRule rule_name specConstrActivation fn_name qvars pats rule_rhs
- ; return (spec_usg, (rule, spec_id, spec_rhs)) }
-
--- In which phase should the specialise-constructor rules be active?
--- Originally I made them always-active, but Manuel found that
--- this defeated some clever user-written rules. So Plan B
--- is to make them active only in Phase 0; after all, currently,
--- the specConstr transformation is only run after the simplifier
--- has reached Phase 0. In general one would want it to be
--- flag-controllable, but for now I'm leaving it baked in
--- [SLPJ Oct 01]
-specConstrActivation :: Activation
-specConstrActivation = ActiveAfter 0 -- Baked in; see comments above
+
+ spec_rhs = mkLams spec_lam_args spec_body
+ body_ty = exprType spec_body
+ rule_rhs = mkVarApps (Var spec_id) spec_call_args
+ inline_act = idInlineActivation fn
+ rule = mkRule True {- Auto -} True {- Local -}
+ rule_name inline_act fn_name qvars pats rule_rhs
+ -- See Note [Transfer activation]
+ ; return (spec_usg, OS call_pat rule spec_id spec_rhs) }
+
+calcSpecStrictness :: Id -- The original function
+ -> [Var] -> [CoreExpr] -- Call pattern
+ -> StrictSig -- Strictness of specialised thing
+-- See Note [Transfer strictness]
+calcSpecStrictness fn qvars pats
+ = StrictSig (mkTopDmdType spec_dmds TopRes)
+ where
+ spec_dmds = [ lookupVarEnv dmd_env qv `orElse` lazyDmd | qv <- qvars, isId qv ]
+ StrictSig (DmdType _ dmds _) = idStrictness fn
+
+ dmd_env = go emptyVarEnv dmds pats
+
+ go env ds (Type {} : pats) = go env ds pats
+ go env (d:ds) (pat : pats) = go (go_one env d pat) ds pats
+ go env _ _ = env
+
+ go_one env d (Var v) = extendVarEnv_C both env v d
+ go_one env (Box d) e = go_one env d e
+ go_one env (Eval (Prod ds)) e
+ | (Var _, args) <- collectArgs e = go env ds args
+ go_one env _ _ = env
+
\end{code}
+Note [Specialise original body]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The RhsInfo for a binding keeps the *original* body of the binding. We
+must specialise that, *not* the result of applying specExpr to the RHS
+(which is also kept in RhsInfo). Otherwise we end up specialising a
+specialised RHS, and that can lead directly to exponential behaviour.
+
+Note [Transfer activation]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+ This note is for SpecConstr, but exactly the same thing
+ happens in the overloading specialiser; see
+ Note [Auto-specialisation and RULES] in Specialise.
+
+In which phase should the specialise-constructor rules be active?
+Originally I made them always-active, but Manuel found that this
+defeated some clever user-written rules. Then I made them active only
+in Phase 0; after all, currently, the specConstr transformation is
+only run after the simplifier has reached Phase 0, but that meant
+that specialisations didn't fire inside wrappers; see test
+simplCore/should_compile/spec-inline.
+
+So now I just use the inline-activation of the parent Id, as the
+activation for the specialiation RULE, just like the main specialiser;
+
+This in turn means there is no point in specialising NOINLINE things,
+so we test for that.
+
+Note [Transfer strictness]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+We must transfer strictness information from the original function to
+the specialised one. Suppose, for example
+
+ f has strictness SS
+ and a RULE f (a:as) b = f_spec a as b
+
+Now we want f_spec to have strictess LLS, otherwise we'll use call-by-need
+when calling f_spec instead of call-by-value. And that can result in
+unbounded worsening in space (cf the classic foldl vs foldl')
+
+See Trac #3437 for a good example.
+
+The function calcSpecStrictness performs the calculation.
+
+
%************************************************************************
%* *
\subsection{Argument analysis}
\begin{code}
type CallPat = ([Var], [CoreExpr]) -- Quantified variables and arguments
-
-callsToPats :: ScEnv -> [CallPat] -> [ArgOcc] -> [Call] -> UniqSM [CallPat]
+callsToPats :: ScEnv -> [OneSpec] -> [ArgOcc] -> [Call] -> UniqSM (Bool, [CallPat])
-- Result has no duplicate patterns,
-- nor ones mentioned in done_pats
-callsToPats env done_pats bndr_occs calls
+ -- Bool indicates that there was at least one boring pattern
+callsToPats env done_specs bndr_occs calls
= do { mb_pats <- mapM (callToPats env bndr_occs) calls
- ; let good_pats :: [([Var], [CoreArg])]
+ ; let good_pats :: [CallPat]
good_pats = catMaybes mb_pats
+ done_pats = [p | OS p _ _ _ <- done_specs]
is_done p = any (samePat p) done_pats
- ; return (filterOut is_done (nubBy samePat good_pats)) }
+ ; return (any isNothing mb_pats,
+ filterOut is_done (nubBy samePat good_pats)) }
callToPats :: ScEnv -> [ArgOcc] -> Call -> UniqSM (Maybe CallPat)
-- The [Var] is the variables to quantify over in the rule
= return Nothing
| otherwise
= do { let in_scope = substInScope (sc_subst env)
- ; prs <- argsToPats in_scope con_env (args `zip` bndr_occs)
- ; let (good_pats, pats) = unzip prs
- pat_fvs = varSetElems (exprsFreeVars pats)
+ ; (interesting, pats) <- argsToPats env in_scope con_env args bndr_occs
+ ; let pat_fvs = varSetElems (exprsFreeVars pats)
qvars = filterOut (`elemInScopeSet` in_scope) pat_fvs
-- Quantify over variables that are not in sccpe
-- at the call site
-- See Note [Shadowing] at the top
- (tvs, ids) = partition isTyVar qvars
+ (tvs, ids) = partition isTyCoVar qvars
qvars' = tvs ++ ids
-- Put the type variables first; the type of a term
-- variable may mention a type variable
; -- pprTrace "callToPats" (ppr args $$ ppr prs $$ ppr bndr_occs) $
- if or good_pats
+ if interesting
then return (Just (qvars', pats))
else return Nothing }
-- placeholder variables. For example:
-- C a (D (f x) (g y)) ==> C p1 (D p2 p3)
-argToPat :: InScopeSet -- What's in scope at the fn defn site
+argToPat :: ScEnv
+ -> InScopeSet -- What's in scope at the fn defn site
-> ValueEnv -- ValueEnv at the call site
-> CoreArg -- A call arg (or component thereof)
-> ArgOcc
-> UniqSM (Bool, CoreArg)
+
-- Returns (interesting, pat),
-- where pat is the pattern derived from the argument
--- intersting=True if the pattern is non-trivial (not a variable or type)
+-- interesting=True if the pattern is non-trivial (not a variable or type)
-- E.g. x:xs --> (True, x:xs)
-- f xs --> (False, w) where w is a fresh wildcard
-- (f xs, 'c') --> (True, (w, 'c')) where w is a fresh wildcard
-- lvl7 --> (True, lvl7) if lvl7 is bound
-- somewhere further out
-argToPat _in_scope _val_env arg@(Type {}) _arg_occ
+argToPat _env _in_scope _val_env arg@(Type {}) _arg_occ
= return (False, arg)
-argToPat in_scope val_env (Note _ arg) arg_occ
- = argToPat in_scope val_env arg arg_occ
+argToPat env in_scope val_env (Note _ arg) arg_occ
+ = argToPat env in_scope val_env arg arg_occ
-- Note [Notes in call patterns]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- Ignore Notes. In particular, we want to ignore any InlineMe notes
-- ride roughshod over them all for now.
--- See Note [Notes in RULE matching] in Rules
-argToPat in_scope val_env (Let _ arg) arg_occ
- = argToPat in_scope val_env arg arg_occ
+argToPat env in_scope val_env (Let _ arg) arg_occ
+ = argToPat env in_scope val_env arg arg_occ
+ -- See Note [Matching lets] in Rule.lhs
-- Look through let expressions
- -- e.g. f (let v = rhs in \y -> ...v...)
- -- Here we can specialise for f (\y -> ...)
+ -- e.g. f (let v = rhs in (v,w))
+ -- Here we can specialise for f (v,w)
-- because the rule-matcher will look through the let.
-argToPat in_scope val_env (Cast arg co) arg_occ
- = do { (interesting, arg') <- argToPat in_scope val_env arg arg_occ
- ; let (ty1,ty2) = coercionKind co
+{- Disabled; see Note [Matching cases] in Rule.lhs
+argToPat env in_scope val_env (Case scrut _ _ [(_, _, rhs)]) arg_occ
+ | exprOkForSpeculation scrut -- See Note [Matching cases] in Rule.hhs
+ = argToPat env in_scope val_env rhs arg_occ
+-}
+
+argToPat env in_scope val_env (Cast arg co) arg_occ
+ | isIdentityCoercion co -- Substitution in the SpecConstr itself
+ -- can lead to identity coercions
+ = argToPat env in_scope val_env arg arg_occ
+ | not (ignoreType env ty2)
+ = do { (interesting, arg') <- argToPat env in_scope val_env arg arg_occ
; if not interesting then
wildCardPat ty2
else do
{ -- Make a wild-card pattern for the coercion
uniq <- getUniqueUs
- ; let co_name = mkSysTvName uniq FSLIT("sg")
+ ; let co_name = mkSysTvName uniq (fsLit "sg")
co_var = mkCoVar co_name (mkCoKind ty1 ty2)
; return (interesting, Cast arg' (mkTyVarTy co_var)) } }
+ where
+ (ty1, ty2) = coercionKind co
+
+
{- Disabling lambda specialisation for now
It's fragile, and the spec_loop can be infinite
-- Check for a constructor application
-- NB: this *precedes* the Var case, so that we catch nullary constrs
-argToPat in_scope val_env arg arg_occ
- | Just (ConVal dc args) <- isValue val_env arg
- , case arg_occ of
- ScrutOcc _ -> True -- Used only by case scrutinee
- BothOcc -> case arg of -- Used elsewhere
- App {} -> True -- see Note [Reboxing]
- _other -> False
- _other -> False -- No point; the arg is not decomposed
- = do { args' <- argsToPats in_scope val_env (args `zip` conArgOccs arg_occ dc)
- ; return (True, mk_con_app dc (map snd args')) }
+argToPat env in_scope val_env arg arg_occ
+ | Just (ConVal (DataAlt dc) args) <- isValue val_env arg
+ , not (ignoreDataCon env dc) -- See Note [NoSpecConstr]
+ , Just arg_occs <- mb_scrut dc
+ = do { let (ty_args, rest_args) = splitAtList (dataConUnivTyVars dc) args
+ ; (_, args') <- argsToPats env in_scope val_env rest_args arg_occs
+ ; return (True,
+ mkConApp dc (ty_args ++ args')) }
+ where
+ mb_scrut dc = case arg_occ of
+ ScrutOcc bs
+ | Just occs <- lookupUFM bs dc
+ -> Just (occs) -- See Note [Reboxing]
+ _other | sc_force env -> Just (repeat UnkOcc)
+ | otherwise -> Nothing
-- Check if the argument is a variable that
- -- is in scope at the function definition site
- -- It's worth specialising on this if
- -- (a) it's used in an interesting way in the body
+ -- (a) is used in an interesting way in the body
-- (b) we know what its value is
-argToPat in_scope val_env (Var v) arg_occ
- | case arg_occ of { UnkOcc -> False; _other -> True }, -- (a)
- is_value -- (b)
+ -- In that case it counts as "interesting"
+argToPat env in_scope val_env (Var v) arg_occ
+ | sc_force env || case arg_occ of { UnkOcc -> False; _other -> True }, -- (a)
+ is_value, -- (b)
+ not (ignoreType env (varType v))
= return (True, Var v)
where
is_value
-- variables that are in soope, which in turn can
-- expose the weakness in let-matching
-- See Note [Matching lets] in Rules
+
-- Check for a variable bound inside the function.
-- Don't make a wild-card, because we may usefully share
-- e.g. f a = let x = ... in f (x,x)
-- NB: this case follows the lambda and con-app cases!!
-argToPat _in_scope _val_env (Var v) _arg_occ
- = return (False, Var v)
+-- argToPat _in_scope _val_env (Var v) _arg_occ
+-- = return (False, Var v)
+ -- SLPJ : disabling this to avoid proliferation of versions
+ -- also works badly when thinking about seeding the loop
+ -- from the body of the let
+ -- f x y = letrec g z = ... in g (x,y)
+ -- We don't want to specialise for that *particular* x,y
-- The default case: make a wild-card
-argToPat _in_scope _val_env arg _arg_occ
+argToPat _env _in_scope _val_env arg _arg_occ
= wildCardPat (exprType arg)
wildCardPat :: Type -> UniqSM (Bool, CoreArg)
-wildCardPat ty = do { uniq <- getUniqueUs
- ; let id = mkSysLocal FSLIT("sc") uniq ty
- ; return (False, Var id) }
-
-argsToPats :: InScopeSet -> ValueEnv
- -> [(CoreArg, ArgOcc)]
- -> UniqSM [(Bool, CoreArg)]
-argsToPats in_scope val_env args
- = mapM do_one args
- where
- do_one (arg,occ) = argToPat in_scope val_env arg occ
+wildCardPat ty
+ = do { uniq <- getUniqueUs
+ ; let id = mkSysLocal (fsLit "sc") uniq ty
+ ; return (False, Var id) }
+
+argsToPats :: ScEnv -> InScopeSet -> ValueEnv
+ -> [CoreArg] -> [ArgOcc] -- Should be same length
+ -> UniqSM (Bool, [CoreArg])
+argsToPats env in_scope val_env args occs
+ = do { stuff <- zipWithM (argToPat env in_scope val_env) args occs
+ ; let (interesting_s, args') = unzip stuff
+ ; return (or interesting_s, args') }
\end{code}
-- as well, for let-bound constructors!
isValue env (Lam b e)
- | isTyVar b = isValue env e
+ | isTyCoVar b = case isValue env e of
+ Just _ -> Just LambdaVal
+ Nothing -> Nothing
| otherwise = Just LambdaVal
isValue _env expr -- Maybe it's a constructor application
isValue _env _expr = Nothing
-mk_con_app :: AltCon -> [CoreArg] -> CoreExpr
-mk_con_app (LitAlt lit) [] = Lit lit
-mk_con_app (DataAlt con) args = mkConApp con args
-mk_con_app _other _args = panic "SpecConstr.mk_con_app"
-
samePat :: CallPat -> CallPat -> Bool
samePat (vs1, as1) (vs2, as2)
= all2 same as1 as2