From 94b170a053c161d1e0cc4418b37a6a4807872a5f Mon Sep 17 00:00:00 2001 From: "simonpj@microsoft.com" Date: Mon, 6 Nov 2006 15:59:01 +0000 Subject: [PATCH] Tidy up substitutions The new simplifer stuff exposed the fact that the invariants on the TvSubstEnv and IdSubstEnv were insufficiently explicit. (Resulted in a bug found by Sam Brosnon.) This patch fixes the bug, and tries to document the invariants pretty thoroughly. See Note [Extending the TvSubst] in Type Note [Extenting the Subst] in CoreSubst (Most of the new lines are comments.) --- compiler/coreSyn/CoreSubst.lhs | 76 ++++++++++++++++++++++++++------------- compiler/simplCore/SimplEnv.lhs | 5 +++ compiler/types/Type.lhs | 52 ++++++++++++++++++++++----- 3 files changed, 101 insertions(+), 32 deletions(-) diff --git a/compiler/coreSyn/CoreSubst.lhs b/compiler/coreSyn/CoreSubst.lhs index 6eecc27..321ea8f 100644 --- a/compiler/coreSyn/CoreSubst.lhs +++ b/compiler/coreSyn/CoreSubst.lhs @@ -71,8 +71,51 @@ data Subst -- - make it empty because all the free vars of the subst are fresh, -- and hence can't possibly clash.a -- - -- INVARIANT 2: The substitution is apply-once; see notes with + -- INVARIANT 2: The substitution is apply-once; see Note [Apply once] with -- Types.TvSubstEnv + -- + -- INVARIANT 3: See Note [Extending the Subst] + +{- +Note [Extending the Subst] +~~~~~~~~~~~~~~~~~~~~~~~~~~ +For a core Subst, which binds Ids as well, we make a different choice for Ids +than we do for TyVars. + +For TyVars, see Note [Extending the TvSubst] with Type.TvSubstEnv + +For Ids, we have a different invariant + The IdSubstEnv is extended *only* when the Unique on an Id changes + Otherwise, we just extend the InScopeSet + +In consequence: + +* In substIdBndr, we extend the IdSubstEnv only when the unique changes + +* If the TvSubstEnv and IdSubstEnv are both empty, substExpr does nothing + (Note that the above rule for substIdBndr maintains this property. If + the incoming envts are both empty, then substituting the type and + IdInfo can't change anything.) + +* In lookupIdSubst, we *must* look up the Id in the in-scope set, because + it may contain non-trivial changes. Example: + (/\a. \x:a. ...x...) Int + We extend the TvSubstEnv with [a |-> Int]; but x's unique does not change + so we only extend the in-scope set. Then we must look up in the in-scope + set when we find the occurrence of x. + +Why do we make a different choice for the IdSubstEnv than the TvSubstEnv? + +* For Ids, we change the IdInfo all the time (e.g. deleting the + unfolding), and adding it back later, so using the TyVar convention + would entail extending the substitution almost all the time + +* The simplifier wants to look up in the in-scope set anyway, in case it + can see a better unfolding from an enclosing case expression + +* For TyVars, only coercion variables can possibly change, and they are + easy to spot +-} type IdSubstEnv = IdEnv CoreExpr @@ -120,28 +163,11 @@ extendTvSubstList (Subst in_scope ids tvs) prs = Subst in_scope ids (extendVarEn lookupIdSubst :: Subst -> Id -> CoreExpr lookupIdSubst (Subst in_scope ids tvs) v | not (isLocalId v) = Var v - | otherwise = case lookupVarEnv ids v of - Just e -> e - Nothing -> Var v - -{- We used to have to look up in the in-scope set, - because GADTs were implicit in the intermediate language - But with FC, the type of an Id does not change in its scope - The worst that can happen if we don't look up in the in-scope set - is that we don't propagate IdInfo as vigorously as we might. - But that'll happen (when it's useful) in SimplEnv.substId - - If you put this back in, you should worry about the - Just e -> e - case above too! - - case lookupInScope in_scope v of { - -- Watch out! Must get the Id from the in-scope set, - -- because its type there may differ - Just v -> Var v ; - Nothing -> WARN( True, ptext SLIT("CoreSubst.lookupIdSubst") <+> ppr v ) - Var v --} + | Just e <- lookupVarEnv ids v = e + | Just v' <- lookupInScope in_scope v = Var v' + -- Vital! See Note [Extending the Subst] + | otherwise = WARN( True, ptext SLIT("CoreSubst.lookupIdSubst") <+> ppr v ) + Var v lookupTvSubst :: Subst -> TyVar -> Type lookupTvSubst (Subst _ ids tvs) v = lookupVarEnv tvs v `orElse` Type.mkTyVarTy v @@ -289,7 +315,9 @@ substIdBndr rec_subst subst@(Subst in_scope env tvs) old_id new_env | no_change = delVarEnv env old_id | otherwise = extendVarEnv env old_id (Var new_id) - no_change = False -- id1 == old_id && isNothing mb_new_info && no_type_change + no_change = id1 == old_id + -- See Note [Extending the Subst] + -- *not* necessary to check mb_new_info and no_type_change \end{code} Now a variant that unconditionally allocates a new unique. diff --git a/compiler/simplCore/SimplEnv.lhs b/compiler/simplCore/SimplEnv.lhs index f9e0484..765fd00 100644 --- a/compiler/simplCore/SimplEnv.lhs +++ b/compiler/simplCore/SimplEnv.lhs @@ -130,6 +130,7 @@ data SimplEnv } type SimplIdSubst = IdEnv SimplSR -- IdId |--> OutExpr + -- See Note [Extending the Subst] in CoreSubst data SimplSR = DoneEx OutExpr -- Completed term @@ -137,6 +138,7 @@ data SimplSR | ContEx TvSubstEnv -- A suspended substitution SimplIdSubst InExpr + instance Outputable SimplSR where ppr (DoneEx e) = ptext SLIT("DoneEx") <+> ppr e ppr (DoneId v) = ptext SLIT("DoneId") <+> ppr v @@ -531,6 +533,8 @@ substIdBndr :: SimplEnv -> Id -- Substitition and Id to transform -- * The substitution extended with a DoneId if unique changed -- In this case, the var in the DoneId is the same as the -- var returned +-- +-- Exactly like CoreSubst.substIdBndr, except that the type of id_subst differs substIdBndr env@(SimplEnv { seInScope = in_scope, seIdSubst = id_subst}) old_id @@ -549,6 +553,7 @@ substIdBndr env@(SimplEnv { seInScope = in_scope, seIdSubst = id_subst}) -- Extend the substitution if the unique has changed -- See the notes with substTyVarBndr for the delSubstEnv + -- Also see Note [Extending the Subst] in CoreSubst new_subst | new_id /= old_id = extendVarEnv id_subst old_id (DoneId new_id) | otherwise diff --git a/compiler/types/Type.lhs b/compiler/types/Type.lhs index f4681e4..88a7bc7 100644 --- a/compiler/types/Type.lhs +++ b/compiler/types/Type.lhs @@ -1047,11 +1047,13 @@ instance Ord PredType where { compare = tcCmpPred } data TvSubst = TvSubst InScopeSet -- The in-scope type variables TvSubstEnv -- The substitution itself - -- See Note [Apply Once] + -- See Note [Apply Once] + -- and Note [Extending the TvSubstEnv] {- ---------------------------------------------------------- - Note [Apply Once] +Note [Apply Once] +~~~~~~~~~~~~~~~~~ We use TvSubsts to instantiate things, and we might instantiate forall a b. ty \with the types @@ -1068,6 +1070,38 @@ variations happen to; for example [a -> (a, b)]. A TvSubst is not idempotent, but, unlike the non-idempotent substitution we use during unifications, it must not be repeatedly applied. + +Note [Extending the TvSubst] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The following invariant should hold of a TvSubst + + The in-scope set is needed *only* to + guide the generation of fresh uniques + + In particular, the *kind* of the type variables in + the in-scope set is not relevant + +This invariant allows a short-cut when the TvSubstEnv is empty: +if the TvSubstEnv is empty --- i.e. (isEmptyTvSubt subst) holds --- +then (substTy subst ty) does nothing. + +For example, consider: + (/\a. /\b:(a~Int). ...b..) Int +We substitute Int for 'a'. The Unique of 'b' does not change, but +nevertheless we add 'b' to the TvSubstEnv, because b's type does change + +This invariant has several crucial consequences: + +* In substTyVarBndr, we need extend the TvSubstEnv + - if the unique has changed + - or if the kind has changed + +* In substTyVar, we do not need to consult the in-scope set; + the TvSubstEnv is enough + +* In substTy, substTheta, we can short-circuit when the TvSubstEnv is empty + + -------------------------------------------------------------- -} @@ -1096,6 +1130,7 @@ composeTvSubst in_scope env1 env2 emptyTvSubst = TvSubst emptyInScopeSet emptyVarEnv isEmptyTvSubst :: TvSubst -> Bool + -- See Note [Extending the TvSubstEnv] isEmptyTvSubst (TvSubst _ env) = isEmptyVarEnv env mkTvSubst :: InScopeSet -> TvSubstEnv -> TvSubst @@ -1254,17 +1289,19 @@ subst_ty subst ty substTyVar :: TvSubst -> TyVar -> Type substTyVar subst@(TvSubst in_scope env) tv = case lookupTyVar subst tv of { - Nothing -> TyVarTy tv; + Nothing -> TyVarTy tv; Just ty -> ty -- See Note [Apply Once] } lookupTyVar :: TvSubst -> TyVar -> Maybe Type + -- See Note [Extending the TvSubst] lookupTyVar (TvSubst in_scope env) tv = lookupVarEnv env tv substTyVarBndr :: TvSubst -> TyVar -> (TvSubst, TyVar) substTyVarBndr subst@(TvSubst in_scope env) old_var = (TvSubst (in_scope `extendInScopeSet` new_var) new_env, new_var) where + is_co_var = isCoVar old_var new_env | no_change = delVarEnv env old_var | otherwise = extendVarEnv env old_var (TyVarTy new_var) @@ -1272,6 +1309,7 @@ substTyVarBndr subst@(TvSubst in_scope env) old_var no_change = new_var == old_var && not is_co_var -- no_change means that the new_var is identical in -- all respects to the old_var (same unique, same kind) + -- See Note [Extending the TvSubst] -- -- In that case we don't need to extend the substitution -- to map old to new. But instead we must zap any @@ -1283,12 +1321,10 @@ substTyVarBndr subst@(TvSubst in_scope env) old_var -- The uniqAway part makes sure the new variable is not already in scope subst_old_var -- subst_old_var is old_var with the substitution applied to its kind - -- It's only worth doing the substitution for coercions, - -- becuase only they can have free type variables - | is_co_var = setTyVarKind old_var (substTy subst kind) + -- It's only worth doing the substitution for coercions, + -- becuase only they can have free type variables + | is_co_var = setTyVarKind old_var (substTy subst (tyVarKind old_var)) | otherwise = old_var - kind = tyVarKind old_var - is_co_var = isCoVar old_var \end{code} ---------------------------------------------------- -- 1.7.10.4