From 6791ad226806d7b4e7618c91516e9e20be882813 Mon Sep 17 00:00:00 2001 From: Manuel M T Chakravarty Date: Wed, 20 Sep 2006 18:37:17 +0000 Subject: [PATCH] Pattern matching of indexed data types Mon Sep 18 19:11:24 EDT 2006 Manuel M T Chakravarty * Pattern matching of indexed data types Thu Aug 24 14:17:44 EDT 2006 Manuel M T Chakravarty * Pattern matching of indexed data types - This patch is the last major puzzle piece to type check and desugar indexed data types (both toplevel and associated with a class). - However, it needs more testing - esp wrt to accumlating CoPats - and some static sanity checks for data instance declarations are still missing. - There are now two detailed notes in MkIds and TcPat on how the worker/wrapper and coercion story for indexed data types works. --- compiler/basicTypes/MkId.lhs | 28 +++++++++- compiler/basicTypes/OccName.lhs | 4 +- compiler/typecheck/TcPat.lhs | 116 ++++++++++++++++++++++++++++++++++----- compiler/types/Coercion.lhs | 10 ++-- 4 files changed, 135 insertions(+), 23 deletions(-) diff --git a/compiler/basicTypes/MkId.lhs b/compiler/basicTypes/MkId.lhs index d306128..0ad0bc6 100644 --- a/compiler/basicTypes/MkId.lhs +++ b/compiler/basicTypes/MkId.lhs @@ -63,7 +63,8 @@ import Literal ( nullAddrLit, mkStringLit ) import TyCon ( TyCon, isNewTyCon, tyConDataCons, FieldLabel, tyConStupidTheta, isProductTyCon, isDataTyCon, isRecursiveTyCon, isFamInstTyCon, - tyConFamInst_maybe, newTyConCo ) + tyConFamInst_maybe, tyConFamilyCoercion_maybe, + newTyConCo ) import Class ( Class, classTyCon, classSelIds ) import Var ( Id, TyVar, Var, setIdType ) import VarSet ( isEmptyVarSet, subVarSet, varSetElems ) @@ -190,9 +191,30 @@ Notice that Making an explicit case expression allows the simplifier to eliminate it in the (common) case where the constructor arg is already evaluated. +[Wrappers for data instance tycons] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In the case of data instances, the wrapper also applies the coercion turning the representation type into the family instance type to cast the result of -the wrapper. +the wrapper. For example, consider the declarations + + data family Map k :: * -> * + data instance Map (a, b) v = MapPair (Map a (Pair b v)) + +The tycon to which the datacon MapPair belongs gets a unique internal name of +the form :R123Map, and we call it the representation tycon. In contrast, Map +is the family tycon (accessible via tyConFamInst_maybe). The wrapper and work +of MapPair get the types + + $WMapPair :: forall a b v. Map a (Map a b v) -> Map (a, b) v + $wMapPair :: forall a b v. Map a (Map a b v) -> :R123Map a b v + +which implies that the wrapper code will have to apply the coercion moving +between representation and family type. It is accessible via +tyConFamilyCoercion_maybe and has kind + + Co123Map a b v :: {Map (a, b) v :=: :R123Map a b v} + +This coercion is conditionally applied by wrapFamInstBody. \begin{code} mkDataConIds :: Name -> Name -> DataCon -> DataConIds @@ -367,7 +389,7 @@ mkLocals i tys = (zipWith mkTemplateLocal [i..i+n-1] tys, i+n) -- wrapFamInstBody :: TyCon -> [Type] -> CoreExpr -> CoreExpr wrapFamInstBody tycon args result_expr - | Just (co_con, _) <- tyConFamInst_maybe tycon + | Just co_con <- tyConFamilyCoercion_maybe tycon = mkCoerce (mkSymCoercion (mkTyConApp co_con args)) result_expr | otherwise = result_expr diff --git a/compiler/basicTypes/OccName.lhs b/compiler/basicTypes/OccName.lhs index 13a7f81..3465f55 100644 --- a/compiler/basicTypes/OccName.lhs +++ b/compiler/basicTypes/OccName.lhs @@ -485,9 +485,9 @@ mkLocalOcc uniq occ -- mkInstTyTcOcc :: Unique -- Unique -> OccName -- Local name (e.g. "Map") - -> OccName -- Nice unique version (":T23Map") + -> OccName -- Nice unique version (":R23Map") mkInstTyTcOcc uniq occ - = mk_deriv varName (":T" ++ show uniq) (occNameString occ) + = mk_deriv varName (":R" ++ show uniq) (occNameString occ) -- Derive a name for the coercion of a data/newtype instance. -- diff --git a/compiler/typecheck/TcPat.lhs b/compiler/typecheck/TcPat.lhs index 2316c93..f165e2e 100644 --- a/compiler/typecheck/TcPat.lhs +++ b/compiler/typecheck/TcPat.lhs @@ -10,7 +10,8 @@ module TcPat ( tcLetPat, tcLamPat, tcLamPats, tcOverloadedLit, #include "HsVersions.h" import {-# SOURCE #-} TcExpr( tcSyntaxOp ) -import HsSyn ( Pat(..), LPat, HsConDetails(..), HsLit(..), HsOverLit(..), HsExpr(..), +import HsSyn ( Pat(..), LPat, HsConDetails(..), HsLit(..), + HsOverLit(..), HsExpr(..), OutPat, ExprCoFn(..), mkCoPat, idCoercion, LHsBinds, emptyLHsBinds, isEmptyLHsBinds, collectPatsBinders, nlHsLit ) @@ -27,7 +28,7 @@ import TcEnv ( newLocalName, tcExtendIdEnv1, tcExtendTyVarEnv2, tcLookupClass, tcLookupDataCon, refineEnvironment, tcLookupField, tcMetaTy ) import TcMType ( newFlexiTyVarTy, arityErr, tcInstSkolTyVars, -+ newCoVars, zonkTcType ) + newCoVars, zonkTcType, tcInstTyVars ) import TcType ( TcType, TcTyVar, TcSigmaType, TcRhoType, BoxyType, SkolemInfo(PatSkol), BoxySigmaType, BoxyRhoType, argTypeKind, typeKind, @@ -39,13 +40,14 @@ import VarSet ( elemVarSet ) import {- Kind parts of -} Type ( liftedTypeKind ) import TcUnify ( boxySplitTyConApp, boxySplitListTy, unBox, - zapToMonotype, boxyUnify, checkSigTyVarsWrt, - unifyType ) + zapToMonotype, boxyUnify, boxyUnifyList, + checkSigTyVarsWrt, unifyType ) import TcHsType ( UserTypeCtxt(..), tcPatSig ) import TysWiredIn ( boolTy, parrTyCon, tupleTyCon ) -import Type ( substTys, substTheta ) +import Type ( Type, mkTyConApp, substTys, substTheta ) import StaticFlags ( opt_IrrefutableTuples ) -import TyCon ( TyCon, FieldLabel ) +import TyCon ( TyCon, FieldLabel, tyConFamInst_maybe, + tyConFamilyCoercion_maybe, tyConTyVars ) import DataCon ( DataCon, dataConTyCon, dataConFullSig, dataConName, dataConFieldLabels, dataConSourceArity, dataConStupidTheta, dataConUnivTyVars ) @@ -477,6 +479,61 @@ tc_pat _ _other_pat _ _ = panic "tc_pat" -- DictPat, ConPatOut, SigPatOut, VarP %* * %************************************************************************ +[Pattern matching indexed data types] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider the following declarations: + + data family Map k :: * -> * + data instance Map (a, b) v = MapPair (Map a (Pair b v)) + +and a case expression + + case x :: Map (Int, c) w of MapPair m -> ... + +As explained by [Wrappers for data instance tycons] in MkIds.lhs, the +worker/wrapper types for MapPair are + + $WMapPair :: forall a b v. Map a (Map a b v) -> Map (a, b) v + $wMapPair :: forall a b v. Map a (Map a b v) -> :R123Map a b v + +So, the type of the scrutinee is Map (Int, c) w, but the tycon of MapPair is +:R123Map, which means the straight use of boxySplitTyConApp would give a type +error. Hence, the smart wrapper function boxySplitTyConAppWithFamily calls +boxySplitTyConApp with the family tycon Map instead, which gives us the family +type list {(Int, c), w}. To get the correct split for :R123Map, we need to +unify the family type list {(Int, c), w} with the instance types {(a, b), v} +(provided by tyConFamInst_maybe together with the family tycon). This +unification yields the substitution [a -> Int, b -> c, v -> w], which gives us +the split arguments for the representation tycon :R123Map as {Int, c, w} + +In other words, boxySplitTyConAppWithFamily implicitly takes the coercion + + Co123Map a b v :: {Map (a, b) v :=: :R123Map a b v} + +moving between representation and family type into account. To produce type +correct Core, this coercion needs to be used to case the type of the scrutinee +from the family to the representation type. This is achieved by +unwrapFamInstScrutinee using a CoPat around the result pattern. + +Now it might appear seem as if we could have used the existing GADT type +refinement infrastructure of refineAlt and friends instead of the explicit +unification and CoPat generation. However, that would be wrong. Why? The +whole point of GADT refinement is that the refinement is local to the case +alternative. In contrast, the substitution generated by the unification of +the family type list and instance types needs to be propagated to the outside. +Imagine that in the above example, the type of the scrutinee would have been +(Map x w), then we would have unified {x, w} with {(a, b), v}, yielding the +substitution [x -> (a, b), v -> w]. In contrast to GADT matching, the +instantiation of x with (a, b) must be global; ie, it must be valid in *all* +alternatives of the case expression, whereas in the GADT case it might vary +between alternatives. + +In fact, if we have a data instance declaration defining a GADT, eq_spec will +be non-empty and we will get a mixture of global instantiations and local +refinement from a single match. This neatly reflects that, as soon as we +have constrained the type of the scrutinee to the required type index, all +further type refinement is local to the alternative. + \begin{code} -- Running example: -- MkT :: forall a b c. (a:=:[b]) => b -> c -> T a @@ -493,7 +550,7 @@ tcConPat pstate con_span data_con tycon pat_ty arg_pats thing_inside origin = SigOrigin skol_info -- Instantiate the constructor type variables [a->ty] - ; ctxt_res_tys <- boxySplitTyConApp tycon pat_ty + ; ctxt_res_tys <- boxySplitTyConAppWithFamily tycon pat_ty ; ex_tvs' <- tcInstSkolTyVars skol_info ex_tvs ; let tenv = zipTopTvSubst (univ_tvs ++ ex_tvs) (ctxt_res_tys ++ mkTyVarTys ex_tvs') @@ -513,17 +570,50 @@ tcConPat pstate con_span data_con tycon pat_ty arg_pats thing_inside ; addDataConStupidTheta origin data_con ctxt_res_tys - ; return (ConPatOut { pat_con = L con_span data_con, - pat_tvs = ex_tvs' ++ co_vars, - pat_dicts = map instToId dicts, pat_binds = dict_binds, - pat_args = arg_pats', pat_ty = pat_ty }, - ex_tvs' ++ inner_tvs, res) + ; return + (unwrapFamInstScrutinee tycon ctxt_res_tys $ + ConPatOut { pat_con = L con_span data_con, + pat_tvs = ex_tvs' ++ co_vars, + pat_dicts = map instToId dicts, + pat_binds = dict_binds, + pat_args = arg_pats', pat_ty = pat_ty }, + ex_tvs' ++ inner_tvs, res) } where doc = ptext SLIT("existential context for") <+> quotes (ppr data_con) + -- Split against the family tycon if the pattern constructor belongs to a + -- representation tycon. + -- + boxySplitTyConAppWithFamily tycon pat_ty = + case tyConFamInst_maybe tycon of + Nothing -> boxySplitTyConApp tycon pat_ty + Just (fam_tycon, instTys) -> + do { scrutinee_arg_tys <- boxySplitTyConApp fam_tycon pat_ty + ; (_, freshTvs, subst) <- tcInstTyVars (tyConTyVars tycon) + ; boxyUnifyList (substTys subst instTys) scrutinee_arg_tys + ; return freshTvs + } + + -- Wraps the pattern (which must be a ConPatOut pattern) in a coercion + -- pattern if the tycon is an instance of a family. + -- + unwrapFamInstScrutinee :: TyCon -> [Type] -> Pat Id -> Pat Id + unwrapFamInstScrutinee tycon args pat + | Just co_con <- tyConFamilyCoercion_maybe tycon + -- NB: We can use CoPat directly, rather than mkCoPat, as we know the + -- coercion is not the identity; mkCoPat is inconvenient as it + -- wants a located pattern. + = CoPat (ExprCoFn $ mkTyConApp co_con args) -- co fam ty to repr ty + (pat {pat_ty = mkTyConApp tycon args}) -- representation type + pat_ty -- family inst type + | otherwise + = pat + + tcConArgs :: DataCon -> [TcSigmaType] - -> Checker (HsConDetails Name (LPat Name)) (HsConDetails Id (LPat Id)) + -> Checker (HsConDetails Name (LPat Name)) + (HsConDetails Id (LPat Id)) tcConArgs data_con arg_tys (PrefixCon arg_pats) pstate thing_inside = do { checkTc (con_arity == no_of_args) -- Check correct arity diff --git a/compiler/types/Coercion.lhs b/compiler/types/Coercion.lhs index d715016..3e1d071 100644 --- a/compiler/types/Coercion.lhs +++ b/compiler/types/Coercion.lhs @@ -38,7 +38,7 @@ import Type ( Type, Kind, PredType, substTyWith, mkAppTy, mkForAllTy, mkFunTy, splitAppTy_maybe, splitForAllTy_maybe, coreView, kindView, mkTyConApp, isCoercionKind, isEqPred, mkAppTys, coreEqType, splitAppTys, isTyVarTy, splitTyConApp_maybe, - tyVarsOfType + tyVarsOfType, mkTyVarTys ) import TyCon ( TyCon, tyConArity, mkCoercionTyCon, isNewTyCon, newTyConRhs, newTyConCo, @@ -340,10 +340,10 @@ mkDataInstCoercion name tvs family instTys rep_tycon where coArity = length tvs - rule args = (substTyWith tvs tys $ -- with sigma = [tys/tvs], - TyConApp family instTys, -- sigma (F ts) - TyConApp rep_tycon instTys, -- :=: R tys - rest) -- surplus arguments + rule args = (substTyWith tvs tys $ -- with sigma = [tys/tvs] + TyConApp family instTys, -- sigma (F ts) + TyConApp rep_tycon (mkTyVarTys tvs), -- :=: R tys + rest) -- surplus arguments where tys = take coArity args rest = drop coArity args -- 1.7.10.4