%
+% (c) The University of Glasgow 2006
% (c) The GRASP/AQUA Project, Glasgow University, 1998
%
\section[DataCon]{@DataCon@: Data Constructors}
ConTag, fIRST_TAG,
mkDataCon,
dataConRepType, dataConSig, dataConFullSig,
- dataConName, dataConTag, dataConTyCon, dataConUserType,
- dataConUnivTyVars, dataConExTyVars, dataConAllTyVars, dataConResTys,
- dataConEqSpec, dataConTheta, dataConStupidTheta,
- dataConInstArgTys, dataConOrigArgTys,
+ dataConName, dataConIdentity, dataConTag, dataConTyCon, dataConUserType,
+ dataConUnivTyVars, dataConExTyVars, dataConAllTyVars,
+ dataConEqSpec, eqSpecPreds, dataConTheta, dataConStupidTheta,
+ dataConInstArgTys, dataConOrigArgTys, dataConOrigResTy,
dataConInstOrigArgTys, dataConRepArgTys,
dataConFieldLabels, dataConFieldType,
dataConStrictMarks, dataConExStricts,
isNullarySrcDataCon, isNullaryRepDataCon, isTupleCon, isUnboxedTupleCon,
isVanillaDataCon, classDataCon,
- splitProductType_maybe, splitProductType,
+ splitProductType_maybe, splitProductType, deepSplitProductType,
+ deepSplitProductType_maybe
) where
#include "HsVersions.h"
-import Type ( Type, ThetaType,
- substTyWith, substTyVar, mkTopTvSubst,
- mkForAllTys, mkFunTys, mkTyConApp, mkTyVarTy, mkTyVarTys,
- splitTyConApp_maybe,
- mkPredTys, isStrictPred, pprType
- )
-import Coercion ( isEqPred, mkEqPred )
-import TyCon ( TyCon, FieldLabel, tyConDataCons,
- isProductTyCon, isTupleTyCon, isUnboxedTupleTyCon,
- isNewTyCon )
-import Class ( Class, classTyCon )
-import Name ( Name, NamedThing(..), nameUnique )
-import Var ( TyVar, Id )
-import BasicTypes ( Arity, StrictnessMark(..) )
+import Type
+import Coercion
+import TyCon
+import Class
+import Name
+import Var
+import BasicTypes
import Outputable
-import Unique ( Unique, Uniquable(..) )
-import ListSetOps ( assoc, minusList )
-import Util ( zipEqual, zipWithEqual )
-import List ( partition )
-import Maybes ( expectJust )
+import Unique
+import ListSetOps
+import Util
+import Maybes
+import FastString
+import PackageConfig
+import Module
+
+import Data.Char
+import Data.Word
\end{code}
The data con has one or two Ids associated with it:
- The "worker Id", is the actual data constructor.
- Its type may be different to the Haskell source constructor
- because:
- - useless dict args are dropped
- - strict args may be flattened
- The worker is very like a primop, in that it has no binding.
+The "worker Id", is the actual data constructor.
+* Every data constructor (newtype or data type) has a worker
- Newtypes have no worker Id
+* The worker is very like a primop, in that it has no binding.
+* For a *data* type, the worker *is* the data constructor;
+ it has no unfolding
- The "wrapper Id", $WC, whose type is exactly what it looks like
- in the source program. It is an ordinary function,
- and it gets a top-level binding like any other function.
+* For a *newtype*, the worker has a compulsory unfolding which
+ does a cast, e.g.
+ newtype T = MkT Int
+ The worker for MkT has unfolding
+ \(x:Int). x `cast` sym CoT
+ Here CoT is the type constructor, witnessing the FC axiom
+ axiom CoT : T = Int
- The wrapper Id isn't generated for a data type if the worker
- and wrapper are identical. It's always generated for a newtype.
+The "wrapper Id", $WC, goes as follows
+
+* Its type is exactly what it looks like in the source program.
+
+* It is an ordinary function, and it gets a top-level binding
+ like any other function.
+
+* The wrapper Id isn't generated for a data type if there is
+ nothing for the wrapper to do. That is, if its defn would be
+ $wC = C
+
+Why might the wrapper have anything to do? Two reasons:
+
+* Unboxing strict fields (with -funbox-strict-fields)
+ data T = MkT !(Int,Int)
+ $wMkT :: (Int,Int) -> T
+ $wMkT (x,y) = MkT x y
+ Notice that the worker has two fields where the wapper has
+ just one. That is, the worker has type
+ MkT :: Int -> Int -> T
+
+* Equality constraints for GADTs
+ data T a where { MkT :: a -> T [a] }
+
+ The worker gets a type with explicit equality
+ constraints, thus:
+ MkT :: forall a b. (a=[b]) => b -> T a
+
+ The wrapper has the programmer-specified type:
+ $wMkT :: a -> T [a]
+ $wMkT a x = MkT [a] a [a] x
+ The third argument is a coerion
+ [a] :: [a]:=:[a]
-- dcEqSpec = [a:=:(x,y)]
-- dcTheta = [Ord x]
-- dcOrigArgTys = [a,List b]
- -- dcTyCon = T
+ -- dcRepTyCon = T
dcVanilla :: Bool, -- True <=> This is a vanilla Haskell 98 data constructor
-- Its type is of form
-- The declaration format is held in the TyCon (algTcGadtSyntax)
dcUnivTyVars :: [TyVar], -- Universally-quantified type vars
+ -- INVARIANT: length matches arity of the dcRepTyCon
+
dcExTyVars :: [TyVar], -- Existentially-quantified type vars
-- In general, the dcUnivTyVars are NOT NECESSARILY THE SAME AS THE TYVARS
-- FOR THE PARENT TyCon. With GADTs the data con might not even have
-- [This is a change (Oct05): previously, vanilla datacons guaranteed to
-- have the same type variables as their parent TyCon, but that seems ugly.]
+ -- INVARIANT: the UnivTyVars and ExTyVars all have distinct OccNames
+ -- Reason: less confusing, and easier to generate IfaceSyn
+
dcEqSpec :: [(TyVar,Type)], -- Equalities derived from the result type,
-- *as written by the programmer*
-- This field allows us to move conveniently between the two ways
dcOrigArgTys :: [Type], -- Original argument types
-- (before unboxing and flattening of strict fields)
-
- -- Result type of constructor is T t1..tn
- dcTyCon :: TyCon, -- Result tycon, T
+ dcOrigResTy :: Type, -- Original result type
+ -- NB: for a data instance, the original user result type may
+ -- differ from the DataCon's representation TyCon. Example
+ -- data instance T [a] where MkT :: a -> T [a]
+ -- The OrigResTy is T [a], but the dcRepTyCon might be :T123
-- Now the strictness annotations and field labels of the constructor
dcStrictMarks :: [StrictnessMark],
dcFields :: [FieldLabel],
-- Field labels for this constructor, in the
- -- same order as the argument types;
+ -- same order as the dcOrigArgTys;
-- length = 0 (if not a record) or dataConSourceArity.
-- Constructor representation
-- and *including* existential dictionaries
dcRepStrictness :: [StrictnessMark], -- One for each *representation* argument
+ -- See also Note [Data-con worker strictness] in MkId.lhs
+
+ -- Result type of constructor is T t1..tn
+ dcRepTyCon :: TyCon, -- Result tycon, T
dcRepType :: Type, -- Type of the constructor
-- forall a x y. (a:=:(x,y), Ord x) => x -> y -> MkT a
}
data DataConIds
- = NewDC Id -- Newtypes have only a wrapper, but no worker
- | AlgDC (Maybe Id) Id -- Algebraic data types always have a worker, and
+ = DCIds (Maybe Id) Id -- Algebraic data types always have a worker, and
-- may or may not have a wrapper, depending on whether
- -- the wrapper does anything.
+ -- the wrapper does anything. Newtypes just have a worker
-- _Neither_ the worker _nor_ the wrapper take the dcStupidTheta dicts as arguments
-- The worker takes dcRepArgTys as its arguments
-- If the worker is absent, dcRepArgTys is the same as dcOrigArgTys
- -- The 'Nothing' case of AlgDC is important
+ -- The 'Nothing' case of DCIds is important
-- Not only is this efficient,
-- but it also ensures that the wrapper is replaced
- -- by the worker (becuase it *is* the wroker)
+ -- by the worker (becuase it *is* the worker)
-- even when there are no args. E.g. in
-- f (:) x
-- the (:) *is* the worker.
eq_spec theta
orig_arg_tys tycon
stupid_theta ids
+-- Warning: mkDataCon is not a good place to check invariants.
+-- If the programmer writes the wrong result type in the decl, thus:
+-- data T a where { MkT :: S }
+-- then it's possible that the univ_tvs may hit an assertion failure
+-- if you pull on univ_tvs. This case is checked by checkValidDataCon,
+-- so the error is detected properly... it's just that asaertions here
+-- are a little dodgy.
+
= ASSERT( not (any isEqPred theta) )
-- We don't currently allow any equality predicates on
-- a data constructor (apart from the GADT ones in eq_spec)
con
where
is_vanilla = null ex_tvs && null eq_spec && null theta
- con = ASSERT( is_vanilla || not (isNewTyCon tycon) )
- -- Invariant: newtypes have a vanilla data-con
- MkData {dcName = name, dcUnique = nameUnique name,
+ con = MkData {dcName = name, dcUnique = nameUnique name,
dcVanilla = is_vanilla, dcInfix = declared_infix,
dcUnivTyVars = univ_tvs, dcExTyVars = ex_tvs,
dcEqSpec = eq_spec,
dcStupidTheta = stupid_theta, dcTheta = theta,
- dcOrigArgTys = orig_arg_tys, dcTyCon = tycon,
+ dcOrigArgTys = orig_arg_tys, dcOrigResTy = orig_res_ty,
+ dcRepTyCon = tycon,
dcRepArgTys = rep_arg_tys,
- dcStrictMarks = arg_stricts, dcRepStrictness = rep_arg_stricts,
+ dcStrictMarks = arg_stricts,
+ dcRepStrictness = rep_arg_stricts,
dcFields = fields, dcTag = tag, dcRepType = ty,
dcIds = ids }
real_arg_tys = dict_tys ++ orig_arg_tys
real_stricts = map mk_dict_strict_mark theta ++ arg_stricts
+ -- Example
+ -- data instance T (b,c) where
+ -- TI :: forall e. e -> T (e,e)
+ --
+ -- The representation tycon looks like this:
+ -- data :R7T b c where
+ -- TI :: forall b1 c1. (b1 ~ c1) => b1 -> :R7T b1 c1
+ orig_res_ty = mkFamilyTyConApp tycon (substTyVars (mkTopTvSubst eq_spec) univ_tvs)
+
-- Representation arguments and demands
-- To do: eliminate duplication with MkId
(rep_arg_stricts, rep_arg_tys) = computeRep real_stricts real_arg_tys
dataConTag = dcTag
dataConTyCon :: DataCon -> TyCon
-dataConTyCon = dcTyCon
+dataConTyCon = dcRepTyCon
dataConRepType :: DataCon -> Type
dataConRepType = dcRepType
dataConWorkId :: DataCon -> Id
dataConWorkId dc = case dcIds dc of
- AlgDC _ wrk_id -> wrk_id
- NewDC _ -> pprPanic "dataConWorkId" (ppr dc)
+ DCIds _ wrk_id -> wrk_id
dataConWrapId_maybe :: DataCon -> Maybe Id
-- Returns Nothing if there is no wrapper for an algebraic data con
-- and also for a newtype (whose constructor is inlined compulsorily)
dataConWrapId_maybe dc = case dcIds dc of
- AlgDC mb_wrap _ -> mb_wrap
- NewDC wrap -> Nothing
+ DCIds mb_wrap _ -> mb_wrap
dataConWrapId :: DataCon -> Id
-- Returns an Id which looks like the Haskell-source constructor
dataConWrapId dc = case dcIds dc of
- AlgDC (Just wrap) _ -> wrap
- AlgDC Nothing wrk -> wrk -- worker=wrapper
- NewDC wrap -> wrap
+ DCIds (Just wrap) _ -> wrap
+ DCIds Nothing wrk -> wrk -- worker=wrapper
dataConImplicitIds :: DataCon -> [Id]
dataConImplicitIds dc = case dcIds dc of
- AlgDC (Just wrap) work -> [wrap,work]
- AlgDC Nothing work -> [work]
- NewDC wrap -> [wrap]
+ DCIds (Just wrap) work -> [wrap,work]
+ DCIds Nothing work -> [work]
dataConFieldLabels :: DataCon -> [FieldLabel]
dataConFieldLabels = dcFields
-- Core constructor application (Con dc args)
dataConRepStrictness dc = dcRepStrictness dc
-dataConSig :: DataCon -> ([TyVar], ThetaType, [Type])
+dataConSig :: DataCon -> ([TyVar], ThetaType, [Type], Type)
dataConSig (MkData {dcUnivTyVars = univ_tvs, dcExTyVars = ex_tvs, dcEqSpec = eq_spec,
- dcTheta = theta, dcOrigArgTys = arg_tys, dcTyCon = tycon})
- = (univ_tvs ++ ex_tvs, eqSpecPreds eq_spec ++ theta, arg_tys)
+ dcTheta = theta, dcOrigArgTys = arg_tys, dcOrigResTy = res_ty})
+ = (univ_tvs ++ ex_tvs, eqSpecPreds eq_spec ++ theta, arg_tys, res_ty)
dataConFullSig :: DataCon
- -> ([TyVar], [TyVar], [(TyVar,Type)], ThetaType, [Type])
+ -> ([TyVar], [TyVar], [(TyVar,Type)], ThetaType, [Type], Type)
dataConFullSig (MkData {dcUnivTyVars = univ_tvs, dcExTyVars = ex_tvs, dcEqSpec = eq_spec,
- dcTheta = theta, dcOrigArgTys = arg_tys, dcTyCon = tycon})
- = (univ_tvs, ex_tvs, eq_spec, theta, arg_tys)
+ dcTheta = theta, dcOrigArgTys = arg_tys, dcOrigResTy = res_ty})
+ = (univ_tvs, ex_tvs, eq_spec, theta, arg_tys, res_ty)
+
+dataConOrigResTy :: DataCon -> Type
+dataConOrigResTy dc = dcOrigResTy dc
dataConStupidTheta :: DataCon -> ThetaType
dataConStupidTheta dc = dcStupidTheta dc
-dataConResTys :: DataCon -> [Type]
-dataConResTys dc = [substTyVar env tv | tv <- dcUnivTyVars dc]
- where
- env = mkTopTvSubst (dcEqSpec dc)
-
dataConUserType :: DataCon -> Type
-- The user-declared type of the data constructor
-- in the nice-to-read form
--- T :: forall a. a -> T [a]
+-- T :: forall a b. a -> b -> T [a]
-- rather than
--- T :: forall b. forall a. (a=[b]) => a -> T b
+-- T :: forall a c. forall b. (c=[a]) => a -> b -> T c
+-- NB: If the constructor is part of a data instance, the result type
+-- mentions the family tycon, not the internal one.
dataConUserType (MkData { dcUnivTyVars = univ_tvs,
dcExTyVars = ex_tvs, dcEqSpec = eq_spec,
dcTheta = theta, dcOrigArgTys = arg_tys,
- dcTyCon = tycon })
+ dcOrigResTy = res_ty })
= mkForAllTys ((univ_tvs `minusList` map fst eq_spec) ++ ex_tvs) $
mkFunTys (mkPredTys theta) $
mkFunTys arg_tys $
- mkTyConApp tycon (map (substTyVar subst) univ_tvs)
- where
- subst = mkTopTvSubst eq_spec
+ res_ty
-dataConInstArgTys :: DataCon
+dataConInstArgTys :: DataCon -- A datacon with no existentials or equality constraints
+ -- However, it can have a dcTheta (notably it can be a
+ -- class dictionary, with superclasses)
-> [Type] -- Instantiated at these types
- -- NB: these INCLUDE the existentially quantified arg types
-> [Type] -- Needs arguments of these types
- -- NB: these INCLUDE the existentially quantified dict args
+ -- NB: these INCLUDE any dict args
-- but EXCLUDE the data-decl context which is discarded
-- It's all post-flattening etc; this is a representation type
-dataConInstArgTys (MkData {dcRepArgTys = arg_tys,
- dcUnivTyVars = univ_tvs,
- dcExTyVars = ex_tvs}) inst_tys
- = ASSERT( length tyvars == length inst_tys )
- map (substTyWith tyvars inst_tys) arg_tys
- where
- tyvars = univ_tvs ++ ex_tvs
-
--- And the same deal for the original arg tys
-dataConInstOrigArgTys :: DataCon -> [Type] -> [Type]
+dataConInstArgTys dc@(MkData {dcRepArgTys = rep_arg_tys,
+ dcUnivTyVars = univ_tvs, dcEqSpec = eq_spec,
+ dcExTyVars = ex_tvs}) inst_tys
+ = ASSERT2 ( length univ_tvs == length inst_tys
+ , ptext SLIT("dataConInstArgTys") <+> ppr dc $$ ppr univ_tvs $$ ppr inst_tys)
+ ASSERT2 ( null ex_tvs && null eq_spec, ppr dc )
+ map (substTyWith univ_tvs inst_tys) rep_arg_tys
+
+dataConInstOrigArgTys
+ :: DataCon -- Works for any DataCon
+ -> [Type] -- Includes existential tyvar args, but NOT
+ -- equality constraints or dicts
+ -> [Type] -- Returns just the instsantiated *value* arguments
+-- For vanilla datacons, it's all quite straightforward
+-- But for the call in MatchCon, we really do want just the value args
dataConInstOrigArgTys dc@(MkData {dcOrigArgTys = arg_tys,
- dcUnivTyVars = univ_tvs,
- dcExTyVars = ex_tvs}) inst_tys
- = ASSERT2( length tyvars == length inst_tys, ptext SLIT("dataConInstOrigArgTys") <+> ppr dc <+> ppr inst_tys )
- map (substTyWith tyvars inst_tys) arg_tys
- where
- tyvars = univ_tvs ++ ex_tvs
+ dcUnivTyVars = univ_tvs,
+ dcExTyVars = ex_tvs}) inst_tys
+ = ASSERT2( length tyvars == length inst_tys
+ , ptext SLIT("dataConInstOrigArgTys") <+> ppr dc $$ ppr tyvars $$ ppr inst_tys )
+ map (substTyWith tyvars inst_tys) arg_tys
+ where
+ tyvars = univ_tvs ++ ex_tvs
\end{code}
These two functions get the real argument types of the constructor,
dataConRepArgTys dc = dcRepArgTys dc
\end{code}
+The string <package>:<module>.<name> identifying a constructor, which is attached
+to its info table and used by the GHCi debugger and the heap profiler. We want
+this string to be UTF-8, so we get the bytes directly from the FastStrings.
+
+\begin{code}
+dataConIdentity :: DataCon -> [Word8]
+dataConIdentity dc = bytesFS (packageIdFS (modulePackageId mod)) ++
+ fromIntegral (ord ':') : bytesFS (moduleNameFS (moduleName mod)) ++
+ fromIntegral (ord '.') : bytesFS (occNameFS (nameOccName name))
+ where name = dataConName dc
+ mod = nameModule name
+\end{code}
+
\begin{code}
isTupleCon :: DataCon -> Bool
-isTupleCon (MkData {dcTyCon = tc}) = isTupleTyCon tc
+isTupleCon (MkData {dcRepTyCon = tc}) = isTupleTyCon tc
isUnboxedTupleCon :: DataCon -> Bool
-isUnboxedTupleCon (MkData {dcTyCon = tc}) = isUnboxedTupleTyCon tc
+isUnboxedTupleCon (MkData {dcRepTyCon = tc}) = isUnboxedTupleTyCon tc
isVanillaDataCon :: DataCon -> Bool
isVanillaDataCon dc = dcVanilla dc
classDataCon :: Class -> DataCon
classDataCon clas = case tyConDataCons (classTyCon clas) of
(dict_constr:no_more) -> ASSERT( null no_more ) dict_constr
+ [] -> panic "classDataCon"
\end{code}
%************************************************************************
-- and for constructors visible
-> Just (tycon, ty_args, data_con, dataConInstArgTys data_con ty_args)
where
- data_con = head (tyConDataCons tycon)
+ data_con = ASSERT( not (null (tyConDataCons tycon)) )
+ head (tyConDataCons tycon)
other -> Nothing
splitProductType str ty
Nothing -> pprPanic (str ++ ": not a product") (pprType ty)
+deepSplitProductType_maybe ty
+ = do { (res@(tycon, tycon_args, _, _)) <- splitProductType_maybe ty
+ ; let {result
+ | Just (ty', _co) <- instNewTyCon_maybe tycon tycon_args
+ , not (isRecursiveTyCon tycon)
+ = deepSplitProductType_maybe ty' -- Ignore the coercion?
+ | isNewTyCon tycon = Nothing -- cannot unbox through recursive
+ -- newtypes nor through families
+ | otherwise = Just res}
+ ; result
+ }
+
+deepSplitProductType str ty
+ = case deepSplitProductType_maybe ty of
+ Just stuff -> stuff
+ Nothing -> pprPanic (str ++ ": not a product") (pprType ty)
+
computeRep :: [StrictnessMark] -- Original arg strictness
-> [Type] -- and types
-> ([StrictnessMark], -- Representation arg strictness
unbox NotMarkedStrict ty = [(NotMarkedStrict, ty)]
unbox MarkedStrict ty = [(MarkedStrict, ty)]
unbox MarkedUnboxed ty = zipEqual "computeRep" (dataConRepStrictness arg_dc) arg_tys
- where
- (_, _, arg_dc, arg_tys) = splitProductType "unbox_strict_arg_ty" ty
+ where
+ (_tycon, _tycon_args, arg_dc, arg_tys)
+ = deepSplitProductType "unbox_strict_arg_ty" ty
\end{code}