From e1231b2bcb1c9294c2ecdf150e9aad72a0caa253 Mon Sep 17 00:00:00 2001 From: "simonpj@microsoft.com" Date: Wed, 12 Jul 2006 15:32:04 +0000 Subject: [PATCH] Experimental flag -fdicts-cheap This experimental flag, -fdicts-cheap, makes a let-binding that bind a value of dictionary type look cheap. That in turn leads to more eta expansion. Instead of f = /\a. \(d1:Ord a). let d2:Ord [a] = dfOrd a d1 in \(x:a). which has arity 1, you get f = /\a. \(d1:Ord a). \(x:a). let d2:Ord [a] = dfOrd a d1 in Now f has arity 2. This can cretainly waste dictionary-construction work, if f is partially applied to its dictionary argument. However it has knock-on effects. Because f has arity 2, we won't float (f Int d) out of \x. h (f Int d) Floating f out of this lambda makes it impossible for an h/f fusion rule to fire; and this unexpected loss of RULE application was the immediate reason for implementing this flag. (Roman Leshchinskiy came across this when working on array fusion.) I've implemented the change only in CoreUtils.arityType, which only affects eta expansion. I thought of putting the change in exprIsCheap, which is a more systematic place (the former calls the latter) but a) I wanted this under flag control, and the flags are not readily available to all callers of exprIsCheap b) I'm not 100% convinced that this change is a good idea, so it's reasonable to do the narrowest change that solves the immediate problem. --- compiler/coreSyn/CoreUtils.lhs | 80 ++++++++++++++++++++++++------------- compiler/main/DynFlags.hs | 2 + compiler/simplCore/SimplUtils.lhs | 10 ++--- 3 files changed, 59 insertions(+), 33 deletions(-) diff --git a/compiler/coreSyn/CoreUtils.lhs b/compiler/coreSyn/CoreUtils.lhs index 6aa6583..e737348 100644 --- a/compiler/coreSyn/CoreUtils.lhs +++ b/compiler/coreSyn/CoreUtils.lhs @@ -58,7 +58,7 @@ import PrimOp ( PrimOp(..), primOpOkForSpeculation, primOpIsCheap ) import Id ( Id, idType, globalIdDetails, idNewStrictness, mkWildId, idArity, idName, idUnfolding, idInfo, isOneShotBndr, isStateHackType, isDataConWorkId_maybe, mkSysLocal, - isDataConWorkId, isBottomingId + isDataConWorkId, isBottomingId, isDictId ) import IdInfo ( GlobalIdDetails(..), megaSeqIdInfo ) import NewDemand ( appIsBottom ) @@ -74,6 +74,7 @@ import CostCentre ( CostCentre ) import BasicTypes ( Arity ) import Unique ( Unique ) import Outputable +import DynFlags ( DynFlags, DynFlag(Opt_DictsCheap), dopt ) import TysPrim ( alphaTy ) -- Debugging only import Util ( equalLength, lengthAtLeast, foldl2 ) \end{code} @@ -432,8 +433,8 @@ exprIsCheap (Case e _ _ alts) = exprIsCheap e && exprIsCheap (Let (NonRec x _) e) | isUnLiftedType (idType x) = exprIsCheap e | otherwise = False - -- strict lets always have cheap right hand sides, and - -- do no allocation. + -- strict lets always have cheap right hand sides, + -- and do no allocation. exprIsCheap other_expr = go other_expr 0 True @@ -448,7 +449,7 @@ exprIsCheap other_expr -- because it certainly doesn't need to be shared! go (App f a) n_args args_cheap - | not (isRuntimeArg a) = go f n_args args_cheap + | not (isRuntimeArg a) = go f n_args args_cheap | otherwise = go f (n_args + 1) (exprIsCheap a && args_cheap) go other n_args args_cheap = False @@ -487,7 +488,6 @@ It returns True iff soon, without raising an exception, without causing a side effect (e.g. writing a mutable variable) - E.G. let x = case y# +# 1# of { r# -> I# r# } in E @@ -706,7 +706,7 @@ exprIsConApp_maybe expr = analyse (collectArgs expr) %************************************************************************ \begin{code} -exprEtaExpandArity :: CoreExpr -> Arity +exprEtaExpandArity :: DynFlags -> CoreExpr -> Arity {- The Arity returned is the number of value args the thing can be applied to without doing much work @@ -786,7 +786,7 @@ decopose Int to a function type. Hence the final case in eta_expand. -} -exprEtaExpandArity e = arityDepth (arityType e) +exprEtaExpandArity dflags e = arityDepth (arityType dflags e) -- A limited sort of function type data ArityType = AFun Bool ArityType -- True <=> one-shot @@ -802,17 +802,17 @@ andArityType ATop at2 = ATop andArityType (AFun t1 at1) (AFun t2 at2) = AFun (t1 && t2) (andArityType at1 at2) andArityType at1 at2 = andArityType at2 at1 -arityType :: CoreExpr -> ArityType +arityType :: DynFlags -> CoreExpr -> ArityType -- (go1 e) = [b1,..,bn] -- means expression can be rewritten \x_b1 -> ... \x_bn -> body -- where bi is True <=> the lambda is one-shot -arityType (Note n e) = arityType e +arityType dflags (Note n e) = arityType dflags e -- Not needed any more: etaExpand is cleverer --- | ok_note n = arityType e +-- | ok_note n = arityType dflags e -- | otherwise = ATop -arityType (Var v) +arityType dflags (Var v) = mk (idArity v) (arg_tys (idType v)) where mk :: Arity -> [Type] -> ArityType @@ -835,14 +835,15 @@ arityType (Var v) | otherwise = [] -- Lambdas; increase arity -arityType (Lam x e) | isId x = AFun (isOneShotBndr x) (arityType e) - | otherwise = arityType e +arityType dflags (Lam x e) + | isId x = AFun (isOneShotBndr x) (arityType dflags e) + | otherwise = arityType dflags e -- Applications; decrease arity -arityType (App f (Type _)) = arityType f -arityType (App f a) = case arityType f of - AFun one_shot xs | exprIsCheap a -> xs - other -> ATop +arityType dflags (App f (Type _)) = arityType dflags f +arityType dflags (App f a) = case arityType dflags f of + AFun one_shot xs | exprIsCheap a -> xs + other -> ATop -- Case/Let; keep arity if either the expression is cheap -- or it's a 1-shot lambda @@ -851,17 +852,40 @@ arityType (App f a) = case arityType f of -- ===> -- f x y = case x of { (a,b) -> e } -- The difference is observable using 'seq' -arityType (Case scrut _ _ alts) = case foldr1 andArityType [arityType rhs | (_,_,rhs) <- alts] of - xs | exprIsCheap scrut -> xs - xs@(AFun one_shot _) | one_shot -> AFun True ATop - other -> ATop - -arityType (Let b e) = case arityType e of - xs | all exprIsCheap (rhssOfBind b) -> xs - xs@(AFun one_shot _) | one_shot -> AFun True ATop - other -> ATop - -arityType other = ATop +arityType dflags (Case scrut _ _ alts) + = case foldr1 andArityType [arityType dflags rhs | (_,_,rhs) <- alts] of + xs | exprIsCheap scrut -> xs + xs@(AFun one_shot _) | one_shot -> AFun True ATop + other -> ATop + +arityType dflags (Let b e) + = case arityType dflags e of + xs | cheap_bind b -> xs + xs@(AFun one_shot _) | one_shot -> AFun True ATop + other -> ATop + where + cheap_bind (NonRec b e) = is_cheap (b,e) + cheap_bind (Rec prs) = all is_cheap prs + is_cheap (b,e) = (dopt Opt_DictsCheap dflags && isDictId b) + || exprIsCheap e + -- If the experimental -fdicts-cheap flag is on, we eta-expand through + -- dictionary bindings. This improves arities. Thereby, it also + -- means that full laziness is less prone to floating out the + -- application of a function to its dictionary arguments, which + -- can thereby lose opportunities for fusion. Example: + -- foo :: Ord a => a -> ... + -- foo = /\a \(d:Ord a). let d' = ...d... in \(x:a). .... + -- -- So foo has arity 1 + -- + -- f = \x. foo dInt $ bar x + -- + -- The (foo DInt) is floated out, and makes ineffective a RULE + -- foo (bar x) = ... + -- + -- One could go further and make exprIsCheap reply True to any + -- dictionary-typed expression, but that's more work. + +arityType dflags other = ATop {- NOT NEEDED ANY MORE: etaExpand is cleverer ok_note InlineMe = False diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs index cf73f42..6c5bfd6 100644 --- a/compiler/main/DynFlags.hs +++ b/compiler/main/DynFlags.hs @@ -168,6 +168,7 @@ data DynFlag | Opt_DoEtaReduction | Opt_CaseMerge | Opt_UnboxStrictFields + | Opt_DictsCheap -- misc opts | Opt_Cpp @@ -1007,6 +1008,7 @@ fFlags = [ ( "do-eta-reduction", Opt_DoEtaReduction ), ( "case-merge", Opt_CaseMerge ), ( "unbox-strict-fields", Opt_UnboxStrictFields ), + ( "dicts-cheap", Opt_DictsCheap ), ( "excess-precision", Opt_ExcessPrecision ), ( "asm-mangling", Opt_DoAsmMangling ) ] diff --git a/compiler/simplCore/SimplUtils.lhs b/compiler/simplCore/SimplUtils.lhs index 1e51042..2fd3870 100644 --- a/compiler/simplCore/SimplUtils.lhs +++ b/compiler/simplCore/SimplUtils.lhs @@ -25,7 +25,7 @@ module SimplUtils ( import SimplEnv import DynFlags ( SimplifierSwitch(..), SimplifierMode(..), - DynFlag(..), dopt ) + DynFlags, DynFlag(..), dopt ) import StaticFlags ( opt_UF_UpdateInPlace, opt_SimplNoPreInlining, opt_RulesOff ) import CoreSyn @@ -818,7 +818,7 @@ mkLam env bndrs body cont | dopt Opt_DoLambdaEtaExpansion dflags, any isRuntimeVar bndrs - = tryEtaExpansion body `thenSmpl` \ body' -> + = tryEtaExpansion dflags body `thenSmpl` \ body' -> returnSmpl (emptyFloats env, mkLams bndrs body') {- Sept 01: I'm experimenting with getting the @@ -901,13 +901,13 @@ when computing arity; and etaExpand adds the coerces as necessary when actually computing the expansion. \begin{code} -tryEtaExpansion :: OutExpr -> SimplM OutExpr +tryEtaExpansion :: DynFlags -> OutExpr -> SimplM OutExpr -- There is at least one runtime binder in the binders -tryEtaExpansion body +tryEtaExpansion dflags body = getUniquesSmpl `thenSmpl` \ us -> returnSmpl (etaExpand fun_arity us body (exprType body)) where - fun_arity = exprEtaExpandArity body + fun_arity = exprEtaExpandArity dflags body \end{code} -- 1.7.10.4