X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2FsimplCore%2FSimplUtils.lhs;h=56d2795e310d9db0f200e5e3432ee0d2abde31bc;hp=1e510421e72a1c94632f5c100e6faadc5557f97f;hb=b84ba676034763b3082bbd9405794a4fde499d14;hpb=f2dcf256399e9a2de6343c625630b51f8abf4863 diff --git a/compiler/simplCore/SimplUtils.lhs b/compiler/simplCore/SimplUtils.lhs index 1e51042..56d2795 100644 --- a/compiler/simplCore/SimplUtils.lhs +++ b/compiler/simplCore/SimplUtils.lhs @@ -5,297 +5,242 @@ \begin{code} module SimplUtils ( - mkLam, mkCase, + -- Rebuilding + mkLam, mkCase, prepareAlts, -- Inlining, - preInlineUnconditionally, postInlineUnconditionally, activeInline, activeRule, - inlineMode, + preInlineUnconditionally, postInlineUnconditionally, + activeUnfolding, activeUnfInRule, activeRule, + simplEnvForGHCi, simplEnvForRules, updModeForInlineRules, -- The continuation type - SimplCont(..), DupFlag(..), LetRhsFlag(..), - contIsDupable, contResultType, - countValArgs, countArgs, pushContArgs, - mkBoringStop, mkLazyArgStop, mkRhsStop, contIsRhs, contIsRhsOrArg, - getContArgs, interestingCallContext, interestingArgContext, - interestingArg, isStrictType + SimplCont(..), DupFlag(..), ArgInfo(..), + contIsDupable, contResultType, contIsTrivial, contArgs, dropArgs, + pushArgs, countValArgs, countArgs, addArgTo, + mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg, + interestingCallContext, + interestingArg, mkArgInfo, + + abstractFloats ) where #include "HsVersions.h" import SimplEnv -import DynFlags ( SimplifierSwitch(..), SimplifierMode(..), - DynFlag(..), dopt ) -import StaticFlags ( opt_UF_UpdateInPlace, opt_SimplNoPreInlining, - opt_RulesOff ) +import DynFlags +import StaticFlags import CoreSyn -import CoreFVs ( exprFreeVars ) -import CoreUtils ( cheapEqExpr, exprType, exprIsTrivial, - etaExpand, exprEtaExpandArity, bindNonRec, mkCoerce2, - findDefault, exprOkForSpeculation, exprIsHNF, mergeAlts - ) -import Literal ( mkStringLit ) -import CoreUnfold ( smallEnoughToInline ) -import MkId ( eRROR_ID ) -import Id ( Id, idType, isDataConWorkId, idOccInfo, isDictId, - isDeadBinder, idNewDemandInfo, isExportedId, - idUnfolding, idNewStrictness, idInlinePragma, idHasRules - ) -import NewDemand ( isStrictDmd, isBotRes, splitStrictSig ) +import qualified CoreSubst +import PprCore +import CoreFVs +import CoreUtils +import CoreArity ( etaExpand, exprEtaExpandArity ) +import CoreUnfold +import Name +import Id +import Var ( isCoVar ) +import Demand import SimplMonad -import Type ( Type, splitFunTys, dropForAlls, isStrictType, - splitTyConApp_maybe, tyConAppArgs - ) -import TyCon ( tyConDataCons_maybe ) -import DataCon ( dataConRepArity ) +import Type hiding( substTy ) +import Coercion ( coercionKind ) +import TyCon +import Unify ( dataConCannotMatch ) import VarSet -import BasicTypes ( TopLevelFlag(..), isNotTopLevel, OccInfo(..), isLoopBreaker, isOneOcc, - Activation, isAlwaysActive, isActive ) -import Util ( lengthExceeds ) +import BasicTypes +import Util +import MonadUtils import Outputable +import FastString + +import Data.List \end{code} %************************************************************************ %* * -\subsection{The continuation data type} + The SimplCont type %* * %************************************************************************ -\begin{code} -data SimplCont -- Strict contexts - = Stop OutType -- Type of the result - LetRhsFlag - Bool -- True <=> There is something interesting about - -- the context, and hence the inliner - -- should be a bit keener (see interestingCallContext) - -- Two cases: - -- (a) This is the RHS of a thunk whose type suggests - -- that update-in-place would be possible - -- (b) This is an argument of a function that has RULES - -- Inlining the call might allow the rule to fire +A SimplCont allows the simplifier to traverse the expression in a +zipper-like fashion. The SimplCont represents the rest of the expression, +"above" the point of interest. - | CoerceIt OutType -- The To-type, simplified - SimplCont +You can also think of a SimplCont as an "evaluation context", using +that term in the way it is used for operational semantics. This is the +way I usually think of it, For example you'll often see a syntax for +evaluation context looking like + C ::= [] | C e | case C of alts | C `cast` co +That's the kind of thing we are doing here, and I use that syntax in +the comments. - | ApplyTo DupFlag - InExpr SimplEnv -- The argument, as yet unsimplified, - SimplCont -- and its environment - | Select DupFlag - InId [InAlt] SimplEnv -- The case binder, alts, and subst-env - SimplCont +Key points: + * A SimplCont describes a *strict* context (just like + evaluation contexts do). E.g. Just [] is not a SimplCont - | ArgOf LetRhsFlag -- An arbitrary strict context: the argument - -- of a strict function, or a primitive-arg fn - -- or a PrimOp - -- No DupFlag, because we never duplicate it - OutType -- arg_ty: type of the argument itself - OutType -- cont_ty: the type of the expression being sought by the context - -- f (error "foo") ==> coerce t (error "foo") - -- when f is strict - -- We need to know the type t, to which to coerce. + * A SimplCont describes a context that *does not* bind + any variables. E.g. \x. [] is not a SimplCont - (SimplEnv -> OutExpr -> SimplM FloatsWithExpr) -- What to do with the result - -- The result expression in the OutExprStuff has type cont_ty +\begin{code} +data SimplCont + = Stop -- An empty context, or hole, [] + CallCtxt -- True <=> There is something interesting about + -- the context, and hence the inliner + -- should be a bit keener (see interestingCallContext) + -- Specifically: + -- This is an argument of a function that has RULES + -- Inlining the call might allow the rule to fire -data LetRhsFlag = AnArg -- It's just an argument not a let RHS - | AnRhs -- It's the RHS of a let (so please float lets out of big lambdas) + | CoerceIt -- C `cast` co + OutCoercion -- The coercion simplified + SimplCont + + | ApplyTo -- C arg + DupFlag + InExpr StaticEnv -- The argument and its static env + SimplCont + + | Select -- case C of alts + DupFlag + InId [InAlt] StaticEnv -- The case binder, alts, and subst-env + SimplCont + + -- The two strict forms have no DupFlag, because we never duplicate them + | StrictBind -- (\x* \xs. e) C + InId [InBndr] -- let x* = [] in e + InExpr StaticEnv -- is a special case + SimplCont + + | StrictArg -- f e1 ..en C + ArgInfo -- Specifies f, e1..en, Whether f has rules, etc + -- plus strictness flags for *further* args + CallCtxt -- Whether *this* argument position is interesting + SimplCont + +data ArgInfo + = ArgInfo { + ai_fun :: Id, -- The function + ai_args :: [OutExpr], -- ...applied to these args (which are in *reverse* order) + ai_rules :: [CoreRule], -- Rules for this function + + ai_encl :: Bool, -- Flag saying whether this function + -- or an enclosing one has rules (recursively) + -- True => be keener to inline in all args + + ai_strs :: [Bool], -- Strictness of remaining arguments + -- Usually infinite, but if it is finite it guarantees + -- that the function diverges after being given + -- that number of args + ai_discs :: [Int] -- Discounts for remaining arguments; non-zero => be keener to inline + -- Always infinite + } -instance Outputable LetRhsFlag where - ppr AnArg = ptext SLIT("arg") - ppr AnRhs = ptext SLIT("rhs") +addArgTo :: ArgInfo -> OutExpr -> ArgInfo +addArgTo ai arg = ai { ai_args = arg : ai_args ai } instance Outputable SimplCont where - ppr (Stop ty is_rhs _) = ptext SLIT("Stop") <> brackets (ppr is_rhs) <+> ppr ty - ppr (ApplyTo dup arg se cont) = (ptext SLIT("ApplyTo") <+> ppr dup <+> ppr arg) $$ ppr cont - ppr (ArgOf _ _ _ _) = ptext SLIT("ArgOf...") - ppr (Select dup bndr alts se cont) = (ptext SLIT("Select") <+> ppr dup <+> ppr bndr) $$ - (nest 4 (ppr alts)) $$ ppr cont - ppr (CoerceIt ty cont) = (ptext SLIT("CoerceIt") <+> ppr ty) $$ ppr cont + ppr (Stop interesting) = ptext (sLit "Stop") <> brackets (ppr interesting) + ppr (ApplyTo dup arg _ cont) = ((ptext (sLit "ApplyTo") <+> ppr dup <+> pprParendExpr arg) + {- $$ nest 2 (pprSimplEnv se) -}) $$ ppr cont + ppr (StrictBind b _ _ _ cont) = (ptext (sLit "StrictBind") <+> ppr b) $$ ppr cont + ppr (StrictArg ai _ cont) = (ptext (sLit "StrictArg") <+> ppr (ai_fun ai)) $$ ppr cont + ppr (Select dup bndr alts _ cont) = (ptext (sLit "Select") <+> ppr dup <+> ppr bndr) $$ + (nest 4 (ppr alts)) $$ ppr cont + ppr (CoerceIt co cont) = (ptext (sLit "CoerceIt") <+> ppr co) $$ ppr cont data DupFlag = OkToDup | NoDup instance Outputable DupFlag where - ppr OkToDup = ptext SLIT("ok") - ppr NoDup = ptext SLIT("nodup") + ppr OkToDup = ptext (sLit "ok") + ppr NoDup = ptext (sLit "nodup") -------------------- -mkBoringStop :: OutType -> SimplCont -mkBoringStop ty = Stop ty AnArg False -mkLazyArgStop :: OutType -> Bool -> SimplCont -mkLazyArgStop ty has_rules = Stop ty AnArg (canUpdateInPlace ty || has_rules) +------------------- +mkBoringStop :: SimplCont +mkBoringStop = Stop BoringCtxt -mkRhsStop :: OutType -> SimplCont -mkRhsStop ty = Stop ty AnRhs (canUpdateInPlace ty) +mkRhsStop :: SimplCont -- See Note [RHS of lets] in CoreUnfold +mkRhsStop = Stop (ArgCtxt False) -contIsRhs :: SimplCont -> Bool -contIsRhs (Stop _ AnRhs _) = True -contIsRhs (ArgOf AnRhs _ _ _) = True -contIsRhs other = False +mkLazyArgStop :: CallCtxt -> SimplCont +mkLazyArgStop cci = Stop cci -contIsRhsOrArg (Stop _ _ _) = True -contIsRhsOrArg (ArgOf _ _ _ _) = True -contIsRhsOrArg other = False +------------------- +contIsRhsOrArg :: SimplCont -> Bool +contIsRhsOrArg (Stop {}) = True +contIsRhsOrArg (StrictBind {}) = True +contIsRhsOrArg (StrictArg {}) = True +contIsRhsOrArg _ = False ------------------- contIsDupable :: SimplCont -> Bool -contIsDupable (Stop _ _ _) = True +contIsDupable (Stop {}) = True contIsDupable (ApplyTo OkToDup _ _ _) = True contIsDupable (Select OkToDup _ _ _ _) = True contIsDupable (CoerceIt _ cont) = contIsDupable cont -contIsDupable other = False +contIsDupable _ = False ------------------- -discardableCont :: SimplCont -> Bool -discardableCont (Stop _ _ _) = False -discardableCont (CoerceIt _ cont) = discardableCont cont -discardableCont other = True - -discardCont :: SimplCont -- A continuation, expecting - -> SimplCont -- Replace the continuation with a suitable coerce -discardCont cont = case cont of - Stop to_ty is_rhs _ -> cont - other -> CoerceIt to_ty (mkBoringStop to_ty) - where - to_ty = contResultType cont +contIsTrivial :: SimplCont -> Bool +contIsTrivial (Stop {}) = True +contIsTrivial (ApplyTo _ (Type _) _ cont) = contIsTrivial cont +contIsTrivial (CoerceIt _ cont) = contIsTrivial cont +contIsTrivial _ = False ------------------- -contResultType :: SimplCont -> OutType -contResultType (Stop to_ty _ _) = to_ty -contResultType (ArgOf _ _ to_ty _) = to_ty -contResultType (ApplyTo _ _ _ cont) = contResultType cont -contResultType (CoerceIt _ cont) = contResultType cont -contResultType (Select _ _ _ _ cont) = contResultType cont +contResultType :: SimplEnv -> OutType -> SimplCont -> OutType +contResultType env ty cont + = go cont ty + where + subst_ty se ty = substTy (se `setInScope` env) ty -------------------- -countValArgs :: SimplCont -> Int -countValArgs (ApplyTo _ (Type ty) se cont) = countValArgs cont -countValArgs (ApplyTo _ val_arg se cont) = 1 + countValArgs cont -countValArgs other = 0 + go (Stop {}) ty = ty + go (CoerceIt co cont) _ = go cont (snd (coercionKind co)) + go (StrictBind _ bs body se cont) _ = go cont (subst_ty se (exprType (mkLams bs body))) + go (StrictArg ai _ cont) _ = go cont (funResultTy (argInfoResultTy ai)) + go (Select _ _ alts se cont) _ = go cont (subst_ty se (coreAltsType alts)) + go (ApplyTo _ arg se cont) ty = go cont (apply_to_arg ty arg se) -countArgs :: SimplCont -> Int -countArgs (ApplyTo _ arg se cont) = 1 + countArgs cont -countArgs other = 0 + apply_to_arg ty (Type ty_arg) se = applyTy ty (subst_ty se ty_arg) + apply_to_arg ty _ _ = funResultTy ty + +argInfoResultTy :: ArgInfo -> OutType +argInfoResultTy (ArgInfo { ai_fun = fun, ai_args = args }) + = foldr (\arg fn_ty -> applyTypeToArg fn_ty arg) (idType fun) args ------------------- -pushContArgs :: SimplEnv -> [OutArg] -> SimplCont -> SimplCont --- Pushes args with the specified environment -pushContArgs env [] cont = cont -pushContArgs env (arg : args) cont = ApplyTo NoDup arg env (pushContArgs env args cont) -\end{code} +countValArgs :: SimplCont -> Int +countValArgs (ApplyTo _ (Type _) _ cont) = countValArgs cont +countValArgs (ApplyTo _ _ _ cont) = 1 + countValArgs cont +countValArgs _ = 0 +countArgs :: SimplCont -> Int +countArgs (ApplyTo _ _ _ cont) = 1 + countArgs cont +countArgs _ = 0 -\begin{code} -getContArgs :: SwitchChecker - -> OutId -> SimplCont - -> ([(InExpr, SimplEnv, Bool)], -- Arguments; the Bool is true for strict args - SimplCont) -- Remaining continuation --- getContArgs id k = (args, k', inl) --- args are the leading ApplyTo items in k --- (i.e. outermost comes first) --- augmented with demand info from the functionn -getContArgs chkr fun orig_cont - = let - -- Ignore strictness info if the no-case-of-case - -- flag is on. Strictness changes evaluation order - -- and that can change full laziness - stricts | switchIsOn chkr NoCaseOfCase = vanilla_stricts - | otherwise = computed_stricts - in - go [] stricts orig_cont +contArgs :: SimplCont -> ([OutExpr], SimplCont) +-- Uses substitution to turn each arg into an OutExpr +contArgs cont = go [] cont where - ---------------------------- - - -- Type argument - go acc ss (ApplyTo _ arg@(Type _) se cont) - = go ((arg,se,False) : acc) ss cont - -- NB: don't bother to instantiate the function type - - -- Value argument - go acc (s:ss) (ApplyTo _ arg se cont) - = go ((arg,se,s) : acc) ss cont - - -- We're run out of arguments, or else we've run out of demands - -- The latter only happens if the result is guaranteed bottom - -- This is the case for - -- * case (error "hello") of { ... } - -- * (error "Hello") arg - -- * f (error "Hello") where f is strict - -- etc - -- Then, especially in the first of these cases, we'd like to discard - -- the continuation, leaving just the bottoming expression. But the - -- type might not be right, so we may have to add a coerce. - go acc ss cont - | null ss && discardableCont cont = (reverse acc, discardCont cont) - | otherwise = (reverse acc, cont) - - ---------------------------- - vanilla_stricts, computed_stricts :: [Bool] - vanilla_stricts = repeat False - computed_stricts = zipWith (||) fun_stricts arg_stricts - - ---------------------------- - (val_arg_tys, _) = splitFunTys (dropForAlls (idType fun)) - arg_stricts = map isStrictType val_arg_tys ++ repeat False - -- These argument types are used as a cheap and cheerful way to find - -- unboxed arguments, which must be strict. But it's an InType - -- and so there might be a type variable where we expect a function - -- type (the substitution hasn't happened yet). And we don't bother - -- doing the type applications for a polymorphic function. - -- Hence the splitFunTys*IgnoringForAlls* - - ---------------------------- - -- If fun_stricts is finite, it means the function returns bottom - -- after that number of value args have been consumed - -- Otherwise it's infinite, extended with False - fun_stricts - = case splitStrictSig (idNewStrictness fun) of - (demands, result_info) - | not (demands `lengthExceeds` countValArgs orig_cont) - -> -- Enough args, use the strictness given. - -- For bottoming functions we used to pretend that the arg - -- is lazy, so that we don't treat the arg as an - -- interesting context. This avoids substituting - -- top-level bindings for (say) strings into - -- calls to error. But now we are more careful about - -- inlining lone variables, so its ok (see SimplUtils.analyseCont) - if isBotRes result_info then - map isStrictDmd demands -- Finite => result is bottom - else - map isStrictDmd demands ++ vanilla_stricts + go args (ApplyTo _ arg se cont) = go (substExpr se arg : args) cont + go args cont = (reverse args, cont) - other -> vanilla_stricts -- Not enough args, or no strictness +pushArgs :: SimplEnv -> [CoreExpr] -> SimplCont -> SimplCont +pushArgs _env [] cont = cont +pushArgs env (arg:args) cont = ApplyTo NoDup arg env (pushArgs env args cont) -------------------- -interestingArg :: OutExpr -> Bool - -- An argument is interesting if it has *some* structure - -- We are here trying to avoid unfolding a function that - -- is applied only to variables that have no unfolding - -- (i.e. they are probably lambda bound): f x y z - -- There is little point in inlining f here. -interestingArg (Var v) = hasSomeUnfolding (idUnfolding v) - -- Was: isValueUnfolding (idUnfolding v') - -- But that seems over-pessimistic - || isDataConWorkId v - -- This accounts for an argument like - -- () or [], which is definitely interesting -interestingArg (Type _) = False -interestingArg (App fn (Type _)) = interestingArg fn -interestingArg (Note _ a) = interestingArg a -interestingArg other = True - -- Consider let x = 3 in f x - -- The substitution will contain (x -> ContEx 3), and we want to - -- to say that x is an interesting argument. - -- But consider also (\x. f x y) y - -- The substitution will contain (x -> ContEx y), and we want to say - -- that x is not interesting (assuming y has no unfolding) +dropArgs :: Int -> SimplCont -> SimplCont +dropArgs 0 cont = cont +dropArgs n (ApplyTo _ _ _ cont) = dropArgs (n-1) cont +dropArgs n other = pprPanic "dropArgs" (ppr n <+> ppr other) \end{code} -Comment about interestingCallContext -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Note [Interesting call context] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We want to avoid inlining an expression where there can't possibly be any gain, such as in an argument position. Hence, if the continuation is interesting (eg. a case scrutinee, application etc.) then we @@ -327,61 +272,30 @@ applies when x is bound to a lambda expression. Hence contIsInteresting looks for case expressions with just a single default case. + \begin{code} -interestingCallContext :: Bool -- False <=> no args at all - -> Bool -- False <=> no value args - -> SimplCont -> Bool - -- The "lone-variable" case is important. I spent ages - -- messing about with unsatisfactory varaints, but this is nice. - -- The idea is that if a variable appear all alone - -- as an arg of lazy fn, or rhs Stop - -- as scrutinee of a case Select - -- as arg of a strict fn ArgOf - -- then we should not inline it (unless there is some other reason, - -- e.g. is is the sole occurrence). We achieve this by making - -- interestingCallContext return False for a lone variable. - -- - -- Why? At least in the case-scrutinee situation, turning - -- let x = (a,b) in case x of y -> ... - -- into - -- let x = (a,b) in case (a,b) of y -> ... - -- and thence to - -- let x = (a,b) in let y = (a,b) in ... - -- is bad if the binding for x will remain. - -- - -- Another example: I discovered that strings - -- were getting inlined straight back into applications of 'error' - -- because the latter is strict. - -- s = "foo" - -- f = \x -> ...(error s)... - - -- Fundamentally such contexts should not ecourage inlining because - -- the context can ``see'' the unfolding of the variable (e.g. case or a RULE) - -- so there's no gain. - -- - -- However, even a type application or coercion isn't a lone variable. - -- Consider - -- case $fMonadST @ RealWorld of { :DMonad a b c -> c } - -- We had better inline that sucker! The case won't see through it. - -- - -- For now, I'm treating treating a variable applied to types - -- in a *lazy* context "lone". The motivating example was - -- f = /\a. \x. BIG - -- g = /\a. \y. h (f a) - -- There's no advantage in inlining f here, and perhaps - -- a significant disadvantage. Hence some_val_args in the Stop case - -interestingCallContext some_args some_val_args cont +interestingCallContext :: SimplCont -> CallCtxt +-- See Note [Interesting call context] +interestingCallContext cont = interesting cont where - interesting (Select _ _ _ _ _) = some_args - interesting (ApplyTo _ _ _ _) = True -- Can happen if we have (coerce t (f x)) y - -- Perhaps True is a bit over-keen, but I've - -- seen (coerce f) x, where f has an INLINE prag, - -- So we have to give some motivaiton for inlining it - interesting (ArgOf _ _ _ _) = some_val_args - interesting (Stop ty _ interesting) = some_val_args && interesting - interesting (CoerceIt _ cont) = interesting cont + interesting (Select _ bndr _ _ _) + | isDeadBinder bndr = CaseCtxt + | otherwise = ArgCtxt False -- If the binder is used, this + -- is like a strict let + -- See Note [RHS of lets] in CoreUnfold + + interesting (ApplyTo _ arg _ cont) + | isTypeArg arg = interesting cont + | otherwise = ValAppCtxt -- Can happen if we have (f Int |> co) y + -- If f has an INLINE prag we need to give it some + -- motivation to inline. See Note [Cast then apply] + -- in CoreUnfold + + interesting (StrictArg _ cci _) = cci + interesting (StrictBind {}) = BoringCtxt + interesting (Stop cci) = cci + interesting (CoerceIt _ cont) = interesting cont -- If this call is the arg of a strict function, the context -- is a bit interesting. If we inline here, we may get useful -- evaluation information to avoid repeated evals: e.g. @@ -399,7 +313,82 @@ interestingCallContext some_args some_val_args cont ------------------- -interestingArgContext :: Id -> SimplCont -> Bool +mkArgInfo :: Id + -> [CoreRule] -- Rules for function + -> Int -- Number of value args + -> SimplCont -- Context of the call + -> ArgInfo + +mkArgInfo fun rules n_val_args call_cont + | n_val_args < idArity fun -- Note [Unsaturated functions] + = ArgInfo { ai_fun = fun, ai_args = [], ai_rules = rules + , ai_encl = False + , ai_strs = vanilla_stricts + , ai_discs = vanilla_discounts } + | otherwise + = ArgInfo { ai_fun = fun, ai_args = [], ai_rules = rules + , ai_encl = interestingArgContext rules call_cont + , ai_strs = add_type_str (idType fun) arg_stricts + , ai_discs = arg_discounts } + where + vanilla_discounts, arg_discounts :: [Int] + vanilla_discounts = repeat 0 + arg_discounts = case idUnfolding fun of + CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = discounts}} + -> discounts ++ vanilla_discounts + _ -> vanilla_discounts + + vanilla_stricts, arg_stricts :: [Bool] + vanilla_stricts = repeat False + + arg_stricts + = case splitStrictSig (idStrictness fun) of + (demands, result_info) + | not (demands `lengthExceeds` n_val_args) + -> -- Enough args, use the strictness given. + -- For bottoming functions we used to pretend that the arg + -- is lazy, so that we don't treat the arg as an + -- interesting context. This avoids substituting + -- top-level bindings for (say) strings into + -- calls to error. But now we are more careful about + -- inlining lone variables, so its ok (see SimplUtils.analyseCont) + if isBotRes result_info then + map isStrictDmd demands -- Finite => result is bottom + else + map isStrictDmd demands ++ vanilla_stricts + | otherwise + -> WARN( True, text "More demands than arity" <+> ppr fun <+> ppr (idArity fun) + <+> ppr n_val_args <+> ppr demands ) + vanilla_stricts -- Not enough args, or no strictness + + add_type_str :: Type -> [Bool] -> [Bool] + -- If the function arg types are strict, record that in the 'strictness bits' + -- No need to instantiate because unboxed types (which dominate the strict + -- types) can't instantiate type variables. + -- add_type_str is done repeatedly (for each call); might be better + -- once-for-all in the function + -- But beware primops/datacons with no strictness + add_type_str _ [] = [] + add_type_str fun_ty strs -- Look through foralls + | Just (_, fun_ty') <- splitForAllTy_maybe fun_ty -- Includes coercions + = add_type_str fun_ty' strs + add_type_str fun_ty (str:strs) -- Add strict-type info + | Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty + = (str || isStrictType arg_ty) : add_type_str fun_ty' strs + add_type_str _ strs + = strs + +{- Note [Unsaturated functions] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider (test eyeball/inline4) + x = a:as + y = f x +where f has arity 2. Then we do not want to inline 'x', because +it'll just be floated out again. Even if f has lots of discounts +on its first argument -- it must be saturated for these to kick in +-} + +interestingArgContext :: [CoreRule] -> SimplCont -> Bool -- If the argument has form (f x y), where x,y are boring, -- and f is marked INLINE, then we don't want to inline f. -- But if the context of the argument is @@ -407,43 +396,30 @@ interestingArgContext :: Id -> SimplCont -> Bool -- where g has rules, then we *do* want to inline f, in case it -- exposes a rule that might fire. Similarly, if the context is -- h (g (f x x)) --- where h has rules, then we do want to inline f. --- The interesting_arg_ctxt flag makes this happen; if it's +-- where h has rules, then we do want to inline f; hence the +-- call_cont argument to interestingArgContext +-- +-- The ai-rules flag makes this happen; if it's -- set, the inliner gets just enough keener to inline f -- regardless of how boring f's arguments are, if it's marked INLINE -- -- The alternative would be to *always* inline an INLINE function, -- regardless of how boring its context is; but that seems overkill -- For example, it'd mean that wrapper functions were always inlined -interestingArgContext fn cont - = idHasRules fn || go cont +interestingArgContext rules call_cont + = notNull rules || enclosing_fn_has_rules where - go (Select {}) = False - go (ApplyTo {}) = False - go (ArgOf {}) = True - go (CoerceIt _ c) = go c - go (Stop _ _ interesting) = interesting + enclosing_fn_has_rules = go call_cont -------------------- -canUpdateInPlace :: Type -> Bool --- Consider let x = in ... --- If returns an explicit constructor, we might be able --- to do update in place. So we treat even a thunk RHS context --- as interesting if update in place is possible. We approximate --- this by seeing if the type has a single constructor with a --- small arity. But arity zero isn't good -- we share the single copy --- for that case, so no point in sharing. - -canUpdateInPlace ty - | not opt_UF_UpdateInPlace = False - | otherwise - = case splitTyConApp_maybe ty of - Nothing -> False - Just (tycon, _) -> case tyConDataCons_maybe tycon of - Just [dc] -> arity == 1 || arity == 2 - where - arity = dataConRepArity dc - other -> False + go (Select {}) = False + go (ApplyTo {}) = False + go (StrictArg _ cci _) = interesting cci + go (StrictBind {}) = False -- ?? + go (CoerceIt _ c) = go c + go (Stop cci) = interesting cci + + interesting (ArgCtxt rules) = rules + interesting _ = False \end{code} @@ -454,18 +430,61 @@ canUpdateInPlace ty %* * %************************************************************************ -Inlining is controlled partly by the SimplifierMode switch. This has two -settings: +\begin{code} +simplEnvForGHCi :: SimplEnv +simplEnvForGHCi = mkSimplEnv allOffSwitchChecker $ + SimplGently { sm_rules = False, sm_inline = False } + -- Do not do any inlining, in case we expose some unboxed + -- tuple stuff that confuses the bytecode interpreter + +simplEnvForRules :: SimplEnv +simplEnvForRules = mkSimplEnv allOffSwitchChecker $ + SimplGently { sm_rules = True, sm_inline = False } + +updModeForInlineRules :: SimplifierMode -> SimplifierMode +updModeForInlineRules mode + = case mode of + SimplGently {} -> mode -- Don't modify mode if we already gentle + SimplPhase {} -> SimplGently { sm_rules = True, sm_inline = True } + -- Simplify as much as possible, subject to the usual "gentle" rules +\end{code} +Inlining is controlled partly by the SimplifierMode switch. This has two +settings + SimplGently (a) Simplifying before specialiser/full laziness - (b) Simplifiying inside INLINE pragma + (b) Simplifiying inside InlineRules (c) Simplifying the LHS of a rule (d) Simplifying a GHCi expression or Template Haskell splice - SimplPhase n Used at all other times + SimplPhase n _ Used at all other times + +Note [Gentle mode] +~~~~~~~~~~~~~~~~~~ +Gentle mode has a separate boolean flag to control + a) inlining (sm_inline flag) + b) rules (sm_rules flag) +A key invariant about Gentle mode is that it is treated as the EARLIEST +phase. Something is inlined if the sm_inline flag is on AND the thing +is inlinable in the earliest phase. This is important. Example + + {-# INLINE [~1] g #-} + g = ... + + {-# INLINE f #-} + f x = g (g x) -The key thing about SimplGently is that it does no call-site inlining. +If we were to inline g into f's inlining, then an importing module would +never be able to do + f e --> g (g e) ---> RULE fires +because the InlineRule for f has had g inlined into it. + +On the other hand, it is bad not to do ANY inlining into an +InlineRule, because then recursive knots in instance declarations +don't get unravelled. + +However, *sometimes* SimplGently must do no call-site inlining at all. Before full laziness we must be careful not to inline wrappers, because doing so inhibits floating e.g. ...(case f x of ...)... @@ -479,17 +498,24 @@ running it, we don't want to use -O2. Indeed, we don't want to inline anything, because the byte-code interpreter might get confused about unboxed tuples and suchlike. -INLINE pragmas -~~~~~~~~~~~~~~ -SimplGently is also used as the mode to simplify inside an InlineMe note. +Note [RULEs enabled in SimplGently] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +RULES are enabled when doing "gentle" simplification. Two reasons: -\begin{code} -inlineMode :: SimplifierMode -inlineMode = SimplGently -\end{code} + * We really want the class-op cancellation to happen: + op (df d1 d2) --> $cop3 d1 d2 + because this breaks the mutual recursion between 'op' and 'df' + + * I wanted the RULE + lift String ===> ... + to work in Template Haskell when simplifying + splices, so we get simpler code for literal strings -It really is important to switch off inlinings inside such -expressions. Consider the following example +Note [Simplifying gently inside InlineRules] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We don't do much simplification inside InlineRules (which come from +INLINE pragmas). It really is important to switch off inlinings +inside such expressions. Consider the following example let f = \pq -> BIG in @@ -498,16 +524,14 @@ expressions. Consider the following example in ...g...g...g...g...g... Now, if that's the ONLY occurrence of f, it will be inlined inside g, -and thence copied multiple times when g is inlined. +and thence copied multiple times when g is inlined. - -This function may be inlinined in other modules, so we -don't want to remove (by inlining) calls to functions that have -specialisations, or that may have transformation rules in an importing -scope. +This function may be inlinined in other modules, so we don't want to +remove (by inlining) calls to functions that have specialisations, or +that may have transformation rules in an importing scope. E.g. {-# INLINE f #-} - f x = ...g... + f x = ...g... and suppose that g is strict *and* has specialisations. If we inline g's wrapper, we deny f the chance of getting the specialised version @@ -525,15 +549,14 @@ continuation. That's why the keep_inline predicate returns True for ArgOf continuations. It shouldn't do any harm not to dissolve the inline-me note under these circumstances. -Note that the result is that we do very little simplification -inside an InlineMe. +Although we do very little simplification inside an InlineRule, +the RHS is simplified as normal. For example: all xs = foldr (&&) True xs any p = all . map p {-# INLINE any #-} -Problem: any won't get deforested, and so if it's exported and the -importer doesn't use the inlining, (eg passes it as an arg) then we -won't get deforestation at all. We havn't solved this problem yet! +The RHS of 'any' will get optimised and deforested; but the InlineRule +will still mention the original RHS. preInlineUnconditionally @@ -578,7 +601,7 @@ y's occurrence info, which breaks the invariant. It matters: y might have a BIG rhs, which will now be dup'd at every occurrenc of x. -Evne RHSs labelled InlineMe aren't caught here, because there might be +Even RHSs labelled InlineMe aren't caught here, because there might be no benefit from inlining at the call site. [Sept 01] Don't unconditionally inline a top-level thing, because that @@ -600,27 +623,46 @@ seems a bit fragile. Conclusion: inline top level things gaily until Phase 0 (the last phase), at which point don't. +Note [pre/postInlineUnconditionally in gentle mode] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Even in gentle mode we want to do preInlineUnconditionally. The +reason is that too little clean-up happens if you don't inline +use-once things. Also a bit of inlining is *good* for full laziness; +it can expose constant sub-expressions. Example in +spectral/mandel/Mandel.hs, where the mandelset function gets a useful +let-float if you inline windowToViewport + +However, as usual for Gentle mode, do not inline things that are +inactive in the intial stages. See Note [Gentle mode]. + +Note [Top-level botomming Ids] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Don't inline top-level Ids that are bottoming, even if they are used just +once, because FloatOut has gone to some trouble to extract them out. +Inlining them won't make the program run faster! + \begin{code} preInlineUnconditionally :: SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool preInlineUnconditionally env top_lvl bndr rhs - | not active = False - | opt_SimplNoPreInlining = False + | not active = False + | isTopLevel top_lvl && isBottomingId bndr = False -- Note [Top-level bottoming Ids] + | opt_SimplNoPreInlining = False | otherwise = case idOccInfo bndr of IAmDead -> True -- Happens in ((\x.1) v) OneOcc in_lam True int_cxt -> try_once in_lam int_cxt - other -> False + _ -> False where phase = getMode env active = case phase of - SimplGently -> isAlwaysActive prag - SimplPhase n -> isActive n prag - prag = idInlinePragma bndr - + SimplGently {} -> isEarlyActive act + -- See Note [pre/postInlineUnconditionally in gentle mode] + SimplPhase n _ -> isActive n act + act = idInlineActivation bndr try_once in_lam int_cxt -- There's one textual occurrence | not in_lam = isNotTopLevel top_lvl || early_phase | otherwise = int_cxt && canInlineInLam rhs --- Be very careful before inlining inside a lambda, becuase (a) we must not +-- Be very careful before inlining inside a lambda, because (a) we must not -- invalidate occurrence information, and (b) we want to avoid pushing a -- single allocation (here) into multiple allocations (inside lambda). -- Inlining a *function* with a single *saturated* call would be ok, mind you. @@ -641,14 +683,14 @@ preInlineUnconditionally env top_lvl bndr rhs -- canInlineInLam => free vars of rhs are (Once in_lam) or Many, -- so substituting rhs inside a lambda doesn't change the occ info. -- Sadly, not quite the same as exprIsHNF. - canInlineInLam (Lit l) = True + canInlineInLam (Lit _) = True canInlineInLam (Lam b e) = isRuntimeVar b || canInlineInLam e canInlineInLam (Note _ e) = canInlineInLam e canInlineInLam _ = False early_phase = case phase of - SimplPhase 0 -> False - other -> True + SimplPhase 0 _ -> False + _ -> True -- If we don't have this early_phase test, consider -- x = length [1,2,3] -- The full laziness pass carefully floats all the cons cells to @@ -695,30 +737,49 @@ our new view that inlining is like a RULE, so I'm sticking to the 'active' story for now. \begin{code} -postInlineUnconditionally :: SimplEnv -> TopLevelFlag -> OutId -> OccInfo -> OutExpr -> Unfolding -> Bool +postInlineUnconditionally + :: SimplEnv -> TopLevelFlag + -> OutId -- The binder (an InId would be fine too) + -> OccInfo -- From the InId + -> OutExpr + -> Unfolding + -> Bool postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding - | not active = False - | isLoopBreaker occ_info = False - | isExportedId bndr = False - | exprIsTrivial rhs = True + | not active = False + | isLoopBreaker occ_info = False -- If it's a loop-breaker of any kind, don't inline + -- because it might be referred to "earlier" + | isExportedId bndr = False + | isStableUnfolding unfolding = False -- Note [InlineRule and postInlineUnconditionally] + | exprIsTrivial rhs = True + | isTopLevel top_lvl = False -- Note [Top level and postInlineUnconditionally] | otherwise = case occ_info of - OneOcc in_lam one_br int_cxt - -> (one_br || smallEnoughToInline unfolding) -- Small enough to dup + -- The point of examining occ_info here is that for *non-values* + -- that occur outside a lambda, the call-site inliner won't have + -- a chance (becuase it doesn't know that the thing + -- only occurs once). The pre-inliner won't have gotten + -- it either, if the thing occurs in more than one branch + -- So the main target is things like + -- let x = f y in + -- case v of + -- True -> case x of ... + -- False -> case x of ... + -- This is very important in practice; e.g. wheel-seive1 doubles + -- in allocation if you miss this out + OneOcc in_lam _one_br int_cxt -- OneOcc => no code-duplication issue + -> smallEnoughToInline unfolding -- Small enough to dup -- ToDo: consider discount on smallEnoughToInline if int_cxt is true -- - -- NB: Do we want to inline arbitrarily big things becuase - -- one_br is True? that can lead to inline cascades. But - -- preInlineUnconditionlly has dealt with all the common cases - -- so perhaps it's worth the risk. Here's an example - -- let f = if b then Left (\x.BIG) else Right (\y.BIG) - -- in \y. ....f.... - -- We can't preInlineUnconditionally because that woud invalidate - -- the occ info for b. Yet f is used just once, and duplicating - -- the case work is fine (exprIsCheap). - - && ((isNotTopLevel top_lvl && not in_lam) || - -- But outside a lambda, we want to be reasonably aggressive + -- NB: Do NOT inline arbitrarily big things, even if one_br is True + -- Reason: doing so risks exponential behaviour. We simplify a big + -- expression, inline it, and simplify it again. But if the + -- very same thing happens in the big expression, we get + -- exponential cost! + -- PRINCIPLE: when we've already simplified an expression once, + -- make sure that we only inline it if it's reasonably small. + + && (not in_lam || + -- Outside a lambda, we want to be reasonably aggressive -- about inlining into multiple branches of case -- e.g. let x = -- in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... } @@ -731,95 +792,202 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding -- int_cxt to prevent us inlining inside a lambda without some -- good reason. See the notes on int_cxt in preInlineUnconditionally - other -> False - -- The point here is that for *non-values* that occur - -- outside a lambda, the call-site inliner won't have - -- a chance (becuase it doesn't know that the thing - -- only occurs once). The pre-inliner won't have gotten - -- it either, if the thing occurs in more than one branch - -- So the main target is things like - -- let x = f y in - -- case v of - -- True -> case x of ... - -- False -> case x of ... - -- I'm not sure how important this is in practice + IAmDead -> True -- This happens; for example, the case_bndr during case of + -- known constructor: case (a,b) of x { (p,q) -> ... } + -- Here x isn't mentioned in the RHS, so we don't want to + -- create the (dead) let-binding let x = (a,b) in ... + + _ -> False + +-- Here's an example that we don't handle well: +-- let f = if b then Left (\x.BIG) else Right (\y.BIG) +-- in \y. ....case f of {...} .... +-- Here f is used just once, and duplicating the case work is fine (exprIsCheap). +-- But +-- - We can't preInlineUnconditionally because that woud invalidate +-- the occ info for b. +-- - We can't postInlineUnconditionally because the RHS is big, and +-- that risks exponential behaviour +-- - We can't call-site inline, because the rhs is big +-- Alas! + where active = case getMode env of - SimplGently -> isAlwaysActive prag - SimplPhase n -> isActive n prag - prag = idInlinePragma bndr + SimplGently {} -> isEarlyActive act + -- See Note [pre/postInlineUnconditionally in gentle mode] + SimplPhase n _ -> isActive n act + act = idInlineActivation bndr -activeInline :: SimplEnv -> OutId -> OccInfo -> Bool -activeInline env id occ +activeUnfolding :: SimplEnv -> IdUnfoldingFun +activeUnfolding env + = case getMode env of + SimplGently { sm_inline = False } -> active_unfolding_minimal + SimplGently { sm_inline = True } -> active_unfolding_gentle + SimplPhase n _ -> active_unfolding n + +activeUnfInRule :: SimplEnv -> IdUnfoldingFun +-- When matching in RULE, we want to "look through" an unfolding +-- if *rules* are on, even if *inlinings* are not. A notable example +-- is DFuns, which really we want to match in rules like (op dfun) +-- in gentle mode. +activeUnfInRule env = case getMode env of - SimplGently -> isOneOcc occ && isAlwaysActive prag - -- No inlining at all when doing gentle stuff, - -- except for local things that occur once - -- The reason is that too little clean-up happens if you - -- don't inline use-once things. Also a bit of inlining is *good* for - -- full laziness; it can expose constant sub-expressions. - -- Example in spectral/mandel/Mandel.hs, where the mandelset - -- function gets a useful let-float if you inline windowToViewport - - -- NB: we used to have a second exception, for data con wrappers. - -- On the grounds that we use gentle mode for rule LHSs, and - -- they match better when data con wrappers are inlined. - -- But that only really applies to the trivial wrappers (like (:)), - -- and they are now constructed as Compulsory unfoldings (in MkId) - -- so they'll happen anyway. - - SimplPhase n -> isActive n prag + SimplGently { sm_rules = False } -> active_unfolding_minimal + SimplGently { sm_rules = True } -> active_unfolding_gentle + SimplPhase n _ -> active_unfolding n + +active_unfolding_minimal :: IdUnfoldingFun +-- Compuslory unfoldings only +-- Ignore SimplGently, because we want to inline regardless; +-- the Id has no top-level binding at all +-- +-- NB: we used to have a second exception, for data con wrappers. +-- On the grounds that we use gentle mode for rule LHSs, and +-- they match better when data con wrappers are inlined. +-- But that only really applies to the trivial wrappers (like (:)), +-- and they are now constructed as Compulsory unfoldings (in MkId) +-- so they'll happen anyway. +active_unfolding_minimal id + | isCompulsoryUnfolding unf = unf + | otherwise = NoUnfolding where - prag = idInlinePragma id - -activeRule :: SimplEnv -> Maybe (Activation -> Bool) + unf = realIdUnfolding id -- Never a loop breaker + +active_unfolding_gentle :: IdUnfoldingFun +-- Anything that is early-active +-- See Note [Gentle mode] +active_unfolding_gentle id + | isEarlyActive (idInlineActivation id) = idUnfolding id + | otherwise = NoUnfolding + -- idUnfolding checks for loop-breakers + -- Things with an INLINE pragma may have + -- an unfolding *and* be a loop breaker + -- (maybe the knot is not yet untied) + +active_unfolding :: CompilerPhase -> IdUnfoldingFun +active_unfolding n id + | isActive n (idInlineActivation id) = idUnfolding id + | otherwise = NoUnfolding + +activeRule :: DynFlags -> SimplEnv -> Maybe (Activation -> Bool) -- Nothing => No rules at all -activeRule env - | opt_RulesOff = Nothing +activeRule dflags env + | not (dopt Opt_EnableRewriteRules dflags) + = Nothing -- Rewriting is off | otherwise = case getMode env of - SimplGently -> Just isAlwaysActive - -- Used to be Nothing (no rules in gentle mode) - -- Main motivation for changing is that I wanted - -- lift String ===> ... - -- to work in Template Haskell when simplifying - -- splices, so we get simpler code for literal strings - SimplPhase n -> Just (isActive n) -\end{code} + SimplGently { sm_rules = rules_on } + | rules_on -> Just isEarlyActive -- Note [RULEs enabled in SimplGently] + | otherwise -> Nothing + SimplPhase n _ -> Just (isActive n) +\end{code} + +Note [Top level and postInlineUnconditionally] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We don't do postInlineUnconditionally for top-level things (except +ones that are trivial). There is no point, because the main goal is +to get rid of local bindings used in multiple case branches. And +doing so risks replacing a single global allocation with local allocations. + + +Note [InlineRule and postInlineUnconditionally] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Do not do postInlineUnconditionally if the Id has an InlineRule, otherwise +we lose the unfolding. Example + + -- f has InlineRule with rhs (e |> co) + -- where 'e' is big + f = e |> co + +Then there's a danger we'll optimise to + + f' = e + f = f' |> co + +and now postInlineUnconditionally, losing the InlineRule on f. Now f' +won't inline because 'e' is too big. %************************************************************************ %* * -\subsection{Rebuilding a lambda} + Rebuilding a lambda %* * %************************************************************************ \begin{code} -mkLam :: SimplEnv -> [OutBinder] -> OutExpr -> SimplCont -> SimplM FloatsWithExpr +mkLam :: SimplEnv -> [OutBndr] -> OutExpr -> SimplM OutExpr +-- mkLam tries three things +-- a) eta reduction, if that gives a trivial expression +-- b) eta expansion [only if there are some value lambdas] + +mkLam _b [] body + = return body +mkLam env bndrs body + = do { dflags <- getDOptsSmpl + ; mkLam' dflags bndrs body } + where + mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr + mkLam' dflags bndrs (Cast body co) + | not (any bad bndrs) + -- Note [Casts and lambdas] + = do { lam <- mkLam' dflags bndrs body + ; return (mkCoerce (mkPiTypes bndrs co) lam) } + where + co_vars = tyVarsOfType co + bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars + + mkLam' dflags bndrs body + | dopt Opt_DoEtaReduction dflags, + Just etad_lam <- tryEtaReduce bndrs body + = do { tick (EtaReduction (head bndrs)) + ; return etad_lam } + + | dopt Opt_DoLambdaEtaExpansion dflags, + not (inGentleMode env), -- In gentle mode don't eta-expansion + any isRuntimeVar bndrs -- because it can clutter up the code + -- with casts etc that may not be removed + = do { let body' = tryEtaExpansion dflags body + ; return (mkLams bndrs body') } + + | otherwise + = return (mkLams bndrs body) \end{code} -Try three things - a) eta reduction, if that gives a trivial expression - b) eta expansion [only if there are some value lambdas] - c) floating lets out through big lambdas - [only if all tyvar lambdas, and only if this lambda - is the RHS of a let] - -\begin{code} -mkLam env bndrs body cont - = getDOptsSmpl `thenSmpl` \dflags -> - mkLam' dflags env bndrs body cont - where - mkLam' dflags env bndrs body cont - | dopt Opt_DoEtaReduction dflags, - Just etad_lam <- tryEtaReduce bndrs body - = tick (EtaReduction (head bndrs)) `thenSmpl_` - returnSmpl (emptyFloats env, etad_lam) - - | dopt Opt_DoLambdaEtaExpansion dflags, - any isRuntimeVar bndrs - = tryEtaExpansion body `thenSmpl` \ body' -> - returnSmpl (emptyFloats env, mkLams bndrs body') +Note [Casts and lambdas] +~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + (\x. (\y. e) `cast` g1) `cast` g2 +There is a danger here that the two lambdas look separated, and the +full laziness pass might float an expression to between the two. + +So this equation in mkLam' floats the g1 out, thus: + (\x. e `cast` g1) --> (\x.e) `cast` (tx -> g1) +where x:tx. + +In general, this floats casts outside lambdas, where (I hope) they +might meet and cancel with some other cast: + \x. e `cast` co ===> (\x. e) `cast` (tx -> co) + /\a. e `cast` co ===> (/\a. e) `cast` (/\a. co) + /\g. e `cast` co ===> (/\g. e) `cast` (/\g. co) + (if not (g `in` co)) + +Notice that it works regardless of 'e'. Originally it worked only +if 'e' was itself a lambda, but in some cases that resulted in +fruitless iteration in the simplifier. A good example was when +compiling Text.ParserCombinators.ReadPrec, where we had a definition +like (\x. Get `cast` g) +where Get is a constructor with nonzero arity. Then mkLam eta-expanded +the Get, and the next iteration eta-reduced it, and then eta-expanded +it again. + +Note also the side condition for the case of coercion binders. +It does not make sense to transform + /\g. e `cast` g ==> (/\g.e) `cast` (/\g.g) +because the latter is not well-kinded. + +-- c) floating lets out through big lambdas +-- [only if all tyvar lambdas, and only if this lambda +-- is the RHS of a let] {- Sept 01: I'm experimenting with getting the full laziness pass to float out past big lambdsa @@ -828,59 +996,111 @@ mkLam env bndrs body cont -- if this is indeed a right-hand side; otherwise -- we end up floating the thing out, only for float-in -- to float it right back in again! - = tryRhsTyLam env bndrs body `thenSmpl` \ (floats, body') -> - returnSmpl (floats, mkLams bndrs body') + = do (floats, body') <- tryRhsTyLam env bndrs body + return (floats, mkLams bndrs body') -} - | otherwise - = returnSmpl (emptyFloats env, mkLams bndrs body) -\end{code} - %************************************************************************ %* * -\subsection{Eta expansion and reduction} + Eta reduction %* * %************************************************************************ -We try for eta reduction here, but *only* if we get all the -way to an exprIsTrivial expression. -We don't want to remove extra lambdas unless we are going -to avoid allocating this thing altogether +Note [Eta reduction conditions] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We try for eta reduction here, but *only* if we get all the way to an +trivial expression. We don't want to remove extra lambdas unless we +are going to avoid allocating this thing altogether. + +There are some particularly delicate points here: + +* Eta reduction is not valid in general: + \x. bot /= bot + This matters, partly for old-fashioned correctness reasons but, + worse, getting it wrong can yield a seg fault. Consider + f = \x.f x + h y = case (case y of { True -> f `seq` True; False -> False }) of + True -> ...; False -> ... + + If we (unsoundly) eta-reduce f to get f=f, the strictness analyser + says f=bottom, and replaces the (f `seq` True) with just + (f `cast` unsafe-co). BUT, as thing stand, 'f' got arity 1, and it + *keeps* arity 1 (perhaps also wrongly). So CorePrep eta-expands + the definition again, so that it does not termninate after all. + Result: seg-fault because the boolean case actually gets a function value. + See Trac #1947. + + So it's important to to the right thing. + +* Note [Arity care]: we need to be careful if we just look at f's + arity. Currently (Dec07), f's arity is visible in its own RHS (see + Note [Arity robustness] in SimplEnv) so we must *not* trust the + arity when checking that 'f' is a value. Otherwise we will + eta-reduce + f = \x. f x + to + f = f + Which might change a terminiating program (think (f `seq` e)) to a + non-terminating one. So we check for being a loop breaker first. + + However for GlobalIds we can look at the arity; and for primops we + must, since they have no unfolding. + +* Regardless of whether 'f' is a value, we always want to + reduce (/\a -> f a) to f + This came up in a RULE: foldr (build (/\a -> g a)) + did not match foldr (build (/\b -> ...something complex...)) + The type checker can insert these eta-expanded versions, + with both type and dictionary lambdas; hence the slightly + ad-hoc isDictId + +* Never *reduce* arity. For example + f = \xy. g x y + Then if h has arity 1 we don't want to eta-reduce because then + f's arity would decrease, and that is bad + +These delicacies are why we don't use exprIsTrivial and exprIsHNF here. +Alas. \begin{code} -tryEtaReduce :: [OutBinder] -> OutExpr -> Maybe OutExpr +tryEtaReduce :: [OutBndr] -> OutExpr -> Maybe OutExpr tryEtaReduce bndrs body - -- We don't use CoreUtils.etaReduce, because we can be more - -- efficient here: - -- (a) we already have the binders - -- (b) we can do the triviality test before computing the free vars = go (reverse bndrs) body where + incoming_arity = count isId bndrs + go (b : bs) (App fun arg) | ok_arg b arg = go bs fun -- Loop round go [] fun | ok_fun fun = Just fun -- Success! go _ _ = Nothing -- Failure! - ok_fun fun = exprIsTrivial fun - && not (any (`elemVarSet` (exprFreeVars fun)) bndrs) - && (exprIsHNF fun || all ok_lam bndrs) + -- Note [Eta reduction conditions] + ok_fun (App fun (Type ty)) + | not (any (`elemVarSet` tyVarsOfType ty) bndrs) + = ok_fun fun + ok_fun (Var fun_id) + = not (fun_id `elem` bndrs) + && (ok_fun_id fun_id || all ok_lam bndrs) + ok_fun _fun = False + + ok_fun_id fun = fun_arity fun >= incoming_arity + + fun_arity fun -- See Note [Arity care] + | isLocalId fun && isLoopBreaker (idOccInfo fun) = 0 + | otherwise = idArity fun + ok_lam v = isTyVar v || isDictId v - -- The exprIsHNF is because eta reduction is not - -- valid in general: \x. bot /= bot - -- So we need to be sure that the "fun" is a value. - -- - -- However, we always want to reduce (/\a -> f a) to f - -- This came up in a RULE: foldr (build (/\a -> g a)) - -- did not match foldr (build (/\b -> ...something complex...)) - -- The type checker can insert these eta-expanded versions, - -- with both type and dictionary lambdas; hence the slightly - -- ad-hoc isDictTy ok_arg b arg = varToCoreExpr b `cheapEqExpr` arg \end{code} - Try eta expansion for RHSs +%************************************************************************ +%* * + Eta expansion +%* * +%************************************************************************ + We go for: f = \x1..xn -> N ==> f = \x1..xn y1..ym -> N y1..ym @@ -895,19 +1115,28 @@ where (in both cases) * N is a NORMAL FORM (i.e. no redexes anywhere) wanting a suitable number of extra args. +The biggest reason for doing this is for cases like + + f = \x -> case x of + True -> \y -> e1 + False -> \y -> e2 + +Here we want to get the lambdas together. A good exmaple is the nofib +program fibheaps, which gets 25% more allocation if you don't do this +eta-expansion. + We may have to sandwich some coerces between the lambdas to make the types work. exprEtaExpandArity looks through coerces when computing arity; and etaExpand adds the coerces as necessary when actually computing the expansion. \begin{code} -tryEtaExpansion :: OutExpr -> SimplM OutExpr +tryEtaExpansion :: DynFlags -> OutExpr -> OutExpr -- There is at least one runtime binder in the binders -tryEtaExpansion body - = getUniquesSmpl `thenSmpl` \ us -> - returnSmpl (etaExpand fun_arity us body (exprType body)) +tryEtaExpansion dflags body + = etaExpand fun_arity body where - fun_arity = exprEtaExpandArity body + fun_arity = exprEtaExpandArity dflags body \end{code} @@ -917,8 +1146,35 @@ tryEtaExpansion body %* * %************************************************************************ -tryRhsTyLam tries this transformation, when the big lambda appears as -the RHS of a let(rec) binding: +Note [Floating and type abstraction] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider this: + x = /\a. C e1 e2 +We'd like to float this to + y1 = /\a. e1 + y2 = /\a. e2 + x = /\a. C (y1 a) (y2 a) +for the usual reasons: we want to inline x rather vigorously. + +You may think that this kind of thing is rare. But in some programs it is +common. For example, if you do closure conversion you might get: + + data a :-> b = forall e. (e -> a -> b) :$ e + + f_cc :: forall a. a :-> a + f_cc = /\a. (\e. id a) :$ () + +Now we really want to inline that f_cc thing so that the +construction of the closure goes away. + +So I have elaborated simplLazyBind to understand right-hand sides that look +like + /\ a1..an. body + +and treat them specially. The real work is done in SimplUtils.abstractFloats, +but there is quite a bit of plumbing in simplLazyBind as well. + +The same transformation is good when there are lets in the body: /\abc -> let(rec) x = e in b ==> @@ -940,25 +1196,6 @@ let-floating. This optimisation is CRUCIAL in eliminating the junk introduced by desugaring mutually recursive definitions. Don't eliminate it lightly! -So far as the implementation is concerned: - - Invariant: go F e = /\tvs -> F e - - Equalities: - go F (Let x=e in b) - = Let x' = /\tvs -> F e - in - go G b - where - G = F . Let x = x' tvs - - go F (Letrec xi=ei in b) - = Letrec {xi' = /\tvs -> G ei} - in - go G b - where - G = F . Let {xi = xi' tvs} - [May 1999] If we do this transformation *regardless* then we can end up with some pretty silly stuff. For example, @@ -980,43 +1217,34 @@ and is of the form If we abstract this wrt the tyvar we then can't do the case inline as we would normally do. +That's why the whole transformation is part of the same process that +floats let-bindings and constructor arguments out of RHSs. In particular, +it is guarded by the doFloatFromRhs call in simplLazyBind. -\begin{code} -{- Trying to do this in full laziness - -tryRhsTyLam :: SimplEnv -> [OutTyVar] -> OutExpr -> SimplM FloatsWithExpr --- Call ensures that all the binders are type variables - -tryRhsTyLam env tyvars body -- Only does something if there's a let - | not (all isTyVar tyvars) - || not (worth_it body) -- inside a type lambda, - = returnSmpl (emptyFloats env, body) -- and a WHNF inside that - - | otherwise - = go env (\x -> x) body +\begin{code} +abstractFloats :: [OutTyVar] -> SimplEnv -> OutExpr -> SimplM ([OutBind], OutExpr) +abstractFloats main_tvs body_env body + = ASSERT( notNull body_floats ) + do { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats + ; return (float_binds, CoreSubst.substExpr subst body) } where - worth_it e@(Let _ _) = whnf_in_middle e - worth_it e = False - - whnf_in_middle (Let (NonRec x rhs) e) | isUnLiftedType (idType x) = False - whnf_in_middle (Let _ e) = whnf_in_middle e - whnf_in_middle e = exprIsCheap e - - main_tyvar_set = mkVarSet tyvars - - go env fn (Let bind@(NonRec var rhs) body) - | exprIsTrivial rhs - = go env (fn . Let bind) body - - go env fn (Let (NonRec var rhs) body) - = mk_poly tyvars_here var `thenSmpl` \ (var', rhs') -> - addAuxiliaryBind env (NonRec var' (mkLams tyvars_here (fn rhs))) $ \ env -> - go env (fn . Let (mk_silly_bind var rhs')) body - + main_tv_set = mkVarSet main_tvs + body_floats = getFloats body_env + empty_subst = CoreSubst.mkEmptySubst (seInScope body_env) + + abstract :: CoreSubst.Subst -> OutBind -> SimplM (CoreSubst.Subst, OutBind) + abstract subst (NonRec id rhs) + = do { (poly_id, poly_app) <- mk_poly tvs_here id + ; let poly_rhs = mkLams tvs_here rhs' + subst' = CoreSubst.extendIdSubst subst id poly_app + ; return (subst', (NonRec poly_id poly_rhs)) } where - - tyvars_here = varSetElems (main_tyvar_set `intersectVarSet` exprSomeFreeVars isTyVar rhs) + rhs' = CoreSubst.substExpr subst rhs + tvs_here | any isCoVar main_tvs = main_tvs -- Note [Abstract over coercions] + | otherwise + = varSetElems (main_tv_set `intersectVarSet` exprSomeFreeVars isTyVar rhs') + -- Abstract only over the type variables free in the rhs -- wrt which the new binding is abstracted. But the naive -- approach of abstract wrt the tyvars free in the Id's type @@ -1033,28 +1261,35 @@ tryRhsTyLam env tyvars body -- Only does something if there's a let -- abstracting wrt *all* the tyvars. We'll see if that -- gives rise to problems. SLPJ June 98 - go env fn (Let (Rec prs) body) - = mapAndUnzipSmpl (mk_poly tyvars_here) vars `thenSmpl` \ (vars', rhss') -> - let - gn body = fn (foldr Let body (zipWith mk_silly_bind vars rhss')) - pairs = vars' `zip` [mkLams tyvars_here (gn rhs) | rhs <- rhss] - in - addAuxiliaryBind env (Rec pairs) $ \ env -> - go env gn body + abstract subst (Rec prs) + = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly tvs_here) ids + ; let subst' = CoreSubst.extendSubstList subst (ids `zip` poly_apps) + poly_rhss = [mkLams tvs_here (CoreSubst.substExpr subst' rhs) | rhs <- rhss] + ; return (subst', Rec (poly_ids `zip` poly_rhss)) } where - (vars,rhss) = unzip prs - tyvars_here = varSetElems (main_tyvar_set `intersectVarSet` exprsSomeFreeVars isTyVar (map snd prs)) - -- See notes with tyvars_here above - - go env fn body = returnSmpl (emptyFloats env, fn body) - - mk_poly tyvars_here var - = getUniqueSmpl `thenSmpl` \ uniq -> - let - poly_name = setNameUnique (idName var) uniq -- Keep same name - poly_ty = mkForAllTys tyvars_here (idType var) -- But new type of course - poly_id = mkLocalId poly_name poly_ty - + (ids,rhss) = unzip prs + -- For a recursive group, it's a bit of a pain to work out the minimal + -- set of tyvars over which to abstract: + -- /\ a b c. let x = ...a... in + -- letrec { p = ...x...q... + -- q = .....p...b... } in + -- ... + -- Since 'x' is abstracted over 'a', the {p,q} group must be abstracted + -- over 'a' (because x is replaced by (poly_x a)) as well as 'b'. + -- Since it's a pain, we just use the whole set, which is always safe + -- + -- If you ever want to be more selective, remember this bizarre case too: + -- x::a = x + -- Here, we must abstract 'x' over 'a'. + tvs_here = main_tvs + + mk_poly tvs_here var + = do { uniq <- getUniqueM + ; let poly_name = setNameUnique (idName var) uniq -- Keep same name + poly_ty = mkForAllTys tvs_here (idType var) -- But new type of course + poly_id = transferPolyIdInfo var tvs_here $ -- Note [transferPolyIdInfo] in Id.lhs + mkLocalId poly_name poly_ty + ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) } -- In the olden days, it was crucial to copy the occInfo of the original var, -- because we were looking at occurrence-analysed but as yet unsimplified code! -- In particular, we mustn't lose the loop breakers. BUT NOW we are looking @@ -1067,10 +1302,17 @@ tryRhsTyLam env tyvars body -- Only does something if there's a let -- where x* has an INLINE prag on it. Now, once x* is inlined, -- the occurrences of x' will be just the occurrences originally -- pinned on x. - in - returnSmpl (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tyvars_here)) +\end{code} + +Note [Abstract over coercions] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If a coercion variable (g :: a ~ Int) is free in the RHS, then so is the +type variable a. Rather than sort this mess out, we simply bale out and abstract +wrt all the type variables if any of them are coercion variables. + + +Historical note: if you use let-bindings instead of a substitution, beware of this: - mk_silly_bind var rhs = NonRec var (Note InlineMe rhs) -- Suppose we start with: -- -- x = /\ a -> let g = G in E @@ -1090,48 +1332,159 @@ tryRhsTyLam env tyvars body -- Only does something if there's a let -- Solution: put an INLINE note on g's RHS, so that poly_g seems -- to appear many times. (NB: mkInlineMe eliminates -- such notes on trivial RHSs, so do it manually.) --} -\end{code} %************************************************************************ %* * -\subsection{Case absorption and identity-case elimination} + prepareAlts %* * %************************************************************************ -mkCase puts a case expression back together, trying various transformations first. +prepareAlts tries these things: + +1. Eliminate alternatives that cannot match, including the + DEFAULT alternative. + +2. If the DEFAULT alternative can match only one possible constructor, + then make that constructor explicit. + e.g. + case e of x { DEFAULT -> rhs } + ===> + case e of x { (a,b) -> rhs } + where the type is a single constructor type. This gives better code + when rhs also scrutinises x or e. + +3. Returns a list of the constructors that cannot holds in the + DEFAULT alternative (if there is one) + +Here "cannot match" includes knowledge from GADTs + +It's a good idea do do this stuff before simplifying the alternatives, to +avoid simplifying alternatives we know can't happen, and to come up with +the list of constructors that are handled, to put into the IdInfo of the +case binder, for use when simplifying the alternatives. + +Eliminating the default alternative in (1) isn't so obvious, but it can +happen: + +data Colour = Red | Green | Blue + +f x = case x of + Red -> .. + Green -> .. + DEFAULT -> h x + +h y = case y of + Blue -> .. + DEFAULT -> [ case y of ... ] + +If we inline h into f, the default case of the inlined h can't happen. +If we don't notice this, we may end up filtering out *all* the cases +of the inner case y, which give us nowhere to go! \begin{code} -mkCase :: OutExpr -> OutId -> OutType - -> [OutAlt] -- Increasing order - -> SimplM OutExpr - -mkCase scrut case_bndr ty alts - = getDOptsSmpl `thenSmpl` \dflags -> - mkAlts dflags scrut case_bndr alts `thenSmpl` \ better_alts -> - mkCase1 scrut case_bndr ty better_alts +prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt]) +prepareAlts scrut case_bndr' alts + = do { let (alts_wo_default, maybe_deflt) = findDefault alts + alt_cons = [con | (con,_,_) <- alts_wo_default] + imposs_deflt_cons = nub (imposs_cons ++ alt_cons) + -- "imposs_deflt_cons" are handled + -- EITHER by the context, + -- OR by a non-DEFAULT branch in this case expression. + + ; default_alts <- prepareDefault case_bndr' mb_tc_app + imposs_deflt_cons maybe_deflt + + ; let trimmed_alts = filterOut impossible_alt alts_wo_default + merged_alts = mergeAlts trimmed_alts default_alts + -- We need the mergeAlts in case the new default_alt + -- has turned into a constructor alternative. + -- The merge keeps the inner DEFAULT at the front, if there is one + -- and interleaves the alternatives in the right order + + ; return (imposs_deflt_cons, merged_alts) } + where + mb_tc_app = splitTyConApp_maybe (idType case_bndr') + Just (_, inst_tys) = mb_tc_app + + imposs_cons = case scrut of + Var v -> otherCons (idUnfolding v) + _ -> [] + + impossible_alt :: CoreAlt -> Bool + impossible_alt (con, _, _) | con `elem` imposs_cons = True + impossible_alt (DataAlt con, _, _) = dataConCannotMatch inst_tys con + impossible_alt _ = False + + +prepareDefault :: OutId -- Case binder; need just for its type. Note that as an + -- OutId, it has maximum information; this is important. + -- Test simpl013 is an example + -> Maybe (TyCon, [Type]) -- Type of scrutinee, decomposed + -> [AltCon] -- These cons can't happen when matching the default + -> Maybe InExpr -- Rhs + -> SimplM [InAlt] -- Still unsimplified + -- We use a list because it's what mergeAlts expects, + +--------- Fill in known constructor ----------- +prepareDefault case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rhs) + | -- This branch handles the case where we are + -- scrutinisng an algebraic data type + isAlgTyCon tycon -- It's a data type, tuple, or unboxed tuples. + , not (isNewTyCon tycon) -- We can have a newtype, if we are just doing an eval: + -- case x of { DEFAULT -> e } + -- and we don't want to fill in a default for them! + , Just all_cons <- tyConDataCons_maybe tycon + , not (null all_cons) -- This is a tricky corner case. If the data type has no constructors, + -- which GHC allows, then the case expression will have at most a default + -- alternative. We don't want to eliminate that alternative, because the + -- invariant is that there's always one alternative. It's more convenient + -- to leave + -- case x of { DEFAULT -> e } + -- as it is, rather than transform it to + -- error "case cant match" + -- which would be quite legitmate. But it's a really obscure corner, and + -- not worth wasting code on. + , let imposs_data_cons = [con | DataAlt con <- imposs_cons] -- We now know it's a data type + impossible con = con `elem` imposs_data_cons || dataConCannotMatch inst_tys con + = case filterOut impossible all_cons of + [] -> return [] -- Eliminate the default alternative + -- altogether if it can't match + + [con] -> -- It matches exactly one constructor, so fill it in + do { tick (FillInCaseDefault case_bndr) + ; us <- getUniquesM + ; let (ex_tvs, co_tvs, arg_ids) = + dataConRepInstPat us con inst_tys + ; return [(DataAlt con, ex_tvs ++ co_tvs ++ arg_ids, deflt_rhs)] } + + _ -> return [(DEFAULT, [], deflt_rhs)] + + | debugIsOn, isAlgTyCon tycon, not (isOpenTyCon tycon), null (tyConDataCons tycon) + -- Check for no data constructors + -- This can legitimately happen for type families, so don't report that + = pprTrace "prepareDefault" (ppr case_bndr <+> ppr tycon) + $ return [(DEFAULT, [], deflt_rhs)] + +--------- Catch-all cases ----------- +prepareDefault _case_bndr _bndr_ty _imposs_cons (Just deflt_rhs) + = return [(DEFAULT, [], deflt_rhs)] + +prepareDefault _case_bndr _bndr_ty _imposs_cons Nothing + = return [] -- No default branch \end{code} -mkAlts tries these things: -1. If several alternatives are identical, merge them into - a single DEFAULT alternative. I've occasionally seen this - making a big difference: +%************************************************************************ +%* * + mkCase +%* * +%************************************************************************ - case e of =====> case e of - C _ -> f x D v -> ....v.... - D v -> ....v.... DEFAULT -> f x - DEFAULT -> f x +mkCase tries these things - The point is that we merge common RHSs, at least for the DEFAULT case. - [One could do something more elaborate but I've never seen it needed.] - To avoid an expensive test, we just merge branches equal to the *first* - alternative; this picks up the common cases - a) all branches equal - b) some branches equal to the DEFAULT (which occurs first) +1. Merge Nested Cases -2. Case merging: case e of b { ==> case e of b { p1 -> rhs1 p1 -> rhs1 ... ... @@ -1144,12 +1497,37 @@ mkAlts tries these things: } which merges two cases in one case when -- the default alternative of - the outer case scrutises the same variable as the outer case This + the outer case scrutises the same variable as the outer case. This transformation is called Case Merging. It avoids that the same variable is scrutinised multiple times. +2. Eliminate Identity Case + + case e of ===> e + True -> True; + False -> False + + and similar friends. + +3. Merge identical alternatives. + If several alternatives are identical, merge them into + a single DEFAULT alternative. I've occasionally seen this + making a big difference: + + case e of =====> case e of + C _ -> f x D v -> ....v.... + D v -> ....v.... DEFAULT -> f x + DEFAULT -> f x -The case where transformation (1) showed up was like this (lib/std/PrelCError.lhs): + The point is that we merge common RHSs, at least for the DEFAULT case. + [One could do something more elaborate but I've never seen it needed.] + To avoid an expensive test, we just merge branches equal to the *first* + alternative; this picks up the common cases + a) all branches equal + b) some branches equal to the DEFAULT (which occurs first) + +The case where Merge Identical Alternatives transformation showed up +was like this (base/Foreign/C/Err/Error.lhs): x | p `is` 1 -> e1 | p `is` 2 -> e2 @@ -1169,295 +1547,175 @@ This gave rise to a horrible sequence of cases and similarly in cascade for all the join points! - \begin{code} --------------------------------------------------- --- 1. Merge identical branches --------------------------------------------------- -mkAlts dflags scrut case_bndr alts@((con1,bndrs1,rhs1) : con_alts) - | all isDeadBinder bndrs1, -- Remember the default - length filtered_alts < length con_alts -- alternative comes first - = tick (AltMerge case_bndr) `thenSmpl_` - returnSmpl better_alts - where - filtered_alts = filter keep con_alts - keep (con,bndrs,rhs) = not (all isDeadBinder bndrs && rhs `cheapEqExpr` rhs1) - better_alts = (DEFAULT, [], rhs1) : filtered_alts - +mkCase, mkCase1, mkCase2 + :: DynFlags + -> OutExpr -> OutId + -> [OutAlt] -- Alternatives in standard (increasing) order + -> SimplM OutExpr -------------------------------------------------- --- 2. Merge nested cases +-- 1. Merge Nested Cases -------------------------------------------------- -mkAlts dflags scrut outer_bndr outer_alts - | dopt Opt_CaseMerge dflags, - (outer_alts_without_deflt, maybe_outer_deflt) <- findDefault outer_alts, - Just (Case (Var scrut_var) inner_bndr _ inner_alts) <- maybe_outer_deflt, - scruting_same_var scrut_var - = let - munged_inner_alts = [(con, args, munge_rhs rhs) | (con, args, rhs) <- inner_alts] - munge_rhs rhs = bindCaseBndr inner_bndr (Var outer_bndr) rhs - - new_alts = mergeAlts outer_alts_without_deflt munged_inner_alts - -- The merge keeps the inner DEFAULT at the front, if there is one - -- and eliminates any inner_alts that are shadowed by the outer_alts - in - tick (CaseMerge outer_bndr) `thenSmpl_` - returnSmpl new_alts - -- Warning: don't call mkAlts recursively! +mkCase dflags scrut outer_bndr ((DEFAULT, _, deflt_rhs) : outer_alts) + | dopt Opt_CaseMerge dflags + , Case (Var inner_scrut_var) inner_bndr _ inner_alts <- deflt_rhs + , inner_scrut_var == outer_bndr + = do { tick (CaseMerge outer_bndr) + + ; let wrap_alt (con, args, rhs) = ASSERT( outer_bndr `notElem` args ) + (con, args, wrap_rhs rhs) + -- Simplifier's no-shadowing invariant should ensure + -- that outer_bndr is not shadowed by the inner patterns + wrap_rhs rhs = Let (NonRec inner_bndr (Var outer_bndr)) rhs + -- The let is OK even for unboxed binders, + + wrapped_alts | isDeadBinder inner_bndr = inner_alts + | otherwise = map wrap_alt inner_alts + + merged_alts = mergeAlts outer_alts wrapped_alts + -- NB: mergeAlts gives priority to the left + -- case x of + -- A -> e1 + -- DEFAULT -> case x of + -- A -> e2 + -- B -> e3 + -- When we merge, we must ensure that e1 takes + -- precedence over e2 as the value for A! + + ; mkCase1 dflags scrut outer_bndr merged_alts + } + -- Warning: don't call mkCase recursively! -- Firstly, there's no point, because inner alts have already had -- mkCase applied to them, so they won't have a case in their default -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr -- in munge_rhs may put a case into the DEFAULT branch! - where - -- We are scrutinising the same variable if it's - -- the outer case-binder, or if the outer case scrutinises a variable - -- (and it's the same). Testing both allows us not to replace the - -- outer scrut-var with the outer case-binder (Simplify.simplCaseBinder). - scruting_same_var = case scrut of - Var outer_scrut -> \ v -> v == outer_bndr || v == outer_scrut - other -> \ v -> v == outer_bndr - ------------------------------------------------- --- Catch-all ------------------------------------------------- - -mkAlts dflags scrut case_bndr other_alts = returnSmpl other_alts -\end{code} - - - -================================================================================= - -mkCase1 tries these things - -1. Eliminate the case altogether if possible - -2. Case-identity: - - case e of ===> e - True -> True; - False -> False - - and similar friends. - - -Start with a simple situation: - - case x# of ===> e[x#/y#] - y# -> e - -(when x#, y# are of primitive type, of course). We can't (in general) -do this for algebraic cases, because we might turn bottom into -non-bottom! - -Actually, we generalise this idea to look for a case where we're -scrutinising a variable, and we know that only the default case can -match. For example: -\begin{verbatim} - case x of - 0# -> ... - other -> ...(case x of - 0# -> ... - other -> ...) ... -\end{code} -Here the inner case can be eliminated. This really only shows up in -eliminating error-checking code. - -We also make sure that we deal with this very common case: - - case e of - x -> ...x... - -Here we are using the case as a strict let; if x is used only once -then we want to inline it. We have to be careful that this doesn't -make the program terminate when it would have diverged before, so we -check that - - x is used strictly, or - - e is already evaluated (it may so if e is a variable) - -Lastly, we generalise the transformation to handle this: - - case e of ===> r - True -> r - False -> r - -We only do this for very cheaply compared r's (constructors, literals -and variables). If pedantic bottoms is on, we only do it when the -scrutinee is a PrimOp which can't fail. - -We do it *here*, looking at un-simplified alternatives, because we -have to check that r doesn't mention the variables bound by the -pattern in each alternative, so the binder-info is rather useful. - -So the case-elimination algorithm is: - - 1. Eliminate alternatives which can't match - - 2. Check whether all the remaining alternatives - (a) do not mention in their rhs any of the variables bound in their pattern - and (b) have equal rhss - 3. Check we can safely ditch the case: - * PedanticBottoms is off, - or * the scrutinee is an already-evaluated variable - or * the scrutinee is a primop which is ok for speculation - -- ie we want to preserve divide-by-zero errors, and - -- calls to error itself! +mkCase dflags scrut bndr alts = mkCase1 dflags scrut bndr alts - or * [Prim cases] the scrutinee is a primitive variable - - or * [Alg cases] the scrutinee is a variable and - either * the rhs is the same variable - (eg case x of C a b -> x ===> x) - or * there is only one alternative, the default alternative, - and the binder is used strictly in its scope. - [NB this is helped by the "use default binder where - possible" transformation; see below.] - - -If so, then we can replace the case with one of the rhss. - -Further notes about case elimination -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Consider: test :: Integer -> IO () - test = print - -Turns out that this compiles to: - Print.test - = \ eta :: Integer - eta1 :: State# RealWorld -> - case PrelNum.< eta PrelNum.zeroInteger of wild { __DEFAULT -> - case hPutStr stdout - (PrelNum.jtos eta ($w[] @ Char)) - eta1 - of wild1 { (# new_s, a4 #) -> PrelIO.lvl23 new_s }} - -Notice the strange '<' which has no effect at all. This is a funny one. -It started like this: - -f x y = if x < 0 then jtos x - else if y==0 then "" else jtos x - -At a particular call site we have (f v 1). So we inline to get - - if v < 0 then jtos x - else if 1==0 then "" else jtos x - -Now simplify the 1==0 conditional: - - if v<0 then jtos v else jtos v - -Now common-up the two branches of the case: - - case (v<0) of DEFAULT -> jtos v - -Why don't we drop the case? Because it's strict in v. It's technically -wrong to drop even unnecessary evaluations, and in practice they -may be a result of 'seq' so we *definitely* don't want to drop those. -I don't really know how to improve this situation. - - -\begin{code} -------------------------------------------------- --- 0. Check for empty alternatives +-- 2. Eliminate Identity Case -------------------------------------------------- --- This isn't strictly an error. It's possible that the simplifer might "see" --- that an inner case has no accessible alternatives before it "sees" that the --- entire branch of an outer case is inaccessible. So we simply --- put an error case here insteadd -mkCase1 scrut case_bndr ty [] - = pprTrace "mkCase1: null alts" (ppr case_bndr <+> ppr scrut) $ - return (mkApps (Var eRROR_ID) - [Type ty, Lit (mkStringLit "Impossible alternative")]) - --------------------------------------------------- --- 1. Eliminate the case altogether if poss --------------------------------------------------- - -mkCase1 scrut case_bndr ty [(con,bndrs,rhs)] - -- See if we can get rid of the case altogether - -- See the extensive notes on case-elimination above - -- mkCase made sure that if all the alternatives are equal, - -- then there is now only one (DEFAULT) rhs - | all isDeadBinder bndrs, - - -- Check that the scrutinee can be let-bound instead of case-bound - exprOkForSpeculation scrut - -- OK not to evaluate it - -- This includes things like (==# a# b#)::Bool - -- so that we simplify - -- case ==# a# b# of { True -> x; False -> x } - -- to just - -- x - -- This particular example shows up in default methods for - -- comparision operations (e.g. in (>=) for Int.Int32) - || exprIsHNF scrut -- It's already evaluated - || var_demanded_later scrut -- It'll be demanded later - --- || not opt_SimplPedanticBottoms) -- Or we don't care! --- We used to allow improving termination by discarding cases, unless -fpedantic-bottoms was on, --- but that breaks badly for the dataToTag# primop, which relies on a case to evaluate --- its argument: case x of { y -> dataToTag# y } --- Here we must *not* discard the case, because dataToTag# just fetches the tag from --- the info pointer. So we'll be pedantic all the time, and see if that gives any --- other problems --- Also we don't want to discard 'seq's - = tick (CaseElim case_bndr) `thenSmpl_` - returnSmpl (bindCaseBndr case_bndr scrut rhs) - - where - -- The case binder is going to be evaluated later, - -- and the scrutinee is a simple variable - var_demanded_later (Var v) = isStrictDmd (idNewDemandInfo case_bndr) - var_demanded_later other = False - - --------------------------------------------------- --- 2. Identity case --------------------------------------------------- - -mkCase1 scrut case_bndr ty alts -- Identity case +mkCase1 _dflags scrut case_bndr alts -- Identity case | all identity_alt alts - = tick (CaseIdentity case_bndr) `thenSmpl_` - returnSmpl (re_note scrut) + = do { tick (CaseIdentity case_bndr) + ; return (re_cast scrut) } where - identity_alt (con, args, rhs) = de_note rhs `cheapEqExpr` identity_rhs con args + identity_alt (con, args, rhs) = check_eq con args (de_cast rhs) - identity_rhs (DataAlt con) args = mkConApp con (arg_tys ++ map varToCoreExpr args) - identity_rhs (LitAlt lit) _ = Lit lit - identity_rhs DEFAULT _ = Var case_bndr + check_eq DEFAULT _ (Var v) = v == case_bndr + check_eq (LitAlt lit') _ (Lit lit) = lit == lit' + check_eq (DataAlt con) args rhs = rhs `cheapEqExpr` mkConApp con (arg_tys ++ varsToCoreExprs args) + || rhs `cheapEqExpr` Var case_bndr + check_eq _ _ _ = False arg_tys = map Type (tyConAppArgs (idType case_bndr)) -- We've seen this: - -- case coerce T e of x { _ -> coerce T' x } - -- And we definitely want to eliminate this case! - -- So we throw away notes from the RHS, and reconstruct - -- (at least an approximation) at the other end - de_note (Note _ e) = de_note e - de_note e = e + -- case e of x { _ -> x `cast` c } + -- And we definitely want to eliminate this case, to give + -- e `cast` c + -- So we throw away the cast from the RHS, and reconstruct + -- it at the other end. All the RHS casts must be the same + -- if (all identity_alt alts) holds. + -- + -- Don't worry about nested casts, because the simplifier combines them + de_cast (Cast e _) = e + de_cast e = e + + re_cast scrut = case head alts of + (_,_,Cast _ co) -> Cast scrut co + _ -> scrut - -- re_note wraps a coerce if it might be necessary - re_note scrut = case head alts of - (_,_,rhs1@(Note _ _)) -> mkCoerce2 (exprType rhs1) (idType case_bndr) scrut - other -> scrut +-------------------------------------------------- +-- 3. Merge Identical Alternatives +-------------------------------------------------- +mkCase1 dflags scrut case_bndr ((_con1,bndrs1,rhs1) : con_alts) + | all isDeadBinder bndrs1 -- Remember the default + , length filtered_alts < length con_alts -- alternative comes first + -- Also Note [Dead binders] + = do { tick (AltMerge case_bndr) + ; mkCase2 dflags scrut case_bndr alts' } + where + alts' = (DEFAULT, [], rhs1) : filtered_alts + filtered_alts = filter keep con_alts + keep (_con,bndrs,rhs) = not (all isDeadBinder bndrs && rhs `cheapEqExpr` rhs1) +mkCase1 dflags scrut bndr alts = mkCase2 dflags scrut bndr alts -------------------------------------------------- -- Catch-all -------------------------------------------------- -mkCase1 scrut bndr ty alts = returnSmpl (Case scrut bndr ty alts) +mkCase2 _dflags scrut bndr alts + = return (Case scrut bndr (coreAltsType alts) alts) \end{code} - -When adding auxiliary bindings for the case binder, it's worth checking if -its dead, because it often is, and occasionally these mkCase transformations -cascade rather nicely. - -\begin{code} -bindCaseBndr bndr rhs body - | isDeadBinder bndr = body - | otherwise = bindNonRec bndr rhs body -\end{code} +Note [Dead binders] +~~~~~~~~~~~~~~~~~~~~ +Note that dead-ness is maintained by the simplifier, so that it is +accurate after simplification as well as before. + + +Note [Cascading case merge] +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Case merging should cascade in one sweep, because it +happens bottom-up + + case e of a { + DEFAULT -> case a of b + DEFAULT -> case b of c { + DEFAULT -> e + A -> ea + B -> eb + C -> ec +==> + case e of a { + DEFAULT -> case a of b + DEFAULT -> let c = b in e + A -> let c = b in ea + B -> eb + C -> ec +==> + case e of a { + DEFAULT -> let b = a in let c = b in e + A -> let b = a in let c = b in ea + B -> let b = a in eb + C -> ec + + +However here's a tricky case that we still don't catch, and I don't +see how to catch it in one pass: + + case x of c1 { I# a1 -> + case a1 of c2 -> + 0 -> ... + DEFAULT -> case x of c3 { I# a2 -> + case a2 of ... + +After occurrence analysis (and its binder-swap) we get this + + case x of c1 { I# a1 -> + let x = c1 in -- Binder-swap addition + case a1 of c2 -> + 0 -> ... + DEFAULT -> case x of c3 { I# a2 -> + case a2 of ... + +When we simplify the inner case x, we'll see that +x=c1=I# a1. So we'll bind a2 to a1, and get + + case x of c1 { I# a1 -> + case a1 of c2 -> + 0 -> ... + DEFAULT -> case a1 of ... + +This is corect, but we can't do a case merge in this sweep +because c2 /= a1. Reason: the binding c1=I# a1 went inwards +without getting changed to c1=I# c2. + +I don't think this is worth fixing, even if I knew how. It'll +all come out in the next pass anyway. + + \ No newline at end of file