\begin{code}
module SimplUtils (
-- Rebuilding
- mkLam, mkCase, prepareAlts, bindCaseBndr,
+ mkLam, mkCase, prepareAlts,
-- Inlining,
preInlineUnconditionally, postInlineUnconditionally,
- activeInline, activeRule, inlineMode,
+ activeInline, activeRule,
+ simplEnvForGHCi, simplEnvForRules, updModeForInlineRules,
-- The continuation type
SimplCont(..), DupFlag(..), ArgInfo(..),
contIsDupable, contResultType, contIsTrivial, contArgs, dropArgs,
- countValArgs, countArgs, splitInlineCont,
- mkBoringStop, mkLazyArgStop, contIsRhsOrArg,
- interestingCallContext, interestingArgContext,
+ pushArgs, countValArgs, countArgs, addArgTo,
+ mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg,
+ interestingCallContext,
interestingArg, mkArgInfo,
import PprCore
import CoreFVs
import CoreUtils
+import CoreArity ( etaExpand, exprEtaExpandArity )
import CoreUnfold
import Name
import Id
import Var ( isCoVar )
-import NewDemand
+import Demand
import SimplMonad
import Type hiding( substTy )
import Coercion ( coercionKind )
import Outputable
import FastString
-import List( nub )
+import Data.List
\end{code}
| ApplyTo -- C arg
DupFlag
- InExpr SimplEnv -- The argument and its static env
+ InExpr StaticEnv -- The argument and its static env
SimplCont
| Select -- case C of alts
DupFlag
- InId [InAlt] SimplEnv -- The case binder, alts, and subst-env
+ InId [InAlt] StaticEnv -- The case binder, alts, and subst-env
SimplCont
-- The two strict forms have no DupFlag, because we never duplicate them
| StrictBind -- (\x* \xs. e) C
InId [InBndr] -- let x* = [] in e
- InExpr SimplEnv -- is a special case
+ InExpr StaticEnv -- is a special case
SimplCont
- | StrictArg -- e C
- OutExpr -- e
- CallCtxt -- Whether *this* argument position is interesting
- ArgInfo -- Whether the function at the head of e has rules, etc
- SimplCont -- plus strictness flags for *further* args
+ | StrictArg -- f e1 ..en C
+ ArgInfo -- Specifies f, e1..en, Whether f has rules, etc
+ -- plus strictness flags for *further* args
+ CallCtxt -- Whether *this* argument position is interesting
+ SimplCont
data ArgInfo
= ArgInfo {
- ai_rules :: Bool, -- Function has rules (recursively)
- -- => be keener to inline in all args
- ai_strs :: [Bool], -- Strictness of arguments
+ ai_fun :: Id, -- The function
+ ai_args :: [OutExpr], -- ...applied to these args (which are in *reverse* order)
+ ai_rules :: [CoreRule], -- Rules for this function
+
+ ai_encl :: Bool, -- Flag saying whether this function
+ -- or an enclosing one has rules (recursively)
+ -- True => be keener to inline in all args
+
+ ai_strs :: [Bool], -- Strictness of remaining arguments
-- Usually infinite, but if it is finite it guarantees
-- that the function diverges after being given
-- that number of args
- ai_discs :: [Int] -- Discounts for arguments; non-zero => be keener to inline
+ ai_discs :: [Int] -- Discounts for remaining arguments; non-zero => be keener to inline
-- Always infinite
}
+addArgTo :: ArgInfo -> OutExpr -> ArgInfo
+addArgTo ai arg = ai { ai_args = arg : ai_args ai }
+
instance Outputable SimplCont where
ppr (Stop interesting) = ptext (sLit "Stop") <> brackets (ppr interesting)
ppr (ApplyTo dup arg _ cont) = ((ptext (sLit "ApplyTo") <+> ppr dup <+> pprParendExpr arg)
{- $$ nest 2 (pprSimplEnv se) -}) $$ ppr cont
ppr (StrictBind b _ _ _ cont) = (ptext (sLit "StrictBind") <+> ppr b) $$ ppr cont
- ppr (StrictArg f _ _ cont) = (ptext (sLit "StrictArg") <+> ppr f) $$ ppr cont
+ ppr (StrictArg ai _ cont) = (ptext (sLit "StrictArg") <+> ppr (ai_fun ai)) $$ ppr cont
ppr (Select dup bndr alts _ cont) = (ptext (sLit "Select") <+> ppr dup <+> ppr bndr) $$
(nest 4 (ppr alts)) $$ ppr cont
ppr (CoerceIt co cont) = (ptext (sLit "CoerceIt") <+> ppr co) $$ ppr cont
mkBoringStop :: SimplCont
mkBoringStop = Stop BoringCtxt
+mkRhsStop :: SimplCont -- See Note [RHS of lets] in CoreUnfold
+mkRhsStop = Stop (ArgCtxt False)
+
mkLazyArgStop :: CallCtxt -> SimplCont
mkLazyArgStop cci = Stop cci
go (Stop {}) ty = ty
go (CoerceIt co cont) _ = go cont (snd (coercionKind co))
go (StrictBind _ bs body se cont) _ = go cont (subst_ty se (exprType (mkLams bs body)))
- go (StrictArg fn _ _ cont) _ = go cont (funResultTy (exprType fn))
+ go (StrictArg ai _ cont) _ = go cont (funResultTy (argInfoResultTy ai))
go (Select _ _ alts se cont) _ = go cont (subst_ty se (coreAltsType alts))
go (ApplyTo _ arg se cont) ty = go cont (apply_to_arg ty arg se)
apply_to_arg ty (Type ty_arg) se = applyTy ty (subst_ty se ty_arg)
apply_to_arg ty _ _ = funResultTy ty
+argInfoResultTy :: ArgInfo -> OutType
+argInfoResultTy (ArgInfo { ai_fun = fun, ai_args = args })
+ = foldr (\arg fn_ty -> applyTypeToArg fn_ty arg) (idType fun) args
+
-------------------
countValArgs :: SimplCont -> Int
countValArgs (ApplyTo _ (Type _) _ cont) = countValArgs cont
go args (ApplyTo _ arg se cont) = go (substExpr se arg : args) cont
go args cont = (reverse args, cont)
+pushArgs :: SimplEnv -> [CoreExpr] -> SimplCont -> SimplCont
+pushArgs _env [] cont = cont
+pushArgs env (arg:args) cont = ApplyTo NoDup arg env (pushArgs env args cont)
+
dropArgs :: Int -> SimplCont -> SimplCont
dropArgs 0 cont = cont
dropArgs n (ApplyTo _ _ _ cont) = dropArgs (n-1) cont
dropArgs n other = pprPanic "dropArgs" (ppr n <+> ppr other)
-
---------------------
-splitInlineCont :: SimplCont -> Maybe (SimplCont, SimplCont)
--- Returns Nothing if the continuation should dissolve an InlineMe Note
--- Return Just (c1,c2) otherwise,
--- where c1 is the continuation to put inside the InlineMe
--- and c2 outside
-
--- Example: (__inline_me__ (/\a. e)) ty
--- Here we want to do the beta-redex without dissolving the InlineMe
--- See test simpl017 (and Trac #1627) for a good example of why this is important
-
-splitInlineCont (ApplyTo dup (Type ty) se c)
- | Just (c1, c2) <- splitInlineCont c = Just (ApplyTo dup (Type ty) se c1, c2)
-splitInlineCont cont@(Stop {}) = Just (mkBoringStop, cont)
-splitInlineCont cont@(StrictBind {}) = Just (mkBoringStop, cont)
-splitInlineCont cont@(StrictArg {}) = Just (mkBoringStop, cont)
-splitInlineCont _ = Nothing
-\end{code}
-
-
-\begin{code}
-interestingArg :: OutExpr -> Bool
- -- An argument is interesting if it has *some* structure
- -- We are here trying to avoid unfolding a function that
- -- is applied only to variables that have no unfolding
- -- (i.e. they are probably lambda bound): f x y z
- -- There is little point in inlining f here.
-interestingArg (Var v) = hasSomeUnfolding (idUnfolding v)
- -- Was: isValueUnfolding (idUnfolding v')
- -- But that seems over-pessimistic
- || isDataConWorkId v
- -- This accounts for an argument like
- -- () or [], which is definitely interesting
-interestingArg (Type _) = False
-interestingArg (App fn (Type _)) = interestingArg fn
-interestingArg (Note _ a) = interestingArg a
-
--- Idea (from Sam B); I'm not sure if it's a good idea, so commented out for now
--- interestingArg expr | isUnLiftedType (exprType expr)
--- -- Unlifted args are only ever interesting if we know what they are
--- = case expr of
--- Lit lit -> True
--- _ -> False
-
-interestingArg _ = True
- -- Consider let x = 3 in f x
- -- The substitution will contain (x -> ContEx 3), and we want to
- -- to say that x is an interesting argument.
- -- But consider also (\x. f x y) y
- -- The substitution will contain (x -> ContEx y), and we want to say
- -- that x is not interesting (assuming y has no unfolding)
\end{code}
-Comment about interestingCallContext
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Note [Interesting call context]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
We want to avoid inlining an expression where there can't possibly be
any gain, such as in an argument position. Hence, if the continuation
is interesting (eg. a case scrutinee, application etc.) then we
\begin{code}
interestingCallContext :: SimplCont -> CallCtxt
+-- See Note [Interesting call context]
interestingCallContext cont
= interesting cont
where
- interestingCtxt = ArgCtxt False 2 -- Give *some* incentive!
-
interesting (Select _ bndr _ _ _)
- | isDeadBinder bndr = CaseCtxt
- | otherwise = interestingCtxt
+ | isDeadBinder bndr = CaseCtxt
+ | otherwise = ArgCtxt False -- If the binder is used, this
+ -- is like a strict let
+ -- See Note [RHS of lets] in CoreUnfold
- interesting (ApplyTo {}) = interestingCtxt
- -- Can happen if we have (coerce t (f x)) y
- -- Perhaps interestingCtxt is a bit over-keen, but I've
- -- seen (coerce f) x, where f has an INLINE prag,
- -- So we have to give some motivation for inlining it
-
- interesting (StrictArg _ cci _ _) = cci
- interesting (StrictBind {}) = BoringCtxt
- interesting (Stop cci) = cci
- interesting (CoerceIt _ cont) = interesting cont
+ interesting (ApplyTo _ arg _ cont)
+ | isTypeArg arg = interesting cont
+ | otherwise = ValAppCtxt -- Can happen if we have (f Int |> co) y
+ -- If f has an INLINE prag we need to give it some
+ -- motivation to inline. See Note [Cast then apply]
+ -- in CoreUnfold
+
+ interesting (StrictArg _ cci _) = cci
+ interesting (StrictBind {}) = BoringCtxt
+ interesting (Stop cci) = cci
+ interesting (CoerceIt _ cont) = interesting cont
-- If this call is the arg of a strict function, the context
-- is a bit interesting. If we inline here, we may get useful
-- evaluation information to avoid repeated evals: e.g.
-------------------
mkArgInfo :: Id
+ -> [CoreRule] -- Rules for function
-> Int -- Number of value args
- -> SimplCont -- Context of the cal
+ -> SimplCont -- Context of the call
-> ArgInfo
-mkArgInfo fun n_val_args call_cont
+mkArgInfo fun rules n_val_args call_cont
| n_val_args < idArity fun -- Note [Unsaturated functions]
- = ArgInfo { ai_rules = False
+ = ArgInfo { ai_fun = fun, ai_args = [], ai_rules = rules
+ , ai_encl = False
, ai_strs = vanilla_stricts
, ai_discs = vanilla_discounts }
| otherwise
- = ArgInfo { ai_rules = interestingArgContext fun call_cont
+ = ArgInfo { ai_fun = fun, ai_args = [], ai_rules = rules
+ , ai_encl = interestingArgContext rules call_cont
, ai_strs = add_type_str (idType fun) arg_stricts
, ai_discs = arg_discounts }
where
vanilla_discounts, arg_discounts :: [Int]
vanilla_discounts = repeat 0
arg_discounts = case idUnfolding fun of
- CoreUnfolding _ _ _ _ (UnfoldIfGoodArgs _ discounts _ _)
+ CoreUnfolding {uf_guidance = UnfoldIfGoodArgs {ug_args = discounts}}
-> discounts ++ vanilla_discounts
_ -> vanilla_discounts
vanilla_stricts = repeat False
arg_stricts
- = case splitStrictSig (idNewStrictness fun) of
+ = case splitStrictSig (idStrictness fun) of
(demands, result_info)
| not (demands `lengthExceeds` n_val_args)
-> -- Enough args, use the strictness given.
on its first argument -- it must be saturated for these to kick in
-}
-interestingArgContext :: Id -> SimplCont -> Bool
+interestingArgContext :: [CoreRule] -> SimplCont -> Bool
-- If the argument has form (f x y), where x,y are boring,
-- and f is marked INLINE, then we don't want to inline f.
-- But if the context of the argument is
-- where h has rules, then we do want to inline f; hence the
-- call_cont argument to interestingArgContext
--
--- The interesting_arg_ctxt flag makes this happen; if it's
+-- The ai-rules flag makes this happen; if it's
-- set, the inliner gets just enough keener to inline f
-- regardless of how boring f's arguments are, if it's marked INLINE
--
-- The alternative would be to *always* inline an INLINE function,
-- regardless of how boring its context is; but that seems overkill
-- For example, it'd mean that wrapper functions were always inlined
-interestingArgContext fn call_cont
- = idHasRules fn || go call_cont
+interestingArgContext rules call_cont
+ = notNull rules || enclosing_fn_has_rules
where
- go (Select {}) = False
- go (ApplyTo {}) = False
- go (StrictArg _ cci _ _) = interesting cci
- go (StrictBind {}) = False -- ??
- go (CoerceIt _ c) = go c
- go (Stop cci) = interesting cci
-
- interesting (ArgCtxt rules _) = rules
- interesting _ = False
+ enclosing_fn_has_rules = go call_cont
+
+ go (Select {}) = False
+ go (ApplyTo {}) = False
+ go (StrictArg _ cci _) = interesting cci
+ go (StrictBind {}) = False -- ??
+ go (CoerceIt _ c) = go c
+ go (Stop cci) = interesting cci
+
+ interesting (ArgCtxt rules) = rules
+ interesting _ = False
\end{code}
%* *
%************************************************************************
-Inlining is controlled partly by the SimplifierMode switch. This has two
-settings:
+\begin{code}
+simplEnvForGHCi :: SimplEnv
+simplEnvForGHCi = mkSimplEnv allOffSwitchChecker $
+ SimplGently { sm_rules = False, sm_inline = False }
+ -- Do not do any inlining, in case we expose some unboxed
+ -- tuple stuff that confuses the bytecode interpreter
+
+simplEnvForRules :: SimplEnv
+simplEnvForRules = mkSimplEnv allOffSwitchChecker $
+ SimplGently { sm_rules = True, sm_inline = False }
+
+updModeForInlineRules :: SimplifierMode -> SimplifierMode
+updModeForInlineRules mode
+ = case mode of
+ SimplGently {} -> mode -- Don't modify mode if we already gentle
+ SimplPhase {} -> SimplGently { sm_rules = True, sm_inline = True }
+ -- Simplify as much as possible, subject to the usual "gentle" rules
+\end{code}
+Inlining is controlled partly by the SimplifierMode switch. This has two
+settings
+
SimplGently (a) Simplifying before specialiser/full laziness
- (b) Simplifiying inside INLINE pragma
+ (b) Simplifiying inside InlineRules
(c) Simplifying the LHS of a rule
(d) Simplifying a GHCi expression or Template
Haskell splice
SimplPhase n _ Used at all other times
-The key thing about SimplGently is that it does no call-site inlining.
+Note [Gentle mode]
+~~~~~~~~~~~~~~~~~~
+Gentle mode has a separate boolean flag to control
+ a) inlining (sm_inline flag)
+ b) rules (sm_rules flag)
+A key invariant about Gentle mode is that it is treated as the EARLIEST
+phase. Something is inlined if the sm_inline flag is on AND the thing
+is inlinable in the earliest phase. This is important. Example
+
+ {-# INLINE [~1] g #-}
+ g = ...
+
+ {-# INLINE f #-}
+ f x = g (g x)
+
+If we were to inline g into f's inlining, then an importing module would
+never be able to do
+ f e --> g (g e) ---> RULE fires
+because the InlineRule for f has had g inlined into it.
+
+On the other hand, it is bad not to do ANY inlining into an
+InlineRule, because then recursive knots in instance declarations
+don't get unravelled.
+
+However, *sometimes* SimplGently must do no call-site inlining at all.
Before full laziness we must be careful not to inline wrappers,
because doing so inhibits floating
e.g. ...(case f x of ...)...
anything, because the byte-code interpreter might get confused about
unboxed tuples and suchlike.
-INLINE pragmas
-~~~~~~~~~~~~~~
-SimplGently is also used as the mode to simplify inside an InlineMe note.
+Note [RULEs enabled in SimplGently]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+RULES are enabled when doing "gentle" simplification. Two reasons:
-\begin{code}
-inlineMode :: SimplifierMode
-inlineMode = SimplGently
-\end{code}
+ * We really want the class-op cancellation to happen:
+ op (df d1 d2) --> $cop3 d1 d2
+ because this breaks the mutual recursion between 'op' and 'df'
+
+ * I wanted the RULE
+ lift String ===> ...
+ to work in Template Haskell when simplifying
+ splices, so we get simpler code for literal strings
-It really is important to switch off inlinings inside such
-expressions. Consider the following example
+Note [Simplifying gently inside InlineRules]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We don't do much simplification inside InlineRules (which come from
+INLINE pragmas). It really is important to switch off inlinings
+inside such expressions. Consider the following example
let f = \pq -> BIG
in
in ...g...g...g...g...g...
Now, if that's the ONLY occurrence of f, it will be inlined inside g,
-and thence copied multiple times when g is inlined.
+and thence copied multiple times when g is inlined.
-
-This function may be inlinined in other modules, so we
-don't want to remove (by inlining) calls to functions that have
-specialisations, or that may have transformation rules in an importing
-scope.
+This function may be inlinined in other modules, so we don't want to
+remove (by inlining) calls to functions that have specialisations, or
+that may have transformation rules in an importing scope.
E.g. {-# INLINE f #-}
- f x = ...g...
+ f x = ...g...
and suppose that g is strict *and* has specialisations. If we inline
g's wrapper, we deny f the chance of getting the specialised version
ArgOf continuations. It shouldn't do any harm not to dissolve the
inline-me note under these circumstances.
-Note that the result is that we do very little simplification
-inside an InlineMe.
+Although we do very little simplification inside an InlineRule,
+the RHS is simplified as normal. For example:
all xs = foldr (&&) True xs
any p = all . map p {-# INLINE any #-}
-Problem: any won't get deforested, and so if it's exported and the
-importer doesn't use the inlining, (eg passes it as an arg) then we
-won't get deforestation at all. We havn't solved this problem yet!
+The RHS of 'any' will get optimised and deforested; but the InlineRule
+will still mention the original RHS.
preInlineUnconditionally
Conclusion: inline top level things gaily until Phase 0 (the last
phase), at which point don't.
+Note [pre/postInlineUnconditionally in gentle mode]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Even in gentle mode we want to do preInlineUnconditionally. The
+reason is that too little clean-up happens if you don't inline
+use-once things. Also a bit of inlining is *good* for full laziness;
+it can expose constant sub-expressions. Example in
+spectral/mandel/Mandel.hs, where the mandelset function gets a useful
+let-float if you inline windowToViewport
+
+However, as usual for Gentle mode, do not inline things that are
+inactive in the intial stages. See Note [Gentle mode].
+
\begin{code}
preInlineUnconditionally :: SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
preInlineUnconditionally env top_lvl bndr rhs
where
phase = getMode env
active = case phase of
- SimplGently -> isAlwaysActive prag
- SimplPhase n _ -> isActive n prag
- prag = idInlinePragma bndr
+ SimplGently {} -> isEarlyActive act
+ -- See Note [pre/postInlineUnconditionally in gentle mode]
+ SimplPhase n _ -> isActive n act
+ act = idInlineActivation bndr
try_once in_lam int_cxt -- There's one textual occurrence
| not in_lam = isNotTopLevel top_lvl || early_phase
\begin{code}
postInlineUnconditionally
:: SimplEnv -> TopLevelFlag
- -> InId -- The binder (an OutId would be fine too)
+ -> OutId -- The binder (an InId would be fine too)
-> OccInfo -- From the InId
-> OutExpr
-> Unfolding
| isLoopBreaker occ_info = False -- If it's a loop-breaker of any kind, don't inline
-- because it might be referred to "earlier"
| isExportedId bndr = False
+ | isInlineRule unfolding = False -- Note [InlineRule and postInlineUnconditionally]
| exprIsTrivial rhs = True
| otherwise
= case occ_info of
where
active = case getMode env of
- SimplGently -> isAlwaysActive prag
- SimplPhase n _ -> isActive n prag
- prag = idInlinePragma bndr
+ SimplGently {} -> isEarlyActive act
+ -- See Note [pre/postInlineUnconditionally in gentle mode]
+ SimplPhase n _ -> isActive n act
+ act = idInlineActivation bndr
activeInline :: SimplEnv -> OutId -> Bool
activeInline env id
+ | isNonRuleLoopBreaker (idOccInfo id) -- Things with an INLINE pragma may have
+ -- an unfolding *and* be a loop breaker
+ = False -- (maybe the knot is not yet untied)
+ | otherwise
= case getMode env of
- SimplGently -> False
- -- No inlining at all when doing gentle stuff,
- -- except for local things that occur once (pre/postInlineUnconditionally)
- -- The reason is that too little clean-up happens if you
- -- don't inline use-once things. Also a bit of inlining is *good* for
- -- full laziness; it can expose constant sub-expressions.
- -- Example in spectral/mandel/Mandel.hs, where the mandelset
- -- function gets a useful let-float if you inline windowToViewport
+ SimplGently { sm_inline = inlining_on }
+ -> inlining_on && isEarlyActive act
+ -- See Note [Gentle mode]
-- NB: we used to have a second exception, for data con wrappers.
-- On the grounds that we use gentle mode for rule LHSs, and
-- and they are now constructed as Compulsory unfoldings (in MkId)
-- so they'll happen anyway.
- SimplPhase n _ -> isActive n prag
+ SimplPhase n _ -> isActive n act
where
- prag = idInlinePragma id
+ act = idInlineActivation id
activeRule :: DynFlags -> SimplEnv -> Maybe (Activation -> Bool)
-- Nothing => No rules at all
= Nothing -- Rewriting is off
| otherwise
= case getMode env of
- SimplGently -> Just isAlwaysActive
- -- Used to be Nothing (no rules in gentle mode)
- -- Main motivation for changing is that I wanted
- -- lift String ===> ...
- -- to work in Template Haskell when simplifying
- -- splices, so we get simpler code for literal strings
- SimplPhase n _ -> Just (isActive n)
+ SimplGently { sm_rules = rules_on }
+ | rules_on -> Just isEarlyActive -- Note [RULEs enabled in SimplGently]
+ | otherwise -> Nothing
+ SimplPhase n _ -> Just (isActive n)
\end{code}
+Note [InlineRule and postInlineUnconditionally]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Do not do postInlineUnconditionally if the Id has an InlineRule, otherwise
+we lose the unfolding. Example
+
+ -- f has InlineRule with rhs (e |> co)
+ -- where 'e' is big
+ f = e |> co
+
+Then there's a danger we'll optimise to
+
+ f' = e
+ f = f' |> co
+
+and now postInlineUnconditionally, losing the InlineRule on f. Now f'
+won't inline because 'e' is too big.
+
%************************************************************************
%* *
%************************************************************************
\begin{code}
-mkLam :: [OutBndr] -> OutExpr -> SimplM OutExpr
+mkLam :: SimplEnv -> [OutBndr] -> OutExpr -> SimplM OutExpr
-- mkLam tries three things
-- a) eta reduction, if that gives a trivial expression
-- b) eta expansion [only if there are some value lambdas]
-mkLam [] body
+mkLam _b [] body
= return body
-mkLam bndrs body
+mkLam env bndrs body
= do { dflags <- getDOptsSmpl
; mkLam' dflags bndrs body }
where
; return etad_lam }
| dopt Opt_DoLambdaEtaExpansion dflags,
- any isRuntimeVar bndrs
- = do { body' <- tryEtaExpansion dflags body
+ not (inGentleMode env), -- In gentle mode don't eta-expansion
+ any isRuntimeVar bndrs -- because it can clutter up the code
+ -- with casts etc that may not be removed
+ = do { let body' = tryEtaExpansion dflags body
; return (mkLams bndrs body') }
| otherwise
actually computing the expansion.
\begin{code}
-tryEtaExpansion :: DynFlags -> OutExpr -> SimplM OutExpr
+tryEtaExpansion :: DynFlags -> OutExpr -> OutExpr
-- There is at least one runtime binder in the binders
-tryEtaExpansion dflags body = do
- us <- getUniquesM
- return (etaExpand fun_arity us body (exprType body))
+tryEtaExpansion dflags body
+ = etaExpand fun_arity body
where
fun_arity = exprEtaExpandArity dflags body
\end{code}
= do { uniq <- getUniqueM
; let poly_name = setNameUnique (idName var) uniq -- Keep same name
poly_ty = mkForAllTys tvs_here (idType var) -- But new type of course
- poly_id = transferPolyIdInfo var $ -- Note [transferPolyIdInfo] in Id.lhs
+ poly_id = transferPolyIdInfo var tvs_here $ -- Note [transferPolyIdInfo] in Id.lhs
mkLocalId poly_name poly_ty
; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
-- In the olden days, it was crucial to copy the occInfo of the original var,
prepareAlts tries these things:
-1. If several alternatives are identical, merge them into
- a single DEFAULT alternative. I've occasionally seen this
- making a big difference:
-
- case e of =====> case e of
- C _ -> f x D v -> ....v....
- D v -> ....v.... DEFAULT -> f x
- DEFAULT -> f x
-
- The point is that we merge common RHSs, at least for the DEFAULT case.
- [One could do something more elaborate but I've never seen it needed.]
- To avoid an expensive test, we just merge branches equal to the *first*
- alternative; this picks up the common cases
- a) all branches equal
- b) some branches equal to the DEFAULT (which occurs first)
+1. Eliminate alternatives that cannot match, including the
+ DEFAULT alternative.
-2. Case merging:
- case e of b { ==> case e of b {
- p1 -> rhs1 p1 -> rhs1
- ... ...
- pm -> rhsm pm -> rhsm
- _ -> case b of b' { pn -> let b'=b in rhsn
- pn -> rhsn ...
- ... po -> let b'=b in rhso
- po -> rhso _ -> let b'=b in rhsd
- _ -> rhsd
- }
-
- which merges two cases in one case when -- the default alternative of
- the outer case scrutises the same variable as the outer case This
- transformation is called Case Merging. It avoids that the same
- variable is scrutinised multiple times.
+2. If the DEFAULT alternative can match only one possible constructor,
+ then make that constructor explicit.
+ e.g.
+ case e of x { DEFAULT -> rhs }
+ ===>
+ case e of x { (a,b) -> rhs }
+ where the type is a single constructor type. This gives better code
+ when rhs also scrutinises x or e.
+3. Returns a list of the constructors that cannot holds in the
+ DEFAULT alternative (if there is one)
-The case where transformation (1) showed up was like this (lib/std/PrelCError.lhs):
+Here "cannot match" includes knowledge from GADTs
- x | p `is` 1 -> e1
- | p `is` 2 -> e2
- ...etc...
+It's a good idea do do this stuff before simplifying the alternatives, to
+avoid simplifying alternatives we know can't happen, and to come up with
+the list of constructors that are handled, to put into the IdInfo of the
+case binder, for use when simplifying the alternatives.
-where @is@ was something like
-
- p `is` n = p /= (-1) && p == n
+Eliminating the default alternative in (1) isn't so obvious, but it can
+happen:
-This gave rise to a horrible sequence of cases
+data Colour = Red | Green | Blue
- case p of
- (-1) -> $j p
- 1 -> e1
- DEFAULT -> $j p
+f x = case x of
+ Red -> ..
+ Green -> ..
+ DEFAULT -> h x
-and similarly in cascade for all the join points!
+h y = case y of
+ Blue -> ..
+ DEFAULT -> [ case y of ... ]
-Note [Dead binders]
-~~~~~~~~~~~~~~~~~~~~
-We do this *here*, looking at un-simplified alternatives, because we
-have to check that r doesn't mention the variables bound by the
-pattern in each alternative, so the binder-info is rather useful.
+If we inline h into f, the default case of the inlined h can't happen.
+If we don't notice this, we may end up filtering out *all* the cases
+of the inner case y, which give us nowhere to go!
\begin{code}
-prepareAlts :: SimplEnv -> OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
-prepareAlts env scrut case_bndr' alts
- = do { dflags <- getDOptsSmpl
- ; alts <- combineIdenticalAlts case_bndr' alts
-
- ; let (alts_wo_default, maybe_deflt) = findDefault alts
+prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
+prepareAlts scrut case_bndr' alts
+ = do { let (alts_wo_default, maybe_deflt) = findDefault alts
alt_cons = [con | (con,_,_) <- alts_wo_default]
imposs_deflt_cons = nub (imposs_cons ++ alt_cons)
-- "imposs_deflt_cons" are handled
-- EITHER by the context,
-- OR by a non-DEFAULT branch in this case expression.
- ; default_alts <- prepareDefault dflags env case_bndr' mb_tc_app
+ ; default_alts <- prepareDefault case_bndr' mb_tc_app
imposs_deflt_cons maybe_deflt
; let trimmed_alts = filterOut impossible_alt alts_wo_default
- merged_alts = mergeAlts trimmed_alts default_alts
+ merged_alts = mergeAlts trimmed_alts default_alts
-- We need the mergeAlts in case the new default_alt
-- has turned into a constructor alternative.
-- The merge keeps the inner DEFAULT at the front, if there is one
impossible_alt _ = False
---------------------------------------------------
--- 1. Merge identical branches
---------------------------------------------------
-combineIdenticalAlts :: OutId -> [InAlt] -> SimplM [InAlt]
-
-combineIdenticalAlts case_bndr ((_con1,bndrs1,rhs1) : con_alts)
- | all isDeadBinder bndrs1, -- Remember the default
- length filtered_alts < length con_alts -- alternative comes first
- -- Also Note [Dead binders]
- = do { tick (AltMerge case_bndr)
- ; return ((DEFAULT, [], rhs1) : filtered_alts) }
- where
- filtered_alts = filter keep con_alts
- keep (_con,bndrs,rhs) = not (all isDeadBinder bndrs && rhs `cheapEqExpr` rhs1)
-
-combineIdenticalAlts _ alts = return alts
-
--------------------------------------------------------------------------
--- Prepare the default alternative
--------------------------------------------------------------------------
-prepareDefault :: DynFlags
- -> SimplEnv
- -> OutId -- Case binder; need just for its type. Note that as an
+prepareDefault :: OutId -- Case binder; need just for its type. Note that as an
-- OutId, it has maximum information; this is important.
-- Test simpl013 is an example
-> Maybe (TyCon, [Type]) -- Type of scrutinee, decomposed
-> Maybe InExpr -- Rhs
-> SimplM [InAlt] -- Still unsimplified
-- We use a list because it's what mergeAlts expects,
- -- And becuase case-merging can cause many to show up
-
-------- Merge nested cases ----------
-prepareDefault dflags env outer_bndr _bndr_ty imposs_cons (Just deflt_rhs)
- | dopt Opt_CaseMerge dflags
- , Case (Var inner_scrut_var) inner_bndr _ inner_alts <- deflt_rhs
- , DoneId inner_scrut_var' <- substId env inner_scrut_var
- -- Remember, inner_scrut_var is an InId, but outer_bndr is an OutId
- , inner_scrut_var' == outer_bndr
- -- NB: the substId means that if the outer scrutinee was a
- -- variable, and inner scrutinee is the same variable,
- -- then inner_scrut_var' will be outer_bndr
- -- via the magic of simplCaseBinder
- = do { tick (CaseMerge outer_bndr)
-
- ; let munge_rhs rhs = bindCaseBndr inner_bndr (Var outer_bndr) rhs
- ; return [(con, args, munge_rhs rhs) | (con, args, rhs) <- inner_alts,
- not (con `elem` imposs_cons) ]
- -- NB: filter out any imposs_cons. Example:
- -- case x of
- -- A -> e1
- -- DEFAULT -> case x of
- -- A -> e2
- -- B -> e3
- -- When we merge, we must ensure that e1 takes
- -- precedence over e2 as the value for A!
- }
- -- Warning: don't call prepareAlts recursively!
- -- Firstly, there's no point, because inner alts have already had
- -- mkCase applied to them, so they won't have a case in their default
- -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
- -- in munge_rhs may put a case into the DEFAULT branch!
-
--------- Fill in known constructor -----------
-prepareDefault _ _ case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rhs)
+prepareDefault case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rhs)
| -- This branch handles the case where we are
-- scrutinisng an algebraic data type
isAlgTyCon tycon -- It's a data type, tuple, or unboxed tuples.
-- which would be quite legitmate. But it's a really obscure corner, and
-- not worth wasting code on.
, let imposs_data_cons = [con | DataAlt con <- imposs_cons] -- We now know it's a data type
- impossible con = con `elem` imposs_data_cons || dataConCannotMatch inst_tys con
+ impossible con = con `elem` imposs_data_cons || dataConCannotMatch inst_tys con
= case filterOut impossible all_cons of
[] -> return [] -- Eliminate the default alternative
-- altogether if it can't match
_ -> return [(DEFAULT, [], deflt_rhs)]
| debugIsOn, isAlgTyCon tycon, not (isOpenTyCon tycon), null (tyConDataCons tycon)
- -- This can legitimately happen for type families, so don't report that
+ -- Check for no data constructors
+ -- This can legitimately happen for type families, so don't report that
= pprTrace "prepareDefault" (ppr case_bndr <+> ppr tycon)
$ return [(DEFAULT, [], deflt_rhs)]
--------- Catch-all cases -----------
-prepareDefault _dflags _env _case_bndr _bndr_ty _imposs_cons (Just deflt_rhs)
+prepareDefault _case_bndr _bndr_ty _imposs_cons (Just deflt_rhs)
= return [(DEFAULT, [], deflt_rhs)]
-prepareDefault _dflags _env _case_bndr _bndr_ty _imposs_cons Nothing
+prepareDefault _case_bndr _bndr_ty _imposs_cons Nothing
= return [] -- No default branch
\end{code}
-=================================================================================
+%************************************************************************
+%* *
+ mkCase
+%* *
+%************************************************************************
mkCase tries these things
-1. Eliminate the case altogether if possible
+1. Merge Nested Cases
+
+ case e of b { ==> case e of b {
+ p1 -> rhs1 p1 -> rhs1
+ ... ...
+ pm -> rhsm pm -> rhsm
+ _ -> case b of b' { pn -> let b'=b in rhsn
+ pn -> rhsn ...
+ ... po -> let b'=b in rhso
+ po -> rhso _ -> let b'=b in rhsd
+ _ -> rhsd
+ }
+
+ which merges two cases in one case when -- the default alternative of
+ the outer case scrutises the same variable as the outer case. This
+ transformation is called Case Merging. It avoids that the same
+ variable is scrutinised multiple times.
-2. Case-identity:
+2. Eliminate Identity Case
case e of ===> e
True -> True;
and similar friends.
+3. Merge identical alternatives.
+ If several alternatives are identical, merge them into
+ a single DEFAULT alternative. I've occasionally seen this
+ making a big difference:
+
+ case e of =====> case e of
+ C _ -> f x D v -> ....v....
+ D v -> ....v.... DEFAULT -> f x
+ DEFAULT -> f x
+
+ The point is that we merge common RHSs, at least for the DEFAULT case.
+ [One could do something more elaborate but I've never seen it needed.]
+ To avoid an expensive test, we just merge branches equal to the *first*
+ alternative; this picks up the common cases
+ a) all branches equal
+ b) some branches equal to the DEFAULT (which occurs first)
+
+The case where Merge Identical Alternatives transformation showed up
+was like this (base/Foreign/C/Err/Error.lhs):
+
+ x | p `is` 1 -> e1
+ | p `is` 2 -> e2
+ ...etc...
+
+where @is@ was something like
+
+ p `is` n = p /= (-1) && p == n
+
+This gave rise to a horrible sequence of cases
+
+ case p of
+ (-1) -> $j p
+ 1 -> e1
+ DEFAULT -> $j p
+
+and similarly in cascade for all the join points!
+
\begin{code}
-mkCase :: OutExpr -> OutId -> [OutAlt] -- Increasing order
- -> SimplM OutExpr
+mkCase, mkCase1, mkCase2
+ :: DynFlags
+ -> OutExpr -> OutId
+ -> [OutAlt] -- Alternatives in standard (increasing) order
+ -> SimplM OutExpr
+
+--------------------------------------------------
+-- 1. Merge Nested Cases
+--------------------------------------------------
+
+mkCase dflags scrut outer_bndr ((DEFAULT, _, deflt_rhs) : outer_alts)
+ | dopt Opt_CaseMerge dflags
+ , Case (Var inner_scrut_var) inner_bndr _ inner_alts <- deflt_rhs
+ , inner_scrut_var == outer_bndr
+ = do { tick (CaseMerge outer_bndr)
+
+ ; let wrap_alt (con, args, rhs) = ASSERT( outer_bndr `notElem` args )
+ (con, args, wrap_rhs rhs)
+ -- Simplifier's no-shadowing invariant should ensure
+ -- that outer_bndr is not shadowed by the inner patterns
+ wrap_rhs rhs = Let (NonRec inner_bndr (Var outer_bndr)) rhs
+ -- The let is OK even for unboxed binders,
+
+ wrapped_alts | isDeadBinder inner_bndr = inner_alts
+ | otherwise = map wrap_alt inner_alts
+
+ merged_alts = mergeAlts outer_alts wrapped_alts
+ -- NB: mergeAlts gives priority to the left
+ -- case x of
+ -- A -> e1
+ -- DEFAULT -> case x of
+ -- A -> e2
+ -- B -> e3
+ -- When we merge, we must ensure that e1 takes
+ -- precedence over e2 as the value for A!
+
+ ; mkCase1 dflags scrut outer_bndr merged_alts
+ }
+ -- Warning: don't call mkCase recursively!
+ -- Firstly, there's no point, because inner alts have already had
+ -- mkCase applied to them, so they won't have a case in their default
+ -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
+ -- in munge_rhs may put a case into the DEFAULT branch!
+
+mkCase dflags scrut bndr alts = mkCase1 dflags scrut bndr alts
--------------------------------------------------
--- 2. Identity case
+-- 2. Eliminate Identity Case
--------------------------------------------------
-mkCase scrut case_bndr alts -- Identity case
+mkCase1 _dflags scrut case_bndr alts -- Identity case
| all identity_alt alts
- = do tick (CaseIdentity case_bndr)
- return (re_cast scrut)
+ = do { tick (CaseIdentity case_bndr)
+ ; return (re_cast scrut) }
where
identity_alt (con, args, rhs) = check_eq con args (de_cast rhs)
(_,_,Cast _ co) -> Cast scrut co
_ -> scrut
+--------------------------------------------------
+-- 3. Merge Identical Alternatives
+--------------------------------------------------
+mkCase1 dflags scrut case_bndr ((_con1,bndrs1,rhs1) : con_alts)
+ | all isDeadBinder bndrs1 -- Remember the default
+ , length filtered_alts < length con_alts -- alternative comes first
+ -- Also Note [Dead binders]
+ = do { tick (AltMerge case_bndr)
+ ; mkCase2 dflags scrut case_bndr alts' }
+ where
+ alts' = (DEFAULT, [], rhs1) : filtered_alts
+ filtered_alts = filter keep con_alts
+ keep (_con,bndrs,rhs) = not (all isDeadBinder bndrs && rhs `cheapEqExpr` rhs1)
+mkCase1 dflags scrut bndr alts = mkCase2 dflags scrut bndr alts
--------------------------------------------------
-- Catch-all
--------------------------------------------------
-mkCase scrut bndr alts = return (Case scrut bndr (coreAltsType alts) alts)
+mkCase2 _dflags scrut bndr alts
+ = return (Case scrut bndr (coreAltsType alts) alts)
\end{code}
-
-When adding auxiliary bindings for the case binder, it's worth checking if
-its dead, because it often is, and occasionally these mkCase transformations
-cascade rather nicely.
-
-\begin{code}
-bindCaseBndr :: Id -> CoreExpr -> CoreExpr -> CoreExpr
-bindCaseBndr bndr rhs body
- | isDeadBinder bndr = body
- | otherwise = bindNonRec bndr rhs body
-\end{code}
+Note [Dead binders]
+~~~~~~~~~~~~~~~~~~~~
+Note that dead-ness is maintained by the simplifier, so that it is
+accurate after simplification as well as before.
+
+
+Note [Cascading case merge]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Case merging should cascade in one sweep, because it
+happens bottom-up
+
+ case e of a {
+ DEFAULT -> case a of b
+ DEFAULT -> case b of c {
+ DEFAULT -> e
+ A -> ea
+ B -> eb
+ C -> ec
+==>
+ case e of a {
+ DEFAULT -> case a of b
+ DEFAULT -> let c = b in e
+ A -> let c = b in ea
+ B -> eb
+ C -> ec
+==>
+ case e of a {
+ DEFAULT -> let b = a in let c = b in e
+ A -> let b = a in let c = b in ea
+ B -> let b = a in eb
+ C -> ec
+
+
+However here's a tricky case that we still don't catch, and I don't
+see how to catch it in one pass:
+
+ case x of c1 { I# a1 ->
+ case a1 of c2 ->
+ 0 -> ...
+ DEFAULT -> case x of c3 { I# a2 ->
+ case a2 of ...
+
+After occurrence analysis (and its binder-swap) we get this
+
+ case x of c1 { I# a1 ->
+ let x = c1 in -- Binder-swap addition
+ case a1 of c2 ->
+ 0 -> ...
+ DEFAULT -> case x of c3 { I# a2 ->
+ case a2 of ...
+
+When we simplify the inner case x, we'll see that
+x=c1=I# a1. So we'll bind a2 to a1, and get
+
+ case x of c1 { I# a1 ->
+ case a1 of c2 ->
+ 0 -> ...
+ DEFAULT -> case a1 of ...
+
+This is corect, but we can't do a case merge in this sweep
+because c2 /= a1. Reason: the binding c1=I# a1 went inwards
+without getting changed to c1=I# c2.
+
+I don't think this is worth fixing, even if I knew how. It'll
+all come out in the next pass anyway.
+
+
\ No newline at end of file