import CoreSyn
import qualified CoreSubst
import PprCore
+import DataCon ( dataConCannotMatch )
import CoreFVs
import CoreUtils
import CoreArity
import Demand
import SimplMonad
import Type hiding( substTy )
-import Coercion ( coercionKind )
+import Coercion hiding( substCo )
import TyCon
-import Unify ( dataConCannotMatch )
import VarSet
import BasicTypes
import Util
import MonadUtils
import Outputable
import FastString
+import Pair
import Data.List
\end{code}
| CoerceIt -- C `cast` co
OutCoercion -- The coercion simplified
+ -- Invariant: never an identity coercion
SimplCont
| ApplyTo -- C arg
contIsTrivial :: SimplCont -> Bool
contIsTrivial (Stop {}) = True
contIsTrivial (ApplyTo _ (Type _) _ cont) = contIsTrivial cont
+contIsTrivial (ApplyTo _ (Coercion _) _ cont) = contIsTrivial cont
contIsTrivial (CoerceIt _ cont) = contIsTrivial cont
contIsTrivial _ = False
contResultType env ty cont
= go cont ty
where
- subst_ty se ty = substTy (se `setInScope` env) ty
+ subst_ty se ty = SimplEnv.substTy (se `setInScope` env) ty
+ subst_co se co = SimplEnv.substCo (se `setInScope` env) co
go (Stop {}) ty = ty
- go (CoerceIt co cont) _ = go cont (snd (coercionKind co))
+ go (CoerceIt co cont) _ = go cont (pSnd (coercionKind co))
go (StrictBind _ bs body se cont) _ = go cont (subst_ty se (exprType (mkLams bs body)))
go (StrictArg ai _ cont) _ = go cont (funResultTy (argInfoResultTy ai))
go (Select _ _ alts se cont) _ = go cont (subst_ty se (coreAltsType alts))
go (ApplyTo _ arg se cont) ty = go cont (apply_to_arg ty arg se)
- apply_to_arg ty (Type ty_arg) se = applyTy ty (subst_ty se ty_arg)
- apply_to_arg ty _ _ = funResultTy ty
+ apply_to_arg ty (Type ty_arg) se = applyTy ty (subst_ty se ty_arg)
+ apply_to_arg ty (Coercion co_arg) se = applyCo ty (subst_co se co_arg)
+ apply_to_arg ty _ _ = funResultTy ty
argInfoResultTy :: ArgInfo -> OutType
argInfoResultTy (ArgInfo { ai_fun = fun, ai_args = args })
-------------------
countValArgs :: SimplCont -> Int
countValArgs (ApplyTo _ (Type _) _ cont) = countValArgs cont
+countValArgs (ApplyTo _ (Coercion _) _ cont) = countValArgs cont
countValArgs (ApplyTo _ _ _ cont) = 1 + countValArgs cont
countValArgs _ = 0
sm_eta_expand :: Bool -- Whether eta-expansion is enabled
\begin{code}
-simplEnvForGHCi :: SimplEnv
-simplEnvForGHCi = mkSimplEnv $
- SimplMode { sm_names = ["GHCi"]
- , sm_phase = InitialPhase
- , sm_rules = True, sm_inline = False
- , sm_eta_expand = False, sm_case_case = True }
+simplEnvForGHCi :: DynFlags -> SimplEnv
+simplEnvForGHCi dflags
+ = mkSimplEnv $ SimplMode { sm_names = ["GHCi"]
+ , sm_phase = InitialPhase
+ , sm_rules = rules_on
+ , sm_inline = False
+ , sm_eta_expand = eta_expand_on
+ , sm_case_case = True }
+ where
+ rules_on = dopt Opt_EnableRewriteRules dflags
+ eta_expand_on = dopt Opt_DoLambdaEtaExpansion dflags
-- Do not do any inlining, in case we expose some unboxed
-- tuple stuff that confuses the bytecode interpreter
-- See Note [Simplifying inside InlineRules]
updModeForInlineRules inline_rule_act current_mode
= current_mode { sm_phase = phaseFromActivation inline_rule_act
- , sm_rules = True
, sm_inline = True
, sm_eta_expand = False }
+ -- For sm_rules, just inherit; sm_rules might be "off"
+ -- becuase of -fno-enable-rewrite-rules
where
phaseFromActivation (ActiveAfter n) = Phase n
phaseFromActivation _ = InitialPhase
once, because FloatOut has gone to some trouble to extract them out.
Inlining them won't make the program run faster!
+Note [Do not inline CoVars unconditionally]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Coercion variables appear inside coercions, and have a separate
+substitution, so don't inline them via the IdSubst!
+
\begin{code}
preInlineUnconditionally :: SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
preInlineUnconditionally env top_lvl bndr rhs
| isStableUnfolding (idUnfolding bndr) = False -- Note [InlineRule and preInlineUnconditionally]
| isTopLevel top_lvl && isBottomingId bndr = False -- Note [Top-level bottoming Ids]
| opt_SimplNoPreInlining = False
+ | isCoVar bndr = False -- Note [Do not inline CoVars unconditionally]
| otherwise = case idOccInfo bndr of
IAmDead -> True -- Happens in ((\x.1) v)
OneOcc in_lam True int_cxt -> try_once in_lam int_cxt
trivial RHS. If so, we can inline and discard the binding altogether.
NB: a loop breaker has must_keep_binding = True and non-loop-breakers
-only have *forward* references Hence, it's safe to discard the binding
+only have *forward* references. Hence, it's safe to discard the binding
NOTE: This isn't our last opportunity to inline. We're at the binding
site right now, and we'll get another opportunity when we get to the
postInlineUnconditionally
:: SimplEnv -> TopLevelFlag
-> OutId -- The binder (an InId would be fine too)
+ -- (*not* a CoVar)
-> OccInfo -- From the InId
-> OutExpr
-> Unfolding
-- because it might be referred to "earlier"
| isExportedId bndr = False
| isStableUnfolding unfolding = False -- Note [InlineRule and postInlineUnconditionally]
- | exprIsTrivial rhs = True
| isTopLevel top_lvl = False -- Note [Top level and postInlineUnconditionally]
+ | exprIsTrivial rhs = True
| otherwise
= case occ_info of
-- The point of examining occ_info here is that for *non-values*
Note [Top level and postInlineUnconditionally]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-We don't do postInlineUnconditionally for top-level things (exept ones that
-are trivial):
- * There is no point, because the main goal is to get rid of local
- bindings used in multiple case branches.
+We don't do postInlineUnconditionally for top-level things (even for
+ones that are trivial):
+
* Doing so will inline top-level error expressions that have been
carefully floated out by FloatOut. More generally, it might
replace static allocation with dynamic.
+ * Even for trivial expressions there's a problem. Consider
+ {-# RULE "foo" forall (xs::[T]). reverse xs = ruggle xs #-}
+ blah xs = reverse xs
+ ruggle = sort
+ In one simplifier pass we might fire the rule, getting
+ blah xs = ruggle xs
+ but in *that* simplifier pass we must not do postInlineUnconditionally
+ on 'ruggle' because then we'll have an unbound occurrence of 'ruggle'
+
+ If the rhs is trivial it'll be inlined by callSiteInline, and then
+ the binding will be dead and discarded by the next use of OccurAnal
+
+ * There is less point, because the main goal is to get rid of local
+ bindings used in multiple case branches.
+
+
Note [InlineRule and postInlineUnconditionally]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Do not do postInlineUnconditionally if the Id has an InlineRule, otherwise
| not (any bad bndrs)
-- Note [Casts and lambdas]
= do { lam <- mkLam' dflags bndrs body
- ; return (mkCoerce (mkPiTypes bndrs co) lam) }
+ ; return (mkCoerce (mkPiCos bndrs co) lam) }
where
- co_vars = tyVarsOfType co
+ co_vars = tyCoVarsOfCo co
bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars
mkLam' dflags bndrs body@(Lam {})
= do { tick (EtaReduction (head bndrs))
; return etad_lam }
- | otherwise
+ | otherwise
= return (mkLams bndrs body)
\end{code}
try_expand dflags
| sm_eta_expand (getMode env) -- Provided eta-expansion is on
, not (exprIsTrivial rhs)
- , let new_arity = exprEtaExpandArity dflags rhs
+ , let dicts_cheap = dopt Opt_DictsCheap dflags
+ new_arity = findArity dicts_cheap bndr rhs old_arity
, new_arity > rhs_arity
= do { tick (EtaExpansion bndr)
; return (new_arity, etaExpand new_arity rhs) }
rhs_arity = exprArity rhs
old_arity = idArity bndr
_dmd_arity = length $ fst $ splitStrictSig $ idStrictness bndr
+
+findArity :: Bool -> Id -> CoreExpr -> Arity -> Arity
+-- This implements the fixpoint loop for arity analysis
+-- See Note [Arity analysis]
+findArity dicts_cheap bndr rhs old_arity
+ = go (exprEtaExpandArity (mk_cheap_fn dicts_cheap init_cheap_app) rhs)
+ -- We always call exprEtaExpandArity once, but usually
+ -- that produces a result equal to old_arity, and then
+ -- we stop right away (since arities should not decrease)
+ -- Result: the common case is that there is just one iteration
+ where
+ go :: Arity -> Arity
+ go cur_arity
+ | cur_arity <= old_arity = cur_arity
+ | new_arity == cur_arity = cur_arity
+ | otherwise = ASSERT( new_arity < cur_arity )
+ pprTrace "Exciting arity"
+ (vcat [ ppr bndr <+> ppr cur_arity <+> ppr new_arity
+ , ppr rhs])
+ go new_arity
+ where
+ new_arity = exprEtaExpandArity (mk_cheap_fn dicts_cheap cheap_app) rhs
+
+ cheap_app :: CheapAppFun
+ cheap_app fn n_val_args
+ | fn == bndr = n_val_args < cur_arity
+ | otherwise = isCheapApp fn n_val_args
+
+ init_cheap_app :: CheapAppFun
+ init_cheap_app fn n_val_args
+ | fn == bndr = True
+ | otherwise = isCheapApp fn n_val_args
+
+mk_cheap_fn :: Bool -> CheapAppFun -> CheapFun
+mk_cheap_fn dicts_cheap cheap_app
+ | not dicts_cheap
+ = \e _ -> exprIsCheap' cheap_app e
+ | otherwise
+ = \e mb_ty -> exprIsCheap' cheap_app e
+ || case mb_ty of
+ Nothing -> False
+ Just ty -> isDictLikeTy ty
+ -- If the experimental -fdicts-cheap flag is on, we eta-expand through
+ -- dictionary bindings. This improves arities. Thereby, it also
+ -- means that full laziness is less prone to floating out the
+ -- application of a function to its dictionary arguments, which
+ -- can thereby lose opportunities for fusion. Example:
+ -- foo :: Ord a => a -> ...
+ -- foo = /\a \(d:Ord a). let d' = ...d... in \(x:a). ....
+ -- -- So foo has arity 1
+ --
+ -- f = \x. foo dInt $ bar x
+ --
+ -- The (foo DInt) is floated out, and makes ineffective a RULE
+ -- foo (bar x) = ...
+ --
+ -- One could go further and make exprIsCheap reply True to any
+ -- dictionary-typed expression, but that's more work.
+ --
+ -- See Note [Dictionary-like types] in TcType.lhs for why we use
+ -- isDictLikeTy here rather than isDictTy
\end{code}
Note [Eta-expanding at let bindings]
as far as the programmer is concerned, it's not applied to two
arguments!
+Note [Arity analysis]
+~~~~~~~~~~~~~~~~~~~~~
+The motivating example for arity analysis is this:
+
+ f = \x. let g = f (x+1)
+ in \y. ...g...
+
+What arity does f have? Really it should have arity 2, but a naive
+look at the RHS won't see that. You need a fixpoint analysis which
+says it has arity "infinity" the first time round.
+
+This example happens a lot; it first showed up in Andy Gill's thesis,
+fifteen years ago! It also shows up in the code for 'rnf' on lists
+in Trac #4138.
+
+The analysis is easy to achieve because exprEtaExpandArity takes an
+argument
+ type CheapFun = CoreExpr -> Maybe Type -> Bool
+used to decide if an expression is cheap enough to push inside a
+lambda. And exprIsCheap' in turn takes an argument
+ type CheapAppFun = Id -> Int -> Bool
+which tells when an application is cheap. This makes it easy to
+write the analysis loop.
+
+The analysis is cheap-and-cheerful because it doesn't deal with
+mutual recursion. But the self-recursive case is the important one.
+
%************************************************************************
%* *
; return (subst', (NonRec poly_id poly_rhs)) }
where
rhs' = CoreSubst.substExpr (text "abstract_floats2") subst rhs
- tvs_here | any isCoVar main_tvs = main_tvs -- Note [Abstract over coercions]
- | otherwise
- = varSetElems (main_tv_set `intersectVarSet` exprSomeFreeVars isTyCoVar rhs')
+ tvs_here = varSetElems (main_tv_set `intersectVarSet` exprSomeFreeVars isTyVar rhs')
-- Abstract only over the type variables free in the rhs
-- wrt which the new binding is abstracted. But the naive
[con] -> -- It matches exactly one constructor, so fill it in
do { tick (FillInCaseDefault case_bndr)
; us <- getUniquesM
- ; let (ex_tvs, co_tvs, arg_ids) =
- dataConRepInstPat us con inst_tys
- ; return [(DataAlt con, ex_tvs ++ co_tvs ++ arg_ids, deflt_rhs)] }
+ ; let (ex_tvs, arg_ids) = dataConRepInstPat us con inst_tys
+ ; return [(DataAlt con, ex_tvs ++ arg_ids, deflt_rhs)] }
_ -> return [(DEFAULT, [], deflt_rhs)]