externallyVisibleId,
idFreeTyVars,
isIP,
-
- -- Inline pragma stuff
- idInlinePragma, setInlinePragma, modifyInlinePragma,
-
isSpecPragmaId, isRecordSelector,
isPrimOpId, isPrimOpId_maybe,
isDataConId, isDataConId_maybe, isDataConWrapId, isDataConWrapId_maybe,
isBottomingId,
isExportedId, isUserExportedId,
- mayHaveNoBinding,
+ hasNoBinding,
+
+ -- Inline pragma stuff
+ idInlinePragma, setInlinePragma, modifyInlinePragma,
+
-- One shot lambda stuff
isOneShotLambda, setOneShotLambda, clearOneShotLambda,
SpecPragmaId -> True
other -> False
-mayHaveNoBinding id = case idFlavour id of
+hasNoBinding id = case idFlavour id of
DataConId _ -> True
PrimOpId _ -> True
other -> False
- -- mayHaveNoBinding returns True of an Id which may not have a
+ -- hasNoBinding returns True of an Id which may not have a
-- binding, even though it is defined in this module. Notably,
-- the constructors of a dictionary are in this situation.
- --
- -- mayHaveNoBinding returns True of some things that *do* have a local binding,
- -- so it's only an approximation. That's ok... it's only use for assertions.
-- Don't drop a binding for an exported Id,
-- if it otherwise looks dead.
\begin{code}
isDeadBinder :: Id -> Bool
-isDeadBinder bndr | isId bndr = case idOccInfo bndr of
- IAmDead -> True
- other -> False
+isDeadBinder bndr | isId bndr = isDeadOcc (idOccInfo bndr)
| otherwise = False -- TyVars count as not dead
isIP id = isIPOcc (getOccName id)
#include "HsVersions.h"
import CoreSyn
-import Id ( Id, idFreeTyVars, mayHaveNoBinding, idSpecialisation )
+import Id ( Id, idFreeTyVars, hasNoBinding, idSpecialisation )
import VarSet
import Var ( Var, isId )
import Name ( isLocallyDefined )
mustHaveLocalBinding :: Var -> Bool
-- True <=> the variable must have a binding in this module
mustHaveLocalBinding v
- | isId v = isLocallyDefined v && not (mayHaveNoBinding v)
+ | isId v = isLocallyDefined v && not (hasNoBinding v)
| otherwise = True -- TyVars etc must
\end{code}
size_up (Case (Var v) _ alts)
| v `elem` top_args -- We are scrutinising an argument variable
- = case alts of
+ =
+{- I'm nuking this special case; BUT see the comment with case alternatives.
+
+ (a) It's too eager. We don't want to inline a wrapper into a
+ context with no benefit.
+ E.g. \ x. f (x+x) o point in inlining (+) here!
+
+ (b) It's ineffective. Once g's wrapper is inlined, its case-expressions
+ aren't scrutinising arguments any more
+
+ case alts of
+
[alt] -> size_up_alt alt `addSize` SizeIs 0# (unitBag (v, 1)) 0#
-- We want to make wrapper-style evaluation look cheap, so that
-- when we inline a wrapper it doesn't make call site (much) bigger
-- ordering difference, we make (case a of (x,y) -> ...),
-- *where a is one of the arguments* look free.
- other -> alts_size (foldr addSize sizeOne alt_sizes) -- The 1 is for the scrutinee
+ other ->
+-}
+ alts_size (foldr addSize sizeOne alt_sizes) -- The 1 is for the scrutinee
(foldr1 maxSize alt_sizes)
-- Good to inline if an arg is scrutinised, because
------------
size_up_alt (con, bndrs, rhs) = size_up rhs
- -- Don't charge for args, so that wrappers look cheap
+ -- Don't charge for args, so that wrappers look cheap
+ -- (See comments about wrappers with Case)
------------
-- We want to record if we're case'ing, or applying, an argument
#ifdef DEBUG
if opt_D_dump_inlinings then
pprTrace "Considering inlining"
- (ppr id <+> vcat [text "black listed" <+> ppr black_listed,
+ (ppr id <+> vcat [text "black listed:" <+> ppr black_listed,
text "occ info:" <+> ppr occ,
text "arg infos" <+> ppr arg_infos,
text "interesting continuation" <+> ppr interesting_cont,
| from_INLINE -> has_rules -- Black list until final phase
| otherwise -> True -- Always blacklisted
- IMustNotBeINLINEd from_inline (Just threshold)
- | from_inline -> (phase < threshold && has_rules)
+ IMustNotBeINLINEd from_INLINE (Just threshold)
+ | from_INLINE -> (phase < threshold && has_rules)
| otherwise -> (phase < threshold || has_rules)
where
has_rules = v `elemVarSet` rule_vars
mkPiType,
-- Properties of expressions
- exprType, coreAltsType, exprArity,
+ exprType, coreAltsType,
exprIsBottom, exprIsDupable, exprIsTrivial, exprIsCheap,
exprIsValue,exprOkForSpeculation, exprIsBig,
exprIsConApp_maybe,
* case e of
pi -> ei
+ (where e, and all the ei are cheap)
- where e, and all the ei are cheap; and
-
- * let x = e
- in b
-
- where e and b are cheap; and
+ * let x = e in b
+ (where e and b are cheap)
* op x1 ... xn
-
- where op is a cheap primitive operator
+ (where op is a cheap primitive operator)
* error "foo"
+ (because we are happy to substitute it inside a lambda)
Notice that a variable is considered 'cheap': we can push it inside a lambda,
because sharing will make sure it is only evaluated once.
exprIsCheap (Var _) = True
exprIsCheap (Note _ e) = exprIsCheap e
exprIsCheap (Lam x e) = if isId x then True else exprIsCheap e
-exprIsCheap (Case (Var v) _ alts) = and [exprIsCheap rhs | (_,_,rhs) <- alts]
+exprIsCheap (Case e _ alts) = exprIsCheap e &&
+ and [exprIsCheap rhs | (_,_,rhs) <- alts]
-- Experimentally, treat (case x of ...) as cheap
+ -- (and case __coerce x etc.)
-- This improves arities of overloaded functions where
-- there is only dictionary selection (no construction) involved
+exprIsCheap (Let (NonRec x _) e)
+ | isUnLiftedType (idType x) = exprIsCheap e
+ | otherwise = False
+ -- strict lets always have cheap right hand sides, and
+ -- do no allocation.
+
exprIsCheap other_expr
= go other_expr 0 True
where
|| idAppIsBottom f n_args
-- Application of a function which
- -- always gives bottom; we treat this as
- -- a WHNF, because it certainly doesn't
- -- need to be shared!
+ -- always gives bottom; we treat this as cheap
+ -- because it certainly doesn't need to be shared!
go (App f a) n_args args_cheap
| isTypeArg a = go f n_args args_cheap
\end{code}
\begin{code}
-exprArity :: CoreExpr -> Int -- How many value lambdas are at the top
-exprArity (Lam b e) | isTyVar b = exprArity e
- | otherwise = 1 + exprArity e
-
-exprArity (Note note e) | ok_note note = exprArity e
- where
- ok_note (Coerce _ _) = True
- -- We *do* look through coerces when getting arities.
- -- Reason: arities are to do with *representation* and
- -- work duplication.
- ok_note InlineMe = True
- ok_note InlineCall = True
- ok_note other = False
- -- SCC and TermUsg might be over-conservative?
-
-exprArity other = 0
-\end{code}
-
-\begin{code}
exprIsConApp_maybe :: CoreExpr -> Maybe (DataCon, [CoreExpr])
exprIsConApp_maybe expr
= analyse (collectArgs expr)
import TcInstUtil ( InstInfo(..) )
import CmdLineOpts
-import Id ( Id, idType, idInfo, omitIfaceSigForId, isUserExportedId,
+import Id ( Id, idType, idInfo, omitIfaceSigForId, isUserExportedId, hasNoBinding,
idSpecialisation
)
import Var ( isId )
import Outputable
import Maybe ( isNothing )
+import List ( partition )
import Monad ( when )
\end{code}
all_decls = cls_dcls ++ ty_dcls ++ bagToList val_dcls
(inst_dcls, inst_ids) = ifaceInstances inst_info
cls_dcls = map ifaceClass local_classes
+
ty_dcls = map ifaceTyCon (filter (not . isWiredInName . getName) local_tycons)
(val_dcls, emitted_ids) = ifaceBinds (inst_ids `unionVarSet` orphan_rule_ids)
-- We can't print builtin rules in interface files
-- Since they are built in, an importing module
-- will have access to them anyway
- all (`elemVarSet` emitted) (varSetElems (ruleSomeLhsFreeVars interestingId rule))
+
+ -- Sept 00: I've disabled this test. It doesn't stop many, if any, rules
+ -- from coming out, and to make it work properly we need to add
+ all (`elemVarSet` emitted) (varSetElems (ruleSomeLhsFreeVars interestingId rule))
-- Spit out a rule only if all its lhs free vars are emitted
-- This is a good reason not to do it when we emit the Id itself
]
Nothing -> pprTrace "ifaceBinds not found:" (ppr id) $
idInfo id
+ -- The 'needed' set contains the Ids that are needed by earlier
+ -- interface file emissions. If the Id isn't in this set, and isn't
+ -- exported, there's no need to emit anything
+ need_id needed_set id = id `elemVarSet` needed_set || isUserExportedId id
+
go needed [] decls emitted
| not (isEmptyVarSet needed) = pprTrace "ifaceBinds: free vars:"
(sep (map ppr (varSetElems needed)))
| otherwise = (decls, emitted)
go needed (NonRec id rhs : binds) decls emitted
- = case ifaceId get_idinfo needed False id rhs of
- Nothing -> go needed binds decls emitted
- Just (decl, extras) -> let
- needed' = (needed `unionVarSet` extras) `delVarSet` id
- -- 'extras' can include the Id itself via a rule
- emitted' = emitted `extendVarSet` id
- in
- go needed' binds (decl `consBag` decls) emitted'
+ | need_id needed id
+ = if omitIfaceSigForId id then
+ go (needed `delVarSet` id) binds decls (emitted `extendVarSet` id)
+ else
+ go ((needed `unionVarSet` extras) `delVarSet` id)
+ binds
+ (decl `consBag` decls)
+ (emitted `extendVarSet` id)
+ | otherwise
+ = go needed binds decls emitted
+ where
+ (decl, extras) = ifaceId get_idinfo False id rhs
-- Recursive groups are a bit more of a pain. We may only need one to
-- start with, but it may call out the next one, and so on. So we
- -- have to look for a fixed point.
+ -- have to look for a fixed point. We don't want necessarily them all,
+ -- because without -O we may only need the first one (if we don't emit
+ -- its unfolding)
go needed (Rec pairs : binds) decls emitted
= go needed' binds decls' emitted'
where
go_rec :: IdSet -> [(Id,CoreExpr)] -> (Bag RdrNameHsDecl, IdSet, IdSet)
go_rec needed pairs
| null decls = (emptyBag, emptyVarSet, emptyVarSet)
- | otherwise = (more_decls `unionBags` listToBag decls,
- more_emitted `unionVarSet` mkVarSet emitted,
- more_extras `unionVarSet` extras)
+ | otherwise = (more_decls `unionBags` listToBag decls,
+ more_emitted `unionVarSet` mkVarSet (map fst needed_prs),
+ more_extras `unionVarSet` extras)
where
- maybes = map do_one pairs
- emitted = [id | ((id,_), Just _) <- pairs `zip` maybes]
- reduced_pairs = [pair | (pair, Nothing) <- pairs `zip` maybes]
- (decls, extras_s) = unzip (catMaybes maybes)
- extras = unionVarSets extras_s
- (more_decls, more_emitted, more_extras) = go_rec extras reduced_pairs
-
- do_one (id,rhs) = ifaceId get_idinfo needed True id rhs
+ (needed_prs,leftover_prs) = partition is_needed pairs
+ (decls, extras_s) = unzip [ifaceId get_idinfo True id rhs
+ | (id,rhs) <- needed_prs, not (omitIfaceSigForId id)]
+ extras = unionVarSets extras_s
+ (more_decls, more_emitted, more_extras) = go_rec extras leftover_prs
+ is_needed (id,_) = need_id needed id
\end{code}
\begin{code}
ifaceId :: (Id -> IdInfo) -- This function "knows" the extra info added
-- by the STG passes. Sigh
-
- -> IdSet -- Set of Ids that are needed by earlier interface
- -- file emissions. If the Id isn't in this set, and isn't
- -- exported, there's no need to emit anything
-> Bool -- True <=> recursive, so don't print unfolding
-> Id
-> CoreExpr -- The Id's right hand side
- -> Maybe (RdrNameHsDecl, IdSet) -- The emitted stuff, plus any *extra* needed Ids
-
-ifaceId get_idinfo needed_ids is_rec id rhs
- | not (id `elemVarSet` needed_ids || -- Needed [no id in needed_ids has omitIfaceSigForId]
- (isUserExportedId id && not (omitIfaceSigForId id))) -- or exported and not to be omitted
- = Nothing -- Well, that was easy!
+ -> (RdrNameHsDecl, IdSet) -- The emitted stuff, plus any *extra* needed Ids
-ifaceId get_idinfo needed_ids is_rec id rhs
- = ASSERT2( arity_matches_strictness, ppr id )
- Just (SigD (IfaceSig (toRdrName id) (toHsType id_type) hs_idinfo noSrcLoc),
- new_needed_ids)
+ifaceId get_idinfo is_rec id rhs
+ = (SigD (IfaceSig (toRdrName id) (toHsType id_type) hs_idinfo noSrcLoc), new_needed_ids)
where
id_type = idType id
core_idinfo = idInfo id
strict_hsinfo ++ wrkr_hsinfo ++ unfold_hsinfo
------------ Arity --------------
- arity_info = arityInfo stg_idinfo
+ arity_info = arityInfo stg_idinfo
+ stg_arity = arityLowerBound arity_info
arity_hsinfo = case arityInfo stg_idinfo of
a@(ArityExactly n) -> [HsArity a]
other -> []
------------ Worker --------------
- work_info = workerInfo core_idinfo
- has_worker = workerExists work_info
- wrkr_hsinfo = case work_info of
- HasWorker work_id _ -> [HsWorker (toRdrName work_id)]
- other -> []
+ -- We only treat a function as having a worker if
+ -- the exported arity (which is now the number of visible lambdas)
+ -- is the same as the arity at the moment of the w/w split
+ -- If so, we can safely omit the unfolding inside the wrapper, and
+ -- instead re-generate it from the type/arity/strictness info
+ -- But if the arity has changed, we just take the simple path and
+ -- put the unfolding into the interface file, forgetting the fact
+ -- that it's a wrapper.
+ --
+ -- How can this happen? Sometimes we get
+ -- f = coerce t (\x y -> $wf x y)
+ -- at the moment of w/w split; but the eta reducer turns it into
+ -- f = coerce t $wf
+ -- which is perfectly fine except that the exposed arity so far as
+ -- the code generator is concerned (zero) differs from the arity
+ -- when we did the split (2).
+ --
+ -- All this arises because we use 'arity' to mean "exactly how many
+ -- top level lambdas are there" in interface files; but during the
+ -- compilation of this module it means "how many things can I apply
+ -- this to".
+ work_info = workerInfo core_idinfo
+ HasWorker work_id _ = work_info
+
+ has_worker = case work_info of
+ HasWorker work_id wrap_arity
+ | wrap_arity == stg_arity -> True
+ | otherwise -> pprTrace "ifaceId: arity change:" (ppr id)
+ False
+
+ other -> False
+
+ wrkr_hsinfo | has_worker = [HsWorker (toRdrName work_id)]
+ | otherwise = []
------------ Unfolding --------------
inline_pragma = inlinePragInfo core_idinfo
unfold_ids `unionVarSet`
spec_ids
- worker_ids = case work_info of
- HasWorker work_id _ | interestingId work_id -> unitVarSet work_id
+ worker_ids | has_worker && interestingId work_id = unitVarSet work_id
-- Conceivably, the worker might come from
-- another module
- other -> emptyVarSet
+ | otherwise = emptyVarSet
spec_ids = filterVarSet interestingId (rulesRhsFreeVars spec_info)
HasWorker _ wrap_arity -> wrap_arity == arityLowerBound arity_info
other -> True
-interestingId id = isId id && isLocallyDefined id &&
- not (omitIfaceSigForId id)
+interestingId id = isId id && isLocallyDefined id && not (hasNoBinding id)
\end{code}
In @occAnalTop@ we do indirection-shorting. That is, if we have this:
- loc = <expression>
+ x_local = <expression>
...
- exp = loc
+ x_exported = loc
where exp is exported, and loc is not, then we replace it with this:
- loc = exp
- exp = <expression>
+ x_local = x_exported
+ x_exported = <expression>
...
-Without this we never get rid of the exp = loc thing. This save a
-gratuitous jump (from \tr{x_exported} to \tr{x_local}), and makes
-strictness information propagate better. This used to happen in the
-final phase, but it's tidier to do it here.
+Without this we never get rid of the x_exported = x_local thing. This
+save a gratuitous jump (from \tr{x_exported} to \tr{x_local}), and
+makes strictness information propagate better. This used to happen in
+the final phase, but it's tidier to do it here.
If more than one exported thing is equal to a local thing (i.e., the
local thing really is shared), then we do one only:
ind_env' = extendVarEnv ind_env local_id exported_id
other -> -- Ho ho! The normal case
- (final_usage, ind_env, new_binds ++ binds')
+ (final_usage, ind_env, new_binds ++ binds')
initialTopEnv = OccEnv isLocallyDefined -- Anything local is interesting
emptyVarSet
ltMajLvl :: Level -> Level -> Bool
-- Tells if one level belongs to a difft *lambda* level to another
- -- But it returns True regardless if l1 is the top level
- -- We always like to float to the top!
-ltMajLvl (Level 0 0) _ = True
ltMajLvl (Level maj1 _) (Level maj2 _) = maj1 < maj2
isTopLvl :: Level -> Bool
instance Outputable Level where
ppr (Level maj min) = hcat [ char '<', int maj, char ',', int min, char '>' ]
+
+instance Eq Level where
+ (Level maj1 min1) == (Level maj2 min2) = maj1==maj2 && min1==min2
\end{code}
%************************************************************************
-- but we do if the function is big and hairy, like a case
lvlExpr ctxt_lvl env (_, AnnNote InlineMe expr)
- -- Don't float anything out of an InlineMe
- = lvlExpr tOP_LEVEL env expr `thenLvl` \ expr' ->
+-- Don't float anything out of an InlineMe; hence the tOP_LEVEL
+ = lvlExpr tOP_LEVEL env expr `thenLvl` \ expr' ->
returnLvl (Note InlineMe expr')
lvlExpr ctxt_lvl env (_, AnnNote note expr)
|| not good_destination
|| exprIsTrivial expr -- Is trivial
|| (strict_ctxt && exprIsBottom expr) -- Strict context and is bottom
+ -- e.g. \x -> error "foo"
+ -- No gain from floating this
= -- Don't float it out
lvlExpr ctxt_lvl env ann_expr
-- VERY IMPORTANT: we must zap the demand info
-- if the thing is going to float out past a lambda
zap_dmd info
- | float_past_lam && isStrict (demandInfo info)
- = setDemandInfo info wwLazy
- | otherwise
- = info
+ | stays_put || not (isStrict (demandInfo info)) = info
+ | otherwise = setDemandInfo info wwLazy
- float_past_lam = ctxt_lvl `ltMajLvl` dest_lvl
+ stays_put = ctxt_lvl == dest_lvl
\end{code}
getSubstEnv, extendSubst, extendSubstList,
getInScope, setInScope, modifyInScope, addNewInScopeIds,
setSubstEnv, zapSubstEnv,
- getSimplBinderStuff, setSimplBinderStuff
+ getSimplBinderStuff, setSimplBinderStuff,
+
+ -- Adding bindings
+ addLetBind, addLetBinds, addAuxiliaryBind, addAuxiliaryBinds,
+ addCaseBind, needsCaseBinding, addNonRecBind
) where
#include "HsVersions.h"
-import Id ( Id, mkSysLocal, idUnfolding, isDataConWrapId )
+import Id ( Id, mkSysLocal, idType, idUnfolding, isDataConWrapId )
import CoreSyn
import CoreUnfold ( isCompulsoryUnfolding )
+import CoreUtils ( exprOkForSpeculation )
import PprCore () -- Instances
import CostCentre ( CostCentreStack, subsumedCCS )
import Name ( isLocallyDefined )
import Subst ( Subst, mkSubst, substEnv,
InScopeSet, mkInScopeSet, substInScope, isInScope
)
-import Type ( Type )
+import Type ( Type, isUnLiftedType )
import UniqSupply ( uniqsFromSupply, uniqFromSupply, splitUniqSupply,
UniqSupply
)
-- incrementally. Comments just before simplExprB in Simplify.lhs
\end{code}
+\begin{code}
+addLetBind :: CoreBind -> SimplM (OutStuff a) -> SimplM (OutStuff a)
+addLetBind bind thing_inside
+ = thing_inside `thenSmpl` \ (binds, res) ->
+ returnSmpl (bind : binds, res)
+
+addLetBinds :: [CoreBind] -> SimplM (OutStuff a) -> SimplM (OutStuff a)
+addLetBinds binds1 thing_inside
+ = thing_inside `thenSmpl` \ (binds2, res) ->
+ returnSmpl (binds1 ++ binds2, res)
+
+addAuxiliaryBinds :: [CoreBind] -> SimplM (OutStuff a) -> SimplM (OutStuff a)
+ -- Extends the in-scope environment as well as wrapping the bindings
+addAuxiliaryBinds binds1 thing_inside
+ = addNewInScopeIds (bindersOfBinds binds1) $
+ addLetBinds binds1 thing_inside
+
+addAuxiliaryBind :: CoreBind -> SimplM (OutStuff a) -> SimplM (OutStuff a)
+ -- Extends the in-scope environment as well as wrapping the bindings
+addAuxiliaryBind bind thing_inside
+ = addNewInScopeIds (bindersOf bind) $
+ addLetBind bind thing_inside
+
+needsCaseBinding ty rhs = isUnLiftedType ty && not (exprOkForSpeculation rhs)
+ -- Make a case expression instead of a let
+ -- These can arise either from the desugarer,
+ -- or from beta reductions: (\x.e) (x +# y)
+
+addCaseBind bndr rhs thing_inside
+ = getInScope `thenSmpl` \ in_scope ->
+ thing_inside `thenSmpl` \ (floats, (_, body)) ->
+ returnSmpl ([], (in_scope, Case rhs bndr [(DEFAULT, [], mkLets floats body)]))
+
+addNonRecBind bndr rhs thing_inside
+ -- Checks for needing a case binding
+ | needsCaseBinding (idType bndr) rhs = addCaseBind bndr rhs thing_inside
+ | otherwise = addLetBind (NonRec bndr rhs) thing_inside
+\end{code}
+
%************************************************************************
%* *
-- The continuation type
SimplCont(..), DupFlag(..), contIsDupable, contResultType,
- countValArgs, countArgs,
+ countValArgs, countArgs, mkRhsStop, mkStop,
getContArgs, interestingCallContext, interestingArg, isStrictType, discardInline
) where
import VarSet
import VarEnv ( SubstEnv, SubstResult(..) )
import Util ( lengthExceeds )
+import BasicTypes ( Arity )
import Outputable
\end{code}
\begin{code}
data SimplCont -- Strict contexts
- = Stop OutType -- Type of the result
+ = Stop OutType -- Type of the result
+ Bool -- True => This is the RHS of a thunk whose type suggests
+ -- that update-in-place would be possible
+ -- (This makes the inliner a little keener.)
| CoerceIt OutType -- The To-type, simplified
SimplCont
-- The result expression in the OutExprStuff has type cont_ty
instance Outputable SimplCont where
- ppr (Stop _) = ptext SLIT("Stop")
+ ppr (Stop _ _) = ptext SLIT("Stop")
ppr (ApplyTo dup arg se cont) = (ptext SLIT("ApplyTo") <+> ppr dup <+> ppr arg) $$ ppr cont
ppr (ArgOf dup _ _) = ptext SLIT("ArgOf...") <+> ppr dup
ppr (Select dup bndr alts se cont) = (ptext SLIT("Select") <+> ppr dup <+> ppr bndr) $$
ppr OkToDup = ptext SLIT("ok")
ppr NoDup = ptext SLIT("nodup")
+
+-------------------
+mkRhsStop, mkStop :: OutType -> SimplCont
+mkStop ty = Stop ty False
+mkRhsStop ty = Stop ty (canUpdateInPlace ty)
+
+
-------------------
contIsDupable :: SimplCont -> Bool
-contIsDupable (Stop _) = True
+contIsDupable (Stop _ _) = True
contIsDupable (ApplyTo OkToDup _ _ _) = True
contIsDupable (ArgOf OkToDup _ _) = True
contIsDupable (Select OkToDup _ _ _ _) = True
-------------------
discardableCont :: SimplCont -> Bool
-discardableCont (Stop _) = False
+discardableCont (Stop _ _) = False
discardableCont (CoerceIt _ cont) = discardableCont cont
discardableCont (InlinePlease cont) = discardableCont cont
discardableCont other = True
discardCont :: SimplCont -- A continuation, expecting
-> SimplCont -- Replace the continuation with a suitable coerce
-discardCont (Stop to_ty) = Stop to_ty
-discardCont cont = CoerceIt to_ty (Stop to_ty)
- where
- to_ty = contResultType cont
+discardCont cont = case cont of
+ Stop to_ty _ -> cont
+ other -> CoerceIt to_ty (mkStop to_ty)
+ where
+ to_ty = contResultType cont
-------------------
contResultType :: SimplCont -> OutType
-contResultType (Stop to_ty) = to_ty
+contResultType (Stop to_ty _) = to_ty
contResultType (ArgOf _ to_ty _) = to_ty
contResultType (ApplyTo _ _ _ cont) = contResultType cont
contResultType (CoerceIt _ cont) = contResultType cont
where
analyse (Var v)
= case lookupIdSubst (mkSubst in_scope subst) v of
- DoneId v' _ -> hasSomeUnfolding (idUnfolding v')
- -- was: isValueUnfolding (idUnfolding v')
- -- But that seems over-pessimistic
-
- other -> True -- was: False
- -- But that is *definitely* too pessimistic.
- -- E.g. let x = 3 in f
- -- Here, x will be unconditionally substituted, via
- -- the substitution!
+ ContEx subst arg -> interestingArg in_scope arg subst
+ DoneEx arg -> analyse arg
+ DoneId v' _ -> hasSomeUnfolding (idUnfolding v')
+ -- Was: isValueUnfolding (idUnfolding v')
+ -- But that seems over-pessimistic
+
+ -- NB: it's too pessimistic to return False for ContEx/DoneEx
+ -- Consider let x = 3 in f x
+ -- The substitution will contain (x -> ContEx 3)
+ -- It's also too optimistic to return True for the ContEx/DoneEx case
+ -- Consider (\x. f x y) y
+ -- The substitution will contain (x -> ContEx y).
+
analyse (Type _) = False
analyse (App fn (Type _)) = analyse fn
analyse (Note _ a) = analyse a
-- as scrutinee of a case Select
-- as arg of a strict fn ArgOf
-- then we should not inline it (unless there is some other reason,
- -- e.g. is is the sole occurrence).
- -- Why not? At least in the case-scrutinee situation, turning
- -- case x of y -> ...
+ -- e.g. is is the sole occurrence). We achieve this by making
+ -- interestingCallContext return False for a lone variable.
+ --
+ -- Why? At least in the case-scrutinee situation, turning
+ -- let x = (a,b) in case x of y -> ...
-- into
- -- let y = (a,b) in ...
+ -- let x = (a,b) in case (a,b) of y -> ...
+ -- and thence to
+ -- let x = (a,b) in let y = (a,b) in ...
-- is bad if the binding for x will remain.
--
-- Another example: I discovered that strings
-- the context can ``see'' the unfolding of the variable (e.g. case or a RULE)
-- so there's no gain.
--
- -- However, even a type application isn't a lone variable. Consider
+ -- However, even a type application or coercion isn't a lone variable.
+ -- Consider
-- case $fMonadST @ RealWorld of { :DMonad a b c -> c }
-- We had better inline that sucker! The case won't see through it.
--
- -- For now, I'm treating treating a variable applied to types as
- -- "lone". The motivating example was
+ -- For now, I'm treating treating a variable applied to types
+ -- in a *lazy* context "lone". The motivating example was
-- f = /\a. \x. BIG
-- g = /\a. \y. h (f a)
-- There's no advantage in inlining f here, and perhaps
interestingCallContext some_args some_val_args cont
= interesting cont
where
- interesting (InlinePlease _) = True
- interesting (ApplyTo _ _ _ _) = some_args -- Can happen if we have (coerce t (f x)) y
- interesting (Select _ _ _ _ _) = some_args
- interesting (ArgOf _ _ _) = some_val_args
- interesting (Stop ty) = some_val_args && canUpdateInPlace ty
- interesting (CoerceIt _ cont) = interesting cont
+ interesting (InlinePlease _) = True
+ interesting (Select _ _ _ _ _) = some_args
+ interesting (ApplyTo _ _ _ _) = some_args -- Can happen if we have (coerce t (f x)) y
+ interesting (ArgOf _ _ _) = some_val_args
+ interesting (Stop ty upd_in_place) = some_val_args && upd_in_place
+ interesting (CoerceIt _ cont) = interesting cont
-- If this call is the arg of a strict function, the context
-- is a bit interesting. If we inline here, we may get useful
-- evaluation information to avoid repeated evals: e.g.
(b) type-lambda swizzling
\begin{code}
-transformRhs :: InExpr -> SimplM InExpr
-transformRhs rhs
- = tryEtaExpansion body `thenSmpl` \ body' ->
- mkRhsTyLam tyvars body'
- where
- (tyvars, body) = collectTyBinders rhs
+transformRhs :: OutExpr
+ -> (Arity -> OutExpr -> SimplM (OutStuff a))
+ -> SimplM (OutStuff a)
+
+transformRhs rhs thing_inside
+ = tryRhsTyLam rhs $ \ rhs1 ->
+ tryEtaExpansion rhs1 thing_inside
\end{code}
This optimisation is CRUCIAL in eliminating the junk introduced by
desugaring mutually recursive definitions. Don't eliminate it lightly!
-So far as the implemtation is concerned:
+So far as the implementation is concerned:
Invariant: go F e = /\tvs -> F e
\begin{code}
-mkRhsTyLam tyvars body -- Only does something if there's a let
+tryRhsTyLam rhs thing_inside -- Only does something if there's a let
| null tyvars || not (worth_it body) -- inside a type lambda, and a WHNF inside that
- = returnSmpl (mkLams tyvars body)
+ = thing_inside rhs
| otherwise
- = go (\x -> x) body
+ = go (\x -> x) body $ \ body' ->
+ thing_inside (mkLams tyvars body')
+
where
+ (tyvars, body) = collectTyBinders rhs
+
worth_it (Let _ e) = whnf_in_middle e
worth_it other = False
whnf_in_middle (Let _ e) = whnf_in_middle e
whnf_in_middle e = exprIsCheap e
- go fn (Let bind@(NonRec var rhs) body) | exprIsTrivial rhs
- = go (fn . Let bind) body
+ go fn (Let bind@(NonRec var rhs) body) thing_inside
+ | exprIsTrivial rhs
+ = go (fn . Let bind) body thing_inside
+
+ go fn (Let bind@(NonRec var rhs) body) thing_inside
+ = mk_poly tyvars_here var `thenSmpl` \ (var', rhs') ->
+ addAuxiliaryBind (NonRec var' (mkLams tyvars_here (fn rhs))) $
+ go (fn . Let (mk_silly_bind var rhs')) body thing_inside
- go fn (Let bind@(NonRec var rhs) body)
- = mk_poly tyvars_here var `thenSmpl` \ (var', rhs') ->
- go (fn . Let (mk_silly_bind var rhs')) body `thenSmpl` \ body' ->
- returnSmpl (Let (NonRec var' (mkLams tyvars_here (fn rhs))) body')
where
tyvars_here = tyvars
-- main_tyvar_set = mkVarSet tyvars
-- abstracting wrt *all* the tyvars. We'll see if that
-- gives rise to problems. SLPJ June 98
- go fn (Let (Rec prs) body)
+ go fn (Let (Rec prs) body) thing_inside
= mapAndUnzipSmpl (mk_poly tyvars_here) vars `thenSmpl` \ (vars', rhss') ->
let
- gn body = fn $ foldr Let body (zipWith mk_silly_bind vars rhss')
+ gn body = fn (foldr Let body (zipWith mk_silly_bind vars rhss'))
in
- go gn body `thenSmpl` \ body' ->
- returnSmpl (Let (Rec (vars' `zip` [mkLams tyvars_here (gn rhs) | rhs <- rhss])) body')
+ addAuxiliaryBind (Rec (vars' `zip` [mkLams tyvars_here (gn rhs) | rhs <- rhss])) $
+ go gn body thing_inside
where
(vars,rhss) = unzip prs
tyvars_here = tyvars
-- See notes with tyvars_here above
- go fn body = returnSmpl (mkLams tyvars (fn body))
+ go fn body thing_inside = thing_inside (fn body)
mk_poly tyvars_here var
= getUniqueSmpl `thenSmpl` \ uniq ->
let
poly_name = setNameUnique (idName var) uniq -- Keep same name
poly_ty = mkForAllTys tyvars_here (idType var) -- But new type of course
+ poly_id = mkId poly_name poly_ty vanillaIdInfo
- -- It's crucial to copy the occInfo of the original var, because
- -- we're looking at occurrence-analysed but as yet unsimplified code!
- -- In particular, we mustn't lose the loop breakers.
+ -- In the olden days, it was crucial to copy the occInfo of the original var,
+ -- because we were looking at occurrence-analysed but as yet unsimplified code!
+ -- In particular, we mustn't lose the loop breakers. BUT NOW we are looking
+ -- at already simplified code, so it doesn't matter
--
-- It's even right to retain single-occurrence or dead-var info:
-- Suppose we started with /\a -> let x = E in B
-- where x* has an INLINE prag on it. Now, once x* is inlined,
-- the occurrences of x' will be just the occurrences originally
-- pinned on x.
- poly_info = vanillaIdInfo `setOccInfo` idOccInfo var
-
- poly_id = mkId poly_name poly_ty poly_info
+ -- poly_info = vanillaIdInfo `setOccInfo` idOccInfo var
in
returnSmpl (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tyvars_here))
mk_silly_bind var rhs = NonRec var rhs
- -- We need to be careful about inlining.
-- Suppose we start with:
--
-- x = let g = /\a -> \x -> f x x
-- * so we're back to square one
-- We rely on the simplifier not to inline g into the RHS of g*,
-- because it's a "lone" occurrence, and there is no benefit in
- -- inlining. But it's a slightly delicate property, and there's
- -- a danger of making the simplifier loop here.
+ -- inlining. But it's a slightly delicate property; hence this comment
\end{code}
Try eta expansion for RHSs
We go for:
- \x1..xn -> N ==> \x1..xn y1..ym -> N y1..ym
- AND
- N E1..En ==> let z1=E1 .. zn=En in \y1..ym -> N z1..zn y1..ym
+ Case 1 f = \x1..xn -> N ==> f = \x1..xn y1..ym -> N y1..ym
+ (n >= 0)
+ OR
+ Case 2 f = N E1..En ==> z1=E1
+ (n > 0) ..
+ zn=En
+ f = \y1..ym -> N z1..zn y1..ym
+
+where (in both cases)
-where (in both cases) N is a NORMAL FORM (i.e. no redexes anywhere)
-wanting a suitable number of extra args.
+ * The xi can include type variables
-NB: the Ei may have unlifted type, but the simplifier (which is applied
-to the result) deals OK with this.
+ * The yi are all value variables
-There is no point in looking for a combination of the two,
-because that would leave use with some lets sandwiched between lambdas;
-that's what the final test in the first equation is for.
+ * N is a NORMAL FORM (i.e. no redexes anywhere)
+ wanting a suitable number of extra args.
+
+ * the Ei must not have unlifted type
+
+There is no point in looking for a combination of the two, because
+that would leave use with some lets sandwiched between lambdas; that's
+what the final test in the first equation is for.
\begin{code}
-tryEtaExpansion :: InExpr -> SimplM InExpr
-tryEtaExpansion rhs
+tryEtaExpansion :: OutExpr
+ -> (Arity -> OutExpr -> SimplM (OutStuff a))
+ -> SimplM (OutStuff a)
+tryEtaExpansion rhs thing_inside
| not opt_SimplDoLambdaEtaExpansion
- || exprIsTrivial rhs -- Don't eta-expand a trival RHS
- || null y_tys -- No useful expansion
- || not (null x_bndrs || and trivial_args) -- Not (no x-binders or no z-binds)
- = returnSmpl rhs
-
- | otherwise -- Consider eta expansion
- = newIds SLIT("y") y_tys $ ( \ y_bndrs ->
- tick (EtaExpansion (head y_bndrs)) `thenSmpl_`
- mapAndUnzipSmpl bind_z_arg (args `zip` trivial_args) `thenSmpl` (\ (maybe_z_binds, z_args) ->
- returnSmpl (mkLams x_bndrs $
- mkLets (catMaybes maybe_z_binds) $
- mkLams y_bndrs $
- mkApps (mkApps fun z_args) (map Var y_bndrs))))
+ || null y_tys -- No useful expansion
+ || not (is_case1 || is_case2) -- Neither case matches
+ = thing_inside final_arity rhs -- So, no eta expansion, but
+ -- return a good arity
+
+ | is_case1
+ = make_y_bndrs $ \ y_bndrs ->
+ thing_inside final_arity
+ (mkLams x_bndrs $ mkLams y_bndrs $
+ mkApps body (map Var y_bndrs))
+
+ | otherwise -- Must be case 2
+ = mapAndUnzipSmpl bind_z_arg arg_infos `thenSmpl` \ (maybe_z_binds, z_args) ->
+ addAuxiliaryBinds (catMaybes maybe_z_binds) $
+ make_y_bndrs $ \ y_bndrs ->
+ thing_inside final_arity
+ (mkLams y_bndrs $
+ mkApps (mkApps fun z_args) (map Var y_bndrs))
where
- (x_bndrs, body) = collectValBinders rhs
- (fun, args) = collectArgs body
- trivial_args = map exprIsTrivial args
- fun_arity = exprEtaExpandArity fun
+ all_trivial_args = all is_trivial arg_infos
+ is_case1 = all_trivial_args
+ is_case2 = null x_bndrs && not (any unlifted_non_trivial arg_infos)
+
+ (x_bndrs, body) = collectBinders rhs -- NB: x_bndrs can include type variables
+ x_arity = valBndrCount x_bndrs
- bind_z_arg (arg, trivial_arg)
+ (fun, args) = collectArgs body
+ arg_infos = [(arg, exprType arg, exprIsTrivial arg) | arg <- args]
+
+ is_trivial (_, _, triv) = triv
+ unlifted_non_trivial (_, ty, triv) = not triv && isUnLiftedType ty
+
+ fun_arity = exprEtaExpandArity fun
+
+ final_arity | all_trivial_args = x_arity + extra_args_wanted
+ | otherwise = x_arity
+ -- Arity can be more than the number of lambdas
+ -- because of coerces. E.g. \x -> coerce t (\y -> e)
+ -- will have arity at least 2
+ -- The worker/wrapper pass will bring the coerce out to the top
+
+ bind_z_arg (arg, arg_ty, trivial_arg)
| trivial_arg = returnSmpl (Nothing, arg)
- | otherwise = newId SLIT("z") (exprType arg) $ \ z ->
+ | otherwise = newId SLIT("z") arg_ty $ \ z ->
returnSmpl (Just (NonRec z arg), Var z)
- -- Note: I used to try to avoid the exprType call by using
- -- the type of the binder. But this type doesn't necessarily
- -- belong to the same substitution environment as this rhs;
- -- and we are going to make extra term binders (y_bndrs) from the type
- -- which will be processed with the rhs substitution environment.
- -- This only went wrong in a mind bendingly complicated case.
+ make_y_bndrs thing_inside
+ = ASSERT( not (exprIsTrivial rhs) )
+ newIds SLIT("y") y_tys $ \ y_bndrs ->
+ tick (EtaExpansion (head y_bndrs)) `thenSmpl_`
+ thing_inside y_bndrs
+
(potential_extra_arg_tys, _) = splitFunTys (exprType body)
y_tys :: [InType]
- y_tys = take no_extras_wanted potential_extra_arg_tys
+ y_tys = take extra_args_wanted potential_extra_arg_tys
- no_extras_wanted :: Int
- no_extras_wanted = 0 `max`
+ extra_args_wanted :: Int -- Number of extra args we want
+ extra_args_wanted = 0 `max` (fun_arity - valArgCount args)
-- We used to expand the arity to the previous arity fo the
-- function; but this is pretty dangerous. Consdier
-- f = \xy -> let z = BIG in e
--
-- (bndr_arity - no_of_xs) `max`
-
- -- See if the body could obviously do with more args
- (fun_arity - valArgCount args)
-
--- This case is now deal with by exprEtaExpandArity
- -- Finally, see if it's a state transformer, and xs is non-null
- -- (so it's also a function not a thunk) in which
- -- case we eta-expand on principle! This can waste work,
- -- but usually doesn't.
- -- I originally checked for a singleton type [ty] in this case
- -- but then I found a situation in which I had
- -- \ x -> let {..} in \ s -> f (...) s
- -- AND f RETURNED A FUNCTION. That is, 's' wasn't the only
- -- potential extra arg.
--- case (x_bndrs, potential_extra_arg_tys) of
--- (_:_, ty:_) -> case splitTyConApp_maybe ty of
--- Just (tycon,_) | tycon == statePrimTyCon -> 1
--- other -> 0
--- other -> 0
\end{code}
import SimplMonad
import SimplUtils ( mkCase, transformRhs, findAlt,
simplBinder, simplBinders, simplIds, findDefault,
- SimplCont(..), DupFlag(..),
+ SimplCont(..), DupFlag(..), mkStop, mkRhsStop,
contResultType, discardInline, countArgs, contIsDupable,
getContArgs, interestingCallContext, interestingArg, isStrictType
)
import Var ( mkSysTyVar, tyVarKind )
import VarEnv
+import VarSet ( elemVarSet )
import Id ( Id, idType, idInfo, isDataConId,
idUnfolding, setIdUnfolding, isExportedId, isDeadBinder,
idDemandInfo, setIdInfo,
idOccInfo, setIdOccInfo,
zapLamIdInfo, setOneShotLambda,
)
-import IdInfo ( OccInfo(..), ArityInfo(..),
- setArityInfo, setUnfoldingInfo,
+import IdInfo ( OccInfo(..), isDeadOcc, isLoopBreaker,
+ ArityInfo, setArityInfo, atLeastArity,
+ setUnfoldingInfo,
occInfo
)
import Demand ( Demand, isStrict )
dataConSig, dataConArgTys
)
import CoreSyn
-import CoreFVs ( mustHaveLocalBinding )
+import CoreFVs ( mustHaveLocalBinding, exprFreeVars )
import CoreUnfold ( mkOtherCon, mkUnfolding, otherCons,
callSiteInline
)
import CoreUtils ( cheapEqExpr, exprIsDupable, exprIsTrivial, exprIsConApp_maybe,
- exprType, coreAltsType, exprArity, exprIsValue, idAppIsCheap,
+ exprType, coreAltsType, exprIsValue, idAppIsCheap,
exprOkForSpeculation, etaReduceExpr,
mkCoerce, mkSCC, mkInlineMe, mkAltExpr
)
import TyCon ( isDataTyCon, tyConDataConsIfAvailable )
import TysPrim ( realWorldStatePrimTy )
import PrelInfo ( realWorldPrimId )
-import BasicTypes ( isLoopBreaker )
import Maybes ( maybeToBool )
import Util ( zipWithEqual )
import Outputable
%* *
%************************************************************************
-\begin{code}
-addLetBind :: OutId -> OutExpr -> SimplM (OutStuff a) -> SimplM (OutStuff a)
-addLetBind bndr rhs thing_inside
- = thing_inside `thenSmpl` \ (binds, res) ->
- returnSmpl (NonRec bndr rhs : binds, res)
-
-addLetBinds :: [CoreBind] -> SimplM (OutStuff a) -> SimplM (OutStuff a)
-addLetBinds binds1 thing_inside
- = thing_inside `thenSmpl` \ (binds2, res) ->
- returnSmpl (binds1 ++ binds2, res)
-
-needsCaseBinding ty rhs = isUnLiftedType ty && not (exprOkForSpeculation rhs)
- -- Make a case expression instead of a let
- -- These can arise either from the desugarer,
- -- or from beta reductions: (\x.e) (x +# y)
-
-addCaseBind bndr rhs thing_inside
- = getInScope `thenSmpl` \ in_scope ->
- thing_inside `thenSmpl` \ (floats, (_, body)) ->
- returnSmpl ([], (in_scope, Case rhs bndr [(DEFAULT, [], mkLets floats body)]))
-
-addNonRecBind bndr rhs thing_inside
- -- Checks for needing a case binding
- | needsCaseBinding (idType bndr) rhs = addCaseBind bndr rhs thing_inside
- | otherwise = addLetBind bndr rhs thing_inside
-\end{code}
-
The reason for this OutExprStuff stuff is that we want to float *after*
simplifying a RHS, not before. If we do so naively we get quadratic
behaviour as things float out.
\begin{code}
simplExpr :: CoreExpr -> SimplM CoreExpr
simplExpr expr = getSubst `thenSmpl` \ subst ->
- simplExprC expr (Stop (substTy subst (exprType expr)))
+ simplExprC expr (mkStop (substTy subst (exprType expr)))
-- The type in the Stop continuation is usually not used
-- It's only needed when discarding continuations after finding
-- a function that returns bottom.
-- If case-of-case is off, simply simplify the case expression
-- in a vanilla Stop context, and rebuild the result around it
simplExprC scrut (Select NoDup bndr alts subst_env
- (Stop (contResultType cont))) `thenSmpl` \ case_expr' ->
+ (mkStop (contResultType cont))) `thenSmpl` \ case_expr' ->
rebuild case_expr' cont
simplExprF expr@(Lam _ _) cont = simplLam expr cont
simplExprF (Type ty) cont
- = ASSERT( case cont of { Stop _ -> True; ArgOf _ _ _ -> True; other -> False } )
+ = ASSERT( case cont of { Stop _ _ -> True; ArgOf _ _ _ -> True; other -> False } )
simplType ty `thenSmpl` \ ty' ->
rebuild (Type ty') cont
simplExprF (Note InlineMe e) cont
= case cont of
- Stop _ -> -- Totally boring continuation
+ Stop _ _ -> -- Totally boring continuation
-- Don't inline inside an INLINE expression
setBlackList noInlineBlackList (simplExpr e) `thenSmpl` \ e' ->
rebuild (mkInlineMe e') cont
go expr cont = simplExprF expr cont
-- completeLam deals with the case where a lambda doesn't have an ApplyTo
--- continuation.
--- We used to try for eta reduction here, but I found that this was
--- eta reducing things like
--- f = \x -> (coerce (\x -> e))
--- This made f's arity reduce, which is a bad thing, so I removed the
--- eta reduction at this point, and now do it only when binding
--- (at the call to postInlineUnconditionally)
-
-completeLam acc (Lam bndr body) cont
+-- continuation, so there are real lambdas left to put in the result
+
+-- We try for eta reduction here, but *only* if we get all the
+-- way to an exprIsTrivial expression.
+-- We don't want to remove extra lambdas unless we are going
+-- to avoid allocating this thing altogether
+
+completeLam rev_bndrs (Lam bndr body) cont
= simplBinder bndr $ \ bndr' ->
- completeLam (bndr':acc) body cont
+ completeLam (bndr':rev_bndrs) body cont
-completeLam acc body cont
+completeLam rev_bndrs body cont
= simplExpr body `thenSmpl` \ body' ->
- rebuild (foldl (flip Lam) body' acc) cont
- -- Remember, acc is the *reversed* binders
+ case try_eta body' of
+ Just etad_lam -> tick (EtaReduction (head rev_bndrs)) `thenSmpl_`
+ rebuild etad_lam cont
+
+ Nothing -> rebuild (foldl (flip Lam) body' rev_bndrs) cont
+ where
+ -- We don't use CoreUtils.etaReduceExpr, because we can be more
+ -- efficient here: (a) we already have the binders, (b) we can do
+ -- the triviality test before computing the free vars
+ try_eta body | not opt_SimplDoEtaReduction = Nothing
+ | otherwise = go rev_bndrs body
+
+ go (b : bs) (App fun arg) | ok_arg b arg = go bs fun -- Loop round
+ go [] body | ok_body body = Just body -- Success!
+ go _ _ = Nothing -- Failure!
+
+ ok_body body = exprIsTrivial body && not (any (`elemVarSet` exprFreeVars body) rev_bndrs)
+ ok_arg b arg = varToCoreExpr b `cheapEqExpr` arg
mkLamBndrZapper :: CoreExpr -- Function
-> SimplCont -- The context
simplValArg arg_ty is_strict arg arg_se cont_ty thing_inside
| is_strict
- = transformRhs arg `thenSmpl` \ t_arg ->
- getEnv `thenSmpl` \ env ->
+ = getEnv `thenSmpl` \ env ->
setSubstEnv arg_se $
- simplExprF t_arg (ArgOf NoDup cont_ty $ \ rhs' ->
+ simplExprF arg (ArgOf NoDup cont_ty $ \ rhs' ->
setAllExceptInScope env $
- thing_inside (etaFirst rhs'))
+ thing_inside rhs')
| otherwise
= simplRhs False {- Not top level -}
True {- OK to float unboxed -}
arg_ty arg arg_se
thing_inside
-
--- Do eta-reduction on the simplified RHS, if eta reduction is on
--- But *only* if we get all the way to an exprIsTrivial expression.
--- We don't want to remove extra lambdas unless we are going
--- to avoid allocating this thing altogether
-etaFirst rhs
- | opt_SimplDoEtaReduction && exprIsTrivial rhs' = rhs'
- | otherwise = rhs
- where
- rhs' = etaReduceExpr rhs
\end{code}
-> SimplM (OutStuff a)
completeBinding old_bndr new_bndr top_lvl black_listed new_rhs thing_inside
- | (case occ_info of -- This happens; for example, the case_bndr during case of
- IAmDead -> True -- known constructor: case (a,b) of x { (p,q) -> ... }
- other -> False) -- Here x isn't mentioned in the RHS, so we don't want to
+ | isDeadOcc occ_info -- This happens; for example, the case_bndr during case of
+ -- known constructor: case (a,b) of x { (p,q) -> ... }
+ -- Here x isn't mentioned in the RHS, so we don't want to
-- create the (dead) let-binding let x = (a,b) in ...
= thing_inside
- | postInlineUnconditionally black_listed occ_info old_bndr new_rhs
- -- Maybe we don't need a let-binding! Maybe we can just
- -- inline it right away. Unlike the preInlineUnconditionally case
- -- we are allowed to look at the RHS.
+ | exprIsTrivial new_rhs
+ = completeTrivialBinding old_bndr new_bndr
+ black_listed loop_breaker new_rhs
+ thing_inside
+
+ | Note coercion@(Coerce _ inner_ty) inner_rhs <- new_rhs
+ -- x = coerce t e ==> c = e; x = inline_me (coerce t c)
+ -- Now x can get inlined, which moves the coercion
+ -- to the usage site. This is a bit like worker/wrapper stuff,
+ -- but it's useful to do it very promptly, so that
+ -- x = coerce T (I# 3)
+ -- get's w/wd to
+ -- c = I# 3
+ -- x = coerce T $wx
+ -- This in turn means that
+ -- case (coerce Int x) of ...
+ -- will inline x.
+ -- Also the full-blown w/w thing isn't set up for non-functions
--
- -- NB: a loop breaker never has postInlineUnconditionally True
- -- and non-loop-breakers only have *forward* references
- -- Hence, it's safe to discard the binding
- --
- -- NB: You might think that postInlineUnconditionally is an optimisation,
- -- but if we have
- -- let x = f Bool in (x, y)
- -- then because of the constructor, x will not be *inlined* in the pair,
- -- so the trivial binding will stay. But in this postInlineUnconditionally
- -- gag we use the *substitution* to substitute (f Bool) for x, and that *will*
- -- happen.
- = tick (PostInlineUnconditionally old_bndr) `thenSmpl_`
- extendSubst old_bndr (DoneEx new_rhs)
- thing_inside
+ -- The inline_me note is so that the simplifier doesn't
+ -- just substitute c back inside x's rhs! (Typically, x will
+ -- get substituted away, but not if it's exported.)
+ = newId SLIT("c") inner_ty $ \ c_id ->
+ completeBinding c_id c_id top_lvl False inner_rhs $
+ completeTrivialBinding old_bndr new_bndr black_listed loop_breaker
+ (Note InlineMe (Note coercion (Var c_id))) $
+ thing_inside
+
| otherwise
- = getSubst `thenSmpl` \ subst ->
+ = transformRhs new_rhs $ \ arity new_rhs' ->
+ getSubst `thenSmpl` \ subst ->
let
-- We make new IdInfo for the new binder by starting from the old binder,
-- doing appropriate substitutions.
-- Then we add arity and unfolding info to get the new binder
- old_info = idInfo old_bndr
new_bndr_info = substIdInfo subst old_info (idInfo new_bndr)
- `setArityInfo` ArityAtLeast (exprArity new_rhs)
+ `setArityInfo` atLeastArity arity
-- Add the unfolding *only* for non-loop-breakers
-- Making loop breakers not have an unfolding at all
-- means that we can avoid tests in exprIsConApp, for example.
-- This is important: if exprIsConApp says 'yes' for a recursive
-- thing, then we can get into an infinite loop
- info_w_unf | isLoopBreaker (occInfo old_info) = new_bndr_info
- | otherwise = new_bndr_info `setUnfoldingInfo` mkUnfolding top_lvl new_rhs
+ info_w_unf | loop_breaker = new_bndr_info
+ | otherwise = new_bndr_info `setUnfoldingInfo` mkUnfolding top_lvl new_rhs'
final_id = new_bndr `setIdInfo` info_w_unf
in
-- These seqs forces the Id, and hence its IdInfo,
-- and hence any inner substitutions
final_id `seq`
- addLetBind final_id new_rhs $
+ addLetBind (NonRec final_id new_rhs') $
modifyInScope new_bndr final_id thing_inside
where
- occ_info = idOccInfo old_bndr
+ old_info = idInfo old_bndr
+ occ_info = occInfo old_info
+ loop_breaker = isLoopBreaker occ_info
\end{code}
+\begin{code}
+completeTrivialBinding old_bndr new_bndr black_listed loop_breaker new_rhs thing_inside
+ -- We're looking at a binding with a trivial RHS, so
+ -- perhaps we can discard it altogether!
+ --
+ -- NB: a loop breaker never has postInlineUnconditionally True
+ -- and non-loop-breakers only have *forward* references
+ -- Hence, it's safe to discard the binding
+ --
+ -- NB: You might think that postInlineUnconditionally is an optimisation,
+ -- but if we have
+ -- let x = f Bool in (x, y)
+ -- then because of the constructor, x will not be *inlined* in the pair,
+ -- so the trivial binding will stay. But in this postInlineUnconditionally
+ -- gag we use the *substitution* to substitute (f Bool) for x, and that *will*
+ -- happen.
+
+ -- NOTE: This isn't our last opportunity to inline.
+ -- We're at the binding site right now, and
+ -- we'll get another opportunity when we get to the ocurrence(s)
+
+ -- Note that we do this unconditional inlining only for trival RHSs.
+ -- Don't inline even WHNFs inside lambdas; doing so may
+ -- simply increase allocation when the function is called
+ -- This isn't the last chance; see NOTE above.
+ --
+ -- NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here
+ -- Why? Because we don't even want to inline them into the
+ -- RHS of constructor arguments. See NOTE above
+ --
+ -- NB: Even NOINLINEis ignored here: if the rhs is trivial
+ -- it's best to inline it anyway. We often get a=E; b=a
+ -- from desugaring, with both a and b marked NOINLINE.
+
+ | not keep_binding -- Can discard binding, inlining everywhere
+ = extendSubst old_bndr (DoneEx new_rhs) $
+ tick (PostInlineUnconditionally old_bndr) `thenSmpl_`
+ thing_inside
+
+ | otherwise -- We must keep the binding, but we may still inline
+ = getSubst `thenSmpl` \ subst ->
+ let
+ new_bndr_info = substIdInfo subst (idInfo old_bndr) (idInfo new_bndr)
+ final_id = new_bndr `setIdInfo` new_bndr_info
+ in
+ addLetBind (NonRec final_id new_rhs) $
+ if dont_inline then
+ modifyInScope new_bndr final_id thing_inside
+ else
+ extendSubst old_bndr (DoneEx new_rhs) thing_inside
+ where
+ dont_inline = black_listed || loop_breaker
+ keep_binding = dont_inline || isExportedId old_bndr
+\end{code}
+
+
%************************************************************************
%* *
\subsection{simplLazyBind}
\begin{code}
simplRhs :: Bool -- True <=> Top level
-> Bool -- True <=> OK to float unboxed (speculative) bindings
+ -- False for (a) recursive and (b) top-level bindings
-> OutType -- Type of RHS; used only occasionally
-> InExpr -> SubstEnv
-> (OutExpr -> SimplM (OutStuff a))
-> SimplM (OutStuff a)
simplRhs top_lvl float_ubx rhs_ty rhs rhs_se thing_inside
- = -- Swizzle the inner lets past the big lambda (if any)
- -- and try eta expansion
- transformRhs rhs `thenSmpl` \ t_rhs ->
-
- -- Simplify it
- setSubstEnv rhs_se (simplExprF t_rhs (Stop rhs_ty)) `thenSmpl` \ (floats, (in_scope', rhs')) ->
+ = -- Simplify it
+ setSubstEnv rhs_se (simplExprF rhs (mkRhsStop rhs_ty)) `thenSmpl` \ (floats, (in_scope', rhs')) ->
-- Float lets out of RHS
let
WARN( any demanded_float floats_out, ppr floats_out )
addLetBinds floats_out $
setInScope in_scope' $
- thing_inside (etaFirst rhs'')
+ thing_inside rhs''
-- in_scope' may be excessive, but that's OK;
-- it's a superset of what's in scope
else
-- Don't do the float
- thing_inside (etaFirst (mkLets floats rhs'))
+ thing_inside (mkLets floats rhs')
-- In a let-from-let float, we just tick once, arbitrarily
-- choosing the first floated binder to identify it
-- v = E
-- z = \w -> g v w
-- Which is what we want; chances are z will be inlined now.
---
--- This defn isn't quite like
--- exprIsCheap (it ignores non-cheap args)
--- exprIsValue (may not say True for a lone variable)
--- which is slightly weird
+
wantToExpose n (Var v) = idAppIsCheap v n
wantToExpose n (Lit l) = True
wantToExpose n (Lam _ e) = True
OneOcc in_lam once -> not in_lam && once
-- Not inside a lambda, one occurrence ==> safe!
other -> False
-
-
-postInlineUnconditionally :: Bool -- Black listed
- -> OccInfo
- -> InId -> OutExpr -> Bool
- -- Examines a (bndr = rhs) binding, AFTER the rhs has been simplified
- -- It returns True if it's ok to discard the binding and inline the
- -- RHS at every use site.
-
- -- NOTE: This isn't our last opportunity to inline.
- -- We're at the binding site right now, and
- -- we'll get another opportunity when we get to the ocurrence(s)
-
-postInlineUnconditionally black_listed occ_info bndr rhs
- | isExportedId bndr = False -- Don't inline these, ever
- | black_listed = False
- | isLoopBreaker occ_info = False
- | otherwise = exprIsTrivial rhs -- Duplicating is free
- -- Don't inline even WHNFs inside lambdas; doing so may
- -- simply increase allocation when the function is called
- -- This isn't the last chance; see NOTE above.
- --
- -- NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here
- -- Why? Because we don't even want to inline them into the
- -- RHS of constructor arguments. See NOTE above
- --
- -- NB: Even NOINLINEis ignored here: if the rhs is trivial
- -- it's best to inline it anyway. We often get a=E; b=a
- -- from desugaring, with both a and b marked NOINLINE.
\end{code}
rebuild :: OutExpr -> SimplCont -> SimplM OutExprStuff
-- Stop continuation
-rebuild expr (Stop _) = rebuild_done expr
+rebuild expr (Stop _ _) = rebuild_done expr
-- ArgOf continuation
rebuild expr (ArgOf _ _ cont_fn) = cont_fn expr
-- Want to tick here so that we go round again,
-- and maybe copy or inline the code;
-- not strictly CaseOf Case
- addLetBind join_id join_rhs (thing_inside new_cont)
+ addLetBind (NonRec join_id join_rhs) $
+ thing_inside new_cont
mkDupableCont ty (ApplyTo _ arg se cont) thing_inside
= mkDupableCont (funResultTy ty) cont $ \ cont' ->
-- and maybe copy or inline the code;
-- not strictly CaseOf Case
- addLetBind bndr arg' $
+ addLetBind (NonRec bndr arg') $
-- But what if the arg should be case-bound? We can't use
-- addNonRecBind here because its type is too specific.
-- This has been this way for a long time, so I'll leave it,
returnSmpl (concat alt_binds_s, alts')
) `thenSmpl` \ (alt_binds, alts') ->
- addNewInScopeIds [b | NonRec b _ <- alt_binds] $
+ addAuxiliaryBinds alt_binds $
-- NB that the new alternatives, alts', are still InAlts, using the original
-- binders. That means we can keep the case_bndr intact. This is important
-- This is VITAL when the type of case_bndr is an unboxed pair (often the
-- case in I/O rich code. We aren't allowed a lambda bound
-- arg of unboxed tuple type, and indeed such a case_bndr is always dead
- addLetBinds alt_binds $
- thing_inside (Select OkToDup case_bndr alts' se (Stop (contResultType cont)))
+ thing_inside (Select OkToDup case_bndr alts' se (mkStop (contResultType cont)))
mkDupableAlt :: InId -> OutId -> SimplCont -> InAlt -> SimplM (OutStuff InAlt)
mkDupableAlt case_bndr case_bndr' cont alt@(con, bndrs, rhs)
= simplBinders bndrs $ \ bndrs' ->
simplExprC rhs cont `thenSmpl` \ rhs' ->
- if (case cont of { Stop _ -> exprIsDupable rhs'; other -> False}) then
+ if (case cont of { Stop _ _ -> exprIsDupable rhs'; other -> False}) then
-- It is worth checking for a small RHS because otherwise we
-- get extra let bindings that may cause an extra iteration of the simplifier to
-- inline back in place. Quite often the rhs is just a variable or constructor.
opt_D_dump_worker_wrapper
)
import CoreLint ( beginPass, endPass )
-import CoreUtils ( exprType, exprArity, exprEtaExpandArity )
+import CoreUtils ( exprType, exprEtaExpandArity )
import MkId ( mkWorkerId )
-import Id ( Id, idType, idStrictness, setIdArityInfo, isOneShotLambda,
+import Id ( Id, idType, idStrictness, idArity, isOneShotLambda,
setIdStrictness, idInlinePragma,
setIdWorkerInfo, idCprInfo, setInlinePragma )
import Type ( Type, isNewType, splitForAllTys, splitFunTys )
-- if two, then a worker and a
-- wrapper.
tryWW non_rec fn_id rhs
- | isNeverInlinePrag inline_prag
+ | isNeverInlinePrag inline_prag || arity == 0
= -- Don't split things that will never be inlined
returnUs [ (fn_id, rhs) ]
- | non_rec && certainlyWillInline fn_id
- -- No point in worker/wrappering something that is going to be
+ | non_rec && not do_coerce_ww && certainlyWillInline fn_id
+ -- No point in worker/wrappering a function that is going to be
-- INLINEd wholesale anyway. If the strictness analyser is run
-- twice, this test also prevents wrappers (which are INLINEd)
-- from being re-done.
--
+ -- The do_coerce_ww test is so that
+ -- a function with a coerce should w/w to get rid
+ -- of the coerces, which can significantly improve its arity.
+ -- Example: f [] = return [] :: IO [Int]
+ -- f (x:xs) = return (x:xs)
+ -- If we aren't careful we end up with
+ -- f = \ x -> case x of {
+ -- x:xs -> __coerce (IO [Int]) (\ s -> (# s, x:xs #)
+ -- [] -> lvl_sJ8
+ --
+ --
-- OUT OF DATE NOTE, kept for info:
+ -- It's out of date because now wrappers look very cheap
+ -- even when they are inlined.
-- In this case we add an INLINE pragma to the RHS. Why?
-- Because consider
-- f = \x -> g x x
-- Then f is small, so we don't w/w it. But g is big, and we do, so
-- g's wrapper will get inlined in f's RHS, which makes f look big now.
-- So f doesn't get inlined, but it is strict and we have failed to w/w it.
- -- It's out of date because now wrappers look very cheap
- -- even when they are inlined.
= returnUs [ (fn_id, rhs) ]
| not (do_strict_ww || do_cpr_ww || do_coerce_ww)
work_id | has_strictness = proto_work_id `setIdStrictness` mkStrictnessInfo (work_demands, result_bot)
| otherwise = proto_work_id
- wrap_arity = exprArity wrap_rhs -- Might be greater than the current visible arity
- -- if the function returns bottom
-
wrap_rhs = wrap_fn work_id
wrap_id = fn_id `setIdStrictness` wrapper_strictness
- `setIdWorkerInfo` HasWorker work_id wrap_arity
- `setIdArityInfo` exactArity wrap_arity
+ `setIdWorkerInfo` HasWorker work_id arity
`setInlinePragma` NoInlinePragInfo -- Put it on the worker instead
-- Add info to the wrapper:
-- (a) we want to set its arity
in
returnUs ([(work_id, work_rhs), (wrap_id, wrap_rhs)])
-- Worker first, because wrapper mentions it
- -- Arrange to inline the wrapper unconditionally
+ -- mkWwBodies has already built a wrap_rhs with an INLINE pragma wrapped around it
where
fun_ty = idType fn_id
- arity = exprEtaExpandArity rhs
+ arity = idArity fn_id -- The arity is set by the simplifier using exprEtaExpandArity
+ -- So it may be more than the number of top-level-visible lambdas
- -- Don't split something which is marked unconditionally NOINLINE
inline_prag = idInlinePragma fn_id
strictness_info = idStrictness fn_id