\begin{code}
module WorkWrap ( wwTopBinds, mkWrapper ) where
-#include "HsVersions.h"
-
import CoreSyn
-import CoreUnfold ( certainlyWillInline )
-import CoreLint ( showPass, endPass )
+import CoreUnfold ( certainlyWillInline, mkInlineRule, mkWwInlineRule )
import CoreUtils ( exprType, exprIsHNF )
-import Id ( Id, idType, isOneShotLambda,
- setIdNewStrictness, mkWorkerId,
- setIdWorkerInfo, setInlinePragma,
- idInfo )
-import MkId ( lazyIdKey, lazyIdUnfolding )
+import CoreArity ( exprArity )
+import Var
+import Id ( idType, isOneShotLambda, idUnfolding,
+ setIdStrictness, mkWorkerId,
+ setInlineActivation, setIdUnfolding,
+ setIdArity )
import Type ( Type )
-import IdInfo ( WorkerInfo(..), arityInfo,
- newDemandInfo, newStrictnessInfo, unfoldingInfo, inlinePragInfo
- )
-import NewDemand ( Demand(..), StrictSig(..), DmdType(..), DmdResult(..),
+import IdInfo
+import Demand ( Demand(..), StrictSig(..), DmdType(..), DmdResult(..),
Demands(..), mkTopDmdType, isBotRes, returnsCPR, topSig, isAbsent
)
-import UniqSupply ( UniqSupply, initUs_, returnUs, thenUs, mapUs, getUniqueUs, UniqSM )
-import Unique ( hasKey )
-import BasicTypes ( RecFlag(..), isNonRec, Activation(..) )
+import UniqSupply
+import BasicTypes ( RecFlag(..), isNonRec, isNeverActive,
+ Activation, inlinePragmaActivation )
import VarEnv ( isEmptyVarEnv )
import Maybes ( orElse )
-import DynFlags
import WwLib
import Util ( lengthIs, notNull )
import Outputable
+import MonadUtils
+
+#include "HsVersions.h"
\end{code}
We take Core bindings whose binders have:
\end{enumerate}
\begin{code}
+wwTopBinds :: UniqSupply -> [CoreBind] -> [CoreBind]
-wwTopBinds :: DynFlags
- -> UniqSupply
- -> [CoreBind]
- -> IO [CoreBind]
-
-wwTopBinds dflags us binds
- = do {
- showPass dflags "Worker Wrapper binds";
-
- -- Create worker/wrappers, and mark binders with their
- -- "strictness info" [which encodes their worker/wrapper-ness]
- let { binds' = workersAndWrappers us binds };
-
- endPass dflags "Worker Wrapper binds"
- Opt_D_dump_worker_wrapper binds'
- }
-\end{code}
-
-
-\begin{code}
-workersAndWrappers :: UniqSupply -> [CoreBind] -> [CoreBind]
-
-workersAndWrappers us top_binds
- = initUs_ us $
- mapUs wwBind top_binds `thenUs` \ top_binds' ->
- returnUs (concat top_binds')
+wwTopBinds us top_binds
+ = initUs_ us $ do
+ top_binds' <- mapM wwBind top_binds
+ return (concat top_binds')
\end{code}
%************************************************************************
-- the caller will convert to Expr/Binding,
-- as appropriate.
-wwBind (NonRec binder rhs)
- = wwExpr rhs `thenUs` \ new_rhs ->
- tryWW NonRecursive binder new_rhs `thenUs` \ new_pairs ->
- returnUs [NonRec b e | (b,e) <- new_pairs]
+wwBind (NonRec binder rhs) = do
+ new_rhs <- wwExpr rhs
+ new_pairs <- tryWW NonRecursive binder new_rhs
+ return [NonRec b e | (b,e) <- new_pairs]
-- Generated bindings must be non-recursive
-- because the original binding was.
wwBind (Rec pairs)
- = mapUs do_one pairs `thenUs` \ new_pairs ->
- returnUs [Rec (concat new_pairs)]
+ = return . Rec <$> concatMapM do_one pairs
where
- do_one (binder, rhs) = wwExpr rhs `thenUs` \ new_rhs ->
- tryWW Recursive binder new_rhs
+ do_one (binder, rhs) = do new_rhs <- wwExpr rhs
+ tryWW Recursive binder new_rhs
\end{code}
@wwExpr@ basically just walks the tree, looking for appropriate
\begin{code}
wwExpr :: CoreExpr -> UniqSM CoreExpr
-wwExpr e@(Type _) = returnUs e
-wwExpr e@(Lit _) = returnUs e
-wwExpr e@(Note InlineMe expr) = returnUs e
- -- Don't w/w inside InlineMe's
-
-wwExpr e@(Var v)
- | v `hasKey` lazyIdKey = returnUs lazyIdUnfolding
- | otherwise = returnUs e
- -- Inline 'lazy' after strictness analysis
- -- (but not inside InlineMe's)
+wwExpr e@(Type {}) = return e
+wwExpr e@(Lit {}) = return e
+wwExpr e@(Var {}) = return e
wwExpr (Lam binder expr)
- = wwExpr expr `thenUs` \ new_expr ->
- returnUs (Lam binder new_expr)
+ = Lam binder <$> wwExpr expr
wwExpr (App f a)
- = wwExpr f `thenUs` \ new_f ->
- wwExpr a `thenUs` \ new_a ->
- returnUs (App new_f new_a)
+ = App <$> wwExpr f <*> wwExpr a
wwExpr (Note note expr)
- = wwExpr expr `thenUs` \ new_expr ->
- returnUs (Note note new_expr)
+ = Note note <$> wwExpr expr
+
+wwExpr (Cast expr co) = do
+ new_expr <- wwExpr expr
+ return (Cast new_expr co)
wwExpr (Let bind expr)
- = wwBind bind `thenUs` \ intermediate_bind ->
- wwExpr expr `thenUs` \ new_expr ->
- returnUs (mkLets intermediate_bind new_expr)
-
-wwExpr (Case expr binder ty alts)
- = wwExpr expr `thenUs` \ new_expr ->
- mapUs ww_alt alts `thenUs` \ new_alts ->
- returnUs (Case new_expr binder ty new_alts)
+ = mkLets <$> wwBind bind <*> wwExpr expr
+
+wwExpr (Case expr binder ty alts) = do
+ new_expr <- wwExpr expr
+ new_alts <- mapM ww_alt alts
+ return (Case new_expr binder ty new_alts)
where
- ww_alt (con, binders, rhs)
- = wwExpr rhs `thenUs` \ new_rhs ->
- returnUs (con, binders, new_rhs)
+ ww_alt (con, binders, rhs) = do
+ new_rhs <- wwExpr rhs
+ return (con, binders, new_rhs)
\end{code}
%************************************************************************
The only reason this is monadised is for the unique supply.
+Note [Don't w/w inline things (a)]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+It's very important to refrain from w/w-ing an INLINE function
+because the wrapepr will then overwrite the InlineRule unfolding.
+
+It was wrong with the old InlineMe Note too: if we do so by mistake
+we transform
+ f = __inline (\x -> E)
+into
+ f = __inline (\x -> case x of (a,b) -> fw E)
+ fw = \ab -> (__inline (\x -> E)) (a,b)
+and the original __inline now vanishes, so E is no longer
+inside its __inline wrapper. Death! Disaster!
+
+Furthermore, if the programmer has marked something as INLINE,
+we may lose by w/w'ing it.
+
+If the strictness analyser is run twice, this test also prevents
+wrappers (which are INLINEd) from being re-done.
+
+Notice that we refrain from w/w'ing an INLINE function even if it is
+in a recursive group. It might not be the loop breaker. (We could
+test for loop-breaker-hood, but I'm not sure that ever matters.)
+
+Note [Don't w/w inline things (b)]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In general, therefore, we refrain from w/w-ing *small* functions,
+because they'll inline anyway. But we must take care: it may look
+small now, but get to be big later after other inling has happened.
+So we take the precaution of adding an INLINE pragma to any such
+functions.
+
+I made this change when I observed a big function at the end of
+compilation with a useful strictness signature but no w-w. When
+I measured it on nofib, it didn't make much difference; just a few
+percent improved allocation on one benchmark (bspt/Euclid.space).
+But nothing got worse.
+
+
\begin{code}
tryWW :: RecFlag
-> Id -- The fn binder
-- if two, then a worker and a
-- wrapper.
tryWW is_rec fn_id rhs
- | isNonRec is_rec && certainlyWillInline unfolding
- -- No point in worker/wrappering a function that is going to be
- -- INLINEd wholesale anyway. If the strictness analyser is run
- -- twice, this test also prevents wrappers (which are INLINEd)
- -- from being re-done.
- --
- -- It's very important to refrain from w/w-ing an INLINE function
- -- If we do so by mistake we transform
- -- f = __inline (\x -> E)
- -- into
- -- f = __inline (\x -> case x of (a,b) -> fw E)
- -- fw = \ab -> (__inline (\x -> E)) (a,b)
- -- and the original __inline now vanishes, so E is no longer
- -- inside its __inline wrapper. Death! Disaster!
- = returnUs [ (new_fn_id, rhs) ]
+ | isNeverActive inline_act
+ -- No point in worker/wrappering if the thing is never inlined!
+ -- Because the no-inline prag will prevent the wrapper ever
+ -- being inlined at a call site.
+ --
+ -- Furthermore, don't even expose strictness info
+ = return [ (fn_id, rhs) ]
| is_thunk && worthSplittingThunk maybe_fn_dmd res_info
= ASSERT2( isNonRec is_rec, ppr new_fn_id ) -- The thunk must be non-recursive
+ checkSize new_fn_id rhs $
splitThunk new_fn_id rhs
| is_fun && worthSplittingFun wrap_dmds res_info
- = splitFun new_fn_id fn_info wrap_dmds res_info inline_prag rhs
+ = checkSize new_fn_id rhs $
+ splitFun new_fn_id fn_info wrap_dmds res_info inline_act rhs
| otherwise
- = returnUs [ (new_fn_id, rhs) ]
+ = return [ (new_fn_id, rhs) ]
where
fn_info = idInfo fn_id
- maybe_fn_dmd = newDemandInfo fn_info
- unfolding = unfoldingInfo fn_info
- inline_prag = inlinePragInfo fn_info
+ maybe_fn_dmd = demandInfo fn_info
+ inline_act = inlinePragmaActivation (inlinePragInfo fn_info)
-- In practice it always will have a strictness
-- signature, even if it's a uninformative one
- strict_sig = newStrictnessInfo fn_info `orElse` topSig
+ strict_sig = strictnessInfo fn_info `orElse` topSig
StrictSig (DmdType env wrap_dmds res_info) = strict_sig
-- new_fn_id has the DmdEnv zapped.
-- (c) it becomes incorrect as things are cloned, because
-- we don't push the substitution into it
new_fn_id | isEmptyVarEnv env = fn_id
- | otherwise = fn_id `setIdNewStrictness`
+ | otherwise = fn_id `setIdStrictness`
StrictSig (mkTopDmdType wrap_dmds res_info)
is_fun = notNull wrap_dmds
is_thunk = not is_fun && not (exprIsHNF rhs)
---------------------
-splitFun fn_id fn_info wrap_dmds res_info inline_prag rhs
- = WARN( not (wrap_dmds `lengthIs` arity), ppr fn_id <+> (ppr arity $$ ppr wrap_dmds $$ ppr res_info) )
+checkSize :: Id -> CoreExpr
+ -> UniqSM [(Id,CoreExpr)] -> UniqSM [(Id,CoreExpr)]
+ -- See Note [Don't w/w inline things (a) and (b)]
+checkSize fn_id rhs thing_inside
+ | isStableUnfolding unfolding -- For DFuns and INLINE things, leave their
+ = return [ (fn_id, rhs) ] -- unfolding unchanged; but still attach
+ -- strictness info to the Id
+
+ | certainlyWillInline unfolding
+ = return [ (fn_id `setIdUnfolding` inline_rule, rhs) ]
+ -- Note [Don't w/w inline things (b)]
+
+ | otherwise = thing_inside
+ where
+ unfolding = idUnfolding fn_id
+ inline_rule = mkInlineRule InlUnSat rhs (unfoldingArity unfolding)
+
+---------------------
+splitFun :: Id -> IdInfo -> [Demand] -> DmdResult -> Activation -> Expr Var
+ -> UniqSM [(Id, CoreExpr)]
+splitFun fn_id fn_info wrap_dmds res_info inline_act rhs
+ = WARN( not (wrap_dmds `lengthIs` arity), ppr fn_id <+> (ppr arity $$ ppr wrap_dmds $$ ppr res_info) )
+ (do {
-- The arity should match the signature
- mkWwBodies fun_ty wrap_dmds res_info one_shots `thenUs` \ (work_demands, wrap_fn, work_fn) ->
- getUniqueUs `thenUs` \ work_uniq ->
- let
+ (work_demands, wrap_fn, work_fn) <- mkWwBodies fun_ty wrap_dmds res_info one_shots
+ ; work_uniq <- getUniqueM
+ ; let
work_rhs = work_fn rhs
work_id = mkWorkerId work_uniq fn_id (exprType work_rhs)
- `setInlinePragma` inline_prag
- `setIdNewStrictness` StrictSig (mkTopDmdType work_demands work_res_info)
+ `setInlineActivation` inline_act
+ -- Any inline activation (which sets when inlining is active)
+ -- on the original function is duplicated on the worker and wrapper
+ -- It *matters* that the pragma stays on the wrapper
+ -- It seems sensible to have it on the worker too, although we
+ -- can't think of a compelling reason. (In ptic, INLINE things are
+ -- not w/wd). However, the RuleMatchInfo is not transferred since
+ -- it does not make sense for workers to be constructorlike.
+ `setIdStrictness` StrictSig (mkTopDmdType work_demands work_res_info)
-- Even though we may not be at top level,
-- it's ok to give it an empty DmdEnv
+ `setIdArity` (exprArity work_rhs)
+ -- Set the arity so that the Core Lint check that the
+ -- arity is consistent with the demand type goes through
wrap_rhs = wrap_fn work_id
- wrap_id = fn_id `setIdWorkerInfo` HasWorker work_id arity
- `setInlinePragma` AlwaysActive -- Zap any inline pragma;
- -- Put it on the worker instead
- in
- returnUs ([(work_id, work_rhs), (wrap_id, wrap_rhs)])
+ wrap_id = fn_id `setIdUnfolding` mkWwInlineRule work_id wrap_rhs arity
+
+ ; return ([(work_id, work_rhs), (wrap_id, wrap_rhs)]) })
-- Worker first, because wrapper mentions it
-- mkWwBodies has already built a wrap_rhs with an INLINE pragma wrapped around it
where
-- make the wrapper and worker have corresponding one-shot arguments too.
-- Otherwise we spuriously float stuff out of case-expression join points,
-- which is very annoying.
+get_one_shots :: Expr Var -> [Bool]
get_one_shots (Lam b e)
| isId b = isOneShotLambda b : get_one_shots e
| otherwise = get_one_shots e
get_one_shots (Note _ e) = get_one_shots e
-get_one_shots other = noOneShotInfo
+get_one_shots _ = noOneShotInfo
\end{code}
Thunk splitting
Now simplifier will transform to
case x-rhs of
- I# a -> let x* = I# b
+ I# a -> let x* = I# a
in body
which is what we want. Now suppose x-rhs is itself a case:
-- I# y -> let x = I# y in x }
-- See comments above. Is it not beautifully short?
-splitThunk fn_id rhs
- = mkWWstr [fn_id] `thenUs` \ (_, wrap_fn, work_fn) ->
- returnUs [ (fn_id, Let (NonRec fn_id rhs) (wrap_fn (work_fn (Var fn_id)))) ]
+splitThunk :: Var -> Expr Var -> UniqSM [(Var, Expr Var)]
+splitThunk fn_id rhs = do
+ (_, wrap_fn, work_fn) <- mkWWstr [fn_id]
+ return [ (fn_id, Let (NonRec fn_id rhs) (wrap_fn (work_fn (Var fn_id)))) ]
\end{code}
= any worth_it ds || returnsCPR res
-- worthSplitting returns False for an empty list of demands,
-- and hence do_strict_ww is False if arity is zero and there is no CPR
-
- -- We used not to split if the result is bottom.
- -- [Justification: there's no efficiency to be gained.]
- -- But it's sometimes bad not to make a wrapper. Consider
- -- fw = \x# -> let x = I# x# in case e of
- -- p1 -> error_fn x
- -- p2 -> error_fn x
- -- p3 -> the real stuff
- -- The re-boxing code won't go away unless error_fn gets a wrapper too.
- -- [We don't do reboxing now, but in general it's better to pass
- -- an unboxed thing to f, and have it reboxed in the error cases....]
+ -- See Note [Worker-wrapper for bottoming functions]
where
worth_it Abs = True -- Absent arg
- worth_it (Eval (Prod ds)) = True -- Product arg to evaluate
- worth_it other = False
+ worth_it (Eval (Prod _)) = True -- Product arg to evaluate
+ worth_it _ = False
worthSplittingThunk :: Maybe Demand -- Demand on the thunk
-> DmdResult -- CPR info for the thunk
where
-- Split if the thing is unpacked
worth_it (Just (Eval (Prod ds))) = not (all isAbsent ds)
- worth_it other = False
+ worth_it _ = False
\end{code}
+Note [Worker-wrapper for bottoming functions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We used not to split if the result is bottom.
+[Justification: there's no efficiency to be gained.]
+
+But it's sometimes bad not to make a wrapper. Consider
+ fw = \x# -> let x = I# x# in case e of
+ p1 -> error_fn x
+ p2 -> error_fn x
+ p3 -> the real stuff
+The re-boxing code won't go away unless error_fn gets a wrapper too.
+[We don't do reboxing now, but in general it's better to pass an
+unboxed thing to f, and have it reboxed in the error cases....]
%************************************************************************
-> StrictSig -- Wrapper strictness info
-> UniqSM (Id -> CoreExpr) -- Wrapper body, missing worker Id
-mkWrapper fun_ty (StrictSig (DmdType _ demands res_info))
- = mkWwBodies fun_ty demands res_info noOneShotInfo `thenUs` \ (_, wrap_fn, _) ->
- returnUs wrap_fn
+mkWrapper fun_ty (StrictSig (DmdType _ demands res_info)) = do
+ (_, wrap_fn, _) <- mkWwBodies fun_ty demands res_info noOneShotInfo
+ return wrap_fn
+noOneShotInfo :: [Bool]
noOneShotInfo = repeat False
\end{code}