From a66541af84d102f32b73fb7f89f48008c01092a6 Mon Sep 17 00:00:00 2001 From: Simon Marlow Date: Fri, 8 Oct 2010 09:27:09 +0000 Subject: [PATCH] Float out partial applications This fixes at least one case of performance regression in 7.0, and is nice win on nofib: Program Size Allocs Runtime Elapsed Min +0.3% -63.0% -38.5% -38.7% Max +1.2% +0.2% +0.9% +0.9% Geometric Mean +0.6% -3.0% -6.4% -6.6% --- compiler/coreSyn/CoreSyn.lhs | 14 +++++++++++ compiler/simplCore/CoreMonad.lhs | 41 +++++++++++++++++++++---------- compiler/simplCore/SetLevels.lhs | 49 +++++++++++++++++++++++++++++++------- 3 files changed, 82 insertions(+), 22 deletions(-) diff --git a/compiler/coreSyn/CoreSyn.lhs b/compiler/coreSyn/CoreSyn.lhs index c74de06..5e03e4d 100644 --- a/compiler/coreSyn/CoreSyn.lhs +++ b/compiler/coreSyn/CoreSyn.lhs @@ -59,6 +59,9 @@ module CoreSyn ( -- * Annotated expression data types AnnExpr, AnnExpr'(..), AnnBind(..), AnnAlt, + -- ** Operations on annotated expressions + collectAnnArgs, + -- ** Operations on annotations deAnnotate, deAnnotate', deAnnAlt, collectAnnBndrs, @@ -1142,6 +1145,17 @@ data AnnBind bndr annot \end{code} \begin{code} +-- | Takes a nested application expression and returns the the function +-- being applied and the arguments to which it is applied +collectAnnArgs :: AnnExpr b a -> (AnnExpr b a, [AnnExpr b a]) +collectAnnArgs expr + = go expr [] + where + go (_, AnnApp f a) as = go f (a:as) + go e as = (e, as) +\end{code} + +\begin{code} deAnnotate :: AnnExpr bndr annot -> Expr bndr deAnnotate (_, e) = deAnnotate' e diff --git a/compiler/simplCore/CoreMonad.lhs b/compiler/simplCore/CoreMonad.lhs index 00dedff..e3dbf3a 100644 --- a/compiler/simplCore/CoreMonad.lhs +++ b/compiler/simplCore/CoreMonad.lhs @@ -304,8 +304,10 @@ data SimplifierSwitch \begin{code} data FloatOutSwitches = FloatOutSwitches { floatOutLambdas :: Bool, -- ^ True <=> float lambdas to top level - floatOutConstants :: Bool -- ^ True <=> float constants to top level, + floatOutConstants :: Bool, -- ^ True <=> float constants to top level, -- even if they do not escape a lambda + floatOutPartialApplications :: Bool -- ^ True <=> float out partial applications + -- based on arity information. } instance Outputable FloatOutSwitches where ppr = pprFloatOutSwitches @@ -320,10 +322,6 @@ pprFloatOutSwitches sw = pp_not (floatOutLambdas sw) <+> text "lambdas" <> comma -- | Switches that specify the minimum amount of floating out -- gentleFloatOutSwitches :: FloatOutSwitches -- gentleFloatOutSwitches = FloatOutSwitches False False - --- | Switches that do not specify floating out of lambdas, just of constants -constantsOnlyFloatOutSwitches :: FloatOutSwitches -constantsOnlyFloatOutSwitches = FloatOutSwitches False True \end{code} @@ -420,14 +418,28 @@ getCoreToDo dflags -- so that overloaded functions have all their dictionary lambdas manifest runWhen do_specialise CoreDoSpecialising, - runWhen full_laziness (CoreDoFloatOutwards constantsOnlyFloatOutSwitches), + runWhen full_laziness $ + CoreDoFloatOutwards FloatOutSwitches { + floatOutLambdas = False, + floatOutConstants = True, + floatOutPartialApplications = False }, -- Was: gentleFloatOutSwitches - -- I have no idea why, but not floating constants to top level is - -- very bad in some cases. + -- + -- I have no idea why, but not floating constants to + -- top level is very bad in some cases. + -- -- Notably: p_ident in spectral/rewrite - -- Changing from "gentle" to "constantsOnly" improved - -- rewrite's allocation by 19%, and made 0.0% difference - -- to any other nofib benchmark + -- Changing from "gentle" to "constantsOnly" + -- improved rewrite's allocation by 19%, and + -- made 0.0% difference to any other nofib + -- benchmark + -- + -- Not doing floatOutPartialApplications yet, we'll do + -- that later on when we've had a chance to get more + -- accurate arity information. In fact it makes no + -- difference at all to performance if we do it here, + -- but maybe we save some unnecessary to-and-fro in + -- the simplifier. runWhen do_float_in CoreDoFloatInwards, @@ -452,8 +464,11 @@ getCoreToDo dflags simpl_phase 0 ["post-worker-wrapper"] max_iter ]), - runWhen full_laziness - (CoreDoFloatOutwards constantsOnlyFloatOutSwitches), + runWhen full_laziness $ + CoreDoFloatOutwards FloatOutSwitches { + floatOutLambdas = False, + floatOutConstants = True, + floatOutPartialApplications = True }, -- nofib/spectral/hartel/wang doubles in speed if you -- do full laziness late in the day. It only happens -- after fusion and other stuff, so the early pass doesn't diff --git a/compiler/simplCore/SetLevels.lhs b/compiler/simplCore/SetLevels.lhs index 23874bf..ebfc27e 100644 --- a/compiler/simplCore/SetLevels.lhs +++ b/compiler/simplCore/SetLevels.lhs @@ -60,11 +60,7 @@ import CoreArity ( exprBotStrictness_maybe ) import CoreFVs -- all of it import CoreSubst ( Subst, emptySubst, extendInScope, extendInScopeList, extendIdSubst, cloneIdBndr, cloneRecIdBndrs ) -import Id ( idType, mkLocalIdWithInfo, mkSysLocal, isOneShotLambda, - zapDemandIdInfo, transferPolyIdInfo, - idSpecialisation, idUnfolding, setIdInfo, - setIdStrictness, setIdArity - ) +import Id import IdInfo import Var import VarSet @@ -250,10 +246,42 @@ lvlExpr _ _ ( _, AnnType ty) = return (Type ty) lvlExpr _ env (_, AnnVar v) = return (lookupVar env v) lvlExpr _ _ (_, AnnLit lit) = return (Lit lit) -lvlExpr ctxt_lvl env (_, AnnApp fun arg) = do - fun' <- lvlExpr ctxt_lvl env fun -- We don't do MFE on partial applications - arg' <- lvlMFE False ctxt_lvl env arg - return (App fun' arg') +lvlExpr ctxt_lvl env expr@(_, AnnApp _ _) = do + let + (fun, args) = collectAnnArgs expr + -- + case fun of + -- float out partial applications. This is very beneficial + -- in some cases (-7% runtime -4% alloc over nofib -O2). + -- In order to float a PAP, there must be a function at the + -- head of the application, and the application must be + -- over-saturated with respect to the function's arity. + (_, AnnVar f) | floatPAPs env && + arity > 0 && arity < n_val_args -> + do + let (lapp, rargs) = left (n_val_args - arity) expr [] + rargs' <- mapM (lvlMFE False ctxt_lvl env) rargs + lapp' <- lvlMFE False ctxt_lvl env lapp + return (foldl App lapp' rargs') + where + n_val_args = count (isValArg . deAnnotate) args + arity = idArity f + + -- separate out the PAP that we are floating from the extra + -- arguments, by traversing the spine until we have collected + -- (n_val_args - arity) value arguments. + left 0 e rargs = (e, rargs) + left n (_, AnnApp f a) rargs + | isValArg (deAnnotate a) = left (n-1) f (a:rargs) + | otherwise = left n f (a:rargs) + left _ _ _ = panic "SetLevels.lvlExpr.left" + + -- No PAPs that we can float: just carry on with the + -- arguments and the function. + _otherwise -> do + args' <- mapM (lvlMFE False ctxt_lvl env) args + fun' <- lvlExpr ctxt_lvl env fun + return (foldl App fun' args') lvlExpr ctxt_lvl env (_, AnnNote note expr) = do expr' <- lvlExpr ctxt_lvl env expr @@ -741,6 +769,9 @@ floatLams (fos, _, _, _) = floatOutLambdas fos floatConsts :: LevelEnv -> Bool floatConsts (fos, _, _, _) = floatOutConstants fos +floatPAPs :: LevelEnv -> Bool +floatPAPs (fos, _, _, _) = floatOutPartialApplications fos + extendLvlEnv :: LevelEnv -> [TaggedBndr Level] -> LevelEnv -- Used when *not* cloning extendLvlEnv (float_lams, lvl_env, subst, id_env) prs -- 1.7.10.4