From 5f087cf4add4e140e7df05d896ee6b271133f822 Mon Sep 17 00:00:00 2001 From: simonpj Date: Fri, 14 Dec 2001 17:24:05 +0000 Subject: [PATCH] [project @ 2001-12-14 17:24:03 by simonpj] ------------------------- Performance tuning things ------------------------- I did some nofib tests, and fixed a number of performance problems. 1. Things were getting floated to top level, and that prevented some useful fusion happening. y = build g x = foldr k z y Fixed by arranging that we only get really keen on floating to top level in the second run of the let-float-out pass. 2. Some fettling up on the let-floater itself. It had some parameters that weren't even being used! And it was stupidly floating things out of a one-shot lambda, and the float-in pass didn't float them back in. I think I fixed both of these problems. 3. The eta-reducer was not eta-reducing (/\a -> g a) to g. In general it has to be a bit careful because "seq" means that (\x -> g x) is not in general the same as g ---- but it *is* the same for a type lambda. This turned out to be important in rule matching, where the foldr/build rule was not firing because the LHS of the rule looked like foldr k z (/\ a -> g a) = ... which never matched! Result, no fusion to speak of! 4. The simplifier was a bit too gung ho about inlining used-once things bound to constructor args. The comment is with Simplify.simplNonRecX. --- ghc/compiler/basicTypes/BasicTypes.lhs | 6 +- ghc/compiler/coreSyn/CoreSyn.lhs | 10 +- ghc/compiler/main/CmdLineOpts.lhs | 9 +- ghc/compiler/main/DriverState.hs | 16 +-- ghc/compiler/simplCore/FloatIn.lhs | 35 +++-- ghc/compiler/simplCore/FloatOut.lhs | 235 +++++++++++++++++--------------- ghc/compiler/simplCore/OccurAnal.lhs | 9 +- ghc/compiler/simplCore/SetLevels.lhs | 26 ++-- ghc/compiler/simplCore/SimplMonad.lhs | 34 +++-- ghc/compiler/simplCore/SimplUtils.lhs | 61 ++++++++- ghc/compiler/simplCore/Simplify.lhs | 27 ++-- ghc/compiler/stranal/DmdAnal.lhs | 2 +- 12 files changed, 286 insertions(+), 184 deletions(-) diff --git a/ghc/compiler/basicTypes/BasicTypes.lhs b/ghc/compiler/basicTypes/BasicTypes.lhs index 696a4c1..76185e7 100644 --- a/ghc/compiler/basicTypes/BasicTypes.lhs +++ b/ghc/compiler/basicTypes/BasicTypes.lhs @@ -33,7 +33,8 @@ module BasicTypes( Boxity(..), isBoxed, tupleParens, - OccInfo(..), seqOccInfo, isFragileOcc, isDeadOcc, isLoopBreaker, + OccInfo(..), seqOccInfo, isFragileOcc, isOneOcc, + isDeadOcc, isLoopBreaker, InsideLam, insideLam, notInsideLam, OneBranch, oneBranch, notOneBranch, @@ -320,6 +321,9 @@ isDeadOcc :: OccInfo -> Bool isDeadOcc IAmDead = True isDeadOcc other = False +isOneOcc (OneOcc _ _) = True +isOneOcc other = False + isFragileOcc :: OccInfo -> Bool isFragileOcc (OneOcc _ _) = True isFragileOcc other = False diff --git a/ghc/compiler/coreSyn/CoreSyn.lhs b/ghc/compiler/coreSyn/CoreSyn.lhs index a352829..f941deb 100644 --- a/ghc/compiler/coreSyn/CoreSyn.lhs +++ b/ghc/compiler/coreSyn/CoreSyn.lhs @@ -36,7 +36,7 @@ module CoreSyn ( -- Annotated expressions AnnExpr, AnnExpr'(..), AnnBind(..), AnnAlt, - deAnnotate, deAnnotate', deAnnAlt, + deAnnotate, deAnnotate', deAnnAlt, collectAnnBndrs, -- Core rules CoreRules(..), -- Representation needed by friends @@ -618,3 +618,11 @@ deAnnAlt :: AnnAlt bndr annot -> Alt bndr deAnnAlt (con,args,rhs) = (con,args,deAnnotate rhs) \end{code} +\begin{code} +collectAnnBndrs :: AnnExpr bndr annot -> ([bndr], AnnExpr bndr annot) +collectAnnBndrs e + = collect [] e + where + collect bs (_, AnnLam b body) = collect (b:bs) body + collect bs body = (reverse bs, body) +\end{code} diff --git a/ghc/compiler/main/CmdLineOpts.lhs b/ghc/compiler/main/CmdLineOpts.lhs index e71eff6..5cf3ce3 100644 --- a/ghc/compiler/main/CmdLineOpts.lhs +++ b/ghc/compiler/main/CmdLineOpts.lhs @@ -8,7 +8,7 @@ module CmdLineOpts ( CoreToDo(..), StgToDo(..), SimplifierSwitch(..), - SimplifierMode(..), + SimplifierMode(..), FloatOutSwitches(..), HscLang(..), DynFlag(..), -- needed non-abstractly by DriverFlags @@ -172,7 +172,7 @@ data CoreToDo -- These are diff core-to-core passes, -- Each run of the simplifier can take a different -- set of simplifier-specific flags. | CoreDoFloatInwards - | CoreDoFloatOutwards Bool -- True <=> float lambdas to top level + | CoreDoFloatOutwards FloatOutSwitches | CoreLiberateCase | CoreDoPrintCore | CoreDoStaticArgs @@ -206,6 +206,11 @@ data SimplifierMode -- See comments in SimplMonad data SimplifierSwitch = MaxSimplifierIterations Int | NoCaseOfCase + +data FloatOutSwitches + = FloatOutSw Bool -- True <=> float lambdas to top level + Bool -- True <=> float constants to top level, + -- even if they do not escape a lambda \end{code} %************************************************************************ diff --git a/ghc/compiler/main/DriverState.hs b/ghc/compiler/main/DriverState.hs index 7676434..ce67ed3 100644 --- a/ghc/compiler/main/DriverState.hs +++ b/ghc/compiler/main/DriverState.hs @@ -1,5 +1,5 @@ ----------------------------------------------------------------------------- --- $Id: DriverState.hs,v 1.63 2001/12/10 14:08:14 simonmar Exp $ +-- $Id: DriverState.hs,v 1.64 2001/12/14 17:24:04 simonpj Exp $ -- -- Settings for the driver -- @@ -238,7 +238,7 @@ buildCoreToDo = do -- so that overloaded functions have all their dictionary lambdas manifest CoreDoSpecialising, - CoreDoFloatOutwards False{-not full-}, + CoreDoFloatOutwards (FloatOutSw False False), CoreDoFloatInwards, CoreDoSimplify (SimplPhase 2) [ @@ -279,9 +279,7 @@ buildCoreToDo = do ], case rule_check of { Just pat -> CoreDoRuleCheck 0 pat; Nothing -> CoreDoNothing }, -#ifdef DEBUG if cpr then CoreDoCPResult else CoreDoNothing, -#endif if strictness then CoreDoStrictness else CoreDoNothing, CoreDoWorkerWrapper, CoreDoGlomBinds, @@ -290,7 +288,8 @@ buildCoreToDo = do MaxSimplifierIterations max_iter ], - CoreDoFloatOutwards False{-not full-}, + CoreDoFloatOutwards (FloatOutSw False -- Not lambdas + True), -- Float constants -- nofib/spectral/hartel/wang doubles in speed if you -- do full laziness late in the day. It only happens -- after fusion and other stuff, so the early pass doesn't @@ -298,13 +297,6 @@ buildCoreToDo = do -- f_el22 (f_el21 r_midblock) --- Leave out lambda lifting for now --- "-fsimplify", -- Tidy up results of full laziness --- "[", --- "-fmax-simplifier-iterations2", --- "]", --- "-ffloat-outwards-full", - -- We want CSE to follow the final full-laziness pass, because it may -- succeed in commoning up things floated out by full laziness. -- CSE used to rely on the no-shadowing invariant, but it doesn't any more diff --git a/ghc/compiler/simplCore/FloatIn.lhs b/ghc/compiler/simplCore/FloatIn.lhs index 2957520..6a05a98 100644 --- a/ghc/compiler/simplCore/FloatIn.lhs +++ b/ghc/compiler/simplCore/FloatIn.lhs @@ -179,20 +179,23 @@ So we treat lambda in groups, using the following rule: Otherwise drop all the bindings outside the group. \begin{code} -fiExpr to_drop (_, AnnLam b body) - = case collect [b] body of - (bndrs, real_body) --- | all is_ok bndrs -> mkLams bndrs (fiExpr to_drop real_body) --- [July 01: I'm experiment with getting the full laziness --- pass to floats bindings out past big lambdas (instead of the simplifier) --- so I don't want the float-in pass to just push them right back in. --- I'm going to try just dumping all bindings outside lambdas.] - | otherwise -> mkCoLets' to_drop (mkLams bndrs (fiExpr [] real_body)) - where - collect bs (_, AnnLam b body) = collect (b:bs) body - collect bs body = (reverse bs, body) + -- Hack alert! We only float in through one-shot lambdas, + -- not (as you might guess) through big lambdas. + -- Reason: we float *out* past big lambdas (see the test in the Lam + -- case of FloatOut.floatExpr) and we don't want to float straight + -- back in again. + -- + -- It *is* important to float into one-shot lambdas, however; + -- see the remarks with noFloatIntoRhs. +fiExpr to_drop lam@(_, AnnLam _ _) + | all is_one_shot bndrs -- Float in + = mkLams bndrs (fiExpr to_drop body) + + | otherwise -- Dump it all here + = mkCoLets' to_drop (mkLams bndrs (fiExpr [] body)) --- is_ok bndr = isTyVar bndr || isOneShotLambda bndr + where + (bndrs, body) = collectAnnBndrs lam \end{code} We don't float lets inwards past an SCC. @@ -339,7 +342,7 @@ fiExpr to_drop (_, AnnCase scrut case_bndr alts) fi_alt to_drop (con, args, rhs) = (con, args, fiExpr to_drop rhs) noFloatIntoRhs (AnnNote InlineMe _) = True -noFloatIntoRhs (AnnLam b _) = not (isId b && isOneShotLambda b) +noFloatIntoRhs (AnnLam b _) = not (is_one_shot b) -- IMPORTANT: don't say 'True' for a RHS with a one-shot lambda at the top. -- This makes a big difference for things like -- f x# = let x = I# x# @@ -349,7 +352,9 @@ noFloatIntoRhs (AnnLam b _) = not (isId b && isOneShotLambda b) -- boxing constructor into it, else we box it every time which is very bad -- news indeed. -noFloatIntoRhs rhs = exprIsValue (deAnnotate' rhs) -- We'd just float rigt back out again... +noFloatIntoRhs rhs = exprIsValue (deAnnotate' rhs) -- We'd just float right back out again... + +is_one_shot b = isId b && isOneShotLambda b \end{code} diff --git a/ghc/compiler/simplCore/FloatOut.lhs b/ghc/compiler/simplCore/FloatOut.lhs index d81c3b9..683f71b 100644 --- a/ghc/compiler/simplCore/FloatOut.lhs +++ b/ghc/compiler/simplCore/FloatOut.lhs @@ -13,11 +13,10 @@ module FloatOut ( floatOutwards ) where import CoreSyn import CoreUtils ( mkSCC ) -import CmdLineOpts ( DynFlags, DynFlag(..) ) +import CmdLineOpts ( DynFlags, DynFlag(..), FloatOutSwitches(..) ) import ErrUtils ( dumpIfSet_dyn ) import CostCentre ( dupifyCC, CostCentre ) import Id ( Id ) -import VarEnv import CoreLint ( showPass, endPass ) import SetLevels ( setLevels, isInlineCtxt, Level(..), tOP_LEVEL, ltMajLvl, ltLvl, isTopLvl @@ -27,6 +26,46 @@ import List ( partition ) import Outputable \end{code} + ----------------- + Overall game plan + ----------------- + +The Big Main Idea is: + + To float out sub-expressions that can thereby get outside + a non-one-shot value lambda, and hence may be shared. + + +To achieve this we may need to do two thing: + + a) Let-bind the sub-expression: + + f (g x) ==> let lvl = f (g x) in lvl + + Now we can float the binding for 'lvl'. + + b) More than that, we may need to abstract wrt a type variable + + \x -> ... /\a -> let v = ...a... in .... + + Here the binding for v mentions 'a' but not 'x'. So we + abstract wrt 'a', to give this binding for 'v': + + vp = /\a -> ...a... + v = vp a + + Now the binding for vp can float out unimpeded. + I can't remember why this case seemed important enough to + deal with, but I certainly found cases where important floats + didn't happen if we did not abstract wrt tyvars. + +With this in mind we can also achieve another goal: lambda lifting. +We can make an arbitrary (function) binding float to top level by +abstracting wrt *all* local variables, not just type variables, leaving +a binding that can be floated right to top level. Whether or not this +happens is controlled by a flag. + + Random comments ~~~~~~~~~~~~~~~ @@ -74,15 +113,15 @@ type FloatBinds = [FloatBind] \begin{code} floatOutwards :: DynFlags - -> Bool -- True <=> float lambdas to top level + -> FloatOutSwitches -> UniqSupply -> [CoreBind] -> IO [CoreBind] -floatOutwards dflags float_lams us pgm +floatOutwards dflags float_sws us pgm = do { showPass dflags float_msg ; - let { annotated_w_levels = setLevels float_lams pgm us ; + let { annotated_w_levels = setLevels float_sws pgm us ; (fss, binds_s') = unzip (map floatTopBind annotated_w_levels) } ; @@ -100,20 +139,21 @@ floatOutwards dflags float_lams us pgm {- no specific flag for dumping float-out -} } where - float_msg | float_lams = "Float out (floating lambdas too)" - | otherwise = "Float out (not floating lambdas)" + float_msg = showSDoc (text "Float out" <+> parens (sws float_sws)) + sws (FloatOutSw lam const) = pp_not lam <+> text "lambdas" <> comma <+> + pp_not const <+> text "constants" + pp_not True = empty + pp_not False = text "not" floatTopBind bind@(NonRec _ _) - = case (floatBind emptyVarEnv tOP_LEVEL bind) of { (fs, floats, bind', _) -> + = case (floatBind bind) of { (fs, floats, bind') -> (fs, floatsToBinds floats ++ [bind']) } floatTopBind bind@(Rec _) - = case (floatBind emptyVarEnv tOP_LEVEL bind) of { (fs, floats, Rec pairs', _) -> - -- Actually floats will be empty - --false:ASSERT(null floats) - (fs, [Rec (floatsToBindPairs floats ++ pairs')]) - } + = case (floatBind bind) of { (fs, floats, Rec pairs') -> + WARN( not (null floats), ppr bind $$ ppr floats ) + (fs, [Rec (floatsToBindPairs floats ++ pairs')]) } \end{code} %************************************************************************ @@ -124,53 +164,44 @@ floatTopBind bind@(Rec _) \begin{code} -floatBind :: IdEnv Level - -> Level - -> LevelledBind - -> (FloatStats, FloatBinds, CoreBind, IdEnv Level) - -floatBind env lvl (NonRec (name,level) rhs) - = case (floatRhs env level rhs) of { (fs, rhs_floats, rhs') -> - (fs, rhs_floats, - NonRec name rhs', - extendVarEnv env name level) - } +floatBind :: LevelledBind + -> (FloatStats, FloatBinds, CoreBind) -floatBind env lvl bind@(Rec pairs) +floatBind (NonRec (name,level) rhs) + = case (floatRhs level rhs) of { (fs, rhs_floats, rhs') -> + (fs, rhs_floats, NonRec name rhs') } + +floatBind bind@(Rec pairs) = case (unzip3 (map do_pair pairs)) of { (fss, rhss_floats, new_pairs) -> if not (isTopLvl bind_level) then -- Standard case - (sum_stats fss, concat rhss_floats, Rec new_pairs, new_env) + (sum_stats fss, concat rhss_floats, Rec new_pairs) else - {- In a recursive binding, destined for the top level (only), - the rhs floats may contain - references to the bound things. For example - - f = ...(let v = ...f... in b) ... - - might get floated to - - v = ...f... - f = ... b ... - - and hence we must (pessimistically) make all the floats recursive - with the top binding. Later dependency analysis will unravel it. - -} - - (sum_stats fss, - [], - Rec (new_pairs ++ floatsToBindPairs (concat rhss_floats)), - new_env) - + -- In a recursive binding, *destined for* the top level + -- (only), the rhs floats may contain references to the + -- bound things. For example + -- + -- f = ...(let v = ...f... in b) ... + -- + -- might get floated to + -- + -- v = ...f... + -- f = ... b ... + -- + -- and hence we must (pessimistically) make all the floats recursive + -- with the top binding. Later dependency analysis will unravel it. + -- + -- Can't happen on nested bindings because floatRhs will dump + -- the bindings in the RHS (partitionByMajorLevel treats top specially) + (sum_stats fss, [], + Rec (new_pairs ++ floatsToBindPairs (concat rhss_floats))) } where - new_env = extendVarEnvList env (map fst pairs) - bind_level = getBindLevel bind do_pair ((name, level), rhs) - = case (floatRhs new_env level rhs) of { (fs, rhs_floats, rhs') -> + = case (floatRhs level rhs) of { (fs, rhs_floats, rhs') -> (fs, rhs_floats, (name, rhs')) } \end{code} @@ -183,13 +214,12 @@ floatBind env lvl bind@(Rec pairs) \begin{code} floatExpr, floatRhs - :: IdEnv Level - -> Level + :: Level -> LevelledExpr -> (FloatStats, FloatBinds, CoreExpr) -floatRhs env lvl arg - = case (floatExpr env lvl arg) of { (fsa, floats, arg') -> +floatRhs lvl arg + = case (floatExpr lvl arg) of { (fsa, floats, arg') -> case (partitionByMajorLevel lvl floats) of { (floats', heres) -> -- Dump bindings that aren't going to escape from a lambda -- This is to avoid floating the x binding out of @@ -197,44 +227,43 @@ floatRhs env lvl arg -- unnecessarily. It even causes a bug to do so if we have -- y = writeArr# a n (let x = e in b) -- because the y binding is an expr-ok-for-speculation one. + -- [SLPJ Dec 01: I don't understand this last comment; + -- writeArr# is not ok-for-spec because of its side effect] (fsa, floats', install heres arg') }} -floatExpr env _ (Var v) = (zeroStats, [], Var v) -floatExpr env _ (Type ty) = (zeroStats, [], Type ty) -floatExpr env _ (Lit lit) = (zeroStats, [], Lit lit) +floatExpr _ (Var v) = (zeroStats, [], Var v) +floatExpr _ (Type ty) = (zeroStats, [], Type ty) +floatExpr _ (Lit lit) = (zeroStats, [], Lit lit) -floatExpr env lvl (App e a) - = case (floatExpr env lvl e) of { (fse, floats_e, e') -> - case (floatRhs env lvl a) of { (fsa, floats_a, a') -> +floatExpr lvl (App e a) + = case (floatExpr lvl e) of { (fse, floats_e, e') -> + case (floatRhs lvl a) of { (fsa, floats_a, a') -> (fse `add_stats` fsa, floats_e ++ floats_a, App e' a') }} -floatExpr env lvl (Lam (tv,incd_lvl) e) - | isTyVar tv - = case (floatExpr env incd_lvl e) of { (fs, floats, e') -> - - -- Dump any bindings which absolutely cannot go any further - case (partitionByLevel incd_lvl floats) of { (floats', heres) -> - - (fs, floats', Lam tv (install heres e')) - }} - -floatExpr env lvl (Lam (arg,incd_lvl) rhs) - = ASSERT( isId arg ) - let - new_env = extendVarEnv env arg incd_lvl +floatExpr lvl lam@(Lam _ _) + = let + (bndrs_w_lvls, body) = collectBinders lam + (bndrs, lvls) = unzip bndrs_w_lvls + + -- For the all-tyvar case we are prepared to pull + -- the lets out, to implement the float-out-of-big-lambda + -- transform; but otherwise we only float bindings that are + -- going to escape a value lambda. + -- In particular, for one-shot lambdas we don't float things + -- out; we get no saving by so doing. + partition_fn | all isTyVar bndrs = partitionByLevel + | otherwise = partitionByMajorLevel in - case (floatExpr new_env incd_lvl rhs) of { (fs, floats, rhs') -> + case (floatExpr (last lvls) body) of { (fs, floats, body') -> -- Dump any bindings which absolutely cannot go any further - case (partitionByLevel incd_lvl floats) of { (floats', heres) -> + case (partition_fn (head lvls) floats) of { (floats', heres) -> - (add_to_stats fs floats', - floats', - Lam arg (install heres rhs')) + (add_to_stats fs floats', floats', mkLams bndrs (install heres body')) }} -floatExpr env lvl (Note note@(SCC cc) expr) - = case (floatExpr env lvl expr) of { (fs, floating_defns, expr') -> +floatExpr lvl (Note note@(SCC cc) expr) + = case (floatExpr lvl expr) of { (fs, floating_defns, expr') -> let -- Annotate bindings floated outwards past an scc expression -- with the cc. We mark that cc as "duplicated", though. @@ -254,23 +283,23 @@ floatExpr env lvl (Note note@(SCC cc) expr) ann_bind (Rec pairs) = Rec [(binder, mkSCC dupd_cc rhs) | (binder, rhs) <- pairs] -floatExpr env lvl (Note InlineMe expr) -- Other than SCCs - = case floatExpr env InlineCtxt expr of { (fs, floating_defns, expr') -> +floatExpr lvl (Note InlineMe expr) -- Other than SCCs + = case floatExpr InlineCtxt expr of { (fs, floating_defns, expr') -> WARN( not (null floating_defns), ppr expr $$ ppr floating_defns ) -- We do no floating out of Inlines (fs, [], Note InlineMe expr') } -- See notes in SetLevels -floatExpr env lvl (Note note expr) -- Other than SCCs - = case (floatExpr env lvl expr) of { (fs, floating_defns, expr') -> +floatExpr lvl (Note note expr) -- Other than SCCs + = case (floatExpr lvl expr) of { (fs, floating_defns, expr') -> (fs, floating_defns, Note note expr') } -floatExpr env lvl (Let bind body) - = case (floatBind env lvl bind) of { (fsb, rhs_floats, bind', new_env) -> - case (floatExpr new_env lvl body) of { (fse, body_floats, body') -> - if isInlineCtxt lvl then -- No floating inside an InlineMe - ASSERT( null rhs_floats && null body_floats ) - (add_stats fsb fse, [], Let bind' body') - else +floatExpr lvl (Let bind body) + = case (floatBind bind) of { (fsb, rhs_floats, bind') -> + case (floatExpr lvl body) of { (fse, body_floats, body') -> +-- if isInlineCtxt lvl then -- No floating inside an InlineMe +-- ASSERT( null rhs_floats && null body_floats ) +-- (add_stats fsb fse, [], Let bind' body') +-- else (add_stats fsb fse, rhs_floats ++ [(bind_lvl, bind')] ++ body_floats, body') @@ -278,24 +307,17 @@ floatExpr env lvl (Let bind body) where bind_lvl = getBindLevel bind -floatExpr env lvl (Case scrut (case_bndr, case_lvl) alts) - = case floatExpr env lvl scrut of { (fse, fde, scrut') -> +floatExpr lvl (Case scrut (case_bndr, case_lvl) alts) + = case floatExpr lvl scrut of { (fse, fde, scrut') -> case floatList float_alt alts of { (fsa, fda, alts') -> (add_stats fse fsa, fda ++ fde, Case scrut' case_bndr alts') }} where - alts_env = extendVarEnv env case_bndr case_lvl - - partition_fn = partitionByMajorLevel - - float_alt (con, bs, rhs) - = let - bs' = map fst bs - new_env = extendVarEnvList alts_env bs - in - case (floatExpr new_env case_lvl rhs) of { (fs, rhs_floats, rhs') -> - case (partition_fn case_lvl rhs_floats) of { (rhs_floats', heres) -> - (fs, rhs_floats', (con, bs', install heres rhs')) }} + -- Use floatRhs for the alternatives, so that we + -- don't gratuitiously float bindings out of the RHSs + float_alt (con, bs, rhs) + = case (floatRhs case_lvl rhs) of { (fs, rhs_floats, rhs') -> + (fs, rhs_floats, (con, map fst bs, rhs')) } floatList :: (a -> (FloatStats, FloatBinds, b)) -> [a] -> (FloatStats, FloatBinds, [b]) @@ -362,8 +384,7 @@ partitionByMajorLevel, partitionByLevel partitionByMajorLevel ctxt_lvl defns = partition float_further defns where - -- Float it if we escape a value lambda, - -- or if we get to the top level + -- Float it if we escape a value lambda, or if we get to the top level float_further (my_lvl, bind) = my_lvl `ltMajLvl` ctxt_lvl || isTopLvl my_lvl -- The isTopLvl part says that if we can get to the top level, say "yes" anyway -- This means that diff --git a/ghc/compiler/simplCore/OccurAnal.lhs b/ghc/compiler/simplCore/OccurAnal.lhs index 895d743..e76d267 100644 --- a/ghc/compiler/simplCore/OccurAnal.lhs +++ b/ghc/compiler/simplCore/OccurAnal.lhs @@ -26,7 +26,8 @@ import Id ( isDataConId, isOneShotLambda, setOneShotLambda, idSpecialisation, isLocalId, idType, idUnique, Id ) -import IdInfo ( OccInfo(..), shortableIdInfo, copyIdInfo ) +import IdInfo ( shortableIdInfo, copyIdInfo ) +import BasicTypes ( OccInfo(..), isOneOcc ) import VarSet import VarEnv @@ -468,9 +469,7 @@ reOrderRec env (CyclicSCC (bind : binds)) inlineCandidate :: Id -> CoreExpr -> Bool inlineCandidate id (Note InlineMe _) = True - inlineCandidate id rhs = case idOccInfo id of - OneOcc _ _ -> True - other -> False + inlineCandidate id rhs = isOneOcc (idOccInfo id) -- Real example (the Enum Ordering instance from PrelBase): -- rec f = \ x -> case d of (p,q,r) -> p x @@ -636,7 +635,7 @@ occAnal env expr@(Lam _ _) (binders, body) = collectBinders expr (linear, env1, _) = oneShotGroup env binders env2 = env1 `addNewCands` binders -- Add in-scope binders - env_body = vanillaCtxt env2 -- Body is (no longer) an RhsContext + env_body = vanillaCtxt env2 -- Body is (no longer) an RhsContext occAnal env (Case scrut bndr alts) = case mapAndUnzip (occAnalAlt alt_env bndr) alts of { (alts_usage_s, alts') -> diff --git a/ghc/compiler/simplCore/SetLevels.lhs b/ghc/compiler/simplCore/SetLevels.lhs index 451240a..1eacf4d 100644 --- a/ghc/compiler/simplCore/SetLevels.lhs +++ b/ghc/compiler/simplCore/SetLevels.lhs @@ -43,7 +43,7 @@ \begin{code} module SetLevels ( - setLevels, + setLevels, Level(..), tOP_LEVEL, @@ -54,6 +54,7 @@ module SetLevels ( import CoreSyn +import CmdLineOpts ( FloatOutSwitches(..) ) import CoreUtils ( exprType, exprIsTrivial, exprIsBottom, mkPiTypes ) import CoreFVs -- all of it import Subst @@ -203,7 +204,7 @@ instance Eq Level where %************************************************************************ \begin{code} -setLevels :: Bool -- True <=> float lambdas to top level +setLevels :: FloatOutSwitches -> [CoreBind] -> UniqSupply -> [LevelledBind] @@ -365,6 +366,7 @@ lvlMFE strict_ctxt ctxt_lvl env ann_expr@(fvs, _) good_destination = dest_lvl `ltMajLvl` ctxt_lvl -- Escapes a value lambda || (isTopLvl dest_lvl -- Goes to the top + && floatConsts env && not strict_ctxt) -- or from a strict context -- A decision to float entails let-binding this thing, and we only do -- that if we'll escape a value lambda, or will go to the top level. @@ -375,6 +377,12 @@ lvlMFE strict_ctxt ctxt_lvl env ann_expr@(fvs, _) -- concat = /\ a -> lvl a -- lvl = /\ a -> foldr ..a.. (++) [] -- which is pretty stupid. Hence the strict_ctxt test + -- + -- We are keen to float something to the top level, even if it does not + -- escape a lambda, because then it needs no allocation. But it's controlled + -- by a flag, because doing this too early loses opportunities for RULES + -- which (needless to say) are important in some nofib programs + -- (gcd is an example). \end{code} @@ -500,11 +508,6 @@ lvlFloatRhs abs_vars dest_lvl env rhs %************************************************************************ \begin{code} -collectAnnBndrs :: CoreExprWithFVs -> ([CoreBndr], CoreExprWithFVs) -collectAnnBndrs (_, AnnLam b e) = case collectAnnBndrs e of - (bs,e') -> (b:bs, e') -collectAnnBndrs e = ([], e) - lvlLamBndrs :: Level -> [CoreBndr] -> (Level, [(CoreBndr, Level)]) -- Compute the levels for the binders of a lambda group -- The binders returned are exactly the same as the ones passed, @@ -574,7 +577,7 @@ isFunction other = False %************************************************************************ \begin{code} -type LevelEnv = (Bool, -- True <=> Float lambdas too +type LevelEnv = (FloatOutSwitches, VarEnv Level, -- Domain is *post-cloned* TyVars and Ids Subst, -- Domain is pre-cloned Ids; tracks the in-scope set -- so that subtitution is capture-avoiding @@ -600,11 +603,14 @@ type LevelEnv = (Bool, -- True <=> Float lambdas too -- -- The domain of the VarEnv Level is the *post-cloned* Ids -initialEnv :: Bool -> LevelEnv +initialEnv :: FloatOutSwitches -> LevelEnv initialEnv float_lams = (float_lams, emptyVarEnv, emptySubst, emptyVarEnv) floatLams :: LevelEnv -> Bool -floatLams (float_lams, _, _, _) = float_lams +floatLams (FloatOutSw float_lams _, _, _, _) = float_lams + +floatConsts :: LevelEnv -> Bool +floatConsts (FloatOutSw _ float_consts, _, _, _) = float_consts extendLvlEnv :: LevelEnv -> [(Var,Level)] -> LevelEnv -- Used when *not* cloning diff --git a/ghc/compiler/simplCore/SimplMonad.lhs b/ghc/compiler/simplCore/SimplMonad.lhs index deae477..27c9eec 100644 --- a/ghc/compiler/simplCore/SimplMonad.lhs +++ b/ghc/compiler/simplCore/SimplMonad.lhs @@ -73,7 +73,7 @@ import UniqSupply ( uniqsFromSupply, uniqFromSupply, splitUniqSupply, import FiniteMap import BasicTypes ( TopLevelFlag, isTopLevel, Activation, isActive, isAlwaysActive, - OccInfo(..) + OccInfo(..), isOneOcc ) import CmdLineOpts ( SimplifierSwitch(..), SimplifierMode(..), DynFlags, DynFlag(..), dopt, @@ -788,7 +788,7 @@ seems a bit fragile. \begin{code} preInlineUnconditionally :: SimplEnv -> TopLevelFlag -> InId -> Bool preInlineUnconditionally env top_lvl bndr - | isTopLevel top_lvl = False + | isTopLevel top_lvl, SimplPhase 0 <- phase = False -- If we don't have this test, consider -- x = length [1,2,3] -- The full laziness pass carefully floats all the cons cells to @@ -799,7 +799,10 @@ preInlineUnconditionally env top_lvl bndr -- -- On the other hand, I have seen cases where top-level fusion is -- lost if we don't inline top level thing (e.g. string constants) --- We'll have to see +-- Hence the test for phase zero (which is the phase for all the final +-- simplifications). Until phase zero we take no special notice of +-- top level things, but then we become more leery about inlining +-- them. | not active = False | opt_SimplNoPreInlining = False @@ -809,7 +812,8 @@ preInlineUnconditionally env top_lvl bndr -- Not inside a lambda, one occurrence ==> safe! other -> False where - active = case getMode env of + phase = getMode env + active = case phase of SimplGently -> isAlwaysActive prag SimplPhase n -> isActive n prag prag = idInlinePragma bndr @@ -844,12 +848,18 @@ our new view that inlining is like a RULE, so I'm sticking to the 'active' story for now. \begin{code} -postInlineUnconditionally :: SimplEnv -> OutId -> Bool -> OutExpr -> Bool -postInlineUnconditionally env bndr loop_breaker rhs - = exprIsTrivial rhs - && active - && not loop_breaker - && not (isExportedId bndr) +postInlineUnconditionally :: SimplEnv -> OutId -> OccInfo -> OutExpr -> Bool +postInlineUnconditionally env bndr occ_info rhs + = exprIsTrivial rhs && active && isOneOcc occ_info + -- We used to have (not loop_breaker && not (isExportedId bndr)) + -- instead of (isOneOcc occ_info). Indeed, you might suppose that + -- there is nothing wrong with substituting for a trivial RHS, even + -- if it occurs many times. But consider + -- x = y + -- h = _inline_me_ (...x...) + -- Here we do *not* want to have x inlined, even though the RHS is + -- trivial, becuase the contract for an INLINE pragma is "no inlining". + -- This is important in the rules for the Prelude (e.g. PrelEnum.eftInt). where active = case getMode env of SimplGently -> isAlwaysActive prag @@ -888,10 +898,6 @@ activeInline env id occ where prag = idInlinePragma id --- Belongs in BasicTypes; this frag occurs in OccurAnal too -isOneOcc (OneOcc _ _) = True -isOneOcc other = False - activeRule :: SimplEnv -> Maybe (Activation -> Bool) -- Nothing => No rules at all activeRule env diff --git a/ghc/compiler/simplCore/SimplUtils.lhs b/ghc/compiler/simplCore/SimplUtils.lhs index e894bc0..817ae8f 100644 --- a/ghc/compiler/simplCore/SimplUtils.lhs +++ b/ghc/compiler/simplCore/SimplUtils.lhs @@ -36,10 +36,10 @@ import Id ( Id, idType, idInfo, ) import NewDemand ( isStrictDmd, isBotRes, splitStrictSig ) import SimplMonad -import Type ( Type, seqType, - splitTyConApp_maybe, tyConAppArgs, mkTyVarTys, - splitRepFunTys, isStrictType +import Type ( Type, seqType, splitRepFunTys, isStrictType, + splitTyConApp_maybe, tyConAppArgs, mkTyVarTys ) +import TcType ( isDictTy ) import OccName ( UserFS ) import TyCon ( tyConDataConsIfAvailable, isAlgTyCon, isNewTyCon ) import DataCon ( dataConRepArity, dataConSig, dataConArgTys ) @@ -547,11 +547,20 @@ tryEtaReduce bndrs body go [] (Var fun) | ok_fun fun = Just (Var fun) -- Success! go _ _ = Nothing -- Failure! - ok_fun fun = not (fun `elem` bndrs) && - isEvaldUnfolding (idUnfolding fun) - -- The exprIsValue is because eta reduction is not + ok_fun fun = not (fun `elem` bndrs) && + (isEvaldUnfolding (idUnfolding fun) || all ok_lam bndrs) + ok_lam v = isTyVar v || isDictTy (idType v) + -- The isEvaldUnfolding is because eta reduction is not -- valid in general: \x. bot /= bot -- So we need to be sure that the "fun" is a value. + -- + -- However, we always want to reduce (/\a -> f a) to f + -- This came up in a RULE: foldr (build (/\a -> g a)) + -- did not match foldr (build (/\b -> ...something complex...)) + -- The type checker can insert these eta-expanded versions, + -- with both type and dictionary lambdas; hence the slightly + -- ad-hoc isDictTy + ok_arg b arg = varToCoreExpr b `cheapEqExpr` arg \end{code} @@ -1045,6 +1054,45 @@ So the case-elimination algorithm is: If so, then we can replace the case with one of the rhss. +Further notes about case elimination +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider: test :: Integer -> IO () + test = print + +Turns out that this compiles to: + Print.test + = \ eta :: Integer + eta1 :: State# RealWorld -> + case PrelNum.< eta PrelNum.zeroInteger of wild { __DEFAULT -> + case hPutStr stdout + (PrelNum.jtos eta ($w[] @ Char)) + eta1 + of wild1 { (# new_s, a4 #) -> PrelIO.lvl23 new_s }} + +Notice the strange '<' which has no effect at all. This is a funny one. +It started like this: + +f x y = if x < 0 then jtos x + else if y==0 then "" else jtos x + +At a particular call site we have (f v 1). So we inline to get + + if v < 0 then jtos x + else if 1==0 then "" else jtos x + +Now simplify the 1==0 conditional: + + if v<0 then jtos v else jtos v + +Now common-up the two branches of the case: + + case (v<0) of DEFAULT -> jtos v + +Why don't we drop the case? Because it's strict in v. It's technically +wrong to drop even unnecessary evaluations, and in practice they +may be a result of 'seq' so we *definitely* don't want to drop those. +I don't really know how to improve this situation. + \begin{code} -------------------------------------------------- @@ -1078,6 +1126,7 @@ mkCase1 scrut case_bndr [(con,bndrs,rhs)] -- Here we must *not* discard the case, because dataToTag# just fetches the tag from -- the info pointer. So we'll be pedantic all the time, and see if that gives any -- other problems +-- Also we don't want to discard 'seq's = tick (CaseElim case_bndr) `thenSmpl_` returnSmpl (bindCaseBndr case_bndr scrut rhs) diff --git a/ghc/compiler/simplCore/Simplify.lhs b/ghc/compiler/simplCore/Simplify.lhs index af9ac73..ca69dab 100644 --- a/ghc/compiler/simplCore/Simplify.lhs +++ b/ghc/compiler/simplCore/Simplify.lhs @@ -327,6 +327,18 @@ simplNonRecX :: SimplEnv -> SimplM FloatsWithExpr simplNonRecX env bndr new_rhs thing_inside + | needsCaseBinding (idType bndr) new_rhs + -- Make this test *before* the preInlineUnconditionally + -- Consider case I# (quotInt# x y) of + -- I# v -> let w = J# v in ... + -- If we gaily inline (quotInt# x y) for v, we end up building an + -- extra thunk: + -- let w = J# (quotInt# x y) in ... + -- because quotInt# can fail. + = simplBinder env bndr `thenSmpl` \ (env, bndr') -> + thing_inside env `thenSmpl` \ (floats, body) -> + returnSmpl (emptyFloats env, Case new_rhs bndr' [(DEFAULT, [], wrapFloats floats body)]) + | preInlineUnconditionally env NotTopLevel bndr -- This happens; for example, the case_bndr during case of -- known constructor: case (a,b) of x { (p,q) -> ... } @@ -344,11 +356,6 @@ simplNonRecX env bndr new_rhs thing_inside bndr bndr' new_rhs thing_inside completeNonRecX env is_strict old_bndr new_bndr new_rhs thing_inside - | needsCaseBinding (idType new_bndr) new_rhs - = thing_inside env `thenSmpl` \ (floats, body) -> - returnSmpl (emptyFloats env, Case new_rhs new_bndr [(DEFAULT, [], wrapFloats floats body)]) - - | otherwise = mkAtomicArgs is_strict True {- OK to float unlifted -} new_rhs `thenSmpl` \ (aux_binds, rhs2) -> @@ -550,7 +557,7 @@ completeLazyBind :: SimplEnv -- (as usual) use the in-scope-env from the floats completeLazyBind env top_lvl old_bndr new_bndr new_rhs - | postInlineUnconditionally env new_bndr loop_breaker new_rhs + | postInlineUnconditionally env new_bndr occ_info new_rhs = -- Drop the binding tick (PostInlineUnconditionally old_bndr) `thenSmpl_` returnSmpl (emptyFloats env, extendSubst env old_bndr (DoneEx new_rhs)) @@ -1290,10 +1297,10 @@ We'll perform the binder-swap for the outer case, giving case x of w1 { DEFAULT -> case w1 of w2 { A -> e1; B -> e2 } ...other cases .... } -But there is no point in doing it for the inner case, -because w1 can't be inlined anyway. Furthermore, doing the case-swapping -involves zapping w2's occurrence info (see paragraphs that follow), -and that forces us to bind w2 when doing case merging. So we get +But there is no point in doing it for the inner case, because w1 can't +be inlined anyway. Furthermore, doing the case-swapping involves +zapping w2's occurrence info (see paragraphs that follow), and that +forces us to bind w2 when doing case merging. So we get case x of w1 { A -> let w2 = w1 in e1 B -> let w2 = w1 in e2 diff --git a/ghc/compiler/stranal/DmdAnal.lhs b/ghc/compiler/stranal/DmdAnal.lhs index 8bfd8f8..3759fe7 100644 --- a/ghc/compiler/stranal/DmdAnal.lhs +++ b/ghc/compiler/stranal/DmdAnal.lhs @@ -894,7 +894,7 @@ lubs = zipWithDmds lub box (Call d) = Call d -- The odd man out. Why? box (Box d) = Box d box (Defer _) = lazyDmd -box Top = lazyDmd -- Box Abs and Box Top +box Top = lazyDmd -- Box Abs and Box Top box Abs = lazyDmd -- are the same box d = Box d -- Bot, Eval -- 1.7.10.4