From 4e6d579860228f1264558d1cb03f27f239333039 Mon Sep 17 00:00:00 2001 From: simonpj Date: Thu, 7 Sep 2000 16:32:24 +0000 Subject: [PATCH] [project @ 2000-09-07 16:32:23 by simonpj] A list of simplifier-related stuff, triggered by looking at GHC's performance. I don't guarantee that this lot will lead to a uniform improvement over 4.08, but it it should be a bit better. More work probably required. * Make the simplifier's Stop continuation record whether the expression being simplified is the RHS of a thunk, or (say) the body of a lambda or case RHS. In the thunk case we want to be a bit keener about inlining if the type of the thunk is amenable to update in place. * Fix interestingArg, which was being too liberal, and hence doing too much inlining. * Extended CoreUtils.exprIsCheap to make two more things cheap: - case (coerce x) of ... - let x = y +# z This makes a bit more eta expansion happen. It was provoked by a program of Marcin's. * MkIface.ifaceBinds. Make sure that we emit rules for things (like class operations) that don't get a top-level binding in the interface file. Previously such rules were silently forgotten. * Move transformRhs to *after* simplification, which makes it a little easier to do, and means that the arity it computes is readily available to completeBinding. This gets much better arities. * Do coerce splitting in completeBinding. This gets good code for newtype CInt = CInt Int test:: CInt -> Int test x = case x of 1 -> 2 2 -> 4 3 -> 8 4 -> 16 _ -> 0 * Modify the meaning of "arity" so that during compilation it means "if you apply this function to fewer args, it will do virtually no work". So, for example f = coerce t (\x -> e) has arity at least 1. When a function is exported, it's arity becomes the number of exposed, top-level lambdas, which is subtly different. But that's ok. I removed CoreUtils.exprArity altogether: it looked only at the exposed lambdas. Instead, we use exprEtaExpandArity exclusively. All of this makes I/O programs work much better. --- ghc/compiler/basicTypes/Id.lhs | 21 +-- ghc/compiler/coreSyn/CoreFVs.lhs | 4 +- ghc/compiler/coreSyn/CoreUnfold.lhs | 26 ++- ghc/compiler/coreSyn/CoreUtils.lhs | 49 ++---- ghc/compiler/main/MkIface.lhs | 125 ++++++++------ ghc/compiler/simplCore/OccurAnal.lhs | 18 +- ghc/compiler/simplCore/SetLevels.lhs | 20 +-- ghc/compiler/simplCore/SimplMonad.lhs | 50 +++++- ghc/compiler/simplCore/SimplUtils.lhs | 282 ++++++++++++++++++-------------- ghc/compiler/simplCore/Simplify.lhs | 291 +++++++++++++++++---------------- ghc/compiler/stranal/WorkWrap.lhs | 37 +++-- 11 files changed, 524 insertions(+), 399 deletions(-) diff --git a/ghc/compiler/basicTypes/Id.lhs b/ghc/compiler/basicTypes/Id.lhs index 0076c36..4901db0 100644 --- a/ghc/compiler/basicTypes/Id.lhs +++ b/ghc/compiler/basicTypes/Id.lhs @@ -27,16 +27,16 @@ module Id ( externallyVisibleId, idFreeTyVars, isIP, - - -- Inline pragma stuff - idInlinePragma, setInlinePragma, modifyInlinePragma, - isSpecPragmaId, isRecordSelector, isPrimOpId, isPrimOpId_maybe, isDataConId, isDataConId_maybe, isDataConWrapId, isDataConWrapId_maybe, isBottomingId, isExportedId, isUserExportedId, - mayHaveNoBinding, + hasNoBinding, + + -- Inline pragma stuff + idInlinePragma, setInlinePragma, modifyInlinePragma, + -- One shot lambda stuff isOneShotLambda, setOneShotLambda, clearOneShotLambda, @@ -237,16 +237,13 @@ isSpecPragmaId id = case idFlavour id of SpecPragmaId -> True other -> False -mayHaveNoBinding id = case idFlavour id of +hasNoBinding id = case idFlavour id of DataConId _ -> True PrimOpId _ -> True other -> False - -- mayHaveNoBinding returns True of an Id which may not have a + -- hasNoBinding returns True of an Id which may not have a -- binding, even though it is defined in this module. Notably, -- the constructors of a dictionary are in this situation. - -- - -- mayHaveNoBinding returns True of some things that *do* have a local binding, - -- so it's only an approximation. That's ok... it's only use for assertions. -- Don't drop a binding for an exported Id, -- if it otherwise looks dead. @@ -294,9 +291,7 @@ exportWithOrigOccName id = omitIfaceSigForId id || isUserExportedId id \begin{code} isDeadBinder :: Id -> Bool -isDeadBinder bndr | isId bndr = case idOccInfo bndr of - IAmDead -> True - other -> False +isDeadBinder bndr | isId bndr = isDeadOcc (idOccInfo bndr) | otherwise = False -- TyVars count as not dead isIP id = isIPOcc (getOccName id) diff --git a/ghc/compiler/coreSyn/CoreFVs.lhs b/ghc/compiler/coreSyn/CoreFVs.lhs index 5784439..42dcee8 100644 --- a/ghc/compiler/coreSyn/CoreFVs.lhs +++ b/ghc/compiler/coreSyn/CoreFVs.lhs @@ -18,7 +18,7 @@ module CoreFVs ( #include "HsVersions.h" import CoreSyn -import Id ( Id, idFreeTyVars, mayHaveNoBinding, idSpecialisation ) +import Id ( Id, idFreeTyVars, hasNoBinding, idSpecialisation ) import VarSet import Var ( Var, isId ) import Name ( isLocallyDefined ) @@ -37,7 +37,7 @@ import Outputable mustHaveLocalBinding :: Var -> Bool -- True <=> the variable must have a binding in this module mustHaveLocalBinding v - | isId v = isLocallyDefined v && not (mayHaveNoBinding v) + | isId v = isLocallyDefined v && not (hasNoBinding v) | otherwise = True -- TyVars etc must \end{code} diff --git a/ghc/compiler/coreSyn/CoreUnfold.lhs b/ghc/compiler/coreSyn/CoreUnfold.lhs index c170c47..ae9fbb6 100644 --- a/ghc/compiler/coreSyn/CoreUnfold.lhs +++ b/ghc/compiler/coreSyn/CoreUnfold.lhs @@ -214,7 +214,18 @@ sizeExpr (I# bOMB_OUT_SIZE) top_args expr size_up (Case (Var v) _ alts) | v `elem` top_args -- We are scrutinising an argument variable - = case alts of + = +{- I'm nuking this special case; BUT see the comment with case alternatives. + + (a) It's too eager. We don't want to inline a wrapper into a + context with no benefit. + E.g. \ x. f (x+x) o point in inlining (+) here! + + (b) It's ineffective. Once g's wrapper is inlined, its case-expressions + aren't scrutinising arguments any more + + case alts of + [alt] -> size_up_alt alt `addSize` SizeIs 0# (unitBag (v, 1)) 0# -- We want to make wrapper-style evaluation look cheap, so that -- when we inline a wrapper it doesn't make call site (much) bigger @@ -227,7 +238,9 @@ sizeExpr (I# bOMB_OUT_SIZE) top_args expr -- ordering difference, we make (case a of (x,y) -> ...), -- *where a is one of the arguments* look free. - other -> alts_size (foldr addSize sizeOne alt_sizes) -- The 1 is for the scrutinee + other -> +-} + alts_size (foldr addSize sizeOne alt_sizes) -- The 1 is for the scrutinee (foldr1 maxSize alt_sizes) -- Good to inline if an arg is scrutinised, because @@ -301,7 +314,8 @@ sizeExpr (I# bOMB_OUT_SIZE) top_args expr ------------ size_up_alt (con, bndrs, rhs) = size_up rhs - -- Don't charge for args, so that wrappers look cheap + -- Don't charge for args, so that wrappers look cheap + -- (See comments about wrappers with Case) ------------ -- We want to record if we're case'ing, or applying, an argument @@ -602,7 +616,7 @@ callSiteInline black_listed inline_call occ id arg_infos interesting_cont #ifdef DEBUG if opt_D_dump_inlinings then pprTrace "Considering inlining" - (ppr id <+> vcat [text "black listed" <+> ppr black_listed, + (ppr id <+> vcat [text "black listed:" <+> ppr black_listed, text "occ info:" <+> ppr occ, text "arg infos" <+> ppr arg_infos, text "interesting continuation" <+> ppr interesting_cont, @@ -700,8 +714,8 @@ normal_case rule_vars phase v | from_INLINE -> has_rules -- Black list until final phase | otherwise -> True -- Always blacklisted - IMustNotBeINLINEd from_inline (Just threshold) - | from_inline -> (phase < threshold && has_rules) + IMustNotBeINLINEd from_INLINE (Just threshold) + | from_INLINE -> (phase < threshold && has_rules) | otherwise -> (phase < threshold || has_rules) where has_rules = v `elemVarSet` rule_vars diff --git a/ghc/compiler/coreSyn/CoreUtils.lhs b/ghc/compiler/coreSyn/CoreUtils.lhs index 5e9736b..05a2520 100644 --- a/ghc/compiler/coreSyn/CoreUtils.lhs +++ b/ghc/compiler/coreSyn/CoreUtils.lhs @@ -11,7 +11,7 @@ module CoreUtils ( mkPiType, -- Properties of expressions - exprType, coreAltsType, exprArity, + exprType, coreAltsType, exprIsBottom, exprIsDupable, exprIsTrivial, exprIsCheap, exprIsValue,exprOkForSpeculation, exprIsBig, exprIsConApp_maybe, @@ -300,19 +300,16 @@ shared. The main examples of things which aren't WHNF but are * case e of pi -> ei + (where e, and all the ei are cheap) - where e, and all the ei are cheap; and - - * let x = e - in b - - where e and b are cheap; and + * let x = e in b + (where e and b are cheap) * op x1 ... xn - - where op is a cheap primitive operator + (where op is a cheap primitive operator) * error "foo" + (because we are happy to substitute it inside a lambda) Notice that a variable is considered 'cheap': we can push it inside a lambda, because sharing will make sure it is only evaluated once. @@ -324,10 +321,18 @@ exprIsCheap (Type _) = True exprIsCheap (Var _) = True exprIsCheap (Note _ e) = exprIsCheap e exprIsCheap (Lam x e) = if isId x then True else exprIsCheap e -exprIsCheap (Case (Var v) _ alts) = and [exprIsCheap rhs | (_,_,rhs) <- alts] +exprIsCheap (Case e _ alts) = exprIsCheap e && + and [exprIsCheap rhs | (_,_,rhs) <- alts] -- Experimentally, treat (case x of ...) as cheap + -- (and case __coerce x etc.) -- This improves arities of overloaded functions where -- there is only dictionary selection (no construction) involved +exprIsCheap (Let (NonRec x _) e) + | isUnLiftedType (idType x) = exprIsCheap e + | otherwise = False + -- strict lets always have cheap right hand sides, and + -- do no allocation. + exprIsCheap other_expr = go other_expr 0 True where @@ -337,9 +342,8 @@ exprIsCheap other_expr || idAppIsBottom f n_args -- Application of a function which - -- always gives bottom; we treat this as - -- a WHNF, because it certainly doesn't - -- need to be shared! + -- always gives bottom; we treat this as cheap + -- because it certainly doesn't need to be shared! go (App f a) n_args args_cheap | isTypeArg a = go f n_args args_cheap @@ -476,25 +480,6 @@ idAppIsValue id n_val_args \end{code} \begin{code} -exprArity :: CoreExpr -> Int -- How many value lambdas are at the top -exprArity (Lam b e) | isTyVar b = exprArity e - | otherwise = 1 + exprArity e - -exprArity (Note note e) | ok_note note = exprArity e - where - ok_note (Coerce _ _) = True - -- We *do* look through coerces when getting arities. - -- Reason: arities are to do with *representation* and - -- work duplication. - ok_note InlineMe = True - ok_note InlineCall = True - ok_note other = False - -- SCC and TermUsg might be over-conservative? - -exprArity other = 0 -\end{code} - -\begin{code} exprIsConApp_maybe :: CoreExpr -> Maybe (DataCon, [CoreExpr]) exprIsConApp_maybe expr = analyse (collectArgs expr) diff --git a/ghc/compiler/main/MkIface.lhs b/ghc/compiler/main/MkIface.lhs index 3321130..da7b866 100644 --- a/ghc/compiler/main/MkIface.lhs +++ b/ghc/compiler/main/MkIface.lhs @@ -24,7 +24,7 @@ import RnMonad import TcInstUtil ( InstInfo(..) ) import CmdLineOpts -import Id ( Id, idType, idInfo, omitIfaceSigForId, isUserExportedId, +import Id ( Id, idType, idInfo, omitIfaceSigForId, isUserExportedId, hasNoBinding, idSpecialisation ) import Var ( isId ) @@ -68,6 +68,7 @@ import Bag import Outputable import Maybe ( isNothing ) +import List ( partition ) import Monad ( when ) \end{code} @@ -322,6 +323,7 @@ completeIface new_iface local_tycons local_classes all_decls = cls_dcls ++ ty_dcls ++ bagToList val_dcls (inst_dcls, inst_ids) = ifaceInstances inst_info cls_dcls = map ifaceClass local_classes + ty_dcls = map ifaceTyCon (filter (not . isWiredInName . getName) local_tycons) (val_dcls, emitted_ids) = ifaceBinds (inst_ids `unionVarSet` orphan_rule_ids) @@ -358,7 +360,10 @@ ifaceRules rules emitted -- We can't print builtin rules in interface files -- Since they are built in, an importing module -- will have access to them anyway - all (`elemVarSet` emitted) (varSetElems (ruleSomeLhsFreeVars interestingId rule)) + + -- Sept 00: I've disabled this test. It doesn't stop many, if any, rules + -- from coming out, and to make it work properly we need to add + all (`elemVarSet` emitted) (varSetElems (ruleSomeLhsFreeVars interestingId rule)) -- Spit out a rule only if all its lhs free vars are emitted -- This is a good reason not to do it when we emit the Id itself ] @@ -489,6 +494,11 @@ ifaceBinds needed_ids final_ids binds Nothing -> pprTrace "ifaceBinds not found:" (ppr id) $ idInfo id + -- The 'needed' set contains the Ids that are needed by earlier + -- interface file emissions. If the Id isn't in this set, and isn't + -- exported, there's no need to emit anything + need_id needed_set id = id `elemVarSet` needed_set || isUserExportedId id + go needed [] decls emitted | not (isEmptyVarSet needed) = pprTrace "ifaceBinds: free vars:" (sep (map ppr (varSetElems needed))) @@ -496,18 +506,24 @@ ifaceBinds needed_ids final_ids binds | otherwise = (decls, emitted) go needed (NonRec id rhs : binds) decls emitted - = case ifaceId get_idinfo needed False id rhs of - Nothing -> go needed binds decls emitted - Just (decl, extras) -> let - needed' = (needed `unionVarSet` extras) `delVarSet` id - -- 'extras' can include the Id itself via a rule - emitted' = emitted `extendVarSet` id - in - go needed' binds (decl `consBag` decls) emitted' + | need_id needed id + = if omitIfaceSigForId id then + go (needed `delVarSet` id) binds decls (emitted `extendVarSet` id) + else + go ((needed `unionVarSet` extras) `delVarSet` id) + binds + (decl `consBag` decls) + (emitted `extendVarSet` id) + | otherwise + = go needed binds decls emitted + where + (decl, extras) = ifaceId get_idinfo False id rhs -- Recursive groups are a bit more of a pain. We may only need one to -- start with, but it may call out the next one, and so on. So we - -- have to look for a fixed point. + -- have to look for a fixed point. We don't want necessarily them all, + -- because without -O we may only need the first one (if we don't emit + -- its unfolding) go needed (Rec pairs : binds) decls emitted = go needed' binds decls' emitted' where @@ -519,42 +535,29 @@ ifaceBinds needed_ids final_ids binds go_rec :: IdSet -> [(Id,CoreExpr)] -> (Bag RdrNameHsDecl, IdSet, IdSet) go_rec needed pairs | null decls = (emptyBag, emptyVarSet, emptyVarSet) - | otherwise = (more_decls `unionBags` listToBag decls, - more_emitted `unionVarSet` mkVarSet emitted, - more_extras `unionVarSet` extras) + | otherwise = (more_decls `unionBags` listToBag decls, + more_emitted `unionVarSet` mkVarSet (map fst needed_prs), + more_extras `unionVarSet` extras) where - maybes = map do_one pairs - emitted = [id | ((id,_), Just _) <- pairs `zip` maybes] - reduced_pairs = [pair | (pair, Nothing) <- pairs `zip` maybes] - (decls, extras_s) = unzip (catMaybes maybes) - extras = unionVarSets extras_s - (more_decls, more_emitted, more_extras) = go_rec extras reduced_pairs - - do_one (id,rhs) = ifaceId get_idinfo needed True id rhs + (needed_prs,leftover_prs) = partition is_needed pairs + (decls, extras_s) = unzip [ifaceId get_idinfo True id rhs + | (id,rhs) <- needed_prs, not (omitIfaceSigForId id)] + extras = unionVarSets extras_s + (more_decls, more_emitted, more_extras) = go_rec extras leftover_prs + is_needed (id,_) = need_id needed id \end{code} \begin{code} ifaceId :: (Id -> IdInfo) -- This function "knows" the extra info added -- by the STG passes. Sigh - - -> IdSet -- Set of Ids that are needed by earlier interface - -- file emissions. If the Id isn't in this set, and isn't - -- exported, there's no need to emit anything -> Bool -- True <=> recursive, so don't print unfolding -> Id -> CoreExpr -- The Id's right hand side - -> Maybe (RdrNameHsDecl, IdSet) -- The emitted stuff, plus any *extra* needed Ids - -ifaceId get_idinfo needed_ids is_rec id rhs - | not (id `elemVarSet` needed_ids || -- Needed [no id in needed_ids has omitIfaceSigForId] - (isUserExportedId id && not (omitIfaceSigForId id))) -- or exported and not to be omitted - = Nothing -- Well, that was easy! + -> (RdrNameHsDecl, IdSet) -- The emitted stuff, plus any *extra* needed Ids -ifaceId get_idinfo needed_ids is_rec id rhs - = ASSERT2( arity_matches_strictness, ppr id ) - Just (SigD (IfaceSig (toRdrName id) (toHsType id_type) hs_idinfo noSrcLoc), - new_needed_ids) +ifaceId get_idinfo is_rec id rhs + = (SigD (IfaceSig (toRdrName id) (toHsType id_type) hs_idinfo noSrcLoc), new_needed_ids) where id_type = idType id core_idinfo = idInfo id @@ -565,7 +568,8 @@ ifaceId get_idinfo needed_ids is_rec id rhs strict_hsinfo ++ wrkr_hsinfo ++ unfold_hsinfo ------------ Arity -------------- - arity_info = arityInfo stg_idinfo + arity_info = arityInfo stg_idinfo + stg_arity = arityLowerBound arity_info arity_hsinfo = case arityInfo stg_idinfo of a@(ArityExactly n) -> [HsArity a] other -> [] @@ -589,11 +593,40 @@ ifaceId get_idinfo needed_ids is_rec id rhs ------------ Worker -------------- - work_info = workerInfo core_idinfo - has_worker = workerExists work_info - wrkr_hsinfo = case work_info of - HasWorker work_id _ -> [HsWorker (toRdrName work_id)] - other -> [] + -- We only treat a function as having a worker if + -- the exported arity (which is now the number of visible lambdas) + -- is the same as the arity at the moment of the w/w split + -- If so, we can safely omit the unfolding inside the wrapper, and + -- instead re-generate it from the type/arity/strictness info + -- But if the arity has changed, we just take the simple path and + -- put the unfolding into the interface file, forgetting the fact + -- that it's a wrapper. + -- + -- How can this happen? Sometimes we get + -- f = coerce t (\x y -> $wf x y) + -- at the moment of w/w split; but the eta reducer turns it into + -- f = coerce t $wf + -- which is perfectly fine except that the exposed arity so far as + -- the code generator is concerned (zero) differs from the arity + -- when we did the split (2). + -- + -- All this arises because we use 'arity' to mean "exactly how many + -- top level lambdas are there" in interface files; but during the + -- compilation of this module it means "how many things can I apply + -- this to". + work_info = workerInfo core_idinfo + HasWorker work_id _ = work_info + + has_worker = case work_info of + HasWorker work_id wrap_arity + | wrap_arity == stg_arity -> True + | otherwise -> pprTrace "ifaceId: arity change:" (ppr id) + False + + other -> False + + wrkr_hsinfo | has_worker = [HsWorker (toRdrName work_id)] + | otherwise = [] ------------ Unfolding -------------- inline_pragma = inlinePragInfo core_idinfo @@ -623,11 +656,10 @@ ifaceId get_idinfo needed_ids is_rec id rhs unfold_ids `unionVarSet` spec_ids - worker_ids = case work_info of - HasWorker work_id _ | interestingId work_id -> unitVarSet work_id + worker_ids | has_worker && interestingId work_id = unitVarSet work_id -- Conceivably, the worker might come from -- another module - other -> emptyVarSet + | otherwise = emptyVarSet spec_ids = filterVarSet interestingId (rulesRhsFreeVars spec_info) @@ -644,7 +676,6 @@ ifaceId get_idinfo needed_ids is_rec id rhs HasWorker _ wrap_arity -> wrap_arity == arityLowerBound arity_info other -> True -interestingId id = isId id && isLocallyDefined id && - not (omitIfaceSigForId id) +interestingId id = isId id && isLocallyDefined id && not (hasNoBinding id) \end{code} diff --git a/ghc/compiler/simplCore/OccurAnal.lhs b/ghc/compiler/simplCore/OccurAnal.lhs index b7d7c22..ad9b70f 100644 --- a/ghc/compiler/simplCore/OccurAnal.lhs +++ b/ghc/compiler/simplCore/OccurAnal.lhs @@ -87,20 +87,20 @@ occurAnalyseRule (Rule str tpl_vars tpl_args rhs) In @occAnalTop@ we do indirection-shorting. That is, if we have this: - loc = + x_local = ... - exp = loc + x_exported = loc where exp is exported, and loc is not, then we replace it with this: - loc = exp - exp = + x_local = x_exported + x_exported = ... -Without this we never get rid of the exp = loc thing. This save a -gratuitous jump (from \tr{x_exported} to \tr{x_local}), and makes -strictness information propagate better. This used to happen in the -final phase, but it's tidier to do it here. +Without this we never get rid of the x_exported = x_local thing. This +save a gratuitous jump (from \tr{x_exported} to \tr{x_local}), and +makes strictness information propagate better. This used to happen in +the final phase, but it's tidier to do it here. If more than one exported thing is equal to a local thing (i.e., the local thing really is shared), then we do one only: @@ -171,7 +171,7 @@ occurAnalyseBinds binds ind_env' = extendVarEnv ind_env local_id exported_id other -> -- Ho ho! The normal case - (final_usage, ind_env, new_binds ++ binds') + (final_usage, ind_env, new_binds ++ binds') initialTopEnv = OccEnv isLocallyDefined -- Anything local is interesting emptyVarSet diff --git a/ghc/compiler/simplCore/SetLevels.lhs b/ghc/compiler/simplCore/SetLevels.lhs index 517d2d9..515185f 100644 --- a/ghc/compiler/simplCore/SetLevels.lhs +++ b/ghc/compiler/simplCore/SetLevels.lhs @@ -133,9 +133,6 @@ ltLvl (Level maj1 min1) (Level maj2 min2) ltMajLvl :: Level -> Level -> Bool -- Tells if one level belongs to a difft *lambda* level to another - -- But it returns True regardless if l1 is the top level - -- We always like to float to the top! -ltMajLvl (Level 0 0) _ = True ltMajLvl (Level maj1 _) (Level maj2 _) = maj1 < maj2 isTopLvl :: Level -> Bool @@ -144,6 +141,9 @@ isTopLvl other = False instance Outputable Level where ppr (Level maj min) = hcat [ char '<', int maj, char ',', int min, char '>' ] + +instance Eq Level where + (Level maj1 min1) == (Level maj2 min2) = maj1==maj2 && min1==min2 \end{code} %************************************************************************ @@ -226,8 +226,8 @@ lvlExpr ctxt_lvl env (_, AnnApp fun arg) -- but we do if the function is big and hairy, like a case lvlExpr ctxt_lvl env (_, AnnNote InlineMe expr) - -- Don't float anything out of an InlineMe - = lvlExpr tOP_LEVEL env expr `thenLvl` \ expr' -> +-- Don't float anything out of an InlineMe; hence the tOP_LEVEL + = lvlExpr tOP_LEVEL env expr `thenLvl` \ expr' -> returnLvl (Note InlineMe expr') lvlExpr ctxt_lvl env (_, AnnNote note expr) @@ -305,6 +305,8 @@ lvlMFE strict_ctxt ctxt_lvl env ann_expr@(fvs, _) || not good_destination || exprIsTrivial expr -- Is trivial || (strict_ctxt && exprIsBottom expr) -- Strict context and is bottom + -- e.g. \x -> error "foo" + -- No gain from floating this = -- Don't float it out lvlExpr ctxt_lvl env ann_expr @@ -734,11 +736,9 @@ subst_id_info (_, _, subst, _) ctxt_lvl dest_lvl v -- VERY IMPORTANT: we must zap the demand info -- if the thing is going to float out past a lambda zap_dmd info - | float_past_lam && isStrict (demandInfo info) - = setDemandInfo info wwLazy - | otherwise - = info + | stays_put || not (isStrict (demandInfo info)) = info + | otherwise = setDemandInfo info wwLazy - float_past_lam = ctxt_lvl `ltMajLvl` dest_lvl + stays_put = ctxt_lvl == dest_lvl \end{code} diff --git a/ghc/compiler/simplCore/SimplMonad.lhs b/ghc/compiler/simplCore/SimplMonad.lhs index fac41a7..322f0f5 100644 --- a/ghc/compiler/simplCore/SimplMonad.lhs +++ b/ghc/compiler/simplCore/SimplMonad.lhs @@ -39,14 +39,19 @@ module SimplMonad ( getSubstEnv, extendSubst, extendSubstList, getInScope, setInScope, modifyInScope, addNewInScopeIds, setSubstEnv, zapSubstEnv, - getSimplBinderStuff, setSimplBinderStuff + getSimplBinderStuff, setSimplBinderStuff, + + -- Adding bindings + addLetBind, addLetBinds, addAuxiliaryBind, addAuxiliaryBinds, + addCaseBind, needsCaseBinding, addNonRecBind ) where #include "HsVersions.h" -import Id ( Id, mkSysLocal, idUnfolding, isDataConWrapId ) +import Id ( Id, mkSysLocal, idType, idUnfolding, isDataConWrapId ) import CoreSyn import CoreUnfold ( isCompulsoryUnfolding ) +import CoreUtils ( exprOkForSpeculation ) import PprCore () -- Instances import CostCentre ( CostCentreStack, subsumedCCS ) import Name ( isLocallyDefined ) @@ -57,7 +62,7 @@ import qualified Subst import Subst ( Subst, mkSubst, substEnv, InScopeSet, mkInScopeSet, substInScope, isInScope ) -import Type ( Type ) +import Type ( Type, isUnLiftedType ) import UniqSupply ( uniqsFromSupply, uniqFromSupply, splitUniqSupply, UniqSupply ) @@ -106,6 +111,45 @@ type OutStuff a = ([OutBind], a) -- incrementally. Comments just before simplExprB in Simplify.lhs \end{code} +\begin{code} +addLetBind :: CoreBind -> SimplM (OutStuff a) -> SimplM (OutStuff a) +addLetBind bind thing_inside + = thing_inside `thenSmpl` \ (binds, res) -> + returnSmpl (bind : binds, res) + +addLetBinds :: [CoreBind] -> SimplM (OutStuff a) -> SimplM (OutStuff a) +addLetBinds binds1 thing_inside + = thing_inside `thenSmpl` \ (binds2, res) -> + returnSmpl (binds1 ++ binds2, res) + +addAuxiliaryBinds :: [CoreBind] -> SimplM (OutStuff a) -> SimplM (OutStuff a) + -- Extends the in-scope environment as well as wrapping the bindings +addAuxiliaryBinds binds1 thing_inside + = addNewInScopeIds (bindersOfBinds binds1) $ + addLetBinds binds1 thing_inside + +addAuxiliaryBind :: CoreBind -> SimplM (OutStuff a) -> SimplM (OutStuff a) + -- Extends the in-scope environment as well as wrapping the bindings +addAuxiliaryBind bind thing_inside + = addNewInScopeIds (bindersOf bind) $ + addLetBind bind thing_inside + +needsCaseBinding ty rhs = isUnLiftedType ty && not (exprOkForSpeculation rhs) + -- Make a case expression instead of a let + -- These can arise either from the desugarer, + -- or from beta reductions: (\x.e) (x +# y) + +addCaseBind bndr rhs thing_inside + = getInScope `thenSmpl` \ in_scope -> + thing_inside `thenSmpl` \ (floats, (_, body)) -> + returnSmpl ([], (in_scope, Case rhs bndr [(DEFAULT, [], mkLets floats body)])) + +addNonRecBind bndr rhs thing_inside + -- Checks for needing a case binding + | needsCaseBinding (idType bndr) rhs = addCaseBind bndr rhs thing_inside + | otherwise = addLetBind (NonRec bndr rhs) thing_inside +\end{code} + %************************************************************************ %* * diff --git a/ghc/compiler/simplCore/SimplUtils.lhs b/ghc/compiler/simplCore/SimplUtils.lhs index 90a759d..235593c 100644 --- a/ghc/compiler/simplCore/SimplUtils.lhs +++ b/ghc/compiler/simplCore/SimplUtils.lhs @@ -11,7 +11,7 @@ module SimplUtils ( -- The continuation type SimplCont(..), DupFlag(..), contIsDupable, contResultType, - countValArgs, countArgs, + countValArgs, countArgs, mkRhsStop, mkStop, getContArgs, interestingCallContext, interestingArg, isStrictType, discardInline ) where @@ -44,6 +44,7 @@ import DataCon ( dataConRepArity ) import VarSet import VarEnv ( SubstEnv, SubstResult(..) ) import Util ( lengthExceeds ) +import BasicTypes ( Arity ) import Outputable \end{code} @@ -56,7 +57,10 @@ import Outputable \begin{code} data SimplCont -- Strict contexts - = Stop OutType -- Type of the result + = Stop OutType -- Type of the result + Bool -- True => This is the RHS of a thunk whose type suggests + -- that update-in-place would be possible + -- (This makes the inliner a little keener.) | CoerceIt OutType -- The To-type, simplified SimplCont @@ -83,7 +87,7 @@ data SimplCont -- Strict contexts -- The result expression in the OutExprStuff has type cont_ty instance Outputable SimplCont where - ppr (Stop _) = ptext SLIT("Stop") + ppr (Stop _ _) = ptext SLIT("Stop") ppr (ApplyTo dup arg se cont) = (ptext SLIT("ApplyTo") <+> ppr dup <+> ppr arg) $$ ppr cont ppr (ArgOf dup _ _) = ptext SLIT("ArgOf...") <+> ppr dup ppr (Select dup bndr alts se cont) = (ptext SLIT("Select") <+> ppr dup <+> ppr bndr) $$ @@ -97,9 +101,16 @@ instance Outputable DupFlag where ppr OkToDup = ptext SLIT("ok") ppr NoDup = ptext SLIT("nodup") + +------------------- +mkRhsStop, mkStop :: OutType -> SimplCont +mkStop ty = Stop ty False +mkRhsStop ty = Stop ty (canUpdateInPlace ty) + + ------------------- contIsDupable :: SimplCont -> Bool -contIsDupable (Stop _) = True +contIsDupable (Stop _ _) = True contIsDupable (ApplyTo OkToDup _ _ _) = True contIsDupable (ArgOf OkToDup _ _) = True contIsDupable (Select OkToDup _ _ _ _) = True @@ -115,21 +126,22 @@ discardInline cont = cont ------------------- discardableCont :: SimplCont -> Bool -discardableCont (Stop _) = False +discardableCont (Stop _ _) = False discardableCont (CoerceIt _ cont) = discardableCont cont discardableCont (InlinePlease cont) = discardableCont cont discardableCont other = True discardCont :: SimplCont -- A continuation, expecting -> SimplCont -- Replace the continuation with a suitable coerce -discardCont (Stop to_ty) = Stop to_ty -discardCont cont = CoerceIt to_ty (Stop to_ty) - where - to_ty = contResultType cont +discardCont cont = case cont of + Stop to_ty _ -> cont + other -> CoerceIt to_ty (mkStop to_ty) + where + to_ty = contResultType cont ------------------- contResultType :: SimplCont -> OutType -contResultType (Stop to_ty) = to_ty +contResultType (Stop to_ty _) = to_ty contResultType (ArgOf _ to_ty _) = to_ty contResultType (ApplyTo _ _ _ cont) = contResultType cont contResultType (CoerceIt _ cont) = contResultType cont @@ -257,15 +269,19 @@ interestingArg in_scope arg subst where analyse (Var v) = case lookupIdSubst (mkSubst in_scope subst) v of - DoneId v' _ -> hasSomeUnfolding (idUnfolding v') - -- was: isValueUnfolding (idUnfolding v') - -- But that seems over-pessimistic - - other -> True -- was: False - -- But that is *definitely* too pessimistic. - -- E.g. let x = 3 in f - -- Here, x will be unconditionally substituted, via - -- the substitution! + ContEx subst arg -> interestingArg in_scope arg subst + DoneEx arg -> analyse arg + DoneId v' _ -> hasSomeUnfolding (idUnfolding v') + -- Was: isValueUnfolding (idUnfolding v') + -- But that seems over-pessimistic + + -- NB: it's too pessimistic to return False for ContEx/DoneEx + -- Consider let x = 3 in f x + -- The substitution will contain (x -> ContEx 3) + -- It's also too optimistic to return True for the ContEx/DoneEx case + -- Consider (\x. f x y) y + -- The substitution will contain (x -> ContEx y). + analyse (Type _) = False analyse (App fn (Type _)) = analyse fn analyse (Note _ a) = analyse a @@ -316,11 +332,15 @@ interestingCallContext :: Bool -- False <=> no args at all -- as scrutinee of a case Select -- as arg of a strict fn ArgOf -- then we should not inline it (unless there is some other reason, - -- e.g. is is the sole occurrence). - -- Why not? At least in the case-scrutinee situation, turning - -- case x of y -> ... + -- e.g. is is the sole occurrence). We achieve this by making + -- interestingCallContext return False for a lone variable. + -- + -- Why? At least in the case-scrutinee situation, turning + -- let x = (a,b) in case x of y -> ... -- into - -- let y = (a,b) in ... + -- let x = (a,b) in case (a,b) of y -> ... + -- and thence to + -- let x = (a,b) in let y = (a,b) in ... -- is bad if the binding for x will remain. -- -- Another example: I discovered that strings @@ -333,12 +353,13 @@ interestingCallContext :: Bool -- False <=> no args at all -- the context can ``see'' the unfolding of the variable (e.g. case or a RULE) -- so there's no gain. -- - -- However, even a type application isn't a lone variable. Consider + -- However, even a type application or coercion isn't a lone variable. + -- Consider -- case $fMonadST @ RealWorld of { :DMonad a b c -> c } -- We had better inline that sucker! The case won't see through it. -- - -- For now, I'm treating treating a variable applied to types as - -- "lone". The motivating example was + -- For now, I'm treating treating a variable applied to types + -- in a *lazy* context "lone". The motivating example was -- f = /\a. \x. BIG -- g = /\a. \y. h (f a) -- There's no advantage in inlining f here, and perhaps @@ -347,12 +368,12 @@ interestingCallContext :: Bool -- False <=> no args at all interestingCallContext some_args some_val_args cont = interesting cont where - interesting (InlinePlease _) = True - interesting (ApplyTo _ _ _ _) = some_args -- Can happen if we have (coerce t (f x)) y - interesting (Select _ _ _ _ _) = some_args - interesting (ArgOf _ _ _) = some_val_args - interesting (Stop ty) = some_val_args && canUpdateInPlace ty - interesting (CoerceIt _ cont) = interesting cont + interesting (InlinePlease _) = True + interesting (Select _ _ _ _ _) = some_args + interesting (ApplyTo _ _ _ _) = some_args -- Can happen if we have (coerce t (f x)) y + interesting (ArgOf _ _ _) = some_val_args + interesting (Stop ty upd_in_place) = some_val_args && upd_in_place + interesting (CoerceIt _ cont) = interesting cont -- If this call is the arg of a strict function, the context -- is a bit interesting. If we inline here, we may get useful -- evaluation information to avoid repeated evals: e.g. @@ -453,12 +474,13 @@ Try (a) eta expansion (b) type-lambda swizzling \begin{code} -transformRhs :: InExpr -> SimplM InExpr -transformRhs rhs - = tryEtaExpansion body `thenSmpl` \ body' -> - mkRhsTyLam tyvars body' - where - (tyvars, body) = collectTyBinders rhs +transformRhs :: OutExpr + -> (Arity -> OutExpr -> SimplM (OutStuff a)) + -> SimplM (OutStuff a) + +transformRhs rhs thing_inside + = tryRhsTyLam rhs $ \ rhs1 -> + tryEtaExpansion rhs1 thing_inside \end{code} @@ -491,7 +513,7 @@ let-floating. This optimisation is CRUCIAL in eliminating the junk introduced by desugaring mutually recursive definitions. Don't eliminate it lightly! -So far as the implemtation is concerned: +So far as the implementation is concerned: Invariant: go F e = /\tvs -> F e @@ -533,25 +555,31 @@ as we would normally do. \begin{code} -mkRhsTyLam tyvars body -- Only does something if there's a let +tryRhsTyLam rhs thing_inside -- Only does something if there's a let | null tyvars || not (worth_it body) -- inside a type lambda, and a WHNF inside that - = returnSmpl (mkLams tyvars body) + = thing_inside rhs | otherwise - = go (\x -> x) body + = go (\x -> x) body $ \ body' -> + thing_inside (mkLams tyvars body') + where + (tyvars, body) = collectTyBinders rhs + worth_it (Let _ e) = whnf_in_middle e worth_it other = False whnf_in_middle (Let _ e) = whnf_in_middle e whnf_in_middle e = exprIsCheap e - go fn (Let bind@(NonRec var rhs) body) | exprIsTrivial rhs - = go (fn . Let bind) body + go fn (Let bind@(NonRec var rhs) body) thing_inside + | exprIsTrivial rhs + = go (fn . Let bind) body thing_inside + + go fn (Let bind@(NonRec var rhs) body) thing_inside + = mk_poly tyvars_here var `thenSmpl` \ (var', rhs') -> + addAuxiliaryBind (NonRec var' (mkLams tyvars_here (fn rhs))) $ + go (fn . Let (mk_silly_bind var rhs')) body thing_inside - go fn (Let bind@(NonRec var rhs) body) - = mk_poly tyvars_here var `thenSmpl` \ (var', rhs') -> - go (fn . Let (mk_silly_bind var rhs')) body `thenSmpl` \ body' -> - returnSmpl (Let (NonRec var' (mkLams tyvars_here (fn rhs))) body') where tyvars_here = tyvars -- main_tyvar_set = mkVarSet tyvars @@ -573,13 +601,13 @@ mkRhsTyLam tyvars body -- Only does something if there's a let -- abstracting wrt *all* the tyvars. We'll see if that -- gives rise to problems. SLPJ June 98 - go fn (Let (Rec prs) body) + go fn (Let (Rec prs) body) thing_inside = mapAndUnzipSmpl (mk_poly tyvars_here) vars `thenSmpl` \ (vars', rhss') -> let - gn body = fn $ foldr Let body (zipWith mk_silly_bind vars rhss') + gn body = fn (foldr Let body (zipWith mk_silly_bind vars rhss')) in - go gn body `thenSmpl` \ body' -> - returnSmpl (Let (Rec (vars' `zip` [mkLams tyvars_here (gn rhs) | rhs <- rhss])) body') + addAuxiliaryBind (Rec (vars' `zip` [mkLams tyvars_here (gn rhs) | rhs <- rhss])) $ + go gn body thing_inside where (vars,rhss) = unzip prs tyvars_here = tyvars @@ -588,17 +616,19 @@ mkRhsTyLam tyvars body -- Only does something if there's a let -- See notes with tyvars_here above - go fn body = returnSmpl (mkLams tyvars (fn body)) + go fn body thing_inside = thing_inside (fn body) mk_poly tyvars_here var = getUniqueSmpl `thenSmpl` \ uniq -> let poly_name = setNameUnique (idName var) uniq -- Keep same name poly_ty = mkForAllTys tyvars_here (idType var) -- But new type of course + poly_id = mkId poly_name poly_ty vanillaIdInfo - -- It's crucial to copy the occInfo of the original var, because - -- we're looking at occurrence-analysed but as yet unsimplified code! - -- In particular, we mustn't lose the loop breakers. + -- In the olden days, it was crucial to copy the occInfo of the original var, + -- because we were looking at occurrence-analysed but as yet unsimplified code! + -- In particular, we mustn't lose the loop breakers. BUT NOW we are looking + -- at already simplified code, so it doesn't matter -- -- It's even right to retain single-occurrence or dead-var info: -- Suppose we started with /\a -> let x = E in B @@ -607,14 +637,11 @@ mkRhsTyLam tyvars body -- Only does something if there's a let -- where x* has an INLINE prag on it. Now, once x* is inlined, -- the occurrences of x' will be just the occurrences originally -- pinned on x. - poly_info = vanillaIdInfo `setOccInfo` idOccInfo var - - poly_id = mkId poly_name poly_ty poly_info + -- poly_info = vanillaIdInfo `setOccInfo` idOccInfo var in returnSmpl (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tyvars_here)) mk_silly_bind var rhs = NonRec var rhs - -- We need to be careful about inlining. -- Suppose we start with: -- -- x = let g = /\a -> \x -> f x x @@ -627,8 +654,7 @@ mkRhsTyLam tyvars body -- Only does something if there's a let -- * so we're back to square one -- We rely on the simplifier not to inline g into the RHS of g*, -- because it's a "lone" occurrence, and there is no benefit in - -- inlining. But it's a slightly delicate property, and there's - -- a danger of making the simplifier loop here. + -- inlining. But it's a slightly delicate property; hence this comment \end{code} @@ -641,61 +667,94 @@ mkRhsTyLam tyvars body -- Only does something if there's a let Try eta expansion for RHSs We go for: - \x1..xn -> N ==> \x1..xn y1..ym -> N y1..ym - AND - N E1..En ==> let z1=E1 .. zn=En in \y1..ym -> N z1..zn y1..ym + Case 1 f = \x1..xn -> N ==> f = \x1..xn y1..ym -> N y1..ym + (n >= 0) + OR + Case 2 f = N E1..En ==> z1=E1 + (n > 0) .. + zn=En + f = \y1..ym -> N z1..zn y1..ym + +where (in both cases) -where (in both cases) N is a NORMAL FORM (i.e. no redexes anywhere) -wanting a suitable number of extra args. + * The xi can include type variables -NB: the Ei may have unlifted type, but the simplifier (which is applied -to the result) deals OK with this. + * The yi are all value variables -There is no point in looking for a combination of the two, -because that would leave use with some lets sandwiched between lambdas; -that's what the final test in the first equation is for. + * N is a NORMAL FORM (i.e. no redexes anywhere) + wanting a suitable number of extra args. + + * the Ei must not have unlifted type + +There is no point in looking for a combination of the two, because +that would leave use with some lets sandwiched between lambdas; that's +what the final test in the first equation is for. \begin{code} -tryEtaExpansion :: InExpr -> SimplM InExpr -tryEtaExpansion rhs +tryEtaExpansion :: OutExpr + -> (Arity -> OutExpr -> SimplM (OutStuff a)) + -> SimplM (OutStuff a) +tryEtaExpansion rhs thing_inside | not opt_SimplDoLambdaEtaExpansion - || exprIsTrivial rhs -- Don't eta-expand a trival RHS - || null y_tys -- No useful expansion - || not (null x_bndrs || and trivial_args) -- Not (no x-binders or no z-binds) - = returnSmpl rhs - - | otherwise -- Consider eta expansion - = newIds SLIT("y") y_tys $ ( \ y_bndrs -> - tick (EtaExpansion (head y_bndrs)) `thenSmpl_` - mapAndUnzipSmpl bind_z_arg (args `zip` trivial_args) `thenSmpl` (\ (maybe_z_binds, z_args) -> - returnSmpl (mkLams x_bndrs $ - mkLets (catMaybes maybe_z_binds) $ - mkLams y_bndrs $ - mkApps (mkApps fun z_args) (map Var y_bndrs)))) + || null y_tys -- No useful expansion + || not (is_case1 || is_case2) -- Neither case matches + = thing_inside final_arity rhs -- So, no eta expansion, but + -- return a good arity + + | is_case1 + = make_y_bndrs $ \ y_bndrs -> + thing_inside final_arity + (mkLams x_bndrs $ mkLams y_bndrs $ + mkApps body (map Var y_bndrs)) + + | otherwise -- Must be case 2 + = mapAndUnzipSmpl bind_z_arg arg_infos `thenSmpl` \ (maybe_z_binds, z_args) -> + addAuxiliaryBinds (catMaybes maybe_z_binds) $ + make_y_bndrs $ \ y_bndrs -> + thing_inside final_arity + (mkLams y_bndrs $ + mkApps (mkApps fun z_args) (map Var y_bndrs)) where - (x_bndrs, body) = collectValBinders rhs - (fun, args) = collectArgs body - trivial_args = map exprIsTrivial args - fun_arity = exprEtaExpandArity fun + all_trivial_args = all is_trivial arg_infos + is_case1 = all_trivial_args + is_case2 = null x_bndrs && not (any unlifted_non_trivial arg_infos) + + (x_bndrs, body) = collectBinders rhs -- NB: x_bndrs can include type variables + x_arity = valBndrCount x_bndrs - bind_z_arg (arg, trivial_arg) + (fun, args) = collectArgs body + arg_infos = [(arg, exprType arg, exprIsTrivial arg) | arg <- args] + + is_trivial (_, _, triv) = triv + unlifted_non_trivial (_, ty, triv) = not triv && isUnLiftedType ty + + fun_arity = exprEtaExpandArity fun + + final_arity | all_trivial_args = x_arity + extra_args_wanted + | otherwise = x_arity + -- Arity can be more than the number of lambdas + -- because of coerces. E.g. \x -> coerce t (\y -> e) + -- will have arity at least 2 + -- The worker/wrapper pass will bring the coerce out to the top + + bind_z_arg (arg, arg_ty, trivial_arg) | trivial_arg = returnSmpl (Nothing, arg) - | otherwise = newId SLIT("z") (exprType arg) $ \ z -> + | otherwise = newId SLIT("z") arg_ty $ \ z -> returnSmpl (Just (NonRec z arg), Var z) - -- Note: I used to try to avoid the exprType call by using - -- the type of the binder. But this type doesn't necessarily - -- belong to the same substitution environment as this rhs; - -- and we are going to make extra term binders (y_bndrs) from the type - -- which will be processed with the rhs substitution environment. - -- This only went wrong in a mind bendingly complicated case. + make_y_bndrs thing_inside + = ASSERT( not (exprIsTrivial rhs) ) + newIds SLIT("y") y_tys $ \ y_bndrs -> + tick (EtaExpansion (head y_bndrs)) `thenSmpl_` + thing_inside y_bndrs + (potential_extra_arg_tys, _) = splitFunTys (exprType body) y_tys :: [InType] - y_tys = take no_extras_wanted potential_extra_arg_tys + y_tys = take extra_args_wanted potential_extra_arg_tys - no_extras_wanted :: Int - no_extras_wanted = 0 `max` + extra_args_wanted :: Int -- Number of extra args we want + extra_args_wanted = 0 `max` (fun_arity - valArgCount args) -- We used to expand the arity to the previous arity fo the -- function; but this is pretty dangerous. Consdier @@ -707,25 +766,6 @@ tryEtaExpansion rhs -- f = \xy -> let z = BIG in e -- -- (bndr_arity - no_of_xs) `max` - - -- See if the body could obviously do with more args - (fun_arity - valArgCount args) - --- This case is now deal with by exprEtaExpandArity - -- Finally, see if it's a state transformer, and xs is non-null - -- (so it's also a function not a thunk) in which - -- case we eta-expand on principle! This can waste work, - -- but usually doesn't. - -- I originally checked for a singleton type [ty] in this case - -- but then I found a situation in which I had - -- \ x -> let {..} in \ s -> f (...) s - -- AND f RETURNED A FUNCTION. That is, 's' wasn't the only - -- potential extra arg. --- case (x_bndrs, potential_extra_arg_tys) of --- (_:_, ty:_) -> case splitTyConApp_maybe ty of --- Just (tycon,_) | tycon == statePrimTyCon -> 1 --- other -> 0 --- other -> 0 \end{code} diff --git a/ghc/compiler/simplCore/Simplify.lhs b/ghc/compiler/simplCore/Simplify.lhs index 68f8c22..5c09ebc 100644 --- a/ghc/compiler/simplCore/Simplify.lhs +++ b/ghc/compiler/simplCore/Simplify.lhs @@ -15,20 +15,22 @@ import CmdLineOpts ( switchIsOn, opt_SimplDoEtaReduction, import SimplMonad import SimplUtils ( mkCase, transformRhs, findAlt, simplBinder, simplBinders, simplIds, findDefault, - SimplCont(..), DupFlag(..), + SimplCont(..), DupFlag(..), mkStop, mkRhsStop, contResultType, discardInline, countArgs, contIsDupable, getContArgs, interestingCallContext, interestingArg, isStrictType ) import Var ( mkSysTyVar, tyVarKind ) import VarEnv +import VarSet ( elemVarSet ) import Id ( Id, idType, idInfo, isDataConId, idUnfolding, setIdUnfolding, isExportedId, isDeadBinder, idDemandInfo, setIdInfo, idOccInfo, setIdOccInfo, zapLamIdInfo, setOneShotLambda, ) -import IdInfo ( OccInfo(..), ArityInfo(..), - setArityInfo, setUnfoldingInfo, +import IdInfo ( OccInfo(..), isDeadOcc, isLoopBreaker, + ArityInfo, setArityInfo, atLeastArity, + setUnfoldingInfo, occInfo ) import Demand ( Demand, isStrict ) @@ -36,12 +38,12 @@ import DataCon ( dataConNumInstArgs, dataConRepStrictness, dataConSig, dataConArgTys ) import CoreSyn -import CoreFVs ( mustHaveLocalBinding ) +import CoreFVs ( mustHaveLocalBinding, exprFreeVars ) import CoreUnfold ( mkOtherCon, mkUnfolding, otherCons, callSiteInline ) import CoreUtils ( cheapEqExpr, exprIsDupable, exprIsTrivial, exprIsConApp_maybe, - exprType, coreAltsType, exprArity, exprIsValue, idAppIsCheap, + exprType, coreAltsType, exprIsValue, idAppIsCheap, exprOkForSpeculation, etaReduceExpr, mkCoerce, mkSCC, mkInlineMe, mkAltExpr ) @@ -57,7 +59,6 @@ import Subst ( mkSubst, substTy, substExpr, import TyCon ( isDataTyCon, tyConDataConsIfAvailable ) import TysPrim ( realWorldStatePrimTy ) import PrelInfo ( realWorldPrimId ) -import BasicTypes ( isLoopBreaker ) import Maybes ( maybeToBool ) import Util ( zipWithEqual ) import Outputable @@ -128,33 +129,6 @@ simplRecBind top_lvl pairs bndrs' thing_inside %* * %************************************************************************ -\begin{code} -addLetBind :: OutId -> OutExpr -> SimplM (OutStuff a) -> SimplM (OutStuff a) -addLetBind bndr rhs thing_inside - = thing_inside `thenSmpl` \ (binds, res) -> - returnSmpl (NonRec bndr rhs : binds, res) - -addLetBinds :: [CoreBind] -> SimplM (OutStuff a) -> SimplM (OutStuff a) -addLetBinds binds1 thing_inside - = thing_inside `thenSmpl` \ (binds2, res) -> - returnSmpl (binds1 ++ binds2, res) - -needsCaseBinding ty rhs = isUnLiftedType ty && not (exprOkForSpeculation rhs) - -- Make a case expression instead of a let - -- These can arise either from the desugarer, - -- or from beta reductions: (\x.e) (x +# y) - -addCaseBind bndr rhs thing_inside - = getInScope `thenSmpl` \ in_scope -> - thing_inside `thenSmpl` \ (floats, (_, body)) -> - returnSmpl ([], (in_scope, Case rhs bndr [(DEFAULT, [], mkLets floats body)])) - -addNonRecBind bndr rhs thing_inside - -- Checks for needing a case binding - | needsCaseBinding (idType bndr) rhs = addCaseBind bndr rhs thing_inside - | otherwise = addLetBind bndr rhs thing_inside -\end{code} - The reason for this OutExprStuff stuff is that we want to float *after* simplifying a RHS, not before. If we do so naively we get quadratic behaviour as things float out. @@ -196,7 +170,7 @@ might do the same again. \begin{code} simplExpr :: CoreExpr -> SimplM CoreExpr simplExpr expr = getSubst `thenSmpl` \ subst -> - simplExprC expr (Stop (substTy subst (exprType expr))) + simplExprC expr (mkStop (substTy subst (exprType expr))) -- The type in the Stop continuation is usually not used -- It's only needed when discarding continuations after finding -- a function that returns bottom. @@ -235,7 +209,7 @@ simplExprF (Case scrut bndr alts) cont -- If case-of-case is off, simply simplify the case expression -- in a vanilla Stop context, and rebuild the result around it simplExprC scrut (Select NoDup bndr alts subst_env - (Stop (contResultType cont))) `thenSmpl` \ case_expr' -> + (mkStop (contResultType cont))) `thenSmpl` \ case_expr' -> rebuild case_expr' cont @@ -249,7 +223,7 @@ simplExprF (Let (Rec pairs) body) cont simplExprF expr@(Lam _ _) cont = simplLam expr cont simplExprF (Type ty) cont - = ASSERT( case cont of { Stop _ -> True; ArgOf _ _ _ -> True; other -> False } ) + = ASSERT( case cont of { Stop _ _ -> True; ArgOf _ _ _ -> True; other -> False } ) simplType ty `thenSmpl` \ ty' -> rebuild (Type ty') cont @@ -302,7 +276,7 @@ simplExprF (Note InlineCall e) cont simplExprF (Note InlineMe e) cont = case cont of - Stop _ -> -- Totally boring continuation + Stop _ _ -> -- Totally boring continuation -- Don't inline inside an INLINE expression setBlackList noInlineBlackList (simplExpr e) `thenSmpl` \ e' -> rebuild (mkInlineMe e') cont @@ -352,22 +326,37 @@ simplLam fun cont go expr cont = simplExprF expr cont -- completeLam deals with the case where a lambda doesn't have an ApplyTo --- continuation. --- We used to try for eta reduction here, but I found that this was --- eta reducing things like --- f = \x -> (coerce (\x -> e)) --- This made f's arity reduce, which is a bad thing, so I removed the --- eta reduction at this point, and now do it only when binding --- (at the call to postInlineUnconditionally) - -completeLam acc (Lam bndr body) cont +-- continuation, so there are real lambdas left to put in the result + +-- We try for eta reduction here, but *only* if we get all the +-- way to an exprIsTrivial expression. +-- We don't want to remove extra lambdas unless we are going +-- to avoid allocating this thing altogether + +completeLam rev_bndrs (Lam bndr body) cont = simplBinder bndr $ \ bndr' -> - completeLam (bndr':acc) body cont + completeLam (bndr':rev_bndrs) body cont -completeLam acc body cont +completeLam rev_bndrs body cont = simplExpr body `thenSmpl` \ body' -> - rebuild (foldl (flip Lam) body' acc) cont - -- Remember, acc is the *reversed* binders + case try_eta body' of + Just etad_lam -> tick (EtaReduction (head rev_bndrs)) `thenSmpl_` + rebuild etad_lam cont + + Nothing -> rebuild (foldl (flip Lam) body' rev_bndrs) cont + where + -- We don't use CoreUtils.etaReduceExpr, because we can be more + -- efficient here: (a) we already have the binders, (b) we can do + -- the triviality test before computing the free vars + try_eta body | not opt_SimplDoEtaReduction = Nothing + | otherwise = go rev_bndrs body + + go (b : bs) (App fun arg) | ok_arg b arg = go bs fun -- Loop round + go [] body | ok_body body = Just body -- Success! + go _ _ = Nothing -- Failure! + + ok_body body = exprIsTrivial body && not (any (`elemVarSet` exprFreeVars body) rev_bndrs) + ok_arg b arg = varToCoreExpr b `cheapEqExpr` arg mkLamBndrZapper :: CoreExpr -- Function -> SimplCont -- The context @@ -465,28 +454,17 @@ simplValArg :: OutType -- rhs_ty: Type of arg; used only occasionally simplValArg arg_ty is_strict arg arg_se cont_ty thing_inside | is_strict - = transformRhs arg `thenSmpl` \ t_arg -> - getEnv `thenSmpl` \ env -> + = getEnv `thenSmpl` \ env -> setSubstEnv arg_se $ - simplExprF t_arg (ArgOf NoDup cont_ty $ \ rhs' -> + simplExprF arg (ArgOf NoDup cont_ty $ \ rhs' -> setAllExceptInScope env $ - thing_inside (etaFirst rhs')) + thing_inside rhs') | otherwise = simplRhs False {- Not top level -} True {- OK to float unboxed -} arg_ty arg arg_se thing_inside - --- Do eta-reduction on the simplified RHS, if eta reduction is on --- But *only* if we get all the way to an exprIsTrivial expression. --- We don't want to remove extra lambdas unless we are going --- to avoid allocating this thing altogether -etaFirst rhs - | opt_SimplDoEtaReduction && exprIsTrivial rhs' = rhs' - | otherwise = rhs - where - rhs' = etaReduceExpr rhs \end{code} @@ -512,63 +490,130 @@ completeBinding :: InId -- Binder -> SimplM (OutStuff a) completeBinding old_bndr new_bndr top_lvl black_listed new_rhs thing_inside - | (case occ_info of -- This happens; for example, the case_bndr during case of - IAmDead -> True -- known constructor: case (a,b) of x { (p,q) -> ... } - other -> False) -- Here x isn't mentioned in the RHS, so we don't want to + | isDeadOcc occ_info -- This happens; for example, the case_bndr during case of + -- known constructor: case (a,b) of x { (p,q) -> ... } + -- Here x isn't mentioned in the RHS, so we don't want to -- create the (dead) let-binding let x = (a,b) in ... = thing_inside - | postInlineUnconditionally black_listed occ_info old_bndr new_rhs - -- Maybe we don't need a let-binding! Maybe we can just - -- inline it right away. Unlike the preInlineUnconditionally case - -- we are allowed to look at the RHS. + | exprIsTrivial new_rhs + = completeTrivialBinding old_bndr new_bndr + black_listed loop_breaker new_rhs + thing_inside + + | Note coercion@(Coerce _ inner_ty) inner_rhs <- new_rhs + -- x = coerce t e ==> c = e; x = inline_me (coerce t c) + -- Now x can get inlined, which moves the coercion + -- to the usage site. This is a bit like worker/wrapper stuff, + -- but it's useful to do it very promptly, so that + -- x = coerce T (I# 3) + -- get's w/wd to + -- c = I# 3 + -- x = coerce T $wx + -- This in turn means that + -- case (coerce Int x) of ... + -- will inline x. + -- Also the full-blown w/w thing isn't set up for non-functions -- - -- NB: a loop breaker never has postInlineUnconditionally True - -- and non-loop-breakers only have *forward* references - -- Hence, it's safe to discard the binding - -- - -- NB: You might think that postInlineUnconditionally is an optimisation, - -- but if we have - -- let x = f Bool in (x, y) - -- then because of the constructor, x will not be *inlined* in the pair, - -- so the trivial binding will stay. But in this postInlineUnconditionally - -- gag we use the *substitution* to substitute (f Bool) for x, and that *will* - -- happen. - = tick (PostInlineUnconditionally old_bndr) `thenSmpl_` - extendSubst old_bndr (DoneEx new_rhs) - thing_inside + -- The inline_me note is so that the simplifier doesn't + -- just substitute c back inside x's rhs! (Typically, x will + -- get substituted away, but not if it's exported.) + = newId SLIT("c") inner_ty $ \ c_id -> + completeBinding c_id c_id top_lvl False inner_rhs $ + completeTrivialBinding old_bndr new_bndr black_listed loop_breaker + (Note InlineMe (Note coercion (Var c_id))) $ + thing_inside + | otherwise - = getSubst `thenSmpl` \ subst -> + = transformRhs new_rhs $ \ arity new_rhs' -> + getSubst `thenSmpl` \ subst -> let -- We make new IdInfo for the new binder by starting from the old binder, -- doing appropriate substitutions. -- Then we add arity and unfolding info to get the new binder - old_info = idInfo old_bndr new_bndr_info = substIdInfo subst old_info (idInfo new_bndr) - `setArityInfo` ArityAtLeast (exprArity new_rhs) + `setArityInfo` atLeastArity arity -- Add the unfolding *only* for non-loop-breakers -- Making loop breakers not have an unfolding at all -- means that we can avoid tests in exprIsConApp, for example. -- This is important: if exprIsConApp says 'yes' for a recursive -- thing, then we can get into an infinite loop - info_w_unf | isLoopBreaker (occInfo old_info) = new_bndr_info - | otherwise = new_bndr_info `setUnfoldingInfo` mkUnfolding top_lvl new_rhs + info_w_unf | loop_breaker = new_bndr_info + | otherwise = new_bndr_info `setUnfoldingInfo` mkUnfolding top_lvl new_rhs' final_id = new_bndr `setIdInfo` info_w_unf in -- These seqs forces the Id, and hence its IdInfo, -- and hence any inner substitutions final_id `seq` - addLetBind final_id new_rhs $ + addLetBind (NonRec final_id new_rhs') $ modifyInScope new_bndr final_id thing_inside where - occ_info = idOccInfo old_bndr + old_info = idInfo old_bndr + occ_info = occInfo old_info + loop_breaker = isLoopBreaker occ_info \end{code} +\begin{code} +completeTrivialBinding old_bndr new_bndr black_listed loop_breaker new_rhs thing_inside + -- We're looking at a binding with a trivial RHS, so + -- perhaps we can discard it altogether! + -- + -- NB: a loop breaker never has postInlineUnconditionally True + -- and non-loop-breakers only have *forward* references + -- Hence, it's safe to discard the binding + -- + -- NB: You might think that postInlineUnconditionally is an optimisation, + -- but if we have + -- let x = f Bool in (x, y) + -- then because of the constructor, x will not be *inlined* in the pair, + -- so the trivial binding will stay. But in this postInlineUnconditionally + -- gag we use the *substitution* to substitute (f Bool) for x, and that *will* + -- happen. + + -- NOTE: This isn't our last opportunity to inline. + -- We're at the binding site right now, and + -- we'll get another opportunity when we get to the ocurrence(s) + + -- Note that we do this unconditional inlining only for trival RHSs. + -- Don't inline even WHNFs inside lambdas; doing so may + -- simply increase allocation when the function is called + -- This isn't the last chance; see NOTE above. + -- + -- NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here + -- Why? Because we don't even want to inline them into the + -- RHS of constructor arguments. See NOTE above + -- + -- NB: Even NOINLINEis ignored here: if the rhs is trivial + -- it's best to inline it anyway. We often get a=E; b=a + -- from desugaring, with both a and b marked NOINLINE. + + | not keep_binding -- Can discard binding, inlining everywhere + = extendSubst old_bndr (DoneEx new_rhs) $ + tick (PostInlineUnconditionally old_bndr) `thenSmpl_` + thing_inside + + | otherwise -- We must keep the binding, but we may still inline + = getSubst `thenSmpl` \ subst -> + let + new_bndr_info = substIdInfo subst (idInfo old_bndr) (idInfo new_bndr) + final_id = new_bndr `setIdInfo` new_bndr_info + in + addLetBind (NonRec final_id new_rhs) $ + if dont_inline then + modifyInScope new_bndr final_id thing_inside + else + extendSubst old_bndr (DoneEx new_rhs) thing_inside + where + dont_inline = black_listed || loop_breaker + keep_binding = dont_inline || isExportedId old_bndr +\end{code} + + %************************************************************************ %* * \subsection{simplLazyBind} @@ -621,17 +666,14 @@ simplLazyBind top_lvl bndr bndr' rhs thing_inside \begin{code} simplRhs :: Bool -- True <=> Top level -> Bool -- True <=> OK to float unboxed (speculative) bindings + -- False for (a) recursive and (b) top-level bindings -> OutType -- Type of RHS; used only occasionally -> InExpr -> SubstEnv -> (OutExpr -> SimplM (OutStuff a)) -> SimplM (OutStuff a) simplRhs top_lvl float_ubx rhs_ty rhs rhs_se thing_inside - = -- Swizzle the inner lets past the big lambda (if any) - -- and try eta expansion - transformRhs rhs `thenSmpl` \ t_rhs -> - - -- Simplify it - setSubstEnv rhs_se (simplExprF t_rhs (Stop rhs_ty)) `thenSmpl` \ (floats, (in_scope', rhs')) -> + = -- Simplify it + setSubstEnv rhs_se (simplExprF rhs (mkRhsStop rhs_ty)) `thenSmpl` \ (floats, (in_scope', rhs')) -> -- Float lets out of RHS let @@ -652,12 +694,12 @@ simplRhs top_lvl float_ubx rhs_ty rhs rhs_se thing_inside WARN( any demanded_float floats_out, ppr floats_out ) addLetBinds floats_out $ setInScope in_scope' $ - thing_inside (etaFirst rhs'') + thing_inside rhs'' -- in_scope' may be excessive, but that's OK; -- it's a superset of what's in scope else -- Don't do the float - thing_inside (etaFirst (mkLets floats rhs')) + thing_inside (mkLets floats rhs') -- In a let-from-let float, we just tick once, arbitrarily -- choosing the first floated binder to identify it @@ -706,11 +748,7 @@ wantToExpose :: Int -> CoreExpr -> Bool -- v = E -- z = \w -> g v w -- Which is what we want; chances are z will be inlined now. --- --- This defn isn't quite like --- exprIsCheap (it ignores non-cheap args) --- exprIsValue (may not say True for a lone variable) --- which is slightly weird + wantToExpose n (Var v) = idAppIsCheap v n wantToExpose n (Lit l) = True wantToExpose n (Lam _ e) = True @@ -952,35 +990,6 @@ preInlineUnconditionally black_listed bndr OneOcc in_lam once -> not in_lam && once -- Not inside a lambda, one occurrence ==> safe! other -> False - - -postInlineUnconditionally :: Bool -- Black listed - -> OccInfo - -> InId -> OutExpr -> Bool - -- Examines a (bndr = rhs) binding, AFTER the rhs has been simplified - -- It returns True if it's ok to discard the binding and inline the - -- RHS at every use site. - - -- NOTE: This isn't our last opportunity to inline. - -- We're at the binding site right now, and - -- we'll get another opportunity when we get to the ocurrence(s) - -postInlineUnconditionally black_listed occ_info bndr rhs - | isExportedId bndr = False -- Don't inline these, ever - | black_listed = False - | isLoopBreaker occ_info = False - | otherwise = exprIsTrivial rhs -- Duplicating is free - -- Don't inline even WHNFs inside lambdas; doing so may - -- simply increase allocation when the function is called - -- This isn't the last chance; see NOTE above. - -- - -- NB: Even inline pragmas (e.g. IMustBeINLINEd) are ignored here - -- Why? Because we don't even want to inline them into the - -- RHS of constructor arguments. See NOTE above - -- - -- NB: Even NOINLINEis ignored here: if the rhs is trivial - -- it's best to inline it anyway. We often get a=E; b=a - -- from desugaring, with both a and b marked NOINLINE. \end{code} @@ -1002,7 +1011,7 @@ rebuild_done expr rebuild :: OutExpr -> SimplCont -> SimplM OutExprStuff -- Stop continuation -rebuild expr (Stop _) = rebuild_done expr +rebuild expr (Stop _ _) = rebuild_done expr -- ArgOf continuation rebuild expr (ArgOf _ _ cont_fn) = cont_fn expr @@ -1453,7 +1462,8 @@ mkDupableCont join_arg_ty (ArgOf _ cont_ty cont_fn) thing_inside -- Want to tick here so that we go round again, -- and maybe copy or inline the code; -- not strictly CaseOf Case - addLetBind join_id join_rhs (thing_inside new_cont) + addLetBind (NonRec join_id join_rhs) $ + thing_inside new_cont mkDupableCont ty (ApplyTo _ arg se cont) thing_inside = mkDupableCont (funResultTy ty) cont $ \ cont' -> @@ -1468,7 +1478,7 @@ mkDupableCont ty (ApplyTo _ arg se cont) thing_inside -- and maybe copy or inline the code; -- not strictly CaseOf Case - addLetBind bndr arg' $ + addLetBind (NonRec bndr arg') $ -- But what if the arg should be case-bound? We can't use -- addNonRecBind here because its type is too specific. -- This has been this way for a long time, so I'll leave it, @@ -1486,7 +1496,7 @@ mkDupableCont ty (Select _ case_bndr alts se cont) thing_inside returnSmpl (concat alt_binds_s, alts') ) `thenSmpl` \ (alt_binds, alts') -> - addNewInScopeIds [b | NonRec b _ <- alt_binds] $ + addAuxiliaryBinds alt_binds $ -- NB that the new alternatives, alts', are still InAlts, using the original -- binders. That means we can keep the case_bndr intact. This is important @@ -1495,15 +1505,14 @@ mkDupableCont ty (Select _ case_bndr alts se cont) thing_inside -- This is VITAL when the type of case_bndr is an unboxed pair (often the -- case in I/O rich code. We aren't allowed a lambda bound -- arg of unboxed tuple type, and indeed such a case_bndr is always dead - addLetBinds alt_binds $ - thing_inside (Select OkToDup case_bndr alts' se (Stop (contResultType cont))) + thing_inside (Select OkToDup case_bndr alts' se (mkStop (contResultType cont))) mkDupableAlt :: InId -> OutId -> SimplCont -> InAlt -> SimplM (OutStuff InAlt) mkDupableAlt case_bndr case_bndr' cont alt@(con, bndrs, rhs) = simplBinders bndrs $ \ bndrs' -> simplExprC rhs cont `thenSmpl` \ rhs' -> - if (case cont of { Stop _ -> exprIsDupable rhs'; other -> False}) then + if (case cont of { Stop _ _ -> exprIsDupable rhs'; other -> False}) then -- It is worth checking for a small RHS because otherwise we -- get extra let bindings that may cause an extra iteration of the simplifier to -- inline back in place. Quite often the rhs is just a variable or constructor. diff --git a/ghc/compiler/stranal/WorkWrap.lhs b/ghc/compiler/stranal/WorkWrap.lhs index 0ad7546..b05737d 100644 --- a/ghc/compiler/stranal/WorkWrap.lhs +++ b/ghc/compiler/stranal/WorkWrap.lhs @@ -14,9 +14,9 @@ import CmdLineOpts ( opt_UF_CreationThreshold , opt_D_verbose_core2core, opt_D_dump_worker_wrapper ) import CoreLint ( beginPass, endPass ) -import CoreUtils ( exprType, exprArity, exprEtaExpandArity ) +import CoreUtils ( exprType, exprEtaExpandArity ) import MkId ( mkWorkerId ) -import Id ( Id, idType, idStrictness, setIdArityInfo, isOneShotLambda, +import Id ( Id, idType, idStrictness, idArity, isOneShotLambda, setIdStrictness, idInlinePragma, setIdWorkerInfo, idCprInfo, setInlinePragma ) import Type ( Type, isNewType, splitForAllTys, splitFunTys ) @@ -186,17 +186,30 @@ tryWW :: Bool -- True <=> a non-recursive binding -- if two, then a worker and a -- wrapper. tryWW non_rec fn_id rhs - | isNeverInlinePrag inline_prag + | isNeverInlinePrag inline_prag || arity == 0 = -- Don't split things that will never be inlined returnUs [ (fn_id, rhs) ] - | non_rec && certainlyWillInline fn_id - -- No point in worker/wrappering something that is going to be + | non_rec && not do_coerce_ww && certainlyWillInline fn_id + -- No point in worker/wrappering a function that is going to be -- INLINEd wholesale anyway. If the strictness analyser is run -- twice, this test also prevents wrappers (which are INLINEd) -- from being re-done. -- + -- The do_coerce_ww test is so that + -- a function with a coerce should w/w to get rid + -- of the coerces, which can significantly improve its arity. + -- Example: f []   = return [] :: IO [Int] + -- f (x:xs) = return (x:xs) + -- If we aren't careful we end up with + -- f = \ x -> case x of { + -- x:xs -> __coerce (IO [Int]) (\ s -> (# s, x:xs #) + -- [] -> lvl_sJ8 + -- + -- -- OUT OF DATE NOTE, kept for info: + -- It's out of date because now wrappers look very cheap + -- even when they are inlined. -- In this case we add an INLINE pragma to the RHS. Why? -- Because consider -- f = \x -> g x x @@ -204,8 +217,6 @@ tryWW non_rec fn_id rhs -- Then f is small, so we don't w/w it. But g is big, and we do, so -- g's wrapper will get inlined in f's RHS, which makes f look big now. -- So f doesn't get inlined, but it is strict and we have failed to w/w it. - -- It's out of date because now wrappers look very cheap - -- even when they are inlined. = returnUs [ (fn_id, rhs) ] | not (do_strict_ww || do_cpr_ww || do_coerce_ww) @@ -222,13 +233,9 @@ tryWW non_rec fn_id rhs work_id | has_strictness = proto_work_id `setIdStrictness` mkStrictnessInfo (work_demands, result_bot) | otherwise = proto_work_id - wrap_arity = exprArity wrap_rhs -- Might be greater than the current visible arity - -- if the function returns bottom - wrap_rhs = wrap_fn work_id wrap_id = fn_id `setIdStrictness` wrapper_strictness - `setIdWorkerInfo` HasWorker work_id wrap_arity - `setIdArityInfo` exactArity wrap_arity + `setIdWorkerInfo` HasWorker work_id arity `setInlinePragma` NoInlinePragInfo -- Put it on the worker instead -- Add info to the wrapper: -- (a) we want to set its arity @@ -237,12 +244,12 @@ tryWW non_rec fn_id rhs in returnUs ([(work_id, work_rhs), (wrap_id, wrap_rhs)]) -- Worker first, because wrapper mentions it - -- Arrange to inline the wrapper unconditionally + -- mkWwBodies has already built a wrap_rhs with an INLINE pragma wrapped around it where fun_ty = idType fn_id - arity = exprEtaExpandArity rhs + arity = idArity fn_id -- The arity is set by the simplifier using exprEtaExpandArity + -- So it may be more than the number of top-level-visible lambdas - -- Don't split something which is marked unconditionally NOINLINE inline_prag = idInlinePragma fn_id strictness_info = idStrictness fn_id -- 1.7.10.4