From d95ce839533391e7118257537044f01cbb1d6694 Mon Sep 17 00:00:00 2001 From: "simonpj@microsoft.com" Date: Fri, 5 Dec 2008 16:54:00 +0000 Subject: [PATCH] Completely new treatment of INLINE pragmas (big patch) This is a major patch, which changes the way INLINE pragmas work. Although lots of files are touched, the net is only +21 lines of code -- and I bet that most of those are comments! HEADS UP: interface file format has changed, so you'll need to recompile everything. There is not much effect on overall performance for nofib, probably because those programs don't make heavy use of INLINE pragmas. Program Size Allocs Runtime Elapsed Min -11.3% -6.9% -9.2% -8.2% Max -0.1% +4.6% +7.5% +8.9% Geometric Mean -2.2% -0.2% -1.0% -0.8% (The +4.6% for on allocs is cichelli; see other patch relating to -fpass-case-bndr-to-join-points.) The old INLINE system ~~~~~~~~~~~~~~~~~~~~~ The old system worked like this. A function with an INLINE pragam got a right-hand side which looked like f = __inline_me__ (\xy. e) The __inline_me__ part was an InlineNote, and was treated specially in various ways. Notably, the simplifier didn't inline inside an __inline_me__ note. As a result, the code for f itself was pretty crappy. That matters if you say (map f xs), because then you execute the code for f, rather than inlining a copy at the call site. The new story: InlineRules ~~~~~~~~~~~~~~~~~~~~~~~~~~ The new system removes the InlineMe Note altogether. Instead there is a new constructor InlineRule in CoreSyn.Unfolding. This is a bit like a RULE, in that it remembers the template to be inlined inside the InlineRule. No simplification or inlining is done on an InlineRule, just like RULEs. An Id can have an InlineRule *or* a CoreUnfolding (since these are two constructors from Unfolding). The simplifier treats them differently: - An InlineRule is has the substitution applied (like RULES) but is otherwise left undisturbed. - A CoreUnfolding is updated with the new RHS of the definition, on each iteration of the simplifier. An InlineRule fires regardless of size, but *only* when the function is applied to enough arguments. The "arity" of the rule is specified (by the programmer) as the number of args on the LHS of the "=". So it makes a difference whether you say {-# INLINE f #-} f x = \y -> e or f x y = e This is one of the big new features that InlineRule gives us, and it is one that Roman really wanted. In contrast, a CoreUnfolding can fire when it is applied to fewer args than than the function has lambdas, provided the result is small enough. Consequential stuff ~~~~~~~~~~~~~~~~~~~ * A 'wrapper' no longer has a WrapperInfo in the IdInfo. Instead, the InlineRule has a field identifying wrappers. * Of course, IfaceSyn and interface serialisation changes appropriately. * Making implication constraints inline nicely was a bit fiddly. In the end I added a var_inline field to HsBInd.VarBind, which is why this patch affects the type checker slightly * I made some changes to the way in which eta expansion happens in CorePrep, mainly to ensure that *arguments* that become let-bound are also eta-expanded. I'm still not too happy with the clarity and robustness fo the result. * We now complain if the programmer gives an INLINE pragma for a recursive function (prevsiously we just ignored it). Reason for change: we don't want an InlineRule on a LoopBreaker, because then we'd have to check for loop-breaker-hood at occurrence sites (which isn't currenlty done). Some tests need changing as a result. This patch has been in my tree for quite a while, so there are probably some other minor changes. --- compiler/basicTypes/Id.lhs | 11 - compiler/basicTypes/IdInfo.lhs | 82 -------- compiler/basicTypes/MkId.lhs | 4 +- compiler/coreSyn/CoreFVs.lhs | 27 ++- compiler/coreSyn/CoreLint.lhs | 6 +- compiler/coreSyn/CorePrep.lhs | 112 +++++------ compiler/coreSyn/CoreSubst.lhs | 55 ++--- compiler/coreSyn/CoreSyn.lhs | 158 +++++++++------ compiler/coreSyn/CoreUnfold.lhs | 224 ++++++++++----------- compiler/coreSyn/CoreUtils.lhs | 379 +++++++++++++++++------------------ compiler/coreSyn/MkExternalCore.lhs | 1 - compiler/coreSyn/PprCore.lhs | 44 +++- compiler/deSugar/DsBinds.lhs | 188 ++++++++++------- compiler/deSugar/DsForeign.lhs | 6 +- compiler/deSugar/DsMeta.hs | 7 +- compiler/hsSyn/HsBinds.lhs | 6 +- compiler/hsSyn/HsUtils.lhs | 9 +- compiler/iface/BinIface.hs | 36 +++- compiler/iface/IfaceSyn.lhs | 34 ++-- compiler/iface/MkIface.lhs | 43 ++-- compiler/iface/TcIface.lhs | 52 +++-- compiler/main/TidyPgm.lhs | 106 +++++----- compiler/parser/ParserCore.y | 11 +- compiler/simplCore/CSE.lhs | 3 +- compiler/simplCore/FloatIn.lhs | 7 +- compiler/simplCore/FloatOut.lhs | 23 --- compiler/simplCore/OccurAnal.lhs | 41 +--- compiler/simplCore/SetLevels.lhs | 113 +++++------ compiler/simplCore/SimplCore.lhs | 106 +++++----- compiler/simplCore/SimplEnv.lhs | 48 ++--- compiler/simplCore/SimplUtils.lhs | 32 +-- compiler/simplCore/Simplify.lhs | 208 ++++++++++--------- compiler/specialise/Specialise.lhs | 33 +-- compiler/stranal/WorkWrap.lhs | 25 +-- compiler/stranal/WwLib.lhs | 4 +- compiler/typecheck/Inst.lhs | 4 +- compiler/typecheck/TcBinds.lhs | 44 ++-- compiler/typecheck/TcClassDcl.lhs | 3 +- compiler/typecheck/TcExpr.lhs | 10 +- compiler/typecheck/TcForeign.lhs | 2 +- compiler/typecheck/TcGenDeriv.lhs | 27 ++- compiler/typecheck/TcHsSyn.lhs | 5 +- compiler/typecheck/TcInstDcls.lhs | 9 +- compiler/typecheck/TcRnDriver.lhs | 13 +- compiler/typecheck/TcSimplify.lhs | 27 ++- compiler/vectorise/VectType.hs | 7 + 46 files changed, 1204 insertions(+), 1191 deletions(-) diff --git a/compiler/basicTypes/Id.lhs b/compiler/basicTypes/Id.lhs index d87e45b..012e42b 100644 --- a/compiler/basicTypes/Id.lhs +++ b/compiler/basicTypes/Id.lhs @@ -68,7 +68,6 @@ module Id ( idArity, idNewDemandInfo, idNewDemandInfo_maybe, idNewStrictness, idNewStrictness_maybe, - idWorkerInfo, idUnfolding, idSpecialisation, idCoreRules, idHasRules, idCafInfo, @@ -86,7 +85,6 @@ module Id ( setIdArity, setIdNewDemandInfo, setIdNewStrictness, zapIdNewStrictness, - setIdWorkerInfo, setIdSpecialisation, setIdCafInfo, setIdOccInfo, zapIdOccInfo, @@ -134,7 +132,6 @@ infixl 1 `setIdUnfolding`, `setIdArity`, `setIdNewDemandInfo`, `setIdNewStrictness`, - `setIdWorkerInfo`, `setIdSpecialisation`, `setInlinePragma`, `idCafInfo` @@ -534,14 +531,6 @@ isStrictId id (isStrictType (idType id)) --------------------------------- - -- WORKER ID -idWorkerInfo :: Id -> WorkerInfo -idWorkerInfo id = workerInfo (idInfo id) - -setIdWorkerInfo :: Id -> WorkerInfo -> Id -setIdWorkerInfo id work_info = modifyIdInfo (`setWorkerInfo` work_info) id - - --------------------------------- -- UNFOLDING idUnfolding :: Id -> Unfolding idUnfolding id = unfoldingInfo (idInfo id) diff --git a/compiler/basicTypes/IdInfo.lhs b/compiler/basicTypes/IdInfo.lhs index 26fe453..fca1abd 100644 --- a/compiler/basicTypes/IdInfo.lhs +++ b/compiler/basicTypes/IdInfo.lhs @@ -49,11 +49,6 @@ module IdInfo ( cprInfoFromNewStrictness, #endif - -- ** The WorkerInfo type - WorkerInfo(..), - workerExists, wrapperArity, workerId, - workerInfo, setWorkerInfo, ppWorkerInfo, - -- ** Unfolding Info unfoldingInfo, setUnfoldingInfo, setUnfoldingInfoLazily, @@ -94,7 +89,6 @@ import CoreSyn ( CoreRule, setRuleIdName, seqRules, Unfolding, noUnfolding ) import Class import PrimOp import Name -import Var import VarSet import BasicTypes import DataCon @@ -119,7 +113,6 @@ infixl 1 `setSpecInfo`, `setArityInfo`, `setInlinePragInfo`, `setUnfoldingInfo`, - `setWorkerInfo`, `setLBVarInfo`, `setOccInfo`, `setCafInfo`, @@ -321,15 +314,6 @@ data IdInfo demandInfo :: Demand.Demand, -- ^ Whether or not the 'Id' is definitely demanded strictnessInfo :: StrictnessInfo, -- ^ 'Id' strictness properties #endif - workerInfo :: WorkerInfo, -- ^ Pointer to worker function. - -- Within one module this is irrelevant; the - -- inlining of a worker is handled via the 'Unfolding'. - -- However, when the module is imported by others, the - -- 'WorkerInfo' is used /only/ to indicate the form of - -- the RHS, so that interface files don't actually - -- need to contain the RHS; it can be derived from - -- the strictness info - unfoldingInfo :: Unfolding, -- ^ The 'Id's unfolding cafInfo :: CafInfo, -- ^ 'Id' CAF info lbvarInfo :: LBVarInfo, -- ^ Info about a lambda-bound variable, if the 'Id' is one @@ -355,7 +339,6 @@ seqIdInfo (IdInfo {}) = () megaSeqIdInfo :: IdInfo -> () megaSeqIdInfo info = seqSpecInfo (specInfo info) `seq` - seqWorker (workerInfo info) `seq` -- Omitting this improves runtimes a little, presumably because -- some unfoldings are not calculated at all @@ -378,8 +361,6 @@ megaSeqIdInfo info Setters \begin{code} -setWorkerInfo :: IdInfo -> WorkerInfo -> IdInfo -setWorkerInfo info wk = wk `seq` info { workerInfo = wk } setSpecInfo :: IdInfo -> SpecInfo -> IdInfo setSpecInfo info sp = sp `seq` info { specInfo = sp } setInlinePragInfo :: IdInfo -> InlinePragInfo -> IdInfo @@ -435,7 +416,6 @@ vanillaIdInfo strictnessInfo = NoStrictnessInfo, #endif specInfo = emptySpecInfo, - workerInfo = NoWorker, unfoldingInfo = noUnfolding, lbvarInfo = NoLBVarInfo, inlinePragInfo = AlwaysActive, @@ -544,67 +524,6 @@ seqSpecInfo (SpecInfo rules fvs) = seqRules rules `seq` seqVarSet fvs %************************************************************************ %* * -\subsection[worker-IdInfo]{Worker info about an @Id@} -%* * -%************************************************************************ - -There might not be a worker, even for a strict function, because: -(a) the function might be small enough to inline, so no need - for w/w split -(b) the strictness info might be "SSS" or something, so no w/w split. - -Sometimes the arity of a wrapper changes from the original arity from -which it was generated, so we always emit the "original" arity into -the interface file, as part of the worker info. - -How can this happen? Sometimes we get - f = coerce t (\x y -> $wf x y) -at the moment of w/w split; but the eta reducer turns it into - f = coerce t $wf -which is perfectly fine except that the exposed arity so far as -the code generator is concerned (zero) differs from the arity -when we did the split (2). - -All this arises because we use 'arity' to mean "exactly how many -top level lambdas are there" in interface files; but during the -compilation of this module it means "how many things can I apply -this to". - -\begin{code} - --- | If this Id has a worker then we store a reference to it. Worker --- functions are generated by the worker\/wrapper pass, using information --- information from strictness analysis. -data WorkerInfo = NoWorker -- ^ No known worker function - | HasWorker Id Arity -- ^ The 'Arity' is the arity of the /wrapper/ at the moment of the - -- worker\/wrapper split, which may be different from the current 'Id' 'Aritiy' - -seqWorker :: WorkerInfo -> () -seqWorker (HasWorker id a) = id `seq` a `seq` () -seqWorker NoWorker = () - -ppWorkerInfo :: WorkerInfo -> SDoc -ppWorkerInfo NoWorker = empty -ppWorkerInfo (HasWorker wk_id _) = ptext (sLit "Worker") <+> ppr wk_id - -workerExists :: WorkerInfo -> Bool -workerExists NoWorker = False -workerExists (HasWorker _ _) = True - --- | The 'Id' of the worker function if it exists, or a panic otherwise -workerId :: WorkerInfo -> Id -workerId (HasWorker id _) = id -workerId NoWorker = panic "workerId: NoWorker" - --- | The 'Arity' of the worker function at the time of the split if it exists, or a panic otherwise -wrapperArity :: WorkerInfo -> Arity -wrapperArity (HasWorker _ a) = a -wrapperArity NoWorker = panic "wrapperArity: NoWorker" -\end{code} - - -%************************************************************************ -%* * \subsection[CG-IdInfo]{Code generator-related information} %* * %************************************************************************ @@ -779,7 +698,6 @@ zapFragileInfo :: IdInfo -> Maybe IdInfo -- ^ Zap info that depends on free variables zapFragileInfo info = Just (info `setSpecInfo` emptySpecInfo - `setWorkerInfo` NoWorker `setUnfoldingInfo` noUnfolding `setOccInfo` if isFragileOcc occ then NoOccInfo else occ) where diff --git a/compiler/basicTypes/MkId.lhs b/compiler/basicTypes/MkId.lhs index be83835..1dd990e 100644 --- a/compiler/basicTypes/MkId.lhs +++ b/compiler/basicTypes/MkId.lhs @@ -335,8 +335,8 @@ mkDataConIds wrap_name wkr_name data_con -- ...(let w = C x in ...(w p q)...)... -- we want to see that w is strict in its two arguments - wrap_unf = mkImplicitUnfolding $ Note InlineMe $ - mkLams wrap_tvs $ + wrap_unf = mkInlineRule wrap_rhs (length dict_args + length id_args) + wrap_rhs = mkLams wrap_tvs $ mkLams eq_args $ mkLams dict_args $ mkLams id_args $ foldr mk_case con_app diff --git a/compiler/coreSyn/CoreFVs.lhs b/compiler/coreSyn/CoreFVs.lhs index d2d1383..a15362a 100644 --- a/compiler/coreSyn/CoreFVs.lhs +++ b/compiler/coreSyn/CoreFVs.lhs @@ -16,6 +16,7 @@ Taken quite directly from the Peyton Jones/Lester paper. module CoreFVs ( -- * Free variables of expressions and binding groups exprFreeVars, -- CoreExpr -> VarSet -- Find all locally-defined free Ids or tyvars + exprFreeIds, -- CoreExpr -> IdSet -- Find all locally-defined free Ids exprsFreeVars, -- [CoreExpr] -> VarSet bindFreeVars, -- CoreBind -> VarSet @@ -25,7 +26,8 @@ module CoreFVs ( exprFreeNames, exprsFreeNames, -- * Free variables of Rules, Vars and Ids - idRuleVars, idFreeVars, varTypeTyVars, + idRuleVars, idRuleRhsVars, idFreeVars, idInlineFreeVars, + varTypeTyVars, ruleRhsFreeVars, rulesFreeVars, ruleLhsFreeNames, ruleLhsFreeIds, @@ -71,6 +73,10 @@ but not those that are free in the type of variable occurrence. exprFreeVars :: CoreExpr -> VarSet exprFreeVars = exprSomeFreeVars isLocalVar +-- | Find all locally-defined free Ids in an expression +exprFreeIds :: CoreExpr -> IdSet -- Find all locally-defined free Ids +exprFreeIds = exprSomeFreeVars isLocalId + -- | Find all locally-defined free Ids or type variables in several expressions exprsFreeVars :: [CoreExpr] -> VarSet exprsFreeVars = foldr (unionVarSet . exprFreeVars) emptyVarSet @@ -378,7 +384,24 @@ bndrRuleVars v | isTyVar v = emptyVarSet | otherwise = idRuleVars v idRuleVars ::Id -> VarSet -idRuleVars id = ASSERT( isId id) specInfoFreeVars (idSpecialisation id) +idRuleVars id = ASSERT( isId id) + specInfoFreeVars (idSpecialisation id) `unionVarSet` + idInlineFreeVars id -- And the variables in an INLINE rule + +idRuleRhsVars :: Id -> VarSet +-- Just the variables free on the *rhs* of a rule +-- See Note [Choosing loop breakers] in Simplify.lhs +idRuleRhsVars id = foldr (unionVarSet . ruleRhsFreeVars) + (idInlineFreeVars id) + (idCoreRules id) + +idInlineFreeVars :: Id -> VarSet +-- Produce free vars for an InlineRule, BUT NOT for an ordinary unfolding +-- An InlineRule behaves *very like* a RULE, and that is what we are after here +idInlineFreeVars id + = case idUnfolding id of + InlineRule { uf_tmpl = tmpl } -> exprFreeVars tmpl + _ -> emptyVarSet \end{code} diff --git a/compiler/coreSyn/CoreLint.lhs b/compiler/coreSyn/CoreLint.lhs index 2d45eb3..8d0304a 100644 --- a/compiler/coreSyn/CoreLint.lhs +++ b/compiler/coreSyn/CoreLint.lhs @@ -28,7 +28,6 @@ import VarEnv import VarSet import Name import Id -import IdInfo import PprCore import ErrUtils import SrcLoc @@ -228,10 +227,7 @@ lintSingleBinding top_lvl_flag rec_flag (binder,rhs) where binder_ty = idType binder maybeDmdTy = idNewStrictness_maybe binder - bndr_vars = varSetElems (idFreeVars binder `unionVarSet` wkr_vars) - wkr_vars | workerExists wkr_info = unitVarSet (workerId wkr_info) - | otherwise = emptyVarSet - wkr_info = idWorkerInfo binder + bndr_vars = varSetElems (idFreeVars binder) lintBinder var | isId var = lintIdBndr var $ \_ -> (return ()) | otherwise = return () \end{code} diff --git a/compiler/coreSyn/CorePrep.lhs b/compiler/coreSyn/CorePrep.lhs index 5fa5002..facffdf 100644 --- a/compiler/coreSyn/CorePrep.lhs +++ b/compiler/coreSyn/CorePrep.lhs @@ -276,8 +276,7 @@ corePrepTopBind env (Rec pairs) = corePrepRecPairs TopLevel env pairs corePrepBind :: CorePrepEnv -> CoreBind -> UniqSM (CorePrepEnv, Floats) -- This one is used for *local* bindings corePrepBind env (NonRec bndr rhs) = do - rhs1 <- etaExpandRhs bndr rhs - (floats, rhs2) <- corePrepExprFloat env rhs1 + (floats, rhs2) <- corePrepExprFloat env rhs (_, bndr') <- cloneBndr env bndr (floats', bndr'') <- mkLocalNonRec bndr' (bdrDem bndr) floats rhs2 -- We want bndr'' in the envt, because it records @@ -310,8 +309,7 @@ corePrepRhs :: TopLevelFlag -> RecFlag -> UniqSM (Floats, CoreExpr) -- Used for top-level bindings, and local recursive bindings corePrepRhs top_lvl is_rec env (bndr, rhs) = do - rhs' <- etaExpandRhs bndr rhs - floats_w_rhs <- corePrepExprFloat env rhs' + floats_w_rhs <- corePrepExprFloat env rhs floatRhs top_lvl is_rec bndr floats_w_rhs @@ -322,14 +320,15 @@ corePrepRhs top_lvl is_rec env (bndr, rhs) = do -- This is where we arrange that a non-trivial argument is let-bound corePrepArg :: CorePrepEnv -> CoreArg -> RhsDemand -> UniqSM (Floats, CoreArg) -corePrepArg env arg dem = do - (floats, arg') <- corePrepExprFloat env arg - if exprIsTrivial arg' && allLazy NotTopLevel NonRecursive floats - -- Note [Floating unlifted arguments] - then return (floats, arg') - else do v <- newVar (exprType arg') - (floats', v') <- mkLocalNonRec v dem floats arg' - return (floats', Var v') +corePrepArg env arg dem + = do { (floats, arg') <- corePrepExprFloat env arg + ; if exprIsTrivial arg' && allLazy NotTopLevel NonRecursive floats + -- Note [Floating unlifted arguments] + then return (floats, arg') + else do { v <- newVar (exprType arg') + -- Note [Eta expand arguments] + ; (floats', v') <- mkLocalNonRec v dem floats arg' + ; return (floats', Var v') } } -- version that doesn't consider an scc annotation to be trivial. exprIsTrivial :: CoreExpr -> Bool @@ -519,7 +518,6 @@ corePrepExprFloat env expr@(App _ _) = do ty = exprType fun ignore_note (CoreNote _) = True - ignore_note InlineMe = True ignore_note _other = False -- We don't ignore SCCs, since they require some code generation @@ -589,20 +587,60 @@ floatRhs :: TopLevelFlag -> RecFlag -> UniqSM (Floats, -- Floats out of this bind CoreExpr) -- Final Rhs -floatRhs top_lvl is_rec _bndr (floats, rhs) +floatRhs top_lvl is_rec bndr (floats, rhs) | isTopLevel top_lvl || exprIsHNF rhs, -- Float to expose value or allLazy top_lvl is_rec floats -- at top level = -- Why the test for allLazy? -- v = f (x `divInt#` y) -- we don't want to float the case, even if f has arity 2, -- because floating the case would make it evaluated too early - return (floats, rhs) + do { us <- getUniquesM + ; let eta_rhs = etaExpand arity us rhs (idType bndr) + -- For a GlobalId, take the Arity from the Id. + -- It was set in CoreTidy and must not change + -- For all others, just expand at will + -- See Note [Eta expansion] + arity | isGlobalId bndr = idArity bndr + | otherwise = exprArity rhs + ; return (floats, eta_rhs) } | otherwise = do -- Don't float; the RHS isn't a value rhs' <- mkBinds floats rhs return (emptyFloats, rhs') +\end{code} + +Note [Eta expansion] +~~~~~~~~~~~~~~~~~~~~~ +Eta expand to match the arity claimed by the binder Remember, +CorePrep must not change arity + +Eta expansion might not have happened already, because it is done by +the simplifier only when there at least one lambda already. + +NB1:we could refrain when the RHS is trivial (which can happen + for exported things). This would reduce the amount of code + generated (a little) and make things a little words for + code compiled without -O. The case in point is data constructor + wrappers. + +NB2: we have to be careful that the result of etaExpand doesn't + invalidate any of the assumptions that CorePrep is attempting + to establish. One possible cause is eta expanding inside of + an SCC note - we're now careful in etaExpand to make sure the + SCC is pushed inside any new lambdas that are generated. + +NB3: It's important to do eta expansion, and *then* ANF-ising + f = /\a -> g (h 3) -- h has arity 2 +If we ANF first we get + f = /\a -> let s = h 3 in g s +and now eta expansion gives + f = /\a -> \ y -> (let s = h 3 in g s) y +which is horrible. +Eta expanding first gives + f = /\a -> \y -> let s = h 3 in g s y +\begin{code} -- mkLocalNonRec is used only for *nested*, *non-recursive* bindings mkLocalNonRec :: Id -> RhsDemand -- Lhs: id with demand -> Floats -> CoreExpr -- Rhs: let binds in body @@ -648,50 +686,6 @@ mkBinds (Floats _ binds) body mk_bind (FloatCase bndr rhs _) body = Case rhs bndr (exprType body) [(DEFAULT, [], body)] mk_bind (FloatLet bind) body = Let bind body -etaExpandRhs :: CoreBndr -> CoreExpr -> UniqSM CoreExpr -etaExpandRhs bndr rhs = do - -- Eta expand to match the arity claimed by the binder - -- Remember, CorePrep must not change arity - -- - -- Eta expansion might not have happened already, - -- because it is done by the simplifier only when - -- there at least one lambda already. - -- - -- NB1:we could refrain when the RHS is trivial (which can happen - -- for exported things). This would reduce the amount of code - -- generated (a little) and make things a little words for - -- code compiled without -O. The case in point is data constructor - -- wrappers. - -- - -- NB2: we have to be careful that the result of etaExpand doesn't - -- invalidate any of the assumptions that CorePrep is attempting - -- to establish. One possible cause is eta expanding inside of - -- an SCC note - we're now careful in etaExpand to make sure the - -- SCC is pushed inside any new lambdas that are generated. - -- - -- NB3: It's important to do eta expansion, and *then* ANF-ising - -- f = /\a -> g (h 3) -- h has arity 2 - -- If we ANF first we get - -- f = /\a -> let s = h 3 in g s - -- and now eta expansion gives - -- f = /\a -> \ y -> (let s = h 3 in g s) y - -- which is horrible. - -- Eta expanding first gives - -- f = /\a -> \y -> let s = h 3 in g s y - -- - us <- getUniquesM - let eta_rhs = etaExpand arity us rhs (idType bndr) - - ASSERT2( manifestArity eta_rhs == arity, (ppr bndr <+> ppr arity <+> ppr (exprArity rhs)) - $$ ppr rhs $$ ppr eta_rhs ) - -- Assertion checks that eta expansion was successful - return eta_rhs - where - -- For a GlobalId, take the Arity from the Id. - -- It was set in CoreTidy and must not change - -- For all others, just expand at will - arity | isGlobalId bndr = idArity bndr - | otherwise = exprArity rhs -- --------------------------------------------------------------------------- -- Eliminate Lam as a non-rhs (STG doesn't have such a thing) diff --git a/compiler/coreSyn/CoreSubst.lhs b/compiler/coreSyn/CoreSubst.lhs index e08cdb8..cf086c8 100644 --- a/compiler/coreSyn/CoreSubst.lhs +++ b/compiler/coreSyn/CoreSubst.lhs @@ -12,7 +12,7 @@ module CoreSubst ( -- ** Substituting into expressions and related types deShadowBinds, - substTy, substExpr, substSpec, substWorker, + substTy, substExpr, substSpec, substUnfolding, lookupIdSubst, lookupTvSubst, -- ** Operations on substitutions @@ -211,7 +211,7 @@ lookupIdSubst (Subst in_scope ids _) v | Just e <- lookupVarEnv ids v = e | Just v' <- lookupInScope in_scope v = Var v' -- Vital! See Note [Extending the Subst] - | otherwise = WARN( True, ptext (sLit "CoreSubst.lookupIdSubst") <+> ppr v ) + | otherwise = WARN( True, ptext (sLit "CoreSubst.lookupIdSubst") <+> ppr v $$ ppr in_scope ) Var v -- | Find the substitution for a 'TyVar' in the 'Subst' @@ -474,31 +474,40 @@ substIdInfo :: Subst -> Id -> IdInfo -> Maybe IdInfo substIdInfo subst new_id info | nothing_to_do = Nothing | otherwise = Just (info `setSpecInfo` substSpec subst new_id old_rules - `setWorkerInfo` substWorker subst old_wrkr - `setUnfoldingInfo` noUnfolding) + `setUnfoldingInfo` substUnfolding subst old_unf) where old_rules = specInfo info - old_wrkr = workerInfo info - nothing_to_do = isEmptySpecInfo old_rules && - not (workerExists old_wrkr) && - not (hasUnfolding (unfoldingInfo info)) + old_unf = unfoldingInfo info + nothing_to_do = isEmptySpecInfo old_rules && isClosedUnfolding old_unf ------------------ --- | Substitutes for the 'Id's within the 'WorkerInfo' -substWorker :: Subst -> WorkerInfo -> WorkerInfo - -- Seq'ing on the returned WorkerInfo is enough to cause all the - -- substitutions to happen completely - -substWorker _ NoWorker - = NoWorker -substWorker subst (HasWorker w a) - = case lookupIdSubst subst w of - Var w1 -> HasWorker w1 a - other -> WARN( not (exprIsTrivial other), text "CoreSubst.substWorker:" <+> ppr w ) - NoWorker -- Worker has got substituted away altogether - -- (This can happen if it's trivial, - -- via postInlineUnconditionally, hence warning) +-- | Substitutes for the 'Id's within an unfolding +substUnfolding :: Subst -> Unfolding -> Unfolding + -- Seq'ing on the returned Unfolding is enough to cause + -- all the substitutions to happen completely +substUnfolding subst unf@(InlineRule { uf_tmpl = tmpl, uf_worker = mb_wkr }) + -- Retain an InlineRule! + = seqExpr new_tmpl `seq` + new_mb_wkr `seq` + unf { uf_tmpl = new_tmpl, uf_worker = new_mb_wkr } + where + new_tmpl = substExpr subst tmpl + new_mb_wkr = case mb_wkr of + Nothing -> Nothing + Just w -> subst_wkr w + + subst_wkr w = case lookupIdSubst subst w of + Var w1 -> Just w1 + other -> WARN( not (exprIsTrivial other), text "CoreSubst.substWorker:" <+> ppr w ) + Nothing -- Worker has got substituted away altogether + -- (This can happen if it's trivial, + -- via postInlineUnconditionally, hence warning) + +substUnfolding _ (CoreUnfolding {}) = NoUnfolding -- Discard + -- Always zap a CoreUnfolding, to save substitution work + +substUnfolding _ unf = unf -- Otherwise no substitution to do ------------------ -- | Substitutes for the 'Id's within the 'WorkerInfo' given the new function 'Id' @@ -512,7 +521,7 @@ substSpec subst new_fn (SpecInfo rules rhs_fvs) do_subst rule@(BuiltinRule {}) = rule do_subst rule@(Rule { ru_bndrs = bndrs, ru_args = args, ru_rhs = rhs }) = rule { ru_bndrs = bndrs', - ru_fn = new_name, -- Important: the function may have changed its name! + ru_fn = new_name, -- Important: the function may have changed its name! ru_args = map (substExpr subst') args, ru_rhs = substExpr subst' rhs } where diff --git a/compiler/coreSyn/CoreSyn.lhs b/compiler/coreSyn/CoreSyn.lhs index 79e25a2..25d2cdb 100644 --- a/compiler/coreSyn/CoreSyn.lhs +++ b/compiler/coreSyn/CoreSyn.lhs @@ -41,9 +41,10 @@ module CoreSyn ( noUnfolding, evaldUnfolding, mkOtherCon, -- ** Predicates and deconstruction on 'Unfolding' - unfoldingTemplate, maybeUnfoldingTemplate, otherCons, + unfoldingTemplate, setUnfoldingTemplate, + maybeUnfoldingTemplate, otherCons, isValueUnfolding, isEvaldUnfolding, isCheapUnfolding, isCompulsoryUnfolding, - hasUnfolding, hasSomeUnfolding, neverUnfold, + isInlineRule, isClosedUnfolding, hasSomeUnfolding, canUnfold, neverUnfoldGuidance, -- * Strictness seqExpr, seqExprs, seqUnfolding, @@ -271,21 +272,7 @@ See #type_let# -- | Allows attaching extra information to points in expressions rather than e.g. identifiers. data Note = SCC CostCentre -- ^ A cost centre annotation for profiling - - | InlineMe -- ^ Instructs the core simplifer to treat the enclosed expression - -- as very small, and inline it at its call sites - | CoreNote String -- ^ A generic core annotation, propagated but not used by GHC - --- NOTE: we also treat expressions wrapped in InlineMe as --- 'cheap' and 'dupable' (in the sense of exprIsCheap, exprIsDupable) --- What this means is that we obediently inline even things that don't --- look like valuse. This is sometimes important: --- {-# INLINE f #-} --- f = g . h --- Here, f looks like a redex, and we aren't going to inline (.) because it's --- inside an INLINE, so it'll stay looking like a redex. Nevertheless, we --- should inline f even inside lambdas. In effect, we should trust the programmer. \end{code} @@ -404,45 +391,73 @@ data Unfolding -- -- Here, @f@ gets an @OtherCon []@ unfolding. - | CompulsoryUnfolding CoreExpr -- ^ There is /no original definition/, - -- so you'd better unfold. + | CompulsoryUnfolding { -- There is /no original definition/, so you'd better unfold. + uf_tmpl :: CoreExpr -- The unfolding is guaranteed to have no free variables + } -- so no need to think about it during dependency analysis + + | InlineRule { -- The function has an INLINE pragma, with the specified (original) RHS + -- (The inline phase, if any, is in the InlinePragInfo for this Id.) + -- Inline when (a) applied to at least this number of args + -- (b) if there is something interesting about args or context + uf_tmpl :: CoreExpr, -- The *original* RHS; occurrence info is correct + -- (The actual RHS of the function may be different by now, + -- but what we inline is still the original RHS (kept in the InlineRule).) + uf_is_top :: Bool, + + uf_arity :: Arity, -- Don't inline unless applied to this number of *value* args + uf_is_value :: Bool, -- True <=> exprIsHNF is true; save to discard a `seq` + uf_worker :: Maybe Id -- Just wrk_id <=> this unfolding is a the wrapper in a worker/wrapper + -- split from the strictness analyser + -- Used to abbreviate the uf_tmpl in interface files + -- In the Just case, interface files don't actually + -- need to contain the RHS; it can be derived from + -- the strictness info + -- Also used in CoreUnfold to guide inlining decisions + } - | CoreUnfolding - CoreExpr - Bool - Bool - Bool - UnfoldingGuidance + | CoreUnfolding { -- An unfolding for an Id with no pragma, or perhaps a NOINLINE pragma + -- (For NOINLINE, the phase, if any, is in the InlinePragInfo for this Id.) + uf_tmpl :: CoreExpr, -- Template; binder-info is correct + uf_is_top :: Bool, -- True <=> top level binding + uf_is_value :: Bool, -- exprIsHNF template (cached); it is ok to discard a `seq` on + -- this variable + uf_is_cheap :: Bool, -- True <=> doesn't waste (much) work to expand inside an inlining + -- Basically it's exprIsCheap + uf_guidance :: UnfoldingGuidance -- Tells about the *size* of the template. + } -- ^ An unfolding with redundant cached information. Parameters: -- - -- 1) Template used to perform unfolding; binder-info is correct + -- uf_tmpl: Template used to perform unfolding; binder-info is correct -- - -- 2) Is this a top level binding? + -- uf_is_top: Is this a top level binding? -- - -- 3) 'exprIsHNF' template (cached); it is ok to discard a 'seq' on + -- uf_is_valiue: 'exprIsHNF' template (cached); it is ok to discard a 'seq' on -- this variable -- - -- 4) Does this waste only a little work if we expand it inside an inlining? + -- uf_is_cheap: Does this waste only a little work if we expand it inside an inlining? -- Basically this is a cached version of 'exprIsCheap' -- - -- 5) Tells us about the /size/ of the unfolding template + -- uf_guidance: Tells us about the /size/ of the unfolding template --- | When unfolding should take place +------------------------------------------------ +-- | 'UnfoldingGuidance' says when unfolding should take place data UnfoldingGuidance = UnfoldNever - | UnfoldIfGoodArgs Int -- and "n" value args + | UnfoldIfGoodArgs { + ug_arity :: Arity, -- "n" value args - [Int] -- Discount if the argument is evaluated. - -- (i.e., a simplification will definitely - -- be possible). One elt of the list per *value* arg. + ug_args :: [Int], -- Discount if the argument is evaluated. + -- (i.e., a simplification will definitely + -- be possible). One elt of the list per *value* arg. - Int -- The "size" of the unfolding; to be elaborated - -- later. ToDo + ug_size :: Int, -- The "size" of the unfolding; to be elaborated + -- later. ToDo - Int -- Scrutinee discount: the discount to substract if the thing is in - -- a context (case (thing args) of ...), - -- (where there are the right number of arguments.) + ug_res :: Int -- Scrutinee discount: the discount to substract if the thing is in + } -- a context (case (thing args) of ...), + -- (where there are the right number of arguments.) +------------------------------------------------ noUnfolding :: Unfolding -- ^ There is no known 'Unfolding' evaldUnfolding :: Unfolding @@ -455,7 +470,8 @@ mkOtherCon :: [AltCon] -> Unfolding mkOtherCon = OtherCon seqUnfolding :: Unfolding -> () -seqUnfolding (CoreUnfolding e top b1 b2 g) +seqUnfolding (CoreUnfolding { uf_tmpl = e, uf_is_top = top, + uf_is_value = b1, uf_is_cheap = b2, uf_guidance = g}) = seqExpr e `seq` top `seq` b1 `seq` b2 `seq` seqGuidance g seqUnfolding _ = () @@ -467,15 +483,17 @@ seqGuidance _ = () \begin{code} -- | Retrieves the template of an unfolding: panics if none is known unfoldingTemplate :: Unfolding -> CoreExpr -unfoldingTemplate (CoreUnfolding expr _ _ _ _) = expr -unfoldingTemplate (CompulsoryUnfolding expr) = expr -unfoldingTemplate _ = panic "getUnfoldingTemplate" +unfoldingTemplate = uf_tmpl + +setUnfoldingTemplate :: Unfolding -> CoreExpr -> Unfolding +setUnfoldingTemplate unf rhs = unf { uf_tmpl = rhs } -- | Retrieves the template of an unfolding if possible maybeUnfoldingTemplate :: Unfolding -> Maybe CoreExpr -maybeUnfoldingTemplate (CoreUnfolding expr _ _ _ _) = Just expr -maybeUnfoldingTemplate (CompulsoryUnfolding expr) = Just expr -maybeUnfoldingTemplate _ = Nothing +maybeUnfoldingTemplate (CoreUnfolding { uf_tmpl = expr }) = Just expr +maybeUnfoldingTemplate (CompulsoryUnfolding { uf_tmpl = expr }) = Just expr +maybeUnfoldingTemplate (InlineRule { uf_tmpl = expr }) = Just expr +maybeUnfoldingTemplate _ = Nothing -- | The constructors that the unfolding could never be: -- returns @[]@ if no information is available @@ -486,45 +504,53 @@ otherCons _ = [] -- | Determines if it is certainly the case that the unfolding will -- yield a value (something in HNF): returns @False@ if unsure isValueUnfolding :: Unfolding -> Bool -isValueUnfolding (CoreUnfolding _ _ is_evald _ _) = is_evald -isValueUnfolding _ = False + -- Returns False for OtherCon +isValueUnfolding (CoreUnfolding { uf_is_value = is_evald }) = is_evald +isValueUnfolding (InlineRule { uf_is_value = is_evald }) = is_evald +isValueUnfolding _ = False -- | Determines if it possibly the case that the unfolding will -- yield a value. Unlike 'isValueUnfolding' it returns @True@ -- for 'OtherCon' isEvaldUnfolding :: Unfolding -> Bool -isEvaldUnfolding (OtherCon _) = True -isEvaldUnfolding (CoreUnfolding _ _ is_evald _ _) = is_evald -isEvaldUnfolding _ = False + -- Returns True for OtherCon +isEvaldUnfolding (OtherCon _) = True +isEvaldUnfolding (CoreUnfolding { uf_is_value = is_evald }) = is_evald +isEvaldUnfolding (InlineRule { uf_is_value = is_evald }) = is_evald +isEvaldUnfolding _ = False -- | Is the thing we will unfold into certainly cheap? isCheapUnfolding :: Unfolding -> Bool -isCheapUnfolding (CoreUnfolding _ _ _ is_cheap _) = is_cheap -isCheapUnfolding _ = False +isCheapUnfolding (CoreUnfolding { uf_is_cheap = is_cheap }) = is_cheap +isCheapUnfolding _ = False + +isInlineRule :: Unfolding -> Bool +isInlineRule (InlineRule {}) = True +isInlineRule _ = False -- | Must this unfolding happen for the code to be executable? isCompulsoryUnfolding :: Unfolding -> Bool -isCompulsoryUnfolding (CompulsoryUnfolding _) = True +isCompulsoryUnfolding (CompulsoryUnfolding {}) = True isCompulsoryUnfolding _ = False --- | Do we have an available or compulsory unfolding? -hasUnfolding :: Unfolding -> Bool -hasUnfolding (CoreUnfolding _ _ _ _ _) = True -hasUnfolding (CompulsoryUnfolding _) = True -hasUnfolding _ = False +isClosedUnfolding :: Unfolding -> Bool -- No free variables +isClosedUnfolding (CoreUnfolding {}) = False +isClosedUnfolding (InlineRule {}) = False +isClosedUnfolding _ = True -- | Only returns False if there is no unfolding information available at all hasSomeUnfolding :: Unfolding -> Bool hasSomeUnfolding NoUnfolding = False hasSomeUnfolding _ = True --- | Similar to @not . hasUnfolding@, but also returns @True@ --- if it has an unfolding that says it should never occur -neverUnfold :: Unfolding -> Bool -neverUnfold NoUnfolding = True -neverUnfold (OtherCon _) = True -neverUnfold (CoreUnfolding _ _ _ _ UnfoldNever) = True -neverUnfold _ = False +neverUnfoldGuidance :: UnfoldingGuidance -> Bool +neverUnfoldGuidance UnfoldNever = True +neverUnfoldGuidance _ = False + +canUnfold :: Unfolding -> Bool +canUnfold (InlineRule {}) = True +canUnfold (CoreUnfolding { uf_guidance = g }) = not (neverUnfoldGuidance g) +canUnfold _ = False \end{code} diff --git a/compiler/coreSyn/CoreUnfold.lhs b/compiler/coreSyn/CoreUnfold.lhs index d7ec4c7..258cd46 100644 --- a/compiler/coreSyn/CoreUnfold.lhs +++ b/compiler/coreSyn/CoreUnfold.lhs @@ -18,12 +18,10 @@ find, unsurprisingly, a Core expression. module CoreUnfold ( Unfolding, UnfoldingGuidance, -- Abstract types - noUnfolding, mkTopUnfolding, mkImplicitUnfolding, mkUnfolding, - mkCompulsoryUnfolding, seqUnfolding, - evaldUnfolding, mkOtherCon, otherCons, - unfoldingTemplate, maybeUnfoldingTemplate, - isEvaldUnfolding, isValueUnfolding, isCheapUnfolding, isCompulsoryUnfolding, - hasUnfolding, hasSomeUnfolding, neverUnfold, + noUnfolding, mkImplicitUnfolding, + mkTopUnfolding, mkUnfolding, + mkInlineRule, mkWwInlineRule, + mkCompulsoryUnfolding, couldBeSmallEnoughToInline, certainlyWillInline, smallEnoughToInline, @@ -37,7 +35,7 @@ import DynFlags import CoreSyn import PprCore () -- Instances import OccurAnal -import CoreSubst ( Subst, emptySubst, substTy, extendIdSubst, extendTvSubst +import CoreSubst ( emptySubst, substTy, extendIdSubst, extendTvSubst , lookupIdSubst, substBndr, substBndrs, substRecBndrs ) import CoreUtils import Id @@ -45,7 +43,9 @@ import DataCon import Literal import PrimOp import IdInfo +import BasicTypes ( Arity ) import Type hiding( substTy, extendTvSubst ) +import Maybes import PrelNames import Bag import FastTypes @@ -68,24 +68,37 @@ mkTopUnfolding expr = mkUnfolding True {- Top level -} expr mkImplicitUnfolding :: CoreExpr -> Unfolding -- For implicit Ids, do a tiny bit of optimising first mkImplicitUnfolding expr - = CoreUnfolding (simpleOptExpr emptySubst expr) + = CoreUnfolding (simpleOptExpr expr) True (exprIsHNF expr) (exprIsCheap expr) (calcUnfoldingGuidance opt_UF_CreationThreshold expr) +mkInlineRule :: CoreExpr -> Arity -> Unfolding +mkInlineRule expr arity + = InlineRule { uf_tmpl = simpleOptExpr expr, + uf_is_top = True, -- Conservative; this gets set more + -- accuately by the simplifier (slight hack) + -- in SimplEnv.substUnfolding + uf_arity = arity, + uf_is_value = exprIsHNF expr, + uf_worker = Nothing } + +mkWwInlineRule :: CoreExpr -> Arity -> Id -> Unfolding +mkWwInlineRule expr arity wkr + = InlineRule { uf_tmpl = simpleOptExpr expr, + uf_is_top = True, -- Conservative; see mkInlineRule + uf_arity = arity, + uf_is_value = exprIsHNF expr, + uf_worker = Just wkr } + mkUnfolding :: Bool -> CoreExpr -> Unfolding mkUnfolding top_lvl expr - = CoreUnfolding (occurAnalyseExpr expr) - top_lvl - - (exprIsHNF expr) - -- Already evaluated - - (exprIsCheap expr) - -- OK to inline inside a lambda - - (calcUnfoldingGuidance opt_UF_CreationThreshold expr) + = CoreUnfolding { uf_tmpl = occurAnalyseExpr expr, + uf_is_top = top_lvl, + uf_is_value = exprIsHNF expr, + uf_is_cheap = exprIsCheap expr, + uf_guidance = calcUnfoldingGuidance opt_UF_CreationThreshold expr } -- Sometimes during simplification, there's a large let-bound thing -- which has been substituted, and so is now dead; so 'expr' contains -- two copies of the thing while the occurrence-analysed expression doesn't @@ -95,14 +108,6 @@ mkUnfolding top_lvl expr -- This can occasionally mean that the guidance is very pessimistic; -- it gets fixed up next round -instance Outputable Unfolding where - ppr NoUnfolding = ptext (sLit "No unfolding") - ppr (OtherCon cs) = ptext (sLit "OtherCon") <+> ppr cs - ppr (CompulsoryUnfolding e) = ptext (sLit "Compulsory") <+> ppr e - ppr (CoreUnfolding e top hnf cheap g) - = ptext (sLit "Unf") <+> sep [ppr top <+> ppr hnf <+> ppr cheap <+> ppr g, - ppr e] - mkCompulsoryUnfolding :: CoreExpr -> Unfolding mkCompulsoryUnfolding expr -- Used for things that absolutely must be unfolded = CompulsoryUnfolding (occurAnalyseExpr expr) @@ -116,75 +121,27 @@ mkCompulsoryUnfolding expr -- Used for things that absolutely must be unfolded %************************************************************************ \begin{code} -instance Outputable UnfoldingGuidance where - ppr UnfoldNever = ptext (sLit "NEVER") - ppr (UnfoldIfGoodArgs v cs size discount) - = hsep [ ptext (sLit "IF_ARGS"), int v, - brackets (hsep (map int cs)), - int size, - int discount ] -\end{code} - - -\begin{code} calcUnfoldingGuidance :: Int -- bomb out if size gets bigger than this -> CoreExpr -- expression to look at -> UnfoldingGuidance calcUnfoldingGuidance bOMB_OUT_SIZE expr - = case collect_val_bndrs expr of { (inline, val_binders, body) -> + = case collectBinders expr of { (binders, body) -> let + val_binders = filter isId binders n_val_binders = length val_binders - - max_inline_size = n_val_binders+2 - -- The idea is that if there is an INLINE pragma (inline is True) - -- and there's a big body, we give a size of n_val_binders+2. This - -- This is just enough to fail the no-size-increase test in callSiteInline, - -- so that INLINE things don't get inlined into entirely boring contexts, - -- but no more. - in case (sizeExpr (iUnbox bOMB_OUT_SIZE) val_binders body) of - - TooBig - | not inline -> UnfoldNever - -- A big function with an INLINE pragma must - -- have an UnfoldIfGoodArgs guidance - | otherwise -> UnfoldIfGoodArgs n_val_binders - (map (const 0) val_binders) - max_inline_size 0 - + TooBig -> UnfoldNever SizeIs size cased_args scrut_discount - -> UnfoldIfGoodArgs - n_val_binders - (map discount_for val_binders) - final_size - (iBox scrut_discount) + -> UnfoldIfGoodArgs { ug_arity = n_val_binders + , ug_args = map discount_for val_binders + , ug_size = iBox size + , ug_res = iBox scrut_discount } where - boxed_size = iBox size - - final_size | inline = boxed_size `min` max_inline_size - | otherwise = boxed_size - - -- Sometimes an INLINE thing is smaller than n_val_binders+2. - -- A particular case in point is a constructor, which has size 1. - -- We want to inline this regardless, hence the `min` - discount_for b = foldlBag (\acc (b',n) -> if b==b' then acc+n else acc) 0 cased_args } - where - collect_val_bndrs e = go False [] e - -- We need to be a bit careful about how we collect the - -- value binders. In ptic, if we see - -- __inline_me (\x y -> e) - -- We want to say "2 value binders". Why? So that - -- we take account of information given for the arguments - - go _ rev_vbs (Note InlineMe e) = go True rev_vbs e - go inline rev_vbs (Lam b e) | isId b = go inline (b:rev_vbs) e - | otherwise = go inline rev_vbs e - go inline rev_vbs e = (inline, reverse rev_vbs, e) \end{code} \begin{code} @@ -197,21 +154,10 @@ sizeExpr :: FastInt -- Bomb out if it gets bigger than this sizeExpr bOMB_OUT_SIZE top_args expr = size_up expr where - size_up (Type _) = sizeZero -- Types cost nothing + size_up (Type _) = sizeZero -- Types cost nothing size_up (Var _) = sizeOne - - size_up (Note InlineMe _) = sizeOne -- Inline notes make it look very small - -- This can be important. If you have an instance decl like this: - -- instance Foo a => Foo [a] where - -- {-# INLINE op1, op2 #-} - -- op1 = ... - -- op2 = ... - -- then we'll get a dfun which is a pair of two INLINE lambdas - - size_up (Note _ body) = size_up body -- Other notes cost nothing - + size_up (Note _ body) = size_up body -- Notes cost nothing size_up (Cast e _) = size_up e - size_up (App fun (Type _)) = size_up fun size_up (App fun arg) = size_up_app fun [arg] @@ -484,13 +430,17 @@ couldBeSmallEnoughToInline threshold rhs = case calcUnfoldingGuidance threshold certainlyWillInline :: Unfolding -> Bool -- Sees if the unfolding is pretty certain to inline -certainlyWillInline (CoreUnfolding _ _ _ is_cheap (UnfoldIfGoodArgs n_vals _ size _)) +certainlyWillInline (CompulsoryUnfolding {}) = True +certainlyWillInline (InlineRule {}) = True +certainlyWillInline (CoreUnfolding + { uf_is_cheap = is_cheap + , uf_guidance = UnfoldIfGoodArgs {ug_arity = n_vals, ug_size = size}}) = is_cheap && size - (n_vals +1) <= opt_UF_UseThreshold certainlyWillInline _ = False smallEnoughToInline :: Unfolding -> Bool -smallEnoughToInline (CoreUnfolding _ _ _ _ (UnfoldIfGoodArgs _ _ size _)) +smallEnoughToInline (CoreUnfolding {uf_guidance = UnfoldIfGoodArgs {ug_size = size}}) = size <= opt_UF_UseThreshold smallEnoughToInline _ = False @@ -550,7 +500,10 @@ instance Outputable CallCtxt where ppr ValAppCtxt = ptext (sLit "ValAppCtxt") callSiteInline dflags active_inline id lone_variable arg_infos cont_info - = case idUnfolding id of { + = let + n_val_args = length arg_infos + in + case idUnfolding id of { NoUnfolding -> Nothing ; OtherCon _ -> Nothing ; @@ -561,14 +514,45 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info -- compulsory unfoldings (see MkId.lhs). -- We don't allow them to be inactive - CoreUnfolding unf_template is_top is_value is_cheap guidance -> + InlineRule { uf_tmpl = unf_template, uf_arity = arity, uf_is_top = is_top + , uf_is_value = is_value, uf_worker = mb_worker } + -> let yes_or_no | not active_inline = False + | n_val_args < arity = yes_unsat -- Not enough value args + | n_val_args == arity = yes_exact -- Exactly saturated + | otherwise = True -- Over-saturated + result | yes_or_no = Just unf_template + | otherwise = Nothing + + -- See Note [Inlining an InlineRule] + is_wrapper = isJust mb_worker + yes_unsat | is_wrapper = or arg_infos + | otherwise = False + + yes_exact = or arg_infos || interesting_saturated_call + interesting_saturated_call + = case cont_info of + BoringCtxt -> not is_top -- Note [Nested functions] + CaseCtxt -> not lone_variable || not is_value -- Note [Lone variables] + ArgCtxt {} -> arity > 0 -- Note [Inlining in ArgCtxt] + ValAppCtxt -> True -- Note [Cast then apply] + in + if dopt Opt_D_dump_inlinings dflags then + pprTrace ("Considering InlineRule for: " ++ showSDoc (ppr id)) + (vcat [text "active:" <+> ppr active_inline, + text "arg infos" <+> ppr arg_infos, + text "interesting call" <+> ppr interesting_saturated_call, + text "is value:" <+> ppr is_value, + text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"]) + result + else result ; + + CoreUnfolding { uf_tmpl = unf_template, uf_is_top = is_top, uf_is_value = is_value, + uf_is_cheap = is_cheap, uf_guidance = guidance } -> let result | yes_or_no = Just unf_template | otherwise = Nothing - n_val_args = length arg_infos - yes_or_no = active_inline && is_cheap && consider_safe -- We consider even the once-in-one-branch -- occurrences, because they won't all have been @@ -584,7 +568,8 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info -- work-duplication issue (the caller checks that). = case guidance of UnfoldNever -> False - UnfoldIfGoodArgs n_vals_wanted arg_discounts size res_discount + UnfoldIfGoodArgs { ug_arity = n_vals_wanted, ug_args = arg_discounts + , ug_res = res_discount, ug_size = size } | enough_args && size <= (n_vals_wanted + 1) -- Inline unconditionally if there no size increase -- Size of call is n_vals_wanted (+1 for the function) @@ -634,20 +619,35 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info in if dopt Opt_D_dump_inlinings dflags then - pprTrace "Considering inlining" - (ppr id <+> vcat [text "active:" <+> ppr active_inline, - text "arg infos" <+> ppr arg_infos, - text "interesting continuation" <+> ppr cont_info, - text "is value:" <+> ppr is_value, - text "is cheap:" <+> ppr is_cheap, - text "guidance" <+> ppr guidance, - text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"]) + pprTrace ("Considering inlining: " ++ showSDoc (ppr id)) + (vcat [text "active:" <+> ppr active_inline, + text "arg infos" <+> ppr arg_infos, + text "interesting continuation" <+> ppr cont_info, + text "is value:" <+> ppr is_value, + text "is cheap:" <+> ppr is_cheap, + text "guidance" <+> ppr guidance, + text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"]) result else result } \end{code} +Note [Inlining an InlineRule] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +An InlineRules is used for + (a) pogrammer INLINE pragmas + (b) inlinings from worker/wrapper + +For (a) the RHS may be large, and our contract is that we *only* inline +when the function is applied to all the arguments on the LHS of the +source-code defn. (The uf_arity in the rule.) + +However for worker/wrapper it may be worth inlining even if the +arity is not satisfied (as we do in the CoreUnfolding case) so we don't +require saturation. + + Note [Nested functions] ~~~~~~~~~~~~~~~~~~~~~~~ If a function has a nested defn we also record some-benefit, on the @@ -772,14 +772,14 @@ computeDiscount n_vals_wanted arg_discounts result_discount arg_infos \begin{code} -simpleOptExpr :: Subst -> CoreExpr -> CoreExpr +simpleOptExpr :: CoreExpr -> CoreExpr -- Return an occur-analysed and slightly optimised expression -- The optimisation is very straightforward: just -- inline non-recursive bindings that are used only once, -- or wheere the RHS is trivial -simpleOptExpr subst expr - = go subst (occurAnalyseExpr expr) +simpleOptExpr expr + = go emptySubst (occurAnalyseExpr expr) where go subst (Var v) = lookupIdSubst subst v go subst (App e1 e2) = App (go subst e1) (go subst e2) diff --git a/compiler/coreSyn/CoreUtils.lhs b/compiler/coreSyn/CoreUtils.lhs index 44ca27a..415f5f7 100644 --- a/compiler/coreSyn/CoreUtils.lhs +++ b/compiler/coreSyn/CoreUtils.lhs @@ -16,7 +16,7 @@ Utility functions on @Core@ syntax -- | Commonly useful utilites for manipulating the Core language module CoreUtils ( -- * Constructing expressions - mkInlineMe, mkSCC, mkCoerce, mkCoerceI, + mkSCC, mkCoerce, mkCoerceI, bindNonRec, needsCaseBinding, mkAltExpr, mkPiType, mkPiTypes, @@ -27,10 +27,12 @@ module CoreUtils ( exprType, coreAltType, coreAltsType, exprIsDupable, exprIsTrivial, exprIsCheap, exprIsHNF,exprOkForSpeculation, exprIsBig, - exprIsConApp_maybe, exprIsBottom, + exprIsConApp_maybe, + exprBotStrictness_maybe, rhsIsStatic, -- * Arity and eta expansion + -- exprIsBottom, Not used manifestArity, exprArity, exprEtaExpandArity, etaExpand, @@ -50,6 +52,7 @@ module CoreUtils ( #include "HsVersions.h" +import StaticFlags ( opt_NoStateHack ) import CoreSyn import CoreFVs import PprCore @@ -172,46 +175,6 @@ panic_msg e op_ty = pprCoreExpr e $$ ppr op_ty %* * %************************************************************************ -mkNote removes redundant coercions, and SCCs where possible - -\begin{code} -#ifdef UNUSED -mkNote :: Note -> CoreExpr -> CoreExpr -mkNote (SCC cc) expr = mkSCC cc expr -mkNote InlineMe expr = mkInlineMe expr -mkNote note expr = Note note expr -#endif -\end{code} - -Drop trivial InlineMe's. This is somewhat important, because if we have an unfolding -that looks like (Note InlineMe (Var v)), the InlineMe doesn't go away because it may -not be *applied* to anything. - -We don't use exprIsTrivial here, though, because we sometimes generate worker/wrapper -bindings like - fw = ... - f = inline_me (coerce t fw) -As usual, the inline_me prevents the worker from getting inlined back into the wrapper. -We want the split, so that the coerces can cancel at the call site. - -However, we can get left with tiresome type applications. Notably, consider - f = /\ a -> let t = e in (t, w) -Then lifting the let out of the big lambda gives - t' = /\a -> e - f = /\ a -> let t = inline_me (t' a) in (t, w) -The inline_me is to stop the simplifier inlining t' right back -into t's RHS. In the next phase we'll substitute for t (since -its rhs is trivial) and *then* we could get rid of the inline_me. -But it hardly seems worth it, so I don't bother. - -\begin{code} --- | Wraps the given expression in an inlining hint unless the expression --- is trivial in some sense, so that doing so would usually hurt us -mkInlineMe :: CoreExpr -> CoreExpr -mkInlineMe (Var v) = Var v -mkInlineMe e = Note InlineMe e -\end{code} - \begin{code} -- | Wrap the given expression in the coercion, dropping identity coercions and coalescing nested coercions mkCoerceI :: CoercionI -> CoreExpr -> CoreExpr @@ -420,12 +383,11 @@ exprIsTrivial _ = False \begin{code} exprIsDupable :: CoreExpr -> Bool -exprIsDupable (Type _) = True -exprIsDupable (Var _) = True -exprIsDupable (Lit lit) = litIsDupable lit -exprIsDupable (Note InlineMe _) = True -exprIsDupable (Note _ e) = exprIsDupable e -exprIsDupable (Cast e _) = exprIsDupable e +exprIsDupable (Type _) = True +exprIsDupable (Var _) = True +exprIsDupable (Lit lit) = litIsDupable lit +exprIsDupable (Note _ e) = exprIsDupable e +exprIsDupable (Cast e _) = exprIsDupable e exprIsDupable expr = go expr 0 where @@ -472,7 +434,6 @@ exprIsCheap :: CoreExpr -> Bool exprIsCheap (Lit _) = True exprIsCheap (Type _) = True exprIsCheap (Var _) = True -exprIsCheap (Note InlineMe _) = True exprIsCheap (Note _ e) = exprIsCheap e exprIsCheap (Cast e _) = exprIsCheap e exprIsCheap (Lam x e) = isRuntimeVar x || exprIsCheap e @@ -619,8 +580,9 @@ isDivOp _ = False \end{code} \begin{code} +{- Never used -- omitting -- | True of expressions that are guaranteed to diverge upon execution -exprIsBottom :: CoreExpr -> Bool +exprIsBottom :: CoreExpr -> Bool -- True => definitely bottom exprIsBottom e = go 0 e where -- n is the number of args @@ -636,6 +598,7 @@ exprIsBottom e = go 0 e idAppIsBottom :: Id -> Int -> Bool idAppIsBottom id n_val_args = appIsBottom (idNewStrictness id) n_val_args +-} \end{code} \begin{code} @@ -882,12 +845,7 @@ exprIsConApp_maybe (Note (BinaryTickBox {}) expr) exprIsConApp_maybe (Note _ expr) = exprIsConApp_maybe expr - -- We ignore InlineMe notes in case we have - -- x = __inline_me__ (a,b) - -- All part of making sure that INLINE pragmas never hurt - -- Marcin tripped on this one when making dictionaries more inlinable - -- - -- In fact, we ignore all notes. For example, + -- We ignore all notes. For example, -- case _scc_ "foo" (C a b) of -- C a b -> e -- should be optimised away, but it will be only if we look @@ -923,50 +881,56 @@ exprIsConApp_maybe expr = analyse (collectArgs expr) -- ^ The Arity returned is the number of value args the -- expression can be applied to without doing much work exprEtaExpandArity :: DynFlags -> CoreExpr -> Arity -{- -exprEtaExpandArity is used when eta expanding - e ==> \xy -> e x y +-- exprEtaExpandArity is used when eta expanding +-- e ==> \xy -> e x y +exprEtaExpandArity dflags e + = applyStateHack (exprType e) (arityDepth (arityType dicts_cheap e)) + where + dicts_cheap = dopt Opt_DictsCheap dflags -It returns 1 (or more) to: - case x of p -> \s -> ... -because for I/O ish things we really want to get that \s to the top. -We are prepared to evaluate x each time round the loop in order to get that +exprBotStrictness_maybe :: CoreExpr -> Maybe (Arity, StrictSig) +-- A cheap and cheerful function that identifies bottoming functions +-- and gives them a suitable strictness signatures. It's used during +-- float-out +exprBotStrictness_maybe e + = case arityType False e of + AT _ ATop -> Nothing + AT a ABot -> Just (a, mkStrictSig (mkTopDmdType (replicate a topDmd) BotRes)) +\end{code} -It's all a bit more subtle than it looks: -1. One-shot lambdas +Note [Definition of arity] +~~~~~~~~~~~~~~~~~~~~~~~~~~ +The "arity" of an expression 'e' is n if + applying 'e' to *fewer* than n *value* arguments + converges rapidly -Consider one-shot lambdas - let x = expensive in \y z -> E -We want this to have arity 2 if the \y-abstraction is a 1-shot lambda -Hence the ArityType returned by arityType +Or, to put it another way -2. The state-transformer hack + there is no work lost in duplicating the partial + application (e x1 .. x(n-1)) -The one-shot lambda special cause is particularly important/useful for -IO state transformers, where we often get - let x = E in \ s -> ... +In the divegent case, no work is lost by duplicating because if the thing +is evaluated once, that's the end of the program. -and the \s is a real-world state token abstraction. Such abstractions -are almost invariably 1-shot, so we want to pull the \s out, past the -let x=E, even if E is expensive. So we treat state-token lambdas as -one-shot even if they aren't really. The hack is in Id.isOneShotBndr. +Or, to put it another way, in any context C -3. Dealing with bottom + C[ (\x1 .. xn. e x1 .. xn) ] + is as efficient as + C[ e ] -Consider also - f = \x -> error "foo" -Here, arity 1 is fine. But if it is - f = \x -> case x of - True -> error "foo" - False -> \y -> x+y -then we want to get arity 2. Tecnically, this isn't quite right, because - (f True) `seq` 1 -should diverge, but it'll converge if we eta-expand f. Nevertheless, we -do so; it improves some programs significantly, and increasing convergence -isn't a bad thing. Hence the ABot/ATop in ArityType. -Actually, the situation is worse. Consider +It's all a bit more subtle than it looks: + +Note [Arity of case expressions] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We treat the arity of + case x of p -> \s -> ... +as 1 (or more) because for I/O ish things we really want to get that +\s to the top. We are prepared to evaluate x each time round the loop +in order to get that. + +This isn't really right in the presence of seq. Consider f = \x -> case x of True -> \y -> x+y False -> \y -> x-y @@ -978,8 +942,29 @@ This should diverge! But if we eta-expand, it won't. Again, we ignore this many programs. -4. Newtypes +1. Note [One-shot lambdas] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider one-shot lambdas + let x = expensive in \y z -> E +We want this to have arity 1 if the \y-abstraction is a 1-shot lambda. +3. Note [Dealing with bottom] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + f = \x -> error "foo" +Here, arity 1 is fine. But if it is + f = \x -> case x of + True -> error "foo" + False -> \y -> x+y +then we want to get arity 2. Technically, this isn't quite right, because + (f True) `seq` 1 +should diverge, but it'll converge if we eta-expand f. Nevertheless, we +do so; it improves some programs significantly, and increasing convergence +isn't a bad thing. Hence the ABot/ATop in ArityType. + + +4. Note [Newtype arity] +~~~~~~~~~~~~~~~~~~~~~~~~ Non-recursive newtypes are transparent, and should not get in the way. We do (currently) eta-expand recursive newtypes too. So if we have, say @@ -997,75 +982,116 @@ HOWEVER, note that if you use coerce bogusly you can ge coerce Int negate And since negate has arity 2, you might try to eta expand. But you can't decopose Int to a function type. Hence the final case in eta_expand. --} -exprEtaExpandArity dflags e = arityDepth (arityType dflags e) +Note [The state-transformer hack] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Suppose we have + f = e +where e has arity n. Then, if we know from the context that f has +a usage type like + t1 -> ... -> tn -1-> t(n+1) -1-> ... -1-> tm -> ... +then we can expand the arity to m. This usage type says that +any application (x e1 .. en) will be applied to uniquely to (m-n) more args +Consider f = \x. let y = + in case x of + True -> foo + False -> \(s:RealWorld) -> e +where foo has arity 1. Then we want the state hack to +apply to foo too, so we can eta expand the case. + +Then we expect that if f is applied to one arg, it'll be applied to two +(that's the hack -- we don't really know, and sometimes it's false) +See also Id.isOneShotBndr. --- A limited sort of function type -data ArityType = AFun Bool ArityType -- True <=> one-shot - | ATop -- Know nothing - | ABot -- Diverges - -arityDepth :: ArityType -> Arity -arityDepth (AFun _ ty) = 1 + arityDepth ty -arityDepth _ = 0 - -andArityType :: ArityType -> ArityType -> ArityType -andArityType ABot at2 = at2 -andArityType ATop _ = ATop -andArityType (AFun t1 at1) (AFun t2 at2) = AFun (t1 && t2) (andArityType at1 at2) -andArityType at1 at2 = andArityType at2 at1 +\begin{code} +applyStateHack :: Type -> Arity -> Arity +applyStateHack ty arity -- Note [The state-transformer hack] + | opt_NoStateHack = arity + | otherwise = go ty arity + where + go :: Type -> Arity -> Arity + go ty arity -- This case analysis should match that in eta_expand + | Just (_, ty') <- splitForAllTy_maybe ty = go ty' arity + + | Just (tc,tys) <- splitTyConApp_maybe ty + , Just (ty', _) <- instNewTyCon_maybe tc tys + , not (isRecursiveTyCon tc) = go ty' arity + -- Important to look through non-recursive newtypes, so that, eg + -- (f x) where f has arity 2, f :: Int -> IO () + -- Here we want to get arity 1 for the result! + + | Just (arg,res) <- splitFunTy_maybe ty + , arity > 0 || isStateHackType arg = 1 + go res (arity-1) + + | otherwise = WARN( arity > 0, ppr arity ) 0 +\end{code} -arityType :: DynFlags -> CoreExpr -> ArityType - -- (go1 e) = [b1,..,bn] - -- means expression can be rewritten \x_b1 -> ... \x_bn -> body - -- where bi is True <=> the lambda is one-shot -arityType dflags (Note _ e) = arityType dflags e --- Not needed any more: etaExpand is cleverer --- removed: | ok_note n = arityType dflags e --- removed: | otherwise = ATop +-------------------- Main arity code ---------------------------- +\begin{code} +-- If e has ArityType (AT as r), then the term 'e' +-- * Must be applied to at least (length as) *value* args +-- before doing any significant work +-- * It will not diverge before being applied to (length as) +-- value arguments +-- * If 'r' is ABot, then it guarantees to eventually diverge if +-- applied to enough arguments (perhaps more than (length as) + +data ArityType = AT Arity ArityRes +data ArityRes = ATop -- Know nothing + | ABot -- Diverges -arityType dflags (Cast e _) = arityType dflags e +vanillaArityType :: ArityType +vanillaArityType = AT 0 ATop -- Totally uninformative +arityDepth :: ArityType -> Arity +arityDepth (AT a _) = a + +incArity :: ArityType -> ArityType +incArity (AT a r) = AT (a+1) r + +decArity :: ArityType -> ArityType +decArity (AT 0 r) = AT 0 r +decArity (AT a r) = AT (a-1) r + +andArityType :: ArityType -> ArityType -> ArityType -- Used for branches of a 'case' +andArityType (AT a1 ATop) (AT a2 ATop) = AT (a1 `min` a2) ATop +andArityType (AT _ ABot) (AT a2 ATop) = AT a2 ATop +andArityType (AT a1 ATop) (AT _ ABot) = AT a1 ATop +andArityType (AT a1 ABot) (AT a2 ABot) = AT (a1 `max` a2) ABot + +trimArity :: Bool -> ArityType -> ArityType +-- We have something like (let x = E in b), where b has the given +-- arity type. Then +-- * If E is cheap we can push it inside as far as we like +-- * If b eventually diverges, we allow ourselves to push inside +-- arbitrarily, even though that is not quite right +trimArity _cheap (AT a ABot) = AT a ABot +trimArity True (AT a ATop) = AT a ATop +trimArity False (AT _ ATop) = AT 0 ATop -- Bale out + +--------------------------- +arityType :: Bool -> CoreExpr -> ArityType arityType _ (Var v) - = mk (idArity v) (arg_tys (idType v)) - where - mk :: Arity -> [Type] -> ArityType - -- The argument types are only to steer the "state hack" - -- Consider case x of - -- True -> foo - -- False -> \(s:RealWorld) -> e - -- where foo has arity 1. Then we want the state hack to - -- apply to foo too, so we can eta expand the case. - mk 0 tys | isBottomingId v = ABot - | (ty:_) <- tys, isStateHackType ty = AFun True ATop - | otherwise = ATop - mk n (ty:tys) = AFun (isStateHackType ty) (mk (n-1) tys) - mk n [] = AFun False (mk (n-1) []) - - arg_tys :: Type -> [Type] -- Ignore for-alls - arg_tys ty - | Just (_, ty') <- splitForAllTy_maybe ty = arg_tys ty' - | Just (arg,res) <- splitFunTy_maybe ty = arg : arg_tys res - | otherwise = [] + | Just strict_sig <- idNewStrictness_maybe v + , (ds, res) <- splitStrictSig strict_sig + , isBotRes res + = AT (length ds) ABot -- Function diverges + | otherwise + = AT (idArity v) ATop -- Lambdas; increase arity -arityType dflags (Lam x e) - | isId x = AFun (isOneShotBndr x) (arityType dflags e) - | otherwise = arityType dflags e +arityType dicts_cheap (Lam x e) + | isId x = incArity (arityType dicts_cheap e) + | otherwise = arityType dicts_cheap e -- Applications; decrease arity -arityType dflags (App f (Type _)) = arityType dflags f -arityType dflags (App f a) - = case arityType dflags f of - ABot -> ABot -- If function diverges, ignore argument - ATop -> ATop -- No no info about function - AFun _ xs - | exprIsCheap a -> xs - | otherwise -> ATop - +arityType dicts_cheap (App fun (Type _)) + = arityType dicts_cheap fun +arityType dicts_cheap (App fun arg ) + = trimArity (exprIsCheap arg) (decArity (arityType dicts_cheap fun)) + -- Case/Let; keep arity if either the expression is cheap -- or it's a 1-shot lambda -- The former is not really right for Haskell @@ -1073,22 +1099,16 @@ arityType dflags (App f a) -- ===> -- f x y = case x of { (a,b) -> e } -- The difference is observable using 'seq' -arityType dflags (Case scrut _ _ alts) - = case foldr1 andArityType [arityType dflags rhs | (_,_,rhs) <- alts] of - xs | exprIsCheap scrut -> xs - AFun one_shot _ | one_shot -> AFun True ATop - _ -> ATop - -arityType dflags (Let b e) - = case arityType dflags e of - xs | cheap_bind b -> xs - AFun one_shot _ | one_shot -> AFun True ATop - _ -> ATop +arityType dicts_cheap (Case scrut _ _ alts) + = trimArity (exprIsCheap scrut) + (foldr1 andArityType [arityType dicts_cheap rhs | (_,_,rhs) <- alts]) + +arityType dicts_cheap (Let b e) + = trimArity (cheap_bind b) (arityType dicts_cheap e) where cheap_bind (NonRec b e) = is_cheap (b,e) cheap_bind (Rec prs) = all is_cheap prs - is_cheap (b,e) = (dopt Opt_DictsCheap dflags && isDictId b) - || exprIsCheap e + is_cheap (b,e) = (dicts_cheap && isDictId b) || exprIsCheap e -- If the experimental -fdicts-cheap flag is on, we eta-expand through -- dictionary bindings. This improves arities. Thereby, it also -- means that full laziness is less prone to floating out the @@ -1106,21 +1126,9 @@ arityType dflags (Let b e) -- One could go further and make exprIsCheap reply True to any -- dictionary-typed expression, but that's more work. -arityType _ _ = ATop - -{- NOT NEEDED ANY MORE: etaExpand is cleverer -ok_note InlineMe = False -ok_note other = True - -- Notice that we do not look through __inline_me__ - -- This may seem surprising, but consider - -- f = _inline_me (\x -> e) - -- We DO NOT want to eta expand this to - -- f = \x -> (_inline_me (\x -> e)) x - -- because the _inline_me gets dropped now it is applied, - -- giving just - -- f = \x -> e - -- A Bad Idea --} +arityType dicts_cheap (Note _ e) = arityType dicts_cheap e +arityType dicts_cheap (Cast e _) = arityType dicts_cheap e +arityType _ _ = vanillaArityType \end{code} @@ -1147,8 +1155,7 @@ etaExpand :: Arity -- ^ Result should have this number of value args etaExpand n us expr ty | manifestArity expr >= n = expr -- The no-op case - | otherwise - = eta_expand n us expr ty + | otherwise = eta_expand n us expr ty -- manifestArity sees how many leading value lambdas there are manifestArity :: CoreExpr -> Arity @@ -1168,16 +1175,8 @@ manifestArity _ = 0 -- so perhaps the extra code isn't worth it eta_expand :: Int -> [Unique] -> CoreExpr -> Type -> CoreExpr -eta_expand n _ expr ty - | n == 0 && - -- The ILX code generator requires eta expansion for type arguments - -- too, but alas the 'n' doesn't tell us how many of them there - -- may be. So we eagerly eta expand any big lambdas, and just - -- cross our fingers about possible loss of sharing in the ILX case. - -- The Right Thing is probably to make 'arity' include - -- type variables throughout the compiler. (ToDo.) - not (isForAllTy ty) - -- Saturated, so nothing to do +eta_expand n _ expr _ + | n == 0 -- Saturated, so nothing to do = expr -- Short cut for the case where there already @@ -1346,6 +1345,7 @@ exprIsBig :: Expr b -> Bool exprIsBig (Lit _) = False exprIsBig (Var _) = False exprIsBig (Type _) = False +exprIsBig (Lam _ e) = exprIsBig e exprIsBig (App f a) = exprIsBig f || exprIsBig a exprIsBig (Cast e _) = exprIsBig e -- Hopefully coercions are not too big! exprIsBig _ = True @@ -1425,7 +1425,6 @@ exprSize (Type t) = seqType t `seq` 1 noteSize :: Note -> Int noteSize (SCC cc) = cc `seq` 1 -noteSize InlineMe = 1 noteSize (CoreNote s) = s `seq` 1 -- hdaume: core annotations varSize :: Var -> Int @@ -1581,7 +1580,7 @@ rhsIsStatic :: PackageId -> CoreExpr -> Bool -- This is a bit like CoreUtils.exprIsHNF, with the following differences: -- a) scc "foo" (\x -> ...) is updatable (so we catch the right SCC) -- --- b) (C x xs), where C is a contructors is updatable if the application is +-- b) (C x xs), where C is a contructor is updatable if the application is -- dynamic -- -- c) don't look through unfolding of f in (f x). diff --git a/compiler/coreSyn/MkExternalCore.lhs b/compiler/coreSyn/MkExternalCore.lhs index 717d3d8..d0d9dea 100644 --- a/compiler/coreSyn/MkExternalCore.lhs +++ b/compiler/coreSyn/MkExternalCore.lhs @@ -171,7 +171,6 @@ make_exp (Case e v ty alts) = do return $ C.Case scrut (make_vbind v) (make_ty ty) newAlts make_exp (Note (SCC _) e) = make_exp e >>= (return . C.Note "SCC") -- temporary make_exp (Note (CoreNote s) e) = make_exp e >>= (return . C.Note s) -- hdaume: core annotations -make_exp (Note InlineMe e) = make_exp e >>= (return . C.Note "InlineMe") make_exp _ = error "MkExternalCore died: make_exp" make_alt :: CoreAlt -> CoreM C.Alt diff --git a/compiler/coreSyn/PprCore.lhs b/compiler/coreSyn/PprCore.lhs index d641a9e..595b6d3 100644 --- a/compiler/coreSyn/PprCore.lhs +++ b/compiler/coreSyn/PprCore.lhs @@ -209,9 +209,6 @@ ppr_expr add_par (Let bind expr) ppr_expr add_par (Note (SCC cc) expr) = add_par (sep [pprCostCentreCore cc, pprCoreExpr expr]) -ppr_expr add_par (Note InlineMe expr) - = add_par $ ptext (sLit "__inline_me") <+> pprParendExpr expr - ppr_expr add_par (Note (CoreNote s) expr) = add_par $ sep [sep [ptext (sLit "__core_note"), pprHsString (mkFastString s)], @@ -268,6 +265,9 @@ pprCoreBinder LambdaBind bndr -- Case bound things don't get a signature or a herald, unless we have debug on pprCoreBinder CaseBind bndr + | isDeadBinder bndr -- False for tyvars + = ptext (sLit "_") + | otherwise = getPprStyle $ \ sty -> if debugStyle sty then parens (pprTypedBinder bndr) @@ -325,6 +325,10 @@ pprIdBndrInfo info \end{code} +----------------------------------------------------- +-- IdInfo +----------------------------------------------------- + \begin{code} pprIdDetails :: Id -> SDoc pprIdDetails id | isGlobalId id = ppr (globalIdDetails id) @@ -335,13 +339,13 @@ ppIdInfo :: Id -> IdInfo -> SDoc ppIdInfo _ info = brackets $ vcat [ ppArityInfo a, - ppWorkerInfo (workerInfo info), ppCafInfo (cafInfo info), #ifdef OLD_STRICTNESS ppStrictnessInfo s, ppCprInfo m, #endif pprNewStrictness (newStrictnessInfo info), + pprInlineInfo (unfoldingInfo info), if null rules then empty else ptext (sLit "RULES:") <+> vcat (map pprRule rules) -- Inline pragma, occ, demand, lbvar info @@ -357,6 +361,38 @@ ppIdInfo _ info rules = specInfoRules (specInfo info) \end{code} +----------------------------------------------------- +-- Unfolding and UnfoldingGuidance +----------------------------------------------------- + +\begin{code} +instance Outputable UnfoldingGuidance where + ppr UnfoldNever = ptext (sLit "NEVER") + ppr (UnfoldIfGoodArgs { ug_arity = v, ug_args = cs + , ug_size = size, ug_res = discount }) + = hsep [ ptext (sLit "IF_ARGS"), int v, + brackets (hsep (map int cs)), + int size, + int discount ] + +instance Outputable Unfolding where + ppr NoUnfolding = ptext (sLit "No unfolding") + ppr (OtherCon cs) = ptext (sLit "OtherCon") <+> ppr cs + ppr (CompulsoryUnfolding e) = ptext (sLit "Compulsory") <+> ppr e + ppr (InlineRule { uf_tmpl = e, uf_is_value = hnf, uf_arity = arity, uf_worker = wkr }) + = ptext (sLit "INLINE") <+> sep [ppr arity <+> ppr hnf <+> ppr wkr, ppr e] + ppr (CoreUnfolding e top hnf cheap g) + = ptext (sLit "Unf") <+> sep [ppr top <+> ppr hnf <+> ppr cheap <+> ppr g, + ppr e] + +pprInlineInfo :: Unfolding -> SDoc -- Print an inline RULE +pprInlineInfo unf | isInlineRule unf = ppr unf + | otherwise = empty +\end{code} + +----------------------------------------------------- +-- Rules +----------------------------------------------------- \begin{code} instance Outputable CoreRule where diff --git a/compiler/deSugar/DsBinds.lhs b/compiler/deSugar/DsBinds.lhs index 4c144b8..e9ab4e8 100644 --- a/compiler/deSugar/DsBinds.lhs +++ b/compiler/deSugar/DsBinds.lhs @@ -29,6 +29,7 @@ import HsSyn -- lots of things import CoreSyn -- lots of things import MkCore import CoreUtils +import CoreUnfold import CoreFVs import TcHsSyn ( mkArbitraryType ) -- Mis-placed? @@ -48,7 +49,7 @@ import Bag import BasicTypes hiding ( TopLevel ) import FastString import StaticFlags ( opt_DsMultiTyVar ) -import Util ( mapSnd, mapAndUnzip, lengthExceeds ) +import Util ( mapSnd, count, mapAndUnzip, lengthExceeds ) import Control.Monad import Data.List @@ -70,6 +71,7 @@ dsLHsBinds binds = ds_lhs_binds NoSccs binds ------------------------ ds_lhs_binds :: AutoScc -> LHsBinds Id -> DsM [(Id,CoreExpr)] + -- scc annotation policy (see below) ds_lhs_binds auto_scc binds = foldM (dsLHsBind auto_scc) [] (bagToList binds) @@ -85,25 +87,30 @@ dsHsBind :: AutoScc -> HsBind Id -> DsM [(Id,CoreExpr)] -- Result -dsHsBind _ rest (VarBind var expr) = do - core_expr <- dsLExpr expr +dsHsBind _ rest (VarBind var expr inline_regardless) + = do { core_expr <- dsLExpr expr + + -- Dictionary bindings are always VarBinds, + -- so we only need do this here + ; core_expr' <- addDictScc var core_expr + ; let var' | inline_regardless = var `setIdUnfolding` mkCompulsoryUnfolding core_expr' + | otherwise = var - -- Dictionary bindings are always VarMonoBinds, so - -- we only need do this here - core_expr' <- addDictScc var core_expr - return ((var, core_expr') : rest) + ; return ((var', core_expr') : rest) } -dsHsBind _ rest (FunBind { fun_id = L _ fun, fun_matches = matches, - fun_co_fn = co_fn, fun_tick = tick, fun_infix = inf }) = do - (args, body) <- matchWrapper (FunRhs (idName fun) inf) matches - body' <- mkOptTickBox tick body - rhs <- dsCoercion co_fn (return (mkLams args body')) - return ((fun,rhs) : rest) +dsHsBind _ rest + (FunBind { fun_id = L _ fun, fun_matches = matches, + fun_co_fn = co_fn, fun_tick = tick, fun_infix = inf }) + = do { (args, body) <- matchWrapper (FunRhs (idName fun) inf) matches + ; body' <- mkOptTickBox tick body + ; rhs <- dsCoercion co_fn (return (mkLams args body')) + ; return ((fun,rhs) : rest) } -dsHsBind _ rest (PatBind { pat_lhs = pat, pat_rhs = grhss, pat_rhs_ty = ty }) = do - body_expr <- dsGuarded grhss ty - sel_binds <- mkSelectorBinds pat body_expr - return (sel_binds ++ rest) +dsHsBind _ rest + (PatBind { pat_lhs = pat, pat_rhs = grhss, pat_rhs_ty = ty }) + = do { body_expr <- dsGuarded grhss ty + ; sel_binds <- mkSelectorBinds pat body_expr + ; return (sel_binds ++ rest) } {- Note [Rules and inlining] ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -132,10 +139,14 @@ dsHsBind _ rest (PatBind { pat_lhs = pat, pat_rhs = grhss, pat_rhs_ty = ty }) = dsHsBind auto_scc rest (AbsBinds [] [] exports binds) = do { core_prs <- ds_lhs_binds NoSccs binds ; let env = mkABEnv exports - do_one (lcl_id, rhs) | Just (_, gbl_id, _, prags) <- lookupVarEnv env lcl_id - = addInlinePrags prags gbl_id $ - addAutoScc auto_scc gbl_id rhs - | otherwise = (lcl_id, rhs) + ar_env = mkArityEnv binds + do_one (lcl_id, rhs) + | Just (_, gbl_id, _, prags) <- lookupVarEnv env lcl_id + = makeCorePair gbl_id (lookupArity ar_env lcl_id) prags $ + addAutoScc auto_scc gbl_id rhs + + | otherwise = (lcl_id, rhs) + locals' = [(lcl_id, Var gbl_id) | (_, gbl_id, lcl_id, _) <- exports] -- Note [Rules and inlining] ; return (map do_one core_prs ++ locals' ++ rest) } @@ -203,17 +214,18 @@ dsHsBind auto_scc rest (AbsBinds tyvars [] exports binds) where fvs = exprSomeFreeVars (`elemVarSet` bndrs) rhs + ar_env = mkArityEnv binds env = mkABEnv exports do_one (lcl_id, rhs) | Just (id_tvs, gbl_id, _, prags) <- lookupVarEnv env lcl_id - = (NonRec lcl_id (mkTyApps (Var gbl_id) (mkTyVarTys id_tvs)), - addInlinePrags prags gbl_id $ - addAutoScc auto_scc gbl_id $ - mkLams id_tvs $ - mkLets [ NonRec tv (Type (lookupVarEnv_NF arby_env tv)) - | tv <- tyvars, not (tv `elem` id_tvs)] $ - add_lets rhs) + = let rhs' = addAutoScc auto_scc gbl_id $ + mkLams id_tvs $ + mkLets [ NonRec tv (Type (lookupVarEnv_NF arby_env tv)) + | tv <- tyvars, not (tv `elem` id_tvs)] $ + add_lets rhs + in (NonRec lcl_id (mkTyApps (Var gbl_id) (mkTyVarTys id_tvs)), + makeCorePair gbl_id (lookupArity ar_env lcl_id) prags rhs') | otherwise = (NonRec lcl_id (mkTyApps (Var non_exp_gbl_id) (mkTyVarTys tyvars)), (non_exp_gbl_id, mkLams tyvars (add_lets rhs))) @@ -224,30 +236,35 @@ dsHsBind auto_scc rest (AbsBinds tyvars [] exports binds) -- Another common case: one exported variable -- Non-recursive bindings come through this way + -- So do self-recursive bindings, and recursive bindings + -- that have been chopped up with type signatures dsHsBind auto_scc rest (AbsBinds all_tyvars dicts [(tyvars, global, local, prags)] binds) - = ASSERT( all (`elem` tyvars) all_tyvars ) do - core_prs <- ds_lhs_binds NoSccs binds - let - -- Always treat the binds as recursive, because the typechecker - -- makes rather mixed-up dictionary bindings - core_bind = Rec core_prs + = ASSERT( all (`elem` tyvars) all_tyvars ) + do { core_prs <- ds_lhs_binds NoSccs binds + + ; let -- Always treat the binds as recursive, because the typechecker + -- makes rather mixed-up dictionary bindings + core_bind = Rec core_prs + inl_arity = lookupArity (mkArityEnv binds) local - mb_specs <- mapM (dsSpec all_tyvars dicts tyvars global local core_bind) prags - let - (spec_binds, rules) = unzip (catMaybes mb_specs) - global' = addIdSpecialisations global rules - rhs' = mkLams tyvars $ mkLams dicts $ Let core_bind (Var local) - bind = addInlinePrags prags global' $ addAutoScc auto_scc global' rhs' + ; mb_specs <- mapM (dsSpec all_tyvars dicts tyvars global + local inl_arity core_bind) prags + + ; let (spec_binds, rules) = unzip (catMaybes mb_specs) + global' = addIdSpecialisations global rules + rhs = addAutoScc auto_scc global $ + mkLams tyvars $ mkLams dicts $ Let core_bind (Var local) + main_bind = makeCorePair global' (inl_arity + length dicts) prags rhs - return (bind : spec_binds ++ rest) + ; return (main_bind : spec_binds ++ rest) } dsHsBind auto_scc rest (AbsBinds all_tyvars dicts exports binds) = do { core_prs <- ds_lhs_binds NoSccs binds ; let env = mkABEnv exports - do_one (lcl_id,rhs) | Just (_, gbl_id, _, prags) <- lookupVarEnv env lcl_id - = addInlinePrags prags lcl_id $ - addAutoScc auto_scc gbl_id rhs + ar_env = mkArityEnv binds + do_one (lcl_id,rhs) | Just (_, gbl_id, _, _prags) <- lookupVarEnv env lcl_id + = (lcl_id, addAutoScc auto_scc gbl_id rhs) | otherwise = (lcl_id,rhs) -- Rec because of mixed-up dictionary bindings @@ -260,6 +277,12 @@ dsHsBind auto_scc rest (AbsBinds all_tyvars dicts exports binds) locals = [local | (_, _, local, _) <- exports] local_tys = map idType locals + inl_prags :: [(Id, SrcSpan)] + inl_prags = [(id, loc) | (_, id, _, prags) <- exports + , L loc (InlinePrag {}) <- prags ] + + ; mapM_ discardedInlineWarning inl_prags + ; poly_tup_id <- newSysLocalDs (exprType poly_tup_expr) ; let dict_args = map Var dicts @@ -271,7 +294,8 @@ dsHsBind auto_scc rest (AbsBinds all_tyvars dicts exports binds) ; let substitute = substTyWith all_tyvars ty_args ; locals' <- newSysLocalsDs (map substitute local_tys) ; tup_id <- newSysLocalDs (substitute tup_ty) - ; mb_specs <- mapM (dsSpec all_tyvars dicts tyvars global local core_bind) + ; mb_specs <- mapM (dsSpec all_tyvars dicts tyvars global local + (lookupArity ar_env local) core_bind) prags ; let (spec_binds, rules) = unzip (catMaybes mb_specs) global' = addIdSpecialisations global rules @@ -285,19 +309,60 @@ dsHsBind auto_scc rest (AbsBinds all_tyvars dicts exports binds) | otherwise = dsMkArbitraryType all_tyvar ; export_binds_s <- mapM mk_bind (exports `zip` [0..]) - -- don't scc (auto-)annotate the tuple itself. + -- Don't scc (auto-)annotate the tuple itself. ; return ((poly_tup_id, poly_tup_expr) : (concat export_binds_s ++ rest)) } -mkABEnv :: [([TyVar], Id, Id, [LPrag])] -> VarEnv ([TyVar], Id, Id, [LPrag]) +------------------------ +makeCorePair :: Id-> Arity -> [LPrag] -> CoreExpr -> (Id, CoreExpr) +makeCorePair gbl_id arity prags rhs + = (addInline gbl_id arity rhs prags, rhs) + +------------------------ +discardedInlineWarning :: (Id, SrcSpan) -> DsM () +discardedInlineWarning (id, loc) + = putSrcSpanDs loc $ + warnDs $ sep [ ptext (sLit "Discarding INLINE pragma for") <+> ppr id + , ptext (sLit "because it is bound by a pattern, or a mutual recursion") ] + +------------------------ +type AbsBindEnv = VarEnv ([TyVar], Id, Id, [LPrag]) + -- Maps the "lcl_id" for an AbsBind to + -- its "gbl_id" and associated pragmas, if any + +mkABEnv :: [([TyVar], Id, Id, [LPrag])] -> AbsBindEnv -- Takes the exports of a AbsBinds, and returns a mapping -- lcl_id -> (tyvars, gbl_id, lcl_id, prags) mkABEnv exports = mkVarEnv [ (lcl_id, export) | export@(_, _, lcl_id, _) <- exports] +mkArityEnv :: LHsBinds Id -> IdEnv Arity + -- Maps a local to the arity of its definition +mkArityEnv binds = mkVarEnv (mapCatMaybes get_arity (bagToList binds)) + where + get_arity (L _ (FunBind { fun_id = id, fun_matches = ms })) = Just (unLoc id, matchGroupArity ms) + get_arity _ = Nothing + +lookupArity :: IdEnv Arity -> Id -> Arity +lookupArity ar_env id = lookupVarEnv ar_env id `orElse` 0 + +addInline :: Id -> Arity -> CoreExpr -> [LPrag] -> Id +addInline id arity rhs prags + = case [inl | L _ (InlinePrag inl) <- prags] of + [] -> id + (inl_spec : _) -> addInlineToId id arity rhs inl_spec + +addInlineToId :: Id -> Arity -> CoreExpr -> InlineSpec -> Id +addInlineToId id inl_arity rhs (Inline phase is_inline) + = id `setInlinePragma` phase + `setIdUnfolding` inline_rule + where + inline_rule | is_inline = mkInlineRule rhs inl_arity + | otherwise = noUnfolding +------------------------ dsSpec :: [TyVar] -> [DictId] -> [TyVar] - -> Id -> Id -- Global, local + -> Id -> Id -> Arity -- Global, local, arity of local -> CoreBind -> LPrag -> DsM (Maybe ((Id,CoreExpr), -- Binding for specialised Id CoreRule)) -- Rule for the Global Id @@ -325,10 +390,10 @@ dsSpec :: [TyVar] -> [DictId] -> [TyVar] -- -- It is *possible* that 'es' does not mention all of the dictionaries 'ds' -- (a bit silly, because then the -dsSpec _ _ _ _ _ _ (L _ (InlinePrag {})) +dsSpec _ _ _ _ _ _ _ (L _ (InlinePrag {})) = return Nothing -dsSpec all_tvs dicts tvs poly_id mono_id mono_bind +dsSpec all_tvs dicts tvs poly_id mono_id inl_arity mono_bind (L loc (SpecPrag spec_expr spec_ty inl)) = putSrcSpanDs loc $ do { let poly_name = idName poly_id @@ -350,6 +415,8 @@ dsSpec all_tvs dicts tvs poly_id mono_id mono_bind -- Very important to make the 'f' non-exported, -- else it won't be inlined! spec_id = mkLocalId spec_name spec_ty + spec_id1 = addInlineToId spec_id (inl_arity + count isDictId bndrs) + spec_rhs inl spec_rhs = Let (NonRec local_poly poly_f_body) ds_spec_expr poly_f_body = mkLams (tvs ++ dicts) f_body @@ -362,7 +429,7 @@ dsSpec all_tvs dicts tvs poly_id mono_id mono_bind AlwaysActive poly_name (extra_dict_bndrs ++ bndrs) args (mkVarApps (Var spec_id) bndrs) - ; return (Just (addInlineInfo inl spec_id spec_rhs, rule)) + ; return (Just ((spec_id1, spec_rhs), rule)) } } } } where -- Bind to Any any of all_ptvs that aren't @@ -508,23 +575,6 @@ simpleSubst subst expr go (Let (Rec pairs) body) = Let (Rec (mapSnd go pairs)) (go body) go (Case scrut bndr ty alts) = Case (go scrut) bndr ty [(c,bs,go r) | (c,bs,r) <- alts] - -addInlinePrags :: [LPrag] -> Id -> CoreExpr -> (Id,CoreExpr) -addInlinePrags prags bndr rhs - = case [inl | L _ (InlinePrag inl) <- prags] of - [] -> (bndr, rhs) - (inl:_) -> addInlineInfo inl bndr rhs - -addInlineInfo :: InlineSpec -> Id -> CoreExpr -> (Id,CoreExpr) -addInlineInfo (Inline phase is_inline) bndr rhs - = (attach_phase bndr phase, wrap_inline is_inline rhs) - where - attach_phase bndr phase - | isAlwaysActive phase = bndr -- Default phase - | otherwise = bndr `setInlinePragma` phase - - wrap_inline True body = mkInlineMe body - wrap_inline False body = body \end{code} @@ -595,8 +645,6 @@ dsCoercion (WpApp v) thing_inside {- An Id -} ; return (App expr (Var v)) } dsCoercion (WpTyApp ty) thing_inside = do { expr <- thing_inside ; return (App expr (Type ty)) } -dsCoercion WpInline thing_inside = do { expr <- thing_inside - ; return (mkInlineMe expr) } dsCoercion (WpLet bs) thing_inside = do { prs <- dsLHsBinds bs ; expr <- thing_inside ; return (Let (Rec prs) expr) } diff --git a/compiler/deSugar/DsForeign.lhs b/compiler/deSugar/DsForeign.lhs index 080289e..007edb9 100644 --- a/compiler/deSugar/DsForeign.lhs +++ b/compiler/deSugar/DsForeign.lhs @@ -19,6 +19,7 @@ import DsMonad import HsSyn import DataCon import CoreUtils +import CoreUnfold import Id import Literal import Module @@ -230,9 +231,10 @@ dsFCall fn_id fcall = do -- Build the wrapper work_app = mkApps (mkVarApps (Var work_id) tvs) val_args wrapper_body = foldr ($) (res_wrapper work_app) arg_wrappers - wrap_rhs = mkInlineMe (mkLams (tvs ++ args) wrapper_body) + wrap_rhs = mkLams (tvs ++ args) wrapper_body + fn_id_w_inl = fn_id `setIdUnfolding` mkInlineRule wrap_rhs (length args) - return ([(work_id, work_rhs), (fn_id, wrap_rhs)], empty, empty) + return ([(work_id, work_rhs), (fn_id_w_inl, wrap_rhs)], empty, empty) \end{code} diff --git a/compiler/deSugar/DsMeta.hs b/compiler/deSugar/DsMeta.hs index 554a945..b0c314b 100644 --- a/compiler/deSugar/DsMeta.hs +++ b/compiler/deSugar/DsMeta.hs @@ -13,7 +13,7 @@ -- a Royal Pain (triggers other recompilation). ----------------------------------------------------------------------------- -{-# OPTIONS -fno-warn-unused-imports #-} +{-# OPTIONS -fwarn-unused-imports #-} -- The above warning supression flag is a temporary kludge. -- While working on this module you are encouraged to remove it and fix -- any warnings in the module. See @@ -33,7 +33,6 @@ module DsMeta( dsBracket, import {-# SOURCE #-} DsExpr ( dsExpr ) import MatchLit -import DsUtils import DsMonad import qualified Language.Haskell.TH as TH @@ -45,11 +44,11 @@ import PrelNames -- OccName.varName we do this by removing varName from the import of -- OccName above, making a qualified instance of OccName and using -- OccNameAlias.varName where varName ws previously used in this file. -import qualified OccName +import qualified OccName( isDataOcc, isVarOcc, isTcOcc, varName, tcName ) import Module import Id -import Name +import Name hiding( isVarOcc, isTcOcc, varName, tcName ) import NameEnv import TcType import TyCon diff --git a/compiler/hsSyn/HsBinds.lhs b/compiler/hsSyn/HsBinds.lhs index 83273f0..a9fa8e8 100644 --- a/compiler/hsSyn/HsBinds.lhs +++ b/compiler/hsSyn/HsBinds.lhs @@ -131,7 +131,9 @@ data HsBindLR idL idR | VarBind { -- Dictionary binding and suchlike var_id :: idL, -- All VarBinds are introduced by the type checker - var_rhs :: LHsExpr idR -- Located only for consistency + var_rhs :: LHsExpr idR, -- Located only for consistency + var_inline :: Bool -- True <=> inline this binding regardless + -- (used for implication constraints) } | AbsBinds { -- Binds abstraction; TRANSLATION @@ -353,7 +355,6 @@ data HsWrapper | WpLam Var -- \d. [] the 'd' is a type-class dictionary or coercion variable | WpTyLam TyVar -- \a. [] the 'a' is a type variable (not coercion var) - | WpInline -- inline_me [] Wrap inline around the thing -- Non-empty bindings, so that the identity coercion -- is always exactly WpHole @@ -374,7 +375,6 @@ pprHsWrapper it wrap = help it (WpLam id) = sep [ptext (sLit "\\") <> pprBndr LambdaBind id <> dot, it] help it (WpTyLam tv) = sep [ptext (sLit "/\\") <> pprBndr LambdaBind tv <> dot, it] help it (WpLet binds) = sep [ptext (sLit "let") <+> braces (ppr binds), it] - help it WpInline = sep [ptext (sLit "_inline_me_"), it] in -- in debug mode, print the wrapper -- otherwise just print what's inside diff --git a/compiler/hsSyn/HsUtils.lhs b/compiler/hsSyn/HsUtils.lhs index db9460e..958feb4 100644 --- a/compiler/hsSyn/HsUtils.lhs +++ b/compiler/hsSyn/HsUtils.lhs @@ -1,3 +1,4 @@ + % % (c) The University of Glasgow, 1992-2006 % @@ -299,8 +300,12 @@ mkFunBind fn ms = FunBind { fun_id = fn, fun_infix = False, fun_matches = mkMatc fun_tick = Nothing } -mkVarBind :: SrcSpan -> id -> LHsExpr id -> LHsBind id -mkVarBind loc var rhs = mk_easy_FunBind loc var [] rhs +mkHsVarBind :: SrcSpan -> id -> LHsExpr id -> LHsBind id +mkHsVarBind loc var rhs = mk_easy_FunBind loc var [] rhs + +mkVarBind :: id -> LHsExpr id -> LHsBind id +mkVarBind var rhs = L (getLoc rhs) $ + VarBind { var_id = var, var_rhs = rhs, var_inline = False } ------------ mk_easy_FunBind :: SrcSpan -> id -> [LPat id] diff --git a/compiler/iface/BinIface.hs b/compiler/iface/BinIface.hs index 9926b95..2ee8310 100644 --- a/compiler/iface/BinIface.hs +++ b/compiler/iface/BinIface.hs @@ -1124,10 +1124,6 @@ instance Binary IfaceInfoItem where put_ bh ad put_ bh HsNoCafRefs = do putByte bh 4 - put_ bh (HsWorker ae af) = do - putByte bh 5 - put_ bh ae - put_ bh af get bh = do h <- getByte bh case h of @@ -1139,17 +1135,36 @@ instance Binary IfaceInfoItem where return (HsUnfold ad) 3 -> do ad <- get bh return (HsInline ad) - 4 -> do return HsNoCafRefs - _ -> do ae <- get bh - af <- get bh - return (HsWorker ae af) + _ -> do return HsNoCafRefs + +instance Binary IfaceUnfolding where + put_ bh (IfCoreUnfold e) = do + putByte bh 0 + put_ bh e + put_ bh (IfInlineRule a e) = do + putByte bh 1 + put_ bh a + put_ bh e + put_ bh (IfWrapper a n) = do + putByte bh 2 + put_ bh a + put_ bh n + get bh = do + h <- getByte bh + case h of + 0 -> do e <- get bh + return (IfCoreUnfold e) + 1 -> do a <- get bh + e <- get bh + return (IfInlineRule a e) + _ -> do a <- get bh + n <- get bh + return (IfWrapper a n) instance Binary IfaceNote where put_ bh (IfaceSCC aa) = do putByte bh 0 put_ bh aa - put_ bh IfaceInlineMe = do - putByte bh 3 put_ bh (IfaceCoreNote s) = do putByte bh 4 put_ bh s @@ -1158,7 +1173,6 @@ instance Binary IfaceNote where case h of 0 -> do aa <- get bh return (IfaceSCC aa) - 3 -> do return IfaceInlineMe 4 -> do ac <- get bh return (IfaceCoreNote ac) _ -> panic ("get IfaceNote " ++ show h) diff --git a/compiler/iface/IfaceSyn.lhs b/compiler/iface/IfaceSyn.lhs index 7ef13a3..16c78fd 100644 --- a/compiler/iface/IfaceSyn.lhs +++ b/compiler/iface/IfaceSyn.lhs @@ -9,7 +9,7 @@ module IfaceSyn ( IfaceDecl(..), IfaceClassOp(..), IfaceConDecl(..), IfaceConDecls(..), IfaceExpr(..), IfaceAlt, IfaceNote(..), IfaceLetBndr(..), - IfaceBinding(..), IfaceConAlt(..), IfaceIdInfo(..), + IfaceBinding(..), IfaceConAlt(..), IfaceIdInfo(..), IfaceUnfolding(..), IfaceInfoItem(..), IfaceRule(..), IfaceAnnotation(..), IfaceAnnTarget, IfaceInst(..), IfaceFamInst(..), @@ -192,15 +192,18 @@ data IfaceInfoItem = HsArity Arity | HsStrictness StrictSig | HsInline Activation - | HsUnfold IfaceExpr + | HsUnfold IfaceUnfolding | HsNoCafRefs - | HsWorker Name Arity -- Worker, if any see IdInfo.WorkerInfo - -- for why we want arity here. - -- NB: we need IfaceExtName (not just OccName) because the worker - -- can simplify to a function in another module. + -- NB: Specialisations and rules come in separately and are -- only later attached to the Id. Partial reason: some are orphans. +data IfaceUnfolding + = IfCoreUnfold IfaceExpr + | IfInlineRule Arity IfaceExpr + | IfWrapper Arity Name -- NB: we need a Name (not just OccName) because the worker + -- can simplify to a function in another module. + -------------------------------- data IfaceExpr = IfaceLcl FastString @@ -218,7 +221,6 @@ data IfaceExpr | IfaceTick Module Int data IfaceNote = IfaceSCC CostCentre - | IfaceInlineMe | IfaceCoreNote String type IfaceAlt = (IfaceConAlt, [FastString], IfaceExpr) @@ -629,7 +631,6 @@ pprIfaceApp fun args = sep (pprIfaceExpr parens fun : args) ------------------ instance Outputable IfaceNote where ppr (IfaceSCC cc) = pprCostCentreCore cc - ppr IfaceInlineMe = ptext (sLit "__inline_me") ppr (IfaceCoreNote s) = ptext (sLit "__core_note") <+> pprHsString (mkFastString s) @@ -646,13 +647,16 @@ instance Outputable IfaceIdInfo where ppr (HasInfo is) = ptext (sLit "{-") <+> fsep (map ppr is) <+> ptext (sLit "-}") instance Outputable IfaceInfoItem where - ppr (HsUnfold unf) = ptext (sLit "Unfolding:") <+> - parens (pprIfaceExpr noParens unf) + ppr (HsUnfold unf) = ptext (sLit "Unfolding:") <+> ppr unf ppr (HsInline act) = ptext (sLit "Inline:") <+> ppr act ppr (HsArity arity) = ptext (sLit "Arity:") <+> int arity ppr (HsStrictness str) = ptext (sLit "Strictness:") <+> pprIfaceStrictSig str ppr HsNoCafRefs = ptext (sLit "HasNoCafRefs") - ppr (HsWorker w a) = ptext (sLit "Worker:") <+> ppr w <+> int a + +instance Outputable IfaceUnfolding where + ppr (IfCoreUnfold e) = parens (ppr e) + ppr (IfInlineRule a e) = ptext (sLit "INLINE:") <+> parens (ptext (sLit "arity") <+> int a) <+> parens (ppr e) + ppr (IfWrapper a wkr) = ptext (sLit "Worker:") <+> ppr wkr <+> parens (ptext (sLit "arity") <+> int a) -- ----------------------------------------------------------------------------- @@ -756,10 +760,14 @@ freeNamesIfIdInfo NoInfo = emptyNameSet freeNamesIfIdInfo (HasInfo i) = fnList freeNamesItem i freeNamesItem :: IfaceInfoItem -> NameSet -freeNamesItem (HsUnfold u) = freeNamesIfExpr u -freeNamesItem (HsWorker wkr _) = unitNameSet wkr +freeNamesItem (HsUnfold u) = freeNamesIfUnfold u freeNamesItem _ = emptyNameSet +freeNamesIfUnfold :: IfaceUnfolding -> NameSet +freeNamesIfUnfold (IfCoreUnfold e) = freeNamesIfExpr e +freeNamesIfUnfold (IfInlineRule _ e) = freeNamesIfExpr e +freeNamesIfUnfold (IfWrapper _ v) = unitNameSet v + freeNamesIfExpr :: IfaceExpr -> NameSet freeNamesIfExpr (IfaceExt v) = unitNameSet v freeNamesIfExpr (IfaceFCall _ ty) = freeNamesIfType ty diff --git a/compiler/iface/MkIface.lhs b/compiler/iface/MkIface.lhs index 4976e1f..c55f54f 100644 --- a/compiler/iface/MkIface.lhs +++ b/compiler/iface/MkIface.lhs @@ -1397,7 +1397,7 @@ instanceToIfaceInst (Instance { is_dfun = dfun_id, is_flag = oflag, (tvs, fds) = classTvsFds cls arg_names = [filterNameSet is_local (tyClsNamesOfType ty) | ty <- tys] orph | is_local cls_name = Just (nameOccName cls_name) - | all isJust mb_ns = head mb_ns + | all isJust mb_ns = ASSERT( not (null mb_ns) ) head mb_ns | otherwise = Nothing mb_ns :: [Maybe OccName] -- One for each fundep; a locally-defined name @@ -1445,7 +1445,7 @@ toIfaceLetBndr id = IfLetBndr (occNameFS (getOccName id)) toIfaceIdInfo :: IdInfo -> [IfaceInfoItem] toIfaceIdInfo id_info = catMaybes [arity_hsinfo, caf_hsinfo, strict_hsinfo, - inline_hsinfo, wrkr_hsinfo, unfold_hsinfo] + inline_hsinfo, unfold_hsinfo] where ------------ Arity -------------- arity_info = arityInfo id_info @@ -1464,33 +1464,29 @@ toIfaceIdInfo id_info Just sig | not (isTopSig sig) -> Just (HsStrictness sig) _other -> Nothing - ------------ Worker -------------- - work_info = workerInfo id_info - has_worker = workerExists work_info - wrkr_hsinfo = case work_info of - HasWorker work_id wrap_arity -> - Just (HsWorker ((idName work_id)) wrap_arity) - NoWorker -> Nothing - ------------ Unfolding -------------- - -- The unfolding is redundant if there is a worker - unfold_info = unfoldingInfo id_info - rhs = unfoldingTemplate unfold_info - no_unfolding = neverUnfold unfold_info - -- The CoreTidy phase retains unfolding info iff - -- we want to expose the unfolding, taking into account - -- unconditional NOINLINE, etc. See TidyPgm.addExternal - unfold_hsinfo | no_unfolding = Nothing - | has_worker = Nothing -- Unfolding is implicit - | otherwise = Just (HsUnfold (toIfaceExpr rhs)) + unfold_hsinfo = fmap HsUnfold $ toIfUnfolding (unfoldingInfo id_info) ------------ Inline prag -------------- inline_prag = inlinePragInfo id_info - inline_hsinfo | isAlwaysActive inline_prag = Nothing - | no_unfolding && not has_worker = Nothing + inline_hsinfo | isAlwaysActive inline_prag = Nothing + | isNothing unfold_hsinfo = Nothing -- If the iface file give no unfolding info, we -- don't need to say when inlining is OK! - | otherwise = Just (HsInline inline_prag) + | otherwise = Just (HsInline inline_prag) + +-------------------------- +toIfUnfolding :: Unfolding -> Maybe IfaceUnfolding +toIfUnfolding (CoreUnfolding { uf_tmpl = rhs, uf_guidance = guidance }) + = case guidance of + UnfoldNever -> Nothing + _ -> Just (IfCoreUnfold (toIfaceExpr rhs)) +toIfUnfolding (InlineRule { uf_worker = Just wkr, uf_arity = arity }) + = Just (IfWrapper arity (idName wkr)) +toIfUnfolding (InlineRule { uf_worker = Nothing, uf_tmpl = rhs, uf_arity = arity }) + = Just (IfInlineRule arity (toIfaceExpr rhs)) +toIfUnfolding _ + = Nothing -------------------------- coreRuleToIfaceRule :: Module -> CoreRule -> IfaceRule @@ -1547,7 +1543,6 @@ toIfaceExpr (Note n e) = IfaceNote (toIfaceNote n) (toIfaceExpr e) --------------------- toIfaceNote :: Note -> IfaceNote toIfaceNote (SCC cc) = IfaceSCC cc -toIfaceNote InlineMe = IfaceInlineMe toIfaceNote (CoreNote s) = IfaceCoreNote s --------------------- diff --git a/compiler/iface/TcIface.lhs b/compiler/iface/TcIface.lhs index 7f74cf2..48ca729 100644 --- a/compiler/iface/TcIface.lhs +++ b/compiler/iface/TcIface.lhs @@ -53,7 +53,6 @@ import SrcLoc import DynFlags import Util import FastString -import BasicTypes (Arity) import Control.Monad import Data.List @@ -848,7 +847,6 @@ tcIfaceExpr (IfaceCast expr co) = do tcIfaceExpr (IfaceNote note expr) = do expr' <- tcIfaceExpr expr case note of - IfaceInlineMe -> return (Note InlineMe expr') IfaceSCC cc -> return (Note (SCC cc) expr') IfaceCoreNote n -> return (Note (CoreNote n) expr') @@ -942,43 +940,39 @@ tcIdInfo ignore_prags name ty info tcPrag info HsNoCafRefs = return (info `setCafInfo` NoCafRefs) tcPrag info (HsArity arity) = return (info `setArityInfo` arity) tcPrag info (HsStrictness str) = return (info `setAllStrictnessInfo` Just str) + tcPrag info (HsInline prag) = return (info `setInlinePragInfo` prag) -- The next two are lazy, so they don't transitively suck stuff in - tcPrag info (HsWorker nm arity) = tcWorkerInfo ty info nm arity - tcPrag info (HsInline inline_prag) = return (info `setInlinePragInfo` inline_prag) - tcPrag info (HsUnfold expr) = do - maybe_expr' <- tcPragExpr name expr - let - -- maybe_expr' doesn't get looked at if the unfolding - -- is never inspected; so the typecheck doesn't even happen - unfold_info = case maybe_expr' of - Nothing -> noUnfolding - Just expr' -> mkTopUnfolding expr' - return (info `setUnfoldingInfoLazily` unfold_info) + tcPrag info (HsUnfold if_unf) = do { unf <- tcUnfolding name ty info if_unf + ; return (info `setUnfoldingInfoLazily` unf) } \end{code} \begin{code} -tcWorkerInfo :: Type -> IdInfo -> Name -> Arity -> IfL IdInfo -tcWorkerInfo ty info wkr arity +tcUnfolding :: Name -> Type -> IdInfo -> IfaceUnfolding -> IfL Unfolding +tcUnfolding name _ _ (IfCoreUnfold if_expr) + = do { mb_expr <- tcPragExpr name if_expr + ; return (case mb_expr of + Nothing -> NoUnfolding + Just expr -> mkTopUnfolding expr) } + +tcUnfolding name _ _ (IfInlineRule arity if_expr) + = do { mb_expr <- tcPragExpr name if_expr + ; return (case mb_expr of + Nothing -> NoUnfolding + Just expr -> mkInlineRule expr arity) } + +tcUnfolding name ty info (IfWrapper arity wkr) = do { mb_wkr_id <- forkM_maybe doc (tcIfaceExtId wkr) - - -- We return without testing maybe_wkr_id, but as soon as info is - -- looked at we will test it. That's ok, because its outside the - -- knot; and there seems no big reason to further defer the - -- tcIfaceId lookup. (Contrast with tcPragExpr, where postponing walking - -- over the unfolding until it's actually used does seem worth while.) ; us <- newUniqueSupply - ; return (case mb_wkr_id of - Nothing -> info - Just wkr_id -> add_wkr_info us wkr_id info) } + Nothing -> noUnfolding + Just wkr_id -> make_inline_rule wkr_id us) } where - doc = text "Worker for" <+> ppr wkr - add_wkr_info us wkr_id info - = info `setUnfoldingInfoLazily` mk_unfolding us wkr_id - `setWorkerInfo` HasWorker wkr_id arity + doc = text "Worker for" <+> ppr name - mk_unfolding us wkr_id = mkTopUnfolding (initUs_ us (mkWrapper ty strict_sig) wkr_id) + make_inline_rule wkr_id us + = mkWwInlineRule (initUs_ us (mkWrapper ty strict_sig) wkr_id) + arity wkr_id -- We are relying here on strictness info always appearing -- before worker info, fingers crossed .... diff --git a/compiler/main/TidyPgm.lhs b/compiler/main/TidyPgm.lhs index 16f389b..2f5d31a 100644 --- a/compiler/main/TidyPgm.lhs +++ b/compiler/main/TidyPgm.lhs @@ -208,7 +208,7 @@ unit. These are This exercise takes a sweep of the bindings bottom to top. Actually, in Step 2 we're also going to need to know which Ids should be exported with their unfoldings, so we produce not an IdSet but an -IdEnv Bool +ExtIdEnv = IdEnv Bool Step 2: Tidy the program @@ -446,10 +446,12 @@ tidyInstances tidy_dfun ispecs %************************************************************************ \begin{code} -findExternalIds :: Bool - -> [CoreBind] - -> IdEnv Bool -- In domain => external - -- Range = True <=> show unfolding +type ExtIdEnv = IdEnv Bool + -- In domain => Id is external + -- Range = True <=> show unfolding, + -- Always True for InlineRule + +findExternalIds :: Bool -> [CoreBind] -> ExtIdEnv -- Step 1 from the notes above findExternalIds omit_prags binds | omit_prags @@ -489,8 +491,7 @@ addExternal (id,rhs) needed -- "False" because we don't know we need the Id's unfolding -- Don't override existing bindings; we might have already set it to True - new_needed_ids = worker_ids `unionVarSet` - unfold_ids `unionVarSet` + new_needed_ids = (mb_unfold_ids `orElse` emptyVarSet) `unionVarSet` spec_ids idinfo = idInfo id @@ -498,29 +499,25 @@ addExternal (id,rhs) needed loop_breaker = isNonRuleLoopBreaker (occInfo idinfo) bottoming_fn = isBottomingSig (newStrictnessInfo idinfo `orElse` topSig) spec_ids = specInfoFreeVars (specInfo idinfo) - worker_info = workerInfo idinfo -- Stuff to do with the Id's unfolding - -- The simplifier has put an up-to-date unfolding - -- in the IdInfo, but the RHS will do just as well - unfolding = unfoldingInfo idinfo - rhs_is_small = not (neverUnfold unfolding) - -- We leave the unfolding there even if there is a worker -- In GHCI the unfolding is used by importers - -- When writing an interface file, we omit the unfolding - -- if there is a worker - show_unfold = not bottoming_fn && -- Not necessary - not dont_inline && - not loop_breaker && - rhs_is_small -- Small enough - - unfold_ids | show_unfold = exprSomeFreeVars isLocalId rhs - | otherwise = emptyVarSet - - worker_ids = case worker_info of - HasWorker work_id _ -> unitVarSet work_id - _otherwise -> emptyVarSet + show_unfold = isJust mb_unfold_ids + + mb_unfold_ids :: Maybe IdSet -- Nothing => don't unfold + mb_unfold_ids = case unfoldingInfo idinfo of + InlineRule { uf_worker = Just wkr_id } -> Just (unitVarSet wkr_id) + InlineRule { uf_tmpl = rhs } -> Just (exprFreeIds rhs) + CoreUnfolding { uf_guidance = guide } + | not bottoming_fn -- Not necessary + , not dont_inline + , not loop_breaker + , not (neverUnfoldGuidance guide) + -> Just (exprFreeIds rhs) -- The simplifier has put an up-to-date unfolding + -- in the IdInfo, but the RHS will do just as well + + _ -> Nothing \end{code} @@ -577,8 +574,7 @@ findExternalRules binds non_local_rules ext_ids tidyTopBinds :: HscEnv -> Module -> TypeEnv - -> IdEnv Bool -- Domain = Ids that should be external - -- True <=> their unfolding is external too + -> ExtIdEnv -> [CoreBind] -> IO (TidyEnv, [CoreBind]) @@ -617,8 +613,7 @@ tidyTopBinds hsc_env mod type_env ext_ids binds tidyTopBind :: PackageId -> Module -> IORef NameCache -- For allocating new unique names - -> IdEnv Bool -- Domain = Ids that should be external - -- True <=> their unfolding is external too + -> ExtIdEnv -> TidyEnv -> CoreBind -> IO (TidyEnv, CoreBind) @@ -741,7 +736,7 @@ tidyTopPair ext_ids rhs_tidy_env caf_info name' (bndr, rhs) rhs' = tidyExpr rhs_tidy_env rhs idinfo = idInfo bndr idinfo' = tidyTopIdInfo (isJust maybe_external) - idinfo unfold_info worker_info + idinfo unfold_info arity caf_info -- Expose an unfolding if ext_ids tells us to @@ -749,9 +744,21 @@ tidyTopPair ext_ids rhs_tidy_env caf_info name' (bndr, rhs) -- True to show the unfolding, False to hide it maybe_external = lookupVarEnv ext_ids bndr show_unfold = maybe_external `orElse` False - unfold_info | show_unfold = mkTopUnfolding rhs' + unfold_info | show_unfold = tidyUnfolding rhs_tidy_env rhs' (unfoldingInfo idinfo) | otherwise = noUnfolding - worker_info = tidyWorker rhs_tidy_env show_unfold (workerInfo idinfo) + -- NB: do *not* expose the worker if show_unfold is off, + -- because that means this thing is a loop breaker or + -- marked NOINLINE or something like that + -- This is important: if you expose the worker for a loop-breaker + -- then you can make the simplifier go into an infinite loop, because + -- in effect the unfolding is exposed. See Trac #1709 + -- + -- You might think that if show_unfold is False, then the thing should + -- not be w/w'd in the first place. But a legitimate reason is this: + -- the function returns bottom + -- In this case, show_unfold will be false (we don't expose unfoldings + -- for bottoming functions), but we might still have a worker/wrapper + -- split (see Note [Worker-wrapper for bottoming functions] in WorkWrap.lhs -- Usually the Id will have an accurate arity on it, because -- the simplifier has just run, but not always. @@ -775,9 +782,9 @@ tidyTopPair ext_ids rhs_tidy_env caf_info name' (bndr, rhs) -- unfoldings, which are inside Ids imported by GHCi. Ditto RULES. -- CoreToStg makes use of this when constructing SRTs. tidyTopIdInfo :: Bool -> IdInfo -> Unfolding - -> WorkerInfo -> ArityInfo -> CafInfo + -> ArityInfo -> CafInfo -> IdInfo -tidyTopIdInfo is_external idinfo unfold_info worker_info arity caf_info +tidyTopIdInfo is_external idinfo unfold_info arity caf_info | not is_external -- For internal Ids (not externally visible) = vanillaIdInfo -- we only need enough info for code generation -- Arity and strictness info are enough; @@ -793,32 +800,19 @@ tidyTopIdInfo is_external idinfo unfold_info worker_info arity caf_info `setAllStrictnessInfo` newStrictnessInfo idinfo `setInlinePragInfo` inlinePragInfo idinfo `setUnfoldingInfo` unfold_info - `setWorkerInfo` worker_info -- NB: we throw away the Rules -- They have already been extracted by findExternalRules ------------- Worker -------------- -tidyWorker :: TidyEnv -> Bool -> WorkerInfo -> WorkerInfo -tidyWorker _tidy_env _show_unfold NoWorker - = NoWorker -tidyWorker tidy_env show_unfold (HasWorker work_id wrap_arity) - | show_unfold = HasWorker (tidyVarOcc tidy_env work_id) wrap_arity - | otherwise = NoWorker - -- NB: do *not* expose the worker if show_unfold is off, - -- because that means this thing is a loop breaker or - -- marked NOINLINE or something like that - -- This is important: if you expose the worker for a loop-breaker - -- then you can make the simplifier go into an infinite loop, because - -- in effect the unfolding is exposed. See Trac #1709 - -- - -- You might think that if show_unfold is False, then the thing should - -- not be w/w'd in the first place. But a legitimate reason is this: - -- the function returns bottom - -- In this case, show_unfold will be false (we don't expose unfoldings - -- for bottoming functions), but we might still have a worker/wrapper - -- split (see Note [Worker-wrapper for bottoming functions] in WorkWrap.lhs +------------ Unfolding -------------- +tidyUnfolding :: TidyEnv -> CoreExpr -> Unfolding -> Unfolding +tidyUnfolding tidy_env _ unf@(InlineRule { uf_tmpl = rhs, uf_worker = mb_wkr }) + = unf { uf_tmpl = tidyExpr tidy_env rhs, + uf_worker = fmap (tidyVarOcc tidy_env) mb_wkr } +tidyUnfolding _ tidy_rhs (CoreUnfolding {}) + = mkTopUnfolding tidy_rhs +tidyUnfolding _ _ unf = unf \end{code} %************************************************************************ diff --git a/compiler/parser/ParserCore.y b/compiler/parser/ParserCore.y index 6d302fb..17d1098 100644 --- a/compiler/parser/ParserCore.y +++ b/compiler/parser/ParserCore.y @@ -275,11 +275,12 @@ exp :: { IfaceExpr } | '%case' '(' ty ')' aexp '%of' id_bndr '{' alts1 '}' { IfaceCase $5 (fst $7) $3 $9 } | '%cast' aexp aty { IfaceCast $2 $3 } - | '%note' STRING exp - { case $2 of - --"SCC" -> IfaceNote (IfaceSCC "scc") $3 - "InlineMe" -> IfaceNote IfaceInlineMe $3 - } +-- No InlineMe any more +-- | '%note' STRING exp +-- { case $2 of +-- --"SCC" -> IfaceNote (IfaceSCC "scc") $3 +-- "InlineMe" -> IfaceNote IfaceInlineMe $3 +-- } | '%external' STRING aty { IfaceFCall (ForeignCall.CCall (CCallSpec (StaticTarget (mkFastString $2)) CCallConv (PlaySafe False))) diff --git a/compiler/simplCore/CSE.lhs b/compiler/simplCore/CSE.lhs index 9b90220..90bd421 100644 --- a/compiler/simplCore/CSE.lhs +++ b/compiler/simplCore/CSE.lhs @@ -114,7 +114,7 @@ Note [CSE for INLINE and NOINLINE] We are careful to do no CSE inside functions that the user has marked as INLINE or NOINLINE. In terms of Core, that means - a) we do not do CSE inside (Note InlineMe e) + a) we do not do CSE inside an InlineRule b) we do not do CSE on the RHS of a binding b=e unless b's InlinePragma is AlwaysActive @@ -218,7 +218,6 @@ cseExpr _ (Type t) = Type t cseExpr _ (Lit lit) = Lit lit cseExpr env (Var v) = Var (lookupSubst env v) cseExpr env (App f a) = App (cseExpr env f) (tryForCSE env a) -cseExpr _ (Note InlineMe e) = Note InlineMe e -- See Note [CSE for INLINE and NOINLINE] cseExpr env (Note n e) = Note n (cseExpr env e) cseExpr env (Cast e co) = Cast (cseExpr env e) co cseExpr env (Lam b e) = let (env', b') = addBinder env b diff --git a/compiler/simplCore/FloatIn.lhs b/compiler/simplCore/FloatIn.lhs index 1146c77..6688797 100644 --- a/compiler/simplCore/FloatIn.lhs +++ b/compiler/simplCore/FloatIn.lhs @@ -213,10 +213,6 @@ fiExpr to_drop (_, AnnNote note@(SCC _) expr) = -- Wimp out for now mkCoLets' to_drop (Note note (fiExpr [] expr)) -fiExpr to_drop (_, AnnNote InlineMe expr) - = -- Ditto... don't float anything into an INLINE expression - mkCoLets' to_drop (Note InlineMe (fiExpr [] expr)) - fiExpr to_drop (_, AnnNote note@(CoreNote _) expr) = Note note (fiExpr to_drop expr) \end{code} @@ -359,8 +355,7 @@ fiExpr to_drop (_, AnnCase scrut case_bndr ty alts) fi_alt to_drop (con, args, rhs) = (con, args, fiExpr to_drop rhs) noFloatIntoRhs :: AnnExpr' Var (UniqFM Var) -> Bool -noFloatIntoRhs (AnnNote InlineMe _) = True -noFloatIntoRhs (AnnLam b _) = not (is_one_shot b) +noFloatIntoRhs (AnnLam b _) = not (is_one_shot b) -- IMPORTANT: don't say 'True' for a RHS with a one-shot lambda at the top. -- This makes a big difference for things like -- f x# = let x = I# x# diff --git a/compiler/simplCore/FloatOut.lhs b/compiler/simplCore/FloatOut.lhs index 6562c84..290c623 100644 --- a/compiler/simplCore/FloatOut.lhs +++ b/compiler/simplCore/FloatOut.lhs @@ -297,13 +297,6 @@ floatExpr lvl (Note note@(SCC cc) expr) ann_bind (Rec pairs) = Rec [(binder, mkSCC dupd_cc rhs) | (binder, rhs) <- pairs] -floatExpr _ (Note InlineMe expr) -- Other than SCCs - = (zeroStats, [], Note InlineMe (unTag expr)) - -- Do no floating at all inside INLINE. - -- The SetLevels pass did not clone the bindings, so it's - -- unsafe to do any floating, even if we dump the results - -- inside the Note (which is what we used to do). - floatExpr lvl (Note note expr) -- Other than SCCs = case (floatExpr lvl expr) of { (fs, floating_defns, expr') -> (fs, floating_defns, Note note expr') } @@ -344,22 +337,6 @@ floatList _ [] = (zeroStats, [], []) floatList f (a:as) = case f a of { (fs_a, binds_a, b) -> case floatList f as of { (fs_as, binds_as, bs) -> (fs_a `add_stats` fs_as, binds_a ++ binds_as, b:bs) }} - -unTagBndr :: TaggedBndr tag -> CoreBndr -unTagBndr (TB b _) = b - -unTag :: TaggedExpr tag -> CoreExpr -unTag (Var v) = Var v -unTag (Lit l) = Lit l -unTag (Type ty) = Type ty -unTag (Note n e) = Note n (unTag e) -unTag (App e1 e2) = App (unTag e1) (unTag e2) -unTag (Lam b e) = Lam (unTagBndr b) (unTag e) -unTag (Cast e co) = Cast (unTag e) co -unTag (Let (Rec prs) e) = Let (Rec [(unTagBndr b,unTag r) | (b, r) <- prs]) (unTag e) -unTag (Let (NonRec b r) e) = Let (NonRec (unTagBndr b) (unTag r)) (unTag e) -unTag (Case e b ty alts) = Case (unTag e) (unTagBndr b) ty - [(c, map unTagBndr bs, unTag r) | (c,bs,r) <- alts] \end{code} %************************************************************************ diff --git a/compiler/simplCore/OccurAnal.lhs b/compiler/simplCore/OccurAnal.lhs index 6af776a..26d5112 100644 --- a/compiler/simplCore/OccurAnal.lhs +++ b/compiler/simplCore/OccurAnal.lhs @@ -22,7 +22,6 @@ import CoreFVs import CoreUtils ( exprIsTrivial, isDefaultAlt ) import Coercion ( mkSymCoercion ) import Id -import IdInfo import BasicTypes import VarSet @@ -399,11 +398,6 @@ occAnalRec (CyclicSCC nodes) (body_usage, binds) where new_fvs = extendFvs env emptyVarSet fvs -idRuleRhsVars :: Id -> VarSet --- Just the variables free on the *rhs* of a rule --- See Note [Choosing loop breakers] -idRuleRhsVars id = foldr (unionVarSet . ruleRhsFreeVars) emptyVarSet (idCoreRules id) - extendFvs :: IdEnv IdSet -> IdSet -> IdSet -> IdSet -- (extendFVs env fvs s) returns (fvs `union` env(s)) extendFvs env fvs id_set @@ -499,8 +493,8 @@ reOrderCycle (bind : binds) score :: Node Details -> Int -- Higher score => less likely to be picked as loop breaker score (ND bndr rhs _ _, _, _) - | workerExists (idWorkerInfo bndr) = 10 - -- Note [Worker inline loop] + | isInlineRule (idUnfolding bndr) = 10 + -- Note [INLINE pragmas] | exprIsTrivial rhs = 5 -- Practically certain to be inlined -- Used to have also: && not (isExportedId bndr) @@ -518,34 +512,14 @@ reOrderCycle (bind : binds) -- so it probably isn't worth the time to test on every binder -- | isNeverActive (idInlinePragma bndr) = -10 - | inlineCandidate bndr rhs = 2 -- Likely to be inlined - -- Note [Inline candidates] + | isOneOcc (idOccInfo bndr) = 1 -- Likely to be inlined - | not (neverUnfold (idUnfolding bndr)) = 1 + | canUnfold (idUnfolding bndr) = 1 -- the Id has some kind of unfolding | otherwise = 0 - inlineCandidate :: Id -> CoreExpr -> Bool - inlineCandidate _ (Note InlineMe _) = True - inlineCandidate id _ = isOneOcc (idOccInfo id) - - -- Note [conapp] - -- - -- It's really really important to inline dictionaries. Real - -- example (the Enum Ordering instance from GHC.Base): - -- - -- rec f = \ x -> case d of (p,q,r) -> p x - -- g = \ x -> case d of (p,q,r) -> q x - -- d = (v, f, g) - -- - -- Here, f and g occur just once; but we can't inline them into d. - -- On the other hand we *could* simplify those case expressions if - -- we didn't stupidly choose d as the loop breaker. - -- But we won't because constructor args are marked "Many". - -- Inlining dictionaries is really essential to unravelling - -- the loops in static numeric dictionaries, see GHC.Float. - + -- Checking for a constructor application -- Cheap and cheerful; the simplifer moves casts out of the way -- The lambda case is important to spot x = /\a. C (f a) -- which comes up when C is a dictionary constructor and @@ -724,11 +698,6 @@ occAnal _ expr@(Lit _) = (emptyDetails, expr) \end{code} \begin{code} -occAnal env (Note InlineMe body) - = case occAnal env body of { (usage, body') -> - (mapVarEnv markMany usage, Note InlineMe body') - } - occAnal env (Note note@(SCC _) body) = case occAnal env body of { (usage, body') -> (mapVarEnv markInsideSCC usage, Note note body') diff --git a/compiler/simplCore/SetLevels.lhs b/compiler/simplCore/SetLevels.lhs index 270ce17..c32b83d 100644 --- a/compiler/simplCore/SetLevels.lhs +++ b/compiler/simplCore/SetLevels.lhs @@ -48,7 +48,7 @@ module SetLevels ( Level(..), tOP_LEVEL, LevelledBind, LevelledExpr, - incMinorLvl, ltMajLvl, ltLvl, isTopLvl, isInlineCtxt + incMinorLvl, ltMajLvl, ltLvl, isTopLvl ) where #include "HsVersions.h" @@ -56,13 +56,14 @@ module SetLevels ( import CoreSyn import DynFlags ( FloatOutSwitches(..) ) -import CoreUtils ( exprType, exprIsTrivial, mkPiTypes ) +import CoreUtils ( exprType, exprIsTrivial, exprBotStrictness_maybe, mkPiTypes ) import CoreFVs -- all of it -import CoreSubst ( Subst, emptySubst, extendInScope, extendIdSubst, - cloneIdBndr, cloneRecIdBndrs ) +import CoreSubst ( Subst, emptySubst, extendInScope, extendInScopeList, + extendIdSubst, cloneIdBndr, cloneRecIdBndrs ) import Id ( Id, idType, mkSysLocal, isOneShotLambda, zapDemandIdInfo, transferPolyIdInfo, - idSpecialisation, idWorkerInfo, setIdInfo + idSpecialisation, idUnfolding, setIdInfo, + setIdNewStrictness, setIdArity ) import IdInfo import Var @@ -85,9 +86,7 @@ import FastString %************************************************************************ \begin{code} -data Level = InlineCtxt -- A level that's used only for - -- the context parameter ctxt_lvl - | Level Int -- Level number of enclosing lambdas +data Level = Level Int -- Level number of enclosing lambdas Int -- Number of big-lambda and/or case expressions between -- here and the nearest enclosing lambda \end{code} @@ -150,55 +149,37 @@ the worker at all. type LevelledExpr = TaggedExpr Level type LevelledBind = TaggedBind Level -tOP_LEVEL, iNLINE_CTXT :: Level +tOP_LEVEL :: Level tOP_LEVEL = Level 0 0 -iNLINE_CTXT = InlineCtxt incMajorLvl :: Level -> Level --- For InlineCtxt we ignore any inc's; we don't want --- to do any floating at all; see notes above -incMajorLvl InlineCtxt = InlineCtxt incMajorLvl (Level major _) = Level (major + 1) 0 incMinorLvl :: Level -> Level -incMinorLvl InlineCtxt = InlineCtxt incMinorLvl (Level major minor) = Level major (minor+1) maxLvl :: Level -> Level -> Level -maxLvl InlineCtxt l2 = l2 -maxLvl l1 InlineCtxt = l1 maxLvl l1@(Level maj1 min1) l2@(Level maj2 min2) | (maj1 > maj2) || (maj1 == maj2 && min1 > min2) = l1 | otherwise = l2 ltLvl :: Level -> Level -> Bool -ltLvl _ InlineCtxt = False -ltLvl InlineCtxt (Level _ _) = True ltLvl (Level maj1 min1) (Level maj2 min2) = (maj1 < maj2) || (maj1 == maj2 && min1 < min2) ltMajLvl :: Level -> Level -> Bool -- Tells if one level belongs to a difft *lambda* level to another -ltMajLvl _ InlineCtxt = False -ltMajLvl InlineCtxt (Level maj2 _) = 0 < maj2 ltMajLvl (Level maj1 _) (Level maj2 _) = maj1 < maj2 isTopLvl :: Level -> Bool isTopLvl (Level 0 0) = True isTopLvl _ = False -isInlineCtxt :: Level -> Bool -isInlineCtxt InlineCtxt = True -isInlineCtxt _ = False - instance Outputable Level where - ppr InlineCtxt = text "" ppr (Level maj min) = hcat [ char '<', int maj, char ',', int min, char '>' ] instance Eq Level where - InlineCtxt == InlineCtxt = True (Level maj1 min1) == (Level maj2 min2) = maj1 == maj2 && min1 == min2 - _ == _ = False \end{code} @@ -215,21 +196,17 @@ setLevels :: FloatOutSwitches -> [LevelledBind] setLevels float_lams binds us - = initLvl us (do_them binds) + = initLvl us (do_them init_env binds) where - -- "do_them"'s main business is to thread the monad along - -- It gives each top binding the same empty envt, because - -- things unbound in the envt have level number zero implicitly - do_them :: [CoreBind] -> LvlM [LevelledBind] - - do_them [] = return [] - do_them (b:bs) = do - (lvld_bind, _) <- lvlTopBind init_env b - lvld_binds <- do_them bs - return (lvld_bind : lvld_binds) - init_env = initialEnv float_lams + do_them :: LevelEnv -> [CoreBind] -> LvlM [LevelledBind] + do_them _ [] = return [] + do_them env (b:bs) + = do { (lvld_bind, env') <- lvlTopBind env b + ; lvld_binds <- do_them env' bs + ; return (lvld_bind : lvld_binds) } + lvlTopBind :: LevelEnv -> Bind Id -> LvlM (LevelledBind, LevelEnv) lvlTopBind env (NonRec binder rhs) = lvlBind TopLevel tOP_LEVEL env (AnnNonRec binder (freeVars rhs)) @@ -283,11 +260,6 @@ lvlExpr ctxt_lvl env (_, AnnApp fun arg) = do -- We don't do MFE on partial applications generally, -- but we do if the function is big and hairy, like a case -lvlExpr _ env (_, AnnNote InlineMe expr) = do --- Don't float anything out of an InlineMe; hence the iNLINE_CTXT - expr' <- lvlExpr iNLINE_CTXT env expr - return (Note InlineMe expr') - lvlExpr ctxt_lvl env (_, AnnNote note expr) = do expr' <- lvlExpr ctxt_lvl env expr return (Note note expr') @@ -359,13 +331,25 @@ lvlExpr ctxt_lvl env (_, AnnCase expr case_bndr ty alts) = do the expression, so that it can itself be floated. Note [Unlifted MFEs] -~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~ We don't float unlifted MFEs, which potentially loses big opportunites. For example: \x -> f (h y) where h :: Int -> Int# is expensive. We'd like to float the (h y) outside the \x, but we don't because it's unboxed. Possible solution: box it. +Note [Bottoming floats] +~~~~~~~~~~~~~~~~~~~~~~~ +If we see + f = \x. g (error "urk") +we'd like to float the call to error, to get + lvl = error "urk" + f = \x. g lvl +But, it's very helpful for lvl to get a strictness signature, so that, +for example, its unfolding is not exposed in interface files (unnecessary). +But this float-out might occur after strictness analysis. So we use the +cheap-and-cheerful exprBotStrictness_maybe function. + \begin{code} lvlMFE :: Bool -- True <=> strict context [body of case or let] -> Level -- Level of innermost enclosing lambda/tylam @@ -376,17 +360,20 @@ lvlMFE :: Bool -- True <=> strict context [body of case or let] lvlMFE _ _ _ (_, AnnType ty) = return (Type ty) --- No point in floating out an expression wrapped in a coercion; +-- No point in floating out an expression wrapped in a coercion or note -- If we do we'll transform lvl = e |> co -- to lvl' = e; lvl = lvl' |> co -- and then inline lvl. Better just to float out the payload. +lvlMFE strict_ctxt ctxt_lvl env (_, AnnNote n e) + = do { e' <- lvlMFE strict_ctxt ctxt_lvl env e + ; return (Note n e') } + lvlMFE strict_ctxt ctxt_lvl env (_, AnnCast e co) - = do { expr' <- lvlMFE strict_ctxt ctxt_lvl env e - ; return (Cast expr' co) } + = do { e' <- lvlMFE strict_ctxt ctxt_lvl env e + ; return (Cast e' co) } lvlMFE strict_ctxt ctxt_lvl env ann_expr@(fvs, _) | isUnLiftedType ty -- Can't let-bind it; see Note [Unlifted MFEs] - || isInlineCtxt ctxt_lvl -- Don't float out of an __inline__ context || exprIsTrivial expr -- Never float if it's trivial || not good_destination = -- Don't float it out @@ -395,8 +382,13 @@ lvlMFE strict_ctxt ctxt_lvl env ann_expr@(fvs, _) | otherwise -- Float it out! = do expr' <- lvlFloatRhs abs_vars dest_lvl env ann_expr var <- newLvlVar "lvl" abs_vars ty - return (Let (NonRec (TB var dest_lvl) expr') - (mkVarApps (Var var) abs_vars)) + -- Note [Bottoming floats] + let var_w_str = case exprBotStrictness_maybe expr of + Just (arity,str) -> var `setIdArity` arity + `setIdNewStrictness` str + Nothing -> var + return (Let (NonRec (TB var_w_str dest_lvl) expr') + (mkVarApps (Var var_w_str) abs_vars)) where expr = deAnnotate ann_expr ty = exprType expr @@ -491,7 +483,6 @@ lvlBind :: TopLevelFlag -- Used solely to decide whether to clone lvlBind top_lvl ctxt_lvl env (AnnNonRec bndr rhs@(rhs_fvs,_)) | isTyVar bndr -- Don't do anything for TyVar binders -- (simplifier gets rid of them pronto) - || isInlineCtxt ctxt_lvl -- Don't do anything inside InlineMe = do rhs' <- lvlExpr ctxt_lvl env rhs return (NonRec (TB bndr ctxt_lvl) rhs', env) @@ -516,10 +507,6 @@ lvlBind top_lvl ctxt_lvl env (AnnNonRec bndr rhs@(rhs_fvs,_)) \begin{code} lvlBind top_lvl ctxt_lvl env (AnnRec pairs) - | isInlineCtxt ctxt_lvl -- Don't do anything inside InlineMe - = do rhss' <- mapM (lvlExpr ctxt_lvl env) rhss - return (Rec ([TB b ctxt_lvl | b <- bndrs] `zip` rhss'), env) - | null abs_vars = do (new_env, new_bndrs) <- cloneRecVars top_lvl env bndrs ctxt_lvl dest_lvl new_rhss <- mapM (lvlExpr ctxt_lvl new_env) rhss @@ -721,6 +708,12 @@ extendLvlEnv (float_lams, lvl_env, subst, id_env) prs -- incorrectly, because the SubstEnv was still lying around. Ouch! -- KSW 2000-07. +extendInScopeEnv :: LevelEnv -> Var -> LevelEnv +extendInScopeEnv (fl, le, subst, ids) v = (fl, le, extendInScope subst v, ids) + +extendInScopeEnvList :: LevelEnv -> [Var] -> LevelEnv +extendInScopeEnvList (fl, le, subst, ids) vs = (fl, le, extendInScopeList subst vs, ids) + -- extendCaseBndrLvlEnv adds the mapping case-bndr->scrut-var if it can -- (see point 4 of the module overview comment) extendCaseBndrLvlEnv :: LevelEnv -> Expr (TaggedBndr Level) -> Var -> Level @@ -808,7 +801,7 @@ abstractVars dest_lvl (_, lvl_env, _, id_env) fvs -- We are going to lambda-abstract, so nuke any IdInfo, -- and add the tyvars of the Id (if necessary) - zap v | isIdVar v = WARN( workerExists (idWorkerInfo v) || + zap v | isIdVar v = WARN( isInlineRule (idUnfolding v) || not (isEmptySpecInfo (idSpecialisation v)), text "absVarsOf: discarding info on" <+> ppr v ) setIdInfo v vanillaIdInfo @@ -869,7 +862,9 @@ newLvlVar str vars body_ty = do cloneVar :: TopLevelFlag -> LevelEnv -> Id -> Level -> Level -> LvlM (LevelEnv, Id) cloneVar TopLevel env v _ _ - = return (env, v) -- Don't clone top level things + = return (extendInScopeEnv env v, v) -- Don't clone top level things + -- But do extend the in-scope env, to satisfy the in-scope invariant + cloneVar NotTopLevel env@(_,_,subst,_) v ctxt_lvl dest_lvl = ASSERT( isIdVar v ) do us <- getUniqueSupplyM @@ -881,7 +876,7 @@ cloneVar NotTopLevel env@(_,_,subst,_) v ctxt_lvl dest_lvl cloneRecVars :: TopLevelFlag -> LevelEnv -> [Id] -> Level -> Level -> LvlM (LevelEnv, [Id]) cloneRecVars TopLevel env vs _ _ - = return (env, vs) -- Don't clone top level things + = return (extendInScopeEnvList env vs, vs) -- Don't clone top level things cloneRecVars NotTopLevel env@(_,_,subst,_) vs ctxt_lvl dest_lvl = ASSERT( all isIdVar vs ) do us <- getUniqueSupplyM diff --git a/compiler/simplCore/SimplCore.lhs b/compiler/simplCore/SimplCore.lhs index 5c3c789..27ada80 100644 --- a/compiler/simplCore/SimplCore.lhs +++ b/compiler/simplCore/SimplCore.lhs @@ -28,7 +28,7 @@ import Rules ( RuleBase, emptyRuleBase, mkRuleBase, unionRuleBase, import PprCore ( pprCoreBindings, pprCoreExpr, pprRules ) import OccurAnal ( occurAnalysePgm, occurAnalyseExpr ) import IdInfo ( setNewStrictnessInfo, newStrictnessInfo, - setWorkerInfo, workerInfo, setSpecInfoHead, + setUnfoldingInfo, unfoldingInfo, setSpecInfoHead, setInlinePragInfo, inlinePragInfo, setSpecInfo, specInfo, specInfoRules ) import CoreUtils ( coreBindsSize ) @@ -368,45 +368,34 @@ prepareRules :: HscEnv ModGuts) -- Modified fields are -- (a) Bindings have rules attached, + -- and INLINE rules simplified -- (b) Rules are now just orphan rules prepareRules hsc_env@(HscEnv { hsc_dflags = dflags, hsc_HPT = hpt }) guts@(ModGuts { mg_binds = binds, mg_deps = deps , mg_rules = local_rules, mg_rdr_env = rdr_env }) us - = do { let -- Simplify the local rules; boringly, we need to make an in-scope set + = do { us <- mkSplitUniqSupply 'w' + + ; let -- Simplify the local rules; boringly, we need to make an in-scope set -- from the local binders, to avoid warnings from Simplify.simplVar local_ids = mkInScopeSet (mkVarSet (bindersOfBinds binds)) env = setInScopeSet gentleSimplEnv local_ids - (better_rules,_) = initSmpl dflags emptyRuleBase emptyFamInstEnvs us $ - (mapM (simplRule env) local_rules) - home_pkg_rules = hptRules hsc_env (dep_mods deps) - - -- Find the rules for locally-defined Ids; then we can attach them - -- to the binders in the top-level bindings - -- - -- Reason - -- - It makes the rules easier to look up - -- - It means that transformation rules and specialisations for - -- locally defined Ids are handled uniformly - -- - It keeps alive things that are referred to only from a rule - -- (the occurrence analyser knows about rules attached to Ids) - -- - It makes sure that, when we apply a rule, the free vars - -- of the RHS are more likely to be in scope - -- - The imported rules are carried in the in-scope set - -- which is extended on each iteration by the new wave of - -- local binders; any rules which aren't on the binding will - -- thereby get dropped - (rules_for_locals, rules_for_imps) = partition isLocalRule better_rules - local_rule_base = extendRuleBaseList emptyRuleBase rules_for_locals - binds_w_rules = updateBinders local_rule_base binds - - hpt_rule_base = mkRuleBase home_pkg_rules - imp_rule_base = extendRuleBaseList hpt_rule_base rules_for_imps + (simpl_rules, _) = initSmpl dflags emptyRuleBase emptyFamInstEnvs us $ + mapM (simplRule env) local_rules + + ; let (rules_for_locals, rules_for_imps) = partition isLocalRule simpl_rules + + home_pkg_rules = hptRules hsc_env (dep_mods deps) + hpt_rule_base = mkRuleBase home_pkg_rules + imp_rule_base = extendRuleBaseList hpt_rule_base rules_for_imps + + binds_w_rules = updateBinders rules_for_locals binds + ; Err.dumpIfSet_dyn dflags Opt_D_dump_rules "Transformation rules" (withPprStyle (mkUserStyle (mkPrintUnqualified dflags rdr_env) AllTheWay) $ - vcat [text "Local rules", pprRules better_rules, + vcat [text "Local rules", pprRules simpl_rules, text "", text "Imported rules", pprRuleBase imp_rule_base]) @@ -414,18 +403,41 @@ prepareRules hsc_env@(HscEnv { hsc_dflags = dflags, hsc_HPT = hpt }) mg_rules = rules_for_imps }) } -updateBinders :: RuleBase -> [CoreBind] -> [CoreBind] -updateBinders local_rules binds - = map update_bndrs binds +-- Note [Attach rules to local ids] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- Find the rules for locally-defined Ids; then we can attach them +-- to the binders in the top-level bindings +-- +-- Reason +-- - It makes the rules easier to look up +-- - It means that transformation rules and specialisations for +-- locally defined Ids are handled uniformly +-- - It keeps alive things that are referred to only from a rule +-- (the occurrence analyser knows about rules attached to Ids) +-- - It makes sure that, when we apply a rule, the free vars +-- of the RHS are more likely to be in scope +-- - The imported rules are carried in the in-scope set +-- which is extended on each iteration by the new wave of +-- local binders; any rules which aren't on the binding will +-- thereby get dropped + +updateBinders :: [CoreRule] -> [CoreBind] -> [CoreBind] +updateBinders rules_for_locals binds + = map update_bind binds where - update_bndrs (NonRec b r) = NonRec (update_bndr b) r - update_bndrs (Rec prs) = Rec [(update_bndr b, r) | (b,r) <- prs] - - update_bndr bndr = case lookupNameEnv local_rules (idName bndr) of - Nothing -> bndr - Just rules -> bndr `addIdSpecialisations` rules - -- The binder might have some existing rules, - -- arising from specialisation pragmas + local_rules = extendRuleBaseList emptyRuleBase rules_for_locals + + update_bind (NonRec b r) = NonRec (add_rules b) r + update_bind (Rec prs) = Rec (mapFst add_rules prs) + + -- See Note [Attach rules to local ids] + -- NB: the binder might have some existing rules, + -- arising from specialisation pragmas + add_rules bndr + | Just rules <- lookupNameEnv local_rules (idName bndr) + = bndr `addIdSpecialisations` rules + | otherwise + = bndr \end{code} Note [Simplifying the left-hand side of a RULE] @@ -442,6 +454,9 @@ we do not want to get otherwise we don't match when given an argument like augment (\a. h a a) (build h) +The simplifier does indeed do eta reduction (it's in +Simplify.completeLam) but only if -O is on. + \begin{code} simplRule env rule@(BuiltinRule {}) = return rule @@ -450,17 +465,6 @@ simplRule env rule@(Rule { ru_bndrs = bndrs, ru_args = args, ru_rhs = rhs }) args' <- mapM (simplExprGently env) args rhs' <- simplExprGently env rhs return (rule { ru_bndrs = bndrs', ru_args = args', ru_rhs = rhs' }) - --- It's important that simplExprGently does eta reduction. --- For example, in a rule like: --- augment g (build h) --- we do not want to get --- augment (\a. g a) (build h) --- otherwise we don't match when given an argument like --- (\a. h a a) --- --- The simplifier does indeed do eta reduction (it's in --- Simplify.completeLam) but only if -O is on. \end{code} \begin{code} @@ -853,7 +857,7 @@ transferIdInfo exported_id local_id where local_info = idInfo local_id transfer exp_info = exp_info `setNewStrictnessInfo` newStrictnessInfo local_info - `setWorkerInfo` workerInfo local_info + `setUnfoldingInfo` unfoldingInfo local_info `setInlinePragInfo` inlinePragInfo local_info `setSpecInfo` addSpecInfo (specInfo exp_info) new_info new_info = setSpecInfoHead (idName exported_id) diff --git a/compiler/simplCore/SimplEnv.lhs b/compiler/simplCore/SimplEnv.lhs index a2e06a0..12b3ce5 100644 --- a/compiler/simplCore/SimplEnv.lhs +++ b/compiler/simplCore/SimplEnv.lhs @@ -29,7 +29,7 @@ module SimplEnv ( simplNonRecBndr, simplRecBndrs, simplLamBndr, simplLamBndrs, simplBinder, simplBinders, addBndrRules, - substExpr, substWorker, substTy, + substExpr, substTy, substUnfolding, -- Floats Floats, emptyFloats, isEmptyFloats, addNonRec, addFloats, extendFloats, @@ -49,7 +49,7 @@ import VarEnv import VarSet import OrdList import Id -import qualified CoreSubst ( Subst, mkSubst, substExpr, substSpec, substWorker ) +import qualified CoreSubst ( Subst, mkSubst, substExpr, substSpec, substUnfolding ) import qualified Type ( substTy, substTyVarBndr ) import Type hiding ( substTy, substTyVarBndr ) import Coercion @@ -528,7 +528,7 @@ simplLamBndr env bndr where old_unf = idUnfolding bndr (env1, id1) = substIdBndr env bndr - id2 = id1 `setIdUnfolding` substUnfolding env old_unf + id2 = id1 `setIdUnfolding` substUnfolding env False old_unf env2 = modifyInScope env1 id2 --------------- @@ -660,29 +660,6 @@ addBndrRules env in_id out_id old_rules = idSpecialisation in_id new_rules = CoreSubst.substSpec subst out_id old_rules final_id = out_id `setIdSpecialisation` new_rules - ------------------- -substIdType :: SimplEnv -> Id -> Id -substIdType (SimplEnv { seInScope = in_scope, seTvSubst = tv_env}) id - | isEmptyVarEnv tv_env || isEmptyVarSet (tyVarsOfType old_ty) = id - | otherwise = Id.setIdType id (Type.substTy (TvSubst in_scope tv_env) old_ty) - -- The tyVarsOfType is cheaper than it looks - -- because we cache the free tyvars of the type - -- in a Note in the id's type itself - where - old_ty = idType id - ------------------- -substUnfolding :: SimplEnv -> Unfolding -> Unfolding -substUnfolding _ NoUnfolding = NoUnfolding -substUnfolding _ (OtherCon cons) = OtherCon cons -substUnfolding env (CompulsoryUnfolding rhs) = CompulsoryUnfolding (substExpr env rhs) -substUnfolding env (CoreUnfolding rhs t v w g) = CoreUnfolding (substExpr env rhs) t v w g - ------------------- -substWorker :: SimplEnv -> WorkerInfo -> WorkerInfo -substWorker _ NoWorker = NoWorker -substWorker env wkr_info = CoreSubst.substWorker (mkCoreSubst env) wkr_info \end{code} @@ -718,9 +695,28 @@ mkCoreSubst (SimplEnv { seInScope = in_scope, seTvSubst = tv_env, seIdSubst = id fiddle (DoneId v) = Var v fiddle (ContEx tv id e) = CoreSubst.substExpr (mk_subst tv id) e +------------------ +substIdType :: SimplEnv -> Id -> Id +substIdType (SimplEnv { seInScope = in_scope, seTvSubst = tv_env}) id + | isEmptyVarEnv tv_env || isEmptyVarSet (tyVarsOfType old_ty) = id + | otherwise = Id.setIdType id (Type.substTy (TvSubst in_scope tv_env) old_ty) + -- The tyVarsOfType is cheaper than it looks + -- because we cache the free tyvars of the type + -- in a Note in the id's type itself + where + old_ty = idType id + +------------------ substExpr :: SimplEnv -> CoreExpr -> CoreExpr substExpr env expr = CoreSubst.substExpr (mkCoreSubst env) expr -- Do *not* short-cut in the case of an empty substitution -- See CoreSubst: Note [Extending the Subst] + +substUnfolding :: SimplEnv -> Bool -> Unfolding -> Unfolding +substUnfolding env is_top_lvl unf + | InlineRule {} <- unf' = unf' { uf_is_top = is_top_lvl } + | otherwise = unf' + where + unf' = CoreSubst.substUnfolding (mkCoreSubst env) unf \end{code} diff --git a/compiler/simplCore/SimplUtils.lhs b/compiler/simplCore/SimplUtils.lhs index 0f6cf73..53c9149 100644 --- a/compiler/simplCore/SimplUtils.lhs +++ b/compiler/simplCore/SimplUtils.lhs @@ -10,12 +10,12 @@ module SimplUtils ( -- Inlining, preInlineUnconditionally, postInlineUnconditionally, - activeInline, activeRule, inlineMode, + activeInline, activeRule, -- The continuation type SimplCont(..), DupFlag(..), ArgInfo(..), contIsDupable, contResultType, contIsTrivial, contArgs, dropArgs, - countValArgs, countArgs, splitInlineCont, + countValArgs, countArgs, mkBoringStop, mkLazyArgStop, contIsRhsOrArg, interestingCallContext, interestingArgContext, @@ -214,24 +214,6 @@ dropArgs :: Int -> SimplCont -> SimplCont dropArgs 0 cont = cont dropArgs n (ApplyTo _ _ _ cont) = dropArgs (n-1) cont dropArgs n other = pprPanic "dropArgs" (ppr n <+> ppr other) - --------------------- -splitInlineCont :: SimplCont -> Maybe (SimplCont, SimplCont) --- Returns Nothing if the continuation should dissolve an InlineMe Note --- Return Just (c1,c2) otherwise, --- where c1 is the continuation to put inside the InlineMe --- and c2 outside - --- Example: (__inline_me__ (/\a. e)) ty --- Here we want to do the beta-redex without dissolving the InlineMe --- See test simpl017 (and Trac #1627) for a good example of why this is important - -splitInlineCont (ApplyTo dup (Type ty) se c) - | Just (c1, c2) <- splitInlineCont c = Just (ApplyTo dup (Type ty) se c1, c2) -splitInlineCont cont@(Stop {}) = Just (mkBoringStop, cont) -splitInlineCont cont@(StrictBind {}) = Just (mkBoringStop, cont) -splitInlineCont cont@(StrictArg {}) = Just (mkBoringStop, cont) -splitInlineCont _ = Nothing \end{code} @@ -359,7 +341,7 @@ mkArgInfo fun n_val_args call_cont vanilla_discounts, arg_discounts :: [Int] vanilla_discounts = repeat 0 arg_discounts = case idUnfolding fun of - CoreUnfolding _ _ _ _ (UnfoldIfGoodArgs _ discounts _ _) + CoreUnfolding {uf_guidance = UnfoldIfGoodArgs {ug_args = discounts}} -> discounts ++ vanilla_discounts _ -> vanilla_discounts @@ -480,13 +462,7 @@ unboxed tuples and suchlike. INLINE pragmas ~~~~~~~~~~~~~~ -SimplGently is also used as the mode to simplify inside an InlineMe note. - -\begin{code} -inlineMode :: SimplifierMode -inlineMode = SimplGently -\end{code} - +We don't simplify inside InlineRules (which come from INLINE pragmas). It really is important to switch off inlinings inside such expressions. Consider the following example diff --git a/compiler/simplCore/Simplify.lhs b/compiler/simplCore/Simplify.lhs index 22c7a5a..5bcda0c 100644 --- a/compiler/simplCore/Simplify.lhs +++ b/compiler/simplCore/Simplify.lhs @@ -352,7 +352,7 @@ simplLazyBind env top_lvl is_rec bndr bndr1 rhs rhs_se do { tick LetFloatFromLet ; (poly_binds, body3) <- abstractFloats tvs' body_env2 body2 ; rhs' <- mkLam tvs' body3 - ; let env' = foldl (addPolyBind top_lvl) env poly_binds + ; env' <- foldlM (addPolyBind top_lvl) env poly_binds ; return (env', rhs') } ; completeBind env' top_lvl bndr bndr1 rhs' } @@ -567,29 +567,23 @@ completeBind :: SimplEnv -- * or by adding to the floats in the envt completeBind env top_lvl old_bndr new_bndr new_rhs - | postInlineUnconditionally env top_lvl new_bndr occ_info new_rhs unfolding - -- Inline and discard the binding - = do { tick (PostInlineUnconditionally old_bndr) - ; -- pprTrace "postInlineUnconditionally" (ppr old_bndr <+> ppr new_bndr <+> ppr new_rhs) $ - return (extendIdSubst env old_bndr (DoneEx new_rhs)) } - -- Use the substitution to make quite, quite sure that the - -- substitution will happen, since we are going to discard the binding + = do { let old_info = idInfo old_bndr + old_unf = unfoldingInfo old_info + occ_info = occInfo old_info - | otherwise - = return (addNonRecWithUnf env new_bndr new_rhs unfolding wkr) - where - unfolding | omit_unfolding = NoUnfolding - | otherwise = mkUnfolding (isTopLevel top_lvl) new_rhs - old_info = idInfo old_bndr - occ_info = occInfo old_info - wkr = substWorker env (workerInfo old_info) - omit_unfolding = isNonRuleLoopBreaker occ_info - -- or not (activeInline env old_bndr) - -- Do *not* trim the unfolding in SimplGently, else - -- the specialiser can't see it! - ------------------ -addPolyBind :: TopLevelFlag -> SimplEnv -> OutBind -> SimplEnv + ; new_unfolding <- simplUnfolding env top_lvl old_bndr occ_info old_unf new_rhs + + ; if postInlineUnconditionally env top_lvl new_bndr occ_info new_rhs new_unfolding + -- Inline and discard the binding + then do { tick (PostInlineUnconditionally old_bndr) + ; return (extendIdSubst env old_bndr (DoneEx new_rhs)) } + -- Use the substitution to make quite, quite sure that the + -- substitution will happen, since we are going to discard the binding + + else return (addNonRecWithUnf env new_bndr new_rhs new_unfolding) } + +------------------------------ +addPolyBind :: TopLevelFlag -> SimplEnv -> OutBind -> SimplM SimplEnv -- Add a new binding to the environment, complete with its unfolding -- but *do not* do postInlineUnconditionally, because we have already -- processed some of the scope of the binding @@ -602,72 +596,95 @@ addPolyBind :: TopLevelFlag -> SimplEnv -> OutBind -> SimplEnv -- opportunity to inline 'y' too. addPolyBind top_lvl env (NonRec poly_id rhs) - = addNonRecWithUnf env poly_id rhs unfolding NoWorker - where - unfolding | not (activeInline env poly_id) = NoUnfolding - | otherwise = mkUnfolding (isTopLevel top_lvl) rhs - -- addNonRecWithInfo adds the new binding in the - -- proper way (ie complete with unfolding etc), - -- and extends the in-scope set + = do { unfolding <- simplUnfolding env top_lvl poly_id NoOccInfo noUnfolding rhs + -- Assumes that poly_id did not have an INLINE prag + -- which is perhaps wrong. ToDo: think about this + ; return (addNonRecWithUnf env poly_id rhs unfolding) } -addPolyBind _ env bind@(Rec _) = extendFloats env bind +addPolyBind _ env bind@(Rec _) = return (extendFloats env bind) -- Hack: letrecs are more awkward, so we extend "by steam" -- without adding unfoldings etc. At worst this leads to -- more simplifier iterations ------------------ +------------------------------ addNonRecWithUnf :: SimplEnv - -> OutId -> OutExpr -- New binder and RHS - -> Unfolding -> WorkerInfo -- and unfolding - -> SimplEnv --- Add suitable IdInfo to the Id, add the binding to the floats, and extend the in-scope set -addNonRecWithUnf env new_bndr rhs unfolding wkr - = ASSERT( isId new_bndr ) - WARN( new_arity < old_arity || new_arity < dmd_arity, - (ppr final_id <+> ppr old_arity <+> ppr new_arity <+> ppr dmd_arity) $$ ppr rhs ) - final_id `seq` -- This seq forces the Id, and hence its IdInfo, - -- and hence any inner substitutions - addNonRec env final_id rhs - -- The addNonRec adds it to the in-scope set too - where - dmd_arity = length $ fst $ splitStrictSig $ idNewStrictness new_bndr + -> OutId -> OutExpr -- New binder and RHS + -> Unfolding -- New unfolding + -> SimplEnv +addNonRecWithUnf env new_bndr new_rhs new_unfolding + = let new_arity = exprArity new_rhs old_arity = idArity new_bndr - - -- Arity info - new_arity = exprArity rhs - new_bndr_info = idInfo new_bndr `setArityInfo` new_arity - - -- Unfolding info - -- Add the unfolding *only* for non-loop-breakers - -- Making loop breakers not have an unfolding at all - -- means that we can avoid tests in exprIsConApp, for example. - -- This is important: if exprIsConApp says 'yes' for a recursive - -- thing, then we can get into an infinite loop - - -- Demand info - -- If the unfolding is a value, the demand info may - -- go pear-shaped, so we nuke it. Example: - -- let x = (a,b) in - -- case x of (p,q) -> h p q x - -- Here x is certainly demanded. But after we've nuked - -- the case, we'll get just - -- let x = (a,b) in h a b x - -- and now x is not demanded (I'm assuming h is lazy) - -- This really happens. Similarly - -- let f = \x -> e in ...f..f... - -- After inlining f at some of its call sites the original binding may - -- (for example) be no longer strictly demanded. - -- The solution here is a bit ad hoc... - info_w_unf = new_bndr_info `setUnfoldingInfo` unfolding - `setWorkerInfo` wkr - - final_info | isEvaldUnfolding unfolding = zapDemandInfo info_w_unf `orElse` info_w_unf - | otherwise = info_w_unf + info1 = idInfo new_bndr `setArityInfo` new_arity - final_id = new_bndr `setIdInfo` final_info + -- Unfolding info: Note [Setting the new unfolding] + info2 = info1 `setUnfoldingInfo` new_unfolding + + -- Demand info: Note [Setting the demand info] + info3 | isEvaldUnfolding new_unfolding = zapDemandInfo info2 `orElse` info2 + | otherwise = info2 + + final_id = new_bndr `setIdInfo` info3 + dmd_arity = length $ fst $ splitStrictSig $ idNewStrictness new_bndr + in + ASSERT( isId new_bndr ) + WARN( new_arity < old_arity || new_arity < dmd_arity, + (ppr final_id <+> ppr old_arity <+> ppr new_arity <+> ppr dmd_arity) $$ ppr new_rhs ) + + final_id `seq` -- This seq forces the Id, and hence its IdInfo, + -- and hence any inner substitutions + -- pprTrace "Binding" (ppr final_id <+> ppr unfolding) $ + addNonRec env final_id new_rhs + -- The addNonRec adds it to the in-scope set too + + +------------------------------ +simplUnfolding :: SimplEnv-> TopLevelFlag + -> Id -- Debug output only + -> OccInfo -> Unfolding -> OutExpr + -> SimplM Unfolding +simplUnfolding env top_lvl bndr occ_info old_unf new_rhs -- Note [Setting the new unfolding] + | omit_unfolding = WARN( is_inline_rule, ppr bndr ) return NoUnfolding + | is_inline_rule = return (substUnfolding env is_top_lvl old_unf) + | otherwise = return (mkUnfolding is_top_lvl new_rhs) + where + is_top_lvl = isTopLevel top_lvl + is_inline_rule = isInlineRule old_unf + omit_unfolding = isNonRuleLoopBreaker occ_info \end{code} +Note [Setting the new unfolding] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* If there's an INLINE pragma, we use substUnfolding to retain the + supplied inlining + +* If not, we make an unfolding from the new RHS. But *only* for + non-loop-breakers. Making loop breakers not have an unfolding at all + means that we can avoid tests in exprIsConApp, for example. This is + important: if exprIsConApp says 'yes' for a recursive thing, then we + can get into an infinite loop + +If there's an INLINE pragma on a loop breaker, we simply discard it +(with a DEBUG warning). The desugarer complains about binding groups +that look likely to trigger this behaviour. + + +Note [Setting the demand info] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If the unfolding is a value, the demand info may +go pear-shaped, so we nuke it. Example: + let x = (a,b) in + case x of (p,q) -> h p q x +Here x is certainly demanded. But after we've nuked +the case, we'll get just + let x = (a,b) in h a b x +and now x is not demanded (I'm assuming h is lazy) +This really happens. Similarly + let f = \x -> e in ...f..f... +After inlining f at some of its call sites the original binding may +(for example) be no longer strictly demanded. +The solution here is a bit ad hoc... + %************************************************************************ %* * @@ -925,7 +942,7 @@ simplLam env bndrs body cont ------------------ simplNonRecE :: SimplEnv - -> InId -- The binder + -> InBndr -- The binder -> (InExpr, SimplEnv) -- Rhs of binding (or arg of lambda) -> ([InBndr], InExpr) -- Body of the let/lambda -- \xs.e @@ -984,21 +1001,9 @@ simplNote env (SCC cc) e cont = do { e' <- simplExpr (setEnclosingCC env currentCCS) e ; rebuild env (mkSCC cc e') cont } --- See notes with SimplMonad.inlineMode -simplNote env InlineMe e cont - | Just (inside, outside) <- splitInlineCont cont -- Boring boring continuation; see notes above - = do { -- Don't inline inside an INLINE expression - e' <- simplExprC (setMode inlineMode env) e inside - ; rebuild env (mkInlineMe e') outside } - - | otherwise -- Dissolve the InlineMe note if there's - -- an interesting context of any kind to combine with - -- (even a type application -- anything except Stop) - = simplExprF env e cont - -simplNote env (CoreNote s) e cont = do - e' <- simplExpr env e - rebuild env (Note (CoreNote s) e') cont +simplNote env (CoreNote s) e cont + = do { e' <- simplExpr env e + ; rebuild env (Note (CoreNote s) e') cont } \end{code} @@ -1095,7 +1100,7 @@ completeCall env var cont Just unfolding -- There is an inlining! -> do { tick (UnfoldingDone var) ; (if dopt Opt_D_dump_inlinings dflags then - pprTrace ("Inlining done" ++ showSDoc (ppr var)) (vcat [ + pprTrace ("Inlining done: " ++ showSDoc (ppr var)) (vcat [ text "Before:" <+> ppr var <+> sep (map pprParendExpr args), text "Inlined fn: " <+> nest 2 (ppr unfolding), text "Cont: " <+> ppr call_cont]) @@ -1408,6 +1413,19 @@ The point is that we bring into the envt a binding after the outer case, and that makes (a,b) alive. At least we do unless the case binder is guaranteed dead. +In practice, the scrutinee is almost always a variable, so we pretty +much always zap the OccInfo of the binders. It doesn't matter much though. + + +Note [Case of cast] +~~~~~~~~~~~~~~~~~~~ +Consider case (v `cast` co) of x { I# -> + ... (case (v `cast` co) of {...}) ... +We'd like to eliminate the inner case. We can get this neatly by +arranging that inside the outer case we add the unfolding + v |-> x `cast` (sym co) +to v. Then we should inline v at the inner case, cancel the casts, and away we go + Note [Improving seq] ~~~~~~~~~~~~~~~~~~~ Consider diff --git a/compiler/specialise/Specialise.lhs b/compiler/specialise/Specialise.lhs index 4d8efdd..4a1cc4c 100644 --- a/compiler/specialise/Specialise.lhs +++ b/compiler/specialise/Specialise.lhs @@ -16,7 +16,7 @@ module Specialise ( specProgram ) where import Id ( Id, idName, idType, mkUserLocal, idCoreRules, idInlinePragma, setInlinePragma, setIdUnfolding, - isLocalId ) + isLocalId, idUnfolding ) import TcType ( Type, mkTyVarTy, tcSplitSigmaTy, tyVarsOfTypes, tyVarsOfTheta, isClassPred, tcCmpType, isUnLiftedType @@ -26,7 +26,7 @@ import CoreSubst ( Subst, mkEmptySubst, extendTvSubstList, lookupIdSubst, cloneIdBndr, cloneIdBndrs, cloneRecIdBndrs, extendIdSubst ) -import CoreUnfold ( mkUnfolding ) +import CoreUnfold ( mkUnfolding, mkInlineRule ) import SimplUtils ( interestingArg ) import Var ( DictId ) import VarSet @@ -43,6 +43,7 @@ import Name import MkId ( voidArgId, realWorldPrimId ) import FiniteMap import Maybes ( catMaybes, isJust ) +import BasicTypes ( Arity ) import Bag import Util import Outputable @@ -831,10 +832,14 @@ specDefn subst calls fn rhs n_dicts = length theta inline_prag = idInlinePragma fn - -- It's important that we "see past" any INLINE pragma - -- else we'll fail to specialise an INLINE thing - (inline_rhs, rhs_inside) = dropInline rhs - (rhs_tyvars, rhs_ids, rhs_body) = collectTyAndValBinders rhs_inside + -- Figure out whether the function has an INLINE pragma + -- See Note [Inline specialisations] + fn_has_inline_rule :: Maybe Arity -- Gives arity of the *specialised* inline rule + fn_has_inline_rule = case idUnfolding fn of + InlineRule { uf_arity = arity } -> Just (arity - n_dicts) + _other -> Nothing + + (rhs_tyvars, rhs_ids, rhs_body) = collectTyAndValBinders rhs rhs_dict_ids = take n_dicts rhs_ids body = mkLams (drop n_dicts rhs_ids) rhs_body @@ -922,10 +927,13 @@ specDefn subst calls fn rhs -- Add the { d1' = dx1; d2' = dx2 } usage stuff final_uds = foldr addDictBind rhs_uds dx_binds - spec_pr | inline_rhs = (spec_f `setInlinePragma` inline_prag, Note InlineMe spec_rhs) - | otherwise = (spec_f, spec_rhs) - - ; return (Just (spec_pr, final_uds, spec_env_rule)) } } + -- See Note [Inline specialisations] + final_spec_f | Just spec_arity <- fn_has_inline_rule + = spec_f `setInlinePragma` inline_prag + `setIdUnfolding` mkInlineRule spec_rhs spec_arity + | otherwise + = spec_f + ; return (Just ((final_spec_f, spec_rhs), final_uds, spec_env_rule)) } } where my_zipEqual xs ys zs | debugIsOn && not (equalLength xs ys && equalLength ys zs) @@ -1090,11 +1098,6 @@ specialised version. A case in point is dictionary functions, which are current marked INLINE, but which are worth specialising. -\begin{code} -dropInline :: CoreExpr -> (Bool, CoreExpr) -dropInline (Note InlineMe rhs) = (True, rhs) -dropInline rhs = (False, rhs) -\end{code} %************************************************************************ %* * diff --git a/compiler/stranal/WorkWrap.lhs b/compiler/stranal/WorkWrap.lhs index 438afd6..6ddbbd8 100644 --- a/compiler/stranal/WorkWrap.lhs +++ b/compiler/stranal/WorkWrap.lhs @@ -16,18 +16,16 @@ module WorkWrap ( wwTopBinds, mkWrapper ) where #include "HsVersions.h" import CoreSyn -import CoreUnfold ( certainlyWillInline ) +import CoreUnfold ( certainlyWillInline, mkWwInlineRule ) import CoreLint ( showPass, endPass ) import CoreUtils ( exprType, exprIsHNF, exprArity ) import Id ( Id, idType, isOneShotLambda, setIdNewStrictness, mkWorkerId, - setIdWorkerInfo, setInlinePragma, - setIdArity, idInfo ) + setInlinePragma, setIdUnfolding, setIdArity, idInfo ) import MkId ( lazyIdKey, lazyIdUnfolding ) import Type ( Type ) -import IdInfo ( WorkerInfo(..), arityInfo, - newDemandInfo, newStrictnessInfo, unfoldingInfo, inlinePragInfo - ) +import IdInfo ( arityInfo, newDemandInfo, newStrictnessInfo, + unfoldingInfo, inlinePragInfo ) import NewDemand ( Demand(..), StrictSig(..), DmdType(..), DmdResult(..), Demands(..), mkTopDmdType, isBotRes, returnsCPR, topSig, isAbsent ) @@ -114,16 +112,12 @@ matching by looking for strict arguments of the correct type. \begin{code} wwExpr :: CoreExpr -> UniqSM CoreExpr -wwExpr e@(Type _) = return e -wwExpr e@(Lit _) = return e -wwExpr e@(Note InlineMe expr) = return e - -- Don't w/w inside InlineMe's - +wwExpr e@(Type _) = return e +wwExpr e@(Lit _) = return e wwExpr e@(Var v) | v `hasKey` lazyIdKey = return lazyIdUnfolding | otherwise = return e -- HACK alert: Inline 'lazy' after strictness analysis - -- (but not inside InlineMe's) wwExpr (Lam binder expr) = Lam binder <$> wwExpr expr @@ -172,7 +166,10 @@ The only reason this is monadised is for the unique supply. Note [Don't w/w inline things] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ It's very important to refrain from w/w-ing an INLINE function -If we do so by mistake we transform +because the wrapepr will then overwrite the InlineRule unfolding. + +It was wrong with the old InlineMe Note too: if we do so by mistake +we transform f = __inline (\x -> E) into f = __inline (\x -> case x of (a,b) -> fw E) @@ -268,7 +265,7 @@ splitFun fn_id fn_info wrap_dmds res_info inline_prag rhs -- arity is consistent with the demand type goes through wrap_rhs = wrap_fn work_id - wrap_id = fn_id `setIdWorkerInfo` HasWorker work_id arity + wrap_id = fn_id `setIdUnfolding` mkWwInlineRule wrap_rhs arity work_id ; return ([(work_id, work_rhs), (wrap_id, wrap_rhs)]) }) -- Worker first, because wrapper mentions it diff --git a/compiler/stranal/WwLib.lhs b/compiler/stranal/WwLib.lhs index 0bde744..43aabc3 100644 --- a/compiler/stranal/WwLib.lhs +++ b/compiler/stranal/WwLib.lhs @@ -134,8 +134,8 @@ mkWwBodies fun_ty demands res_info one_shots ; let (work_lam_args, work_call_args) = mkWorkerArgs work_args res_ty ; return ([idNewDemandInfo v | v <- work_call_args, isIdVar v], - Note InlineMe . wrap_fn_args . wrap_fn_cpr . wrap_fn_str . applyToVars work_call_args . Var, - mkLams work_lam_args. work_fn_str . work_fn_cpr . work_fn_args) } + wrap_fn_args . wrap_fn_cpr . wrap_fn_str . applyToVars work_call_args . Var, + mkLams work_lam_args. work_fn_str . work_fn_cpr . work_fn_args) } -- We use an INLINE unconditionally, even if the wrapper turns out to be -- something trivial like -- fw = ... diff --git a/compiler/typecheck/Inst.lhs b/compiler/typecheck/Inst.lhs index b5eeff0..c942435 100644 --- a/compiler/typecheck/Inst.lhs +++ b/compiler/typecheck/Inst.lhs @@ -210,7 +210,9 @@ tyVarsOfLIE lie = tyVarsOfInsts (lieToList lie) -------------------------- instToDictBind :: Inst -> LHsExpr TcId -> TcDictBinds instToDictBind inst rhs - = unitBag (L (instSpan inst) (VarBind (instToId inst) rhs)) + = unitBag (L (instSpan inst) (VarBind { var_id = instToId inst + , var_rhs = rhs + , var_inline = False })) addInstToDictBind :: TcDictBinds -> Inst -> LHsExpr TcId -> TcDictBinds addInstToDictBind binds inst rhs = binds `unionBags` instToDictBind inst rhs diff --git a/compiler/typecheck/TcBinds.lhs b/compiler/typecheck/TcBinds.lhs index b4c0d1a..a5b15f3 100644 --- a/compiler/typecheck/TcBinds.lhs +++ b/compiler/typecheck/TcBinds.lhs @@ -352,7 +352,7 @@ tcPolyBinds top_lvl sig_fn prag_fn rec_group rec_tc binds -- BUILD THE POLYMORPHIC RESULT IDs ; let dict_vars = map instToVar dicts -- May include equality constraints - ; exports <- mapM (mkExport top_lvl prag_fn tyvars_to_gen (map varType dict_vars)) + ; exports <- mapM (mkExport top_lvl rec_group prag_fn tyvars_to_gen (map varType dict_vars)) mono_bind_infos ; let poly_ids = [poly_id | (_, poly_id, _, _) <- exports] @@ -367,7 +367,7 @@ tcPolyBinds top_lvl sig_fn prag_fn rec_group rec_tc binds -------------- -mkExport :: TopLevelFlag -> TcPragFun -> [TyVar] -> [TcType] +mkExport :: TopLevelFlag -> RecFlag -> TcPragFun -> [TyVar] -> [TcType] -> MonoBindInfo -> TcM ([TyVar], Id, Id, [LPrag]) -- mkExport generates exports with @@ -381,13 +381,13 @@ mkExport :: TopLevelFlag -> TcPragFun -> [TyVar] -> [TcType] -- Pre-condition: the inferred_tvs are already zonked -mkExport top_lvl prag_fn inferred_tvs dict_tys (poly_name, mb_sig, mono_id) +mkExport top_lvl rec_group prag_fn inferred_tvs dict_tys (poly_name, mb_sig, mono_id) = do { warn_missing_sigs <- doptM Opt_WarnMissingSigs ; let warn = isTopLevel top_lvl && warn_missing_sigs ; (tvs, poly_id) <- mk_poly_id warn mb_sig -- poly_id has a zonked type - ; prags <- tcPrags poly_id (prag_fn poly_name) + ; prags <- tcPrags rec_group poly_id (prag_fn poly_name) -- tcPrags requires a zonked poly_id ; return (tvs, poly_id, mono_id, prags) } @@ -413,24 +413,34 @@ mkPragFun sigs = \n -> lookupNameEnv env n `orElse` [] env = foldl add emptyNameEnv prs add env (n,p) = extendNameEnv_Acc (:) singleton env n p -tcPrags :: Id -> [LSig Name] -> TcM [LPrag] -tcPrags poly_id prags = mapM (wrapLocM tc_prag) prags +tcPrags :: RecFlag -> Id -> [LSig Name] -> TcM [LPrag] +-- Pre-condition: the poly_id is zonked +-- Reason: required by tcSubExp +tcPrags rec_group poly_id prags = mapM tc_lprag prags where - tc_prag prag = addErrCtxt (pragSigCtxt prag) $ - tcPrag poly_id prag + tc_lprag :: LSig Name -> TcM LPrag + tc_lprag (L loc prag) = setSrcSpan loc $ + addErrCtxt (pragSigCtxt prag) $ + do { prag' <- tc_prag prag + ; return (L loc prag') } + + tc_prag (SpecSig _ hs_ty inl) = tcSpecPrag poly_id hs_ty inl + tc_prag (SpecInstSig hs_ty) = tcSpecPrag poly_id hs_ty defaultInlineSpec + tc_prag (InlineSig _ inl) = do { warnIfRecInline rec_group inl poly_id + ; return (InlinePrag inl) } + tc_prag (FixSig {}) = panic "tcPrag FixSig" + tc_prag (TypeSig {}) = panic "tcPrag TypeSig" pragSigCtxt :: Sig Name -> SDoc pragSigCtxt prag = hang (ptext (sLit "In the pragma")) 2 (ppr prag) -tcPrag :: TcId -> Sig Name -> TcM Prag --- Pre-condition: the poly_id is zonked --- Reason: required by tcSubExp -tcPrag poly_id (SpecSig _ hs_ty inl) = tcSpecPrag poly_id hs_ty inl -tcPrag poly_id (SpecInstSig hs_ty) = tcSpecPrag poly_id hs_ty defaultInlineSpec -tcPrag _ (InlineSig _ inl) = return (InlinePrag inl) -tcPrag _ (FixSig {}) = panic "tcPrag FixSig" -tcPrag _ (TypeSig {}) = panic "tcPrag TypeSig" - +warnIfRecInline :: RecFlag -> InlineSpec -> TcId -> TcM () +warnIfRecInline rec_group (Inline _ is_inline) poly_id + | is_inline && isRec rec_group = addWarnTc warn + | otherwise = return () + where + warn = ptext (sLit "INLINE pragma for recursive binder") <+> quotes (ppr poly_id) + <+> ptext (sLit "may be discarded") tcSpecPrag :: TcId -> LHsType Name -> InlineSpec -> TcM Prag tcSpecPrag poly_id hs_ty inl diff --git a/compiler/typecheck/TcClassDcl.lhs b/compiler/typecheck/TcClassDcl.lhs index 3814f23..cb27a98 100644 --- a/compiler/typecheck/TcClassDcl.lhs +++ b/compiler/typecheck/TcClassDcl.lhs @@ -224,6 +224,8 @@ tcDefMeth rigid_info clas tyvars theta this_dict binds_in sig_fn prag_fn sel_id meth_sig_fn _ = sig_fn sel_name meth_prag_fn _ = prag_fn sel_name + -- See Note [Silly default-method bind] + -- (possibly out of date) ; (top_dm_id, bind) <- tcInstanceMethodBody rigid_info clas tyvars [this_dict] theta (mkTyVarTys tyvars) Nothing sel_id @@ -366,7 +368,6 @@ gives rise to the instance declarations instance C 1 where op Unit = ... - \begin{code} mkGenericDefMethBind :: Class -> [Type] -> Id -> Name -> TcM (LHsBind Name) mkGenericDefMethBind clas inst_tys sel_id meth_name diff --git a/compiler/typecheck/TcExpr.lhs b/compiler/typecheck/TcExpr.lhs index 51d6f4b..687f3d5 100644 --- a/compiler/typecheck/TcExpr.lhs +++ b/compiler/typecheck/TcExpr.lhs @@ -809,12 +809,12 @@ tcId :: InstOrigin -> BoxyRhoType -- Result type -> TcM (HsExpr TcId) tcId orig fun_name res_ty - = do { traceTc (text "tcId" <+> ppr fun_name <+> ppr res_ty) - ; (fun, fun_ty) <- lookupFun orig fun_name - + = do { (fun, fun_ty) <- lookupFun orig fun_name + ; traceTc (text "tcId" <+> ppr fun_name <+> (ppr fun_ty $$ ppr res_ty)) + -- Split up the function type ; let (tv_theta_prs, fun_tau) = tcMultiSplitSigmaTy fun_ty - qtvs = concatMap fst tv_theta_prs -- Quantified tyvars + qtvs = concatMap fst tv_theta_prs -- Quantified tyvars tau_qtvs = exactTyVarsOfType fun_tau -- Mentioned in the tau part ; qtv_tys <- preSubType qtvs tau_qtvs fun_tau res_ty @@ -822,6 +822,8 @@ tcId orig fun_name res_ty ; let res_subst = zipTopTvSubst qtvs qtv_tys fun_tau' = substTy res_subst fun_tau + ; traceTc (text "tcId2" <+> ppr fun_name <+> (ppr qtvs $$ ppr qtv_tys)) + ; co_fn <- tcSubExp orig fun_tau' res_ty -- And pack up the results diff --git a/compiler/typecheck/TcForeign.lhs b/compiler/typecheck/TcForeign.lhs index b1dda2d..23f959b 100644 --- a/compiler/typecheck/TcForeign.lhs +++ b/compiler/typecheck/TcForeign.lhs @@ -233,7 +233,7 @@ tcFExport fo@(ForeignExport (L loc nm) hs_ty spec) = -- is *stable* (i.e. the compiler won't change it later), -- because this name will be referred to by the C code stub. id <- mkStableIdFromName nm sig_ty loc mkForeignExportOcc - return (L loc (VarBind id rhs), ForeignExport (L loc id) undefined spec) + return (mkVarBind id rhs, ForeignExport (L loc id) undefined spec) tcFExport d = pprPanic "tcFExport" (ppr d) \end{code} diff --git a/compiler/typecheck/TcGenDeriv.lhs b/compiler/typecheck/TcGenDeriv.lhs index 9826f2f..dcf230a 100644 --- a/compiler/typecheck/TcGenDeriv.lhs +++ b/compiler/typecheck/TcGenDeriv.lhs @@ -558,8 +558,8 @@ gen_Bounded_binds loc tycon data_cons = tyConDataCons tycon ----- enum-flavored: --------------------------- - min_bound_enum = mkVarBind loc minBound_RDR (nlHsVar data_con_1_RDR) - max_bound_enum = mkVarBind loc maxBound_RDR (nlHsVar data_con_N_RDR) + min_bound_enum = mkHsVarBind loc minBound_RDR (nlHsVar data_con_1_RDR) + max_bound_enum = mkHsVarBind loc maxBound_RDR (nlHsVar data_con_N_RDR) data_con_1 = head data_cons data_con_N = last data_cons @@ -569,9 +569,9 @@ gen_Bounded_binds loc tycon ----- single-constructor-flavored: ------------- arity = dataConSourceArity data_con_1 - min_bound_1con = mkVarBind loc minBound_RDR $ + min_bound_1con = mkHsVarBind loc minBound_RDR $ nlHsVarApps data_con_1_RDR (nOfThem arity minBound_RDR) - max_bound_1con = mkVarBind loc maxBound_RDR $ + max_bound_1con = mkHsVarBind loc maxBound_RDR $ nlHsVarApps data_con_1_RDR (nOfThem arity maxBound_RDR) \end{code} @@ -801,16 +801,16 @@ gen_Read_binds get_fixity loc tycon where ----------------------------------------------------------------------- default_readlist - = mkVarBind loc readList_RDR (nlHsVar readListDefault_RDR) + = mkHsVarBind loc readList_RDR (nlHsVar readListDefault_RDR) default_readlistprec - = mkVarBind loc readListPrec_RDR (nlHsVar readListPrecDefault_RDR) + = mkHsVarBind loc readListPrec_RDR (nlHsVar readListPrecDefault_RDR) ----------------------------------------------------------------------- data_cons = tyConDataCons tycon (nullary_cons, non_nullary_cons) = partition isNullarySrcDataCon data_cons - read_prec = mkVarBind loc readPrec_RDR + read_prec = mkHsVarBind loc readPrec_RDR (nlHsApp (nlHsVar parens_RDR) read_cons) read_cons = foldr1 mk_alt (read_nullary_cons ++ read_non_nullary_cons) @@ -955,7 +955,7 @@ gen_Show_binds get_fixity loc tycon = (listToBag [shows_prec, show_list], []) where ----------------------------------------------------------------------- - show_list = mkVarBind loc showList_RDR + show_list = mkHsVarBind loc showList_RDR (nlHsApp (nlHsVar showList___RDR) (nlHsPar (nlHsApp (nlHsVar showsPrec_RDR) (nlHsIntLit 0)))) ----------------------------------------------------------------------- shows_prec = mk_FunBind loc showsPrec_RDR (map pats_etc (tyConDataCons tycon)) @@ -1189,7 +1189,6 @@ gen_Data_binds loc tycon [nlWildPat] (nlHsVar (mk_data_type_name tycon)) - gfoldl_RDR, gunfold_RDR, toConstr_RDR, dataTypeOf_RDR, mkConstr_RDR, mkDataType_RDR, conIndex_RDR, prefix_RDR, infix_RDR :: RdrName gfoldl_RDR = varQual_RDR gENERICS (fsLit "gfoldl") @@ -1265,7 +1264,7 @@ genAuxBind loc (GenTag2Con tycon) rdr_name = tag2con_RDR tycon genAuxBind loc (GenMaxTag tycon) - = mkVarBind loc rdr_name + = mkHsVarBind loc rdr_name (nlHsApp (nlHsVar intDataCon_RDR) (nlHsLit (HsIntPrim max_tag))) where rdr_name = maxtag_RDR tycon @@ -1273,16 +1272,16 @@ genAuxBind loc (GenMaxTag tycon) data_cons -> toInteger ((length data_cons) - fIRST_TAG) genAuxBind loc (MkTyCon tycon) -- $dT - = mkVarBind loc (mk_data_type_name tycon) - ( nlHsVar mkDataType_RDR + = mkHsVarBind loc (mk_data_type_name tycon) + ( nlHsVar mkDataType_RDR `nlHsApp` nlHsLit (mkHsString (showSDoc (ppr tycon))) `nlHsApp` nlList constrs ) where constrs = [nlHsVar (mk_constr_name con) | con <- tyConDataCons tycon] genAuxBind loc (MkDataCon dc) -- $cT1 etc - = mkVarBind loc (mk_constr_name dc) - (nlHsApps mkConstr_RDR constr_args) + = mkHsVarBind loc (mk_constr_name dc) + (nlHsApps mkConstr_RDR constr_args) where constr_args = [ -- nlHsIntLit (toInteger (dataConTag dc)), -- Tag diff --git a/compiler/typecheck/TcHsSyn.lhs b/compiler/typecheck/TcHsSyn.lhs index 491ca27..46ac794 100644 --- a/compiler/typecheck/TcHsSyn.lhs +++ b/compiler/typecheck/TcHsSyn.lhs @@ -344,10 +344,10 @@ zonk_bind env bind@(PatBind { pat_lhs = pat, pat_rhs = grhss, pat_rhs_ty = ty}) ; new_ty <- zonkTcTypeToType env ty ; return (bind { pat_lhs = new_pat, pat_rhs = new_grhss, pat_rhs_ty = new_ty }) } -zonk_bind env (VarBind { var_id = var, var_rhs = expr }) +zonk_bind env (VarBind { var_id = var, var_rhs = expr, var_inline = inl }) = zonkIdBndr env var `thenM` \ new_var -> zonkLExpr env expr `thenM` \ new_expr -> - returnM (VarBind { var_id = new_var, var_rhs = new_expr }) + returnM (VarBind { var_id = new_var, var_rhs = new_expr, var_inline = inl }) zonk_bind env bind@(FunBind { fun_id = var, fun_matches = ms, fun_co_fn = co_fn }) = wrapLocM (zonkIdBndr env) var `thenM` \ new_var -> @@ -608,7 +608,6 @@ zonk_cmd_top env (HsCmdTop cmd stack_tys ty ids) ------------------------------------------------------------------------- zonkCoFn :: ZonkEnv -> HsWrapper -> TcM (ZonkEnv, HsWrapper) zonkCoFn env WpHole = return (env, WpHole) -zonkCoFn env WpInline = return (env, WpInline) zonkCoFn env (WpCompose c1 c2) = do { (env1, c1') <- zonkCoFn env c1 ; (env2, c2') <- zonkCoFn env1 c2 ; return (env2, WpCompose c1' c2') } diff --git a/compiler/typecheck/TcInstDcls.lhs b/compiler/typecheck/TcInstDcls.lhs index e7c472b..965db15 100644 --- a/compiler/typecheck/TcInstDcls.lhs +++ b/compiler/typecheck/TcInstDcls.lhs @@ -626,7 +626,7 @@ tcInstDecl2 (InstInfo { iSpec = ispec, iBinds = NewTypeDerived }) ; let coerced_rep_dict = wrapId the_coercion (instToId rep_dict) ; body <- make_body cls_tycon cls_inst_tys sc_dicts coerced_rep_dict - ; let dict_bind = noLoc $ VarBind (instToId this_dict) (noLoc body) + ; let dict_bind = mkVarBind (instToId this_dict) (noLoc body) ; return (unitBag $ noLoc $ AbsBinds inst_tvs' (map instToVar dfun_dicts) @@ -744,7 +744,7 @@ tcInstDecl2 (InstInfo { iSpec = ispec, iBinds = VanillaInst monobinds uprags }) checkSigTyVars inst_tyvars' -- Deal with 'SPECIALISE instance' pragmas - prags <- tcPrags dfun_id (filter isSpecInstLSig uprags) + prags <- tcPrags NonRecursive dfun_id (filter isSpecInstLSig uprags) -- Create the result bindings let @@ -763,7 +763,7 @@ tcInstDecl2 (InstInfo { iSpec = ispec, iBinds = VanillaInst monobinds uprags }) -- See Note [Inline dfuns] below sc_dict_vars = map instToVar sc_dicts - dict_bind = L loc (VarBind this_dict_id dict_rhs) + dict_bind = mkVarBind this_dict_id dict_rhs dict_rhs = foldl (\ f a -> L loc (HsApp f (L loc a))) inst_constr meth_exprs inst_constr = L loc $ wrapId (mkWpApps sc_dict_vars <.> mkWpTyApps inst_tys') (dataConWrapId dict_constr) @@ -774,7 +774,6 @@ tcInstDecl2 (InstInfo { iSpec = ispec, iBinds = VanillaInst monobinds uprags }) -- member) are dealt with by the common MkId.mkDataConWrapId code rather -- than needing to be repeated here. - main_bind = noLoc $ AbsBinds inst_tyvars' dfun_lam_vars @@ -825,7 +824,7 @@ tcInstanceMethod loc clas tyvars dfun_dicts theta inst_tys -- then clashes with its friends ; uniq1 <- newUnique ; let local_meth_name = mkInternalName uniq1 sel_occ loc -- Same OccName - this_dict_bind = L loc $ VarBind (instToId cloned_this) $ + this_dict_bind = mkVarBind (instToId cloned_this) $ L loc $ wrapId meth_wrapper dfun_id mb_this_bind | null tyvars = Nothing | otherwise = Just (cloned_this, this_dict_bind) diff --git a/compiler/typecheck/TcRnDriver.lhs b/compiler/typecheck/TcRnDriver.lhs index e0d8632..591ea5e 100644 --- a/compiler/typecheck/TcRnDriver.lhs +++ b/compiler/typecheck/TcRnDriver.lhs @@ -547,6 +547,15 @@ checkHiBootIface -- Check the exports of the boot module, one by one ; mapM_ check_export boot_exports + -- Check instance declarations + ; mb_dfun_prs <- mapM check_inst boot_insts + ; let tcg_env' = tcg_env { tcg_binds = binds `unionBags` dfun_binds, + tcg_type_env = extendTypeEnvWithIds local_type_env boot_dfuns } + dfun_prs = catMaybes mb_dfun_prs + boot_dfuns = map fst dfun_prs + dfun_binds = listToBag [ mkVarBind boot_dfun (nlHsVar dfun) + | (boot_dfun, dfun) <- dfun_prs ] + -- Check for no family instances ; unless (null boot_fam_insts) $ panic ("TcRnDriver.checkHiBootIface: Cannot handle family " ++ @@ -561,7 +570,7 @@ checkHiBootIface final_type_env = extendTypeEnvWithIds local_type_env boot_dfuns dfun_prs = catMaybes mb_dfun_prs boot_dfuns = map fst dfun_prs - dfun_binds = listToBag [ noLoc $ VarBind boot_dfun (nlHsVar dfun) + dfun_binds = listToBag [ mkVarBind boot_dfun (nlHsVar dfun) | (boot_dfun, dfun) <- dfun_prs ] ; failIfErrsM @@ -905,7 +914,7 @@ check_main dflags tcg_env (mkTyConApp ioTyCon [res_ty]) ; co = mkWpTyApps [res_ty] ; rhs = nlHsApp (mkLHsWrap co (nlHsVar run_main_id)) main_expr - ; main_bind = noLoc (VarBind root_main_id rhs) } + ; main_bind = mkVarBind root_main_id rhs } ; return (tcg_env { tcg_binds = tcg_binds tcg_env `snocBag` main_bind, diff --git a/compiler/typecheck/TcSimplify.lhs b/compiler/typecheck/TcSimplify.lhs index 932cb68..98e5aa5 100644 --- a/compiler/typecheck/TcSimplify.lhs +++ b/compiler/typecheck/TcSimplify.lhs @@ -710,6 +710,13 @@ tcSimplifyInfer doc tau_tvs wanted -- irreds2 will be empty. But we don't want to generalise over b! ; let preds2 = fdPredsOfInsts irreds2 -- irreds2 is zonked qtvs = grow preds2 tau_tvs2 `minusVarSet` oclose preds2 gbl_tvs2 + --------------------------------------------------- + -- BUG WARNING: there's a nasty bug lurking here + -- fdPredsOfInsts may return preds that mention variables quantified in + -- one of the implication constraints in irreds2; and that is clearly wrong: + -- we might quantify over too many variables through accidental capture + --------------------------------------------------- + ; let (free, irreds3) = partition (isFreeWhenInferring qtvs) irreds2 ; extendLIEs free @@ -1016,16 +1023,17 @@ makeImplicationBind loc all_tvs <.> mkWpTyApps eq_cotvs <.> mkWpTyApps (mkTyVarTys all_tvs) bind | [dict_irred_id] <- dict_irred_ids - = VarBind dict_irred_id rhs + = mkVarBind dict_irred_id rhs | otherwise - = PatBind { pat_lhs = lpat + = L span $ + PatBind { pat_lhs = lpat , pat_rhs = unguardedGRHSs rhs , pat_rhs_ty = hsLPatType lpat , bind_fvs = placeHolderNames } ; traceTc $ text "makeImplicationBind" <+> ppr implic_inst - ; return ([implic_inst], unitBag (L span bind)) + ; return ([implic_inst], unitBag bind) } ----------------------------------------------------------- @@ -2353,11 +2361,7 @@ reduceImplication env eq_cotvs = map instToVar extra_eq_givens dict_ids = map instToId extra_dict_givens - -- Note [Always inline implication constraints] - wrap_inline | null dict_ids = idHsWrapper - | otherwise = WpInline - co = wrap_inline - <.> mkWpTyLams tvs + co = mkWpTyLams tvs <.> mkWpTyLams eq_cotvs <.> mkWpLams dict_ids <.> WpLet (binds `unionBags` bind) @@ -2369,12 +2373,15 @@ reduceImplication env . filter (not . isEqInst) $ wanteds payload = mkBigLHsTup dict_bndrs - ; traceTc (vcat [text "reduceImplication" <+> ppr name, ppr simpler_implic_insts, text "->" <+> ppr rhs]) - ; return (unitBag (L loc (VarBind (instToId orig_implic) rhs)), + ; return (unitBag (L loc (VarBind { var_id= instToId orig_implic + , var_rhs = rhs + , var_inline = not (null dict_ids) } + -- See Note [Always inline implication constraints] + )), simpler_implic_insts) } } diff --git a/compiler/vectorise/VectType.hs b/compiler/vectorise/VectType.hs index 9952121..ae39cac 100644 --- a/compiler/vectorise/VectType.hs +++ b/compiler/vectorise/VectType.hs @@ -943,6 +943,13 @@ buildPADict repr vect_tc prepr_tc arr_tc _ var <- newLocalVar name (exprType body) return (var, mkInlineMe body) +-- The InlineMe note has gone away. Instead, you need to use +-- CoreUnfold.mkInlineRule to make an InlineRule for the thing, and +-- attach *that* as the unfolding for the dictionary binder +mkInlineMe :: CoreExpr -> CoreExpr +mkInlineMe expr = pprTrace "VectType: Roman, you need to use the new InlineRule story" + (ppr expr) expr + paMethods :: [(FastString, Repr -> TyCon -> TyCon -> TyCon -> VM CoreExpr)] paMethods = [(fsLit "toPRepr", buildToPRepr), (fsLit "fromPRepr", buildFromPRepr), -- 1.7.10.4