X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2FsimplCore%2FSimplUtils.lhs;h=56d2795e310d9db0f200e5e3432ee0d2abde31bc;hp=6d2250550ebf376ce0e142124d613791afc7955c;hb=b84ba676034763b3082bbd9405794a4fde499d14;hpb=30c122df62ec75f9ed7f392f24c2925675bf1d06 diff --git a/compiler/simplCore/SimplUtils.lhs b/compiler/simplCore/SimplUtils.lhs index 6d22505..56d2795 100644 --- a/compiler/simplCore/SimplUtils.lhs +++ b/compiler/simplCore/SimplUtils.lhs @@ -4,27 +4,21 @@ \section[SimplUtils]{The simplifier utilities} \begin{code} -{-# OPTIONS -w #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and fix --- any warnings in the module. See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings --- for details - module SimplUtils ( -- Rebuilding - mkLam, mkCase, prepareAlts, bindCaseBndr, + mkLam, mkCase, prepareAlts, -- Inlining, preInlineUnconditionally, postInlineUnconditionally, - activeInline, activeRule, inlineMode, + activeUnfolding, activeUnfInRule, activeRule, + simplEnvForGHCi, simplEnvForRules, updModeForInlineRules, -- The continuation type SimplCont(..), DupFlag(..), ArgInfo(..), contIsDupable, contResultType, contIsTrivial, contArgs, dropArgs, - countValArgs, countArgs, splitInlineCont, - mkBoringStop, mkLazyArgStop, mkRhsStop, contIsRhsOrArg, - interestingCallContext, interestingArgContext, + pushArgs, countValArgs, countArgs, addArgTo, + mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg, + interestingCallContext, interestingArg, mkArgInfo, @@ -41,17 +35,16 @@ import qualified CoreSubst import PprCore import CoreFVs import CoreUtils -import Literal +import CoreArity ( etaExpand, exprEtaExpandArity ) import CoreUnfold -import MkId import Name import Id import Var ( isCoVar ) -import NewDemand +import Demand import SimplMonad import Type hiding( substTy ) +import Coercion ( coercionKind ) import TyCon -import DataCon import Unify ( dataConCannotMatch ) import VarSet import BasicTypes @@ -60,7 +53,7 @@ import MonadUtils import Outputable import FastString -import List( nub ) +import Data.List \end{code} @@ -93,7 +86,6 @@ Key points: \begin{code} data SimplCont = Stop -- An empty context, or hole, [] - OutType -- Type of the result CallCtxt -- True <=> There is something interesting about -- the context, and hence the inliner -- should be a bit keener (see interestingCallContext) @@ -107,105 +99,127 @@ data SimplCont | ApplyTo -- C arg DupFlag - InExpr SimplEnv -- The argument and its static env + InExpr StaticEnv -- The argument and its static env SimplCont | Select -- case C of alts DupFlag - InId [InAlt] SimplEnv -- The case binder, alts, and subst-env + InId [InAlt] StaticEnv -- The case binder, alts, and subst-env SimplCont -- The two strict forms have no DupFlag, because we never duplicate them | StrictBind -- (\x* \xs. e) C InId [InBndr] -- let x* = [] in e - InExpr SimplEnv -- is a special case + InExpr StaticEnv -- is a special case SimplCont - | StrictArg -- e C - OutExpr OutType -- e and its type - CallCtxt -- Whether *this* argument position is interesting - ArgInfo -- Whether the function at the head of e has rules, etc - SimplCont -- plus strictness flags for *further* args + | StrictArg -- f e1 ..en C + ArgInfo -- Specifies f, e1..en, Whether f has rules, etc + -- plus strictness flags for *further* args + CallCtxt -- Whether *this* argument position is interesting + SimplCont data ArgInfo = ArgInfo { - ai_rules :: Bool, -- Function has rules (recursively) - -- => be keener to inline in all args - ai_strs :: [Bool], -- Strictness of arguments + ai_fun :: Id, -- The function + ai_args :: [OutExpr], -- ...applied to these args (which are in *reverse* order) + ai_rules :: [CoreRule], -- Rules for this function + + ai_encl :: Bool, -- Flag saying whether this function + -- or an enclosing one has rules (recursively) + -- True => be keener to inline in all args + + ai_strs :: [Bool], -- Strictness of remaining arguments -- Usually infinite, but if it is finite it guarantees -- that the function diverges after being given -- that number of args - ai_discs :: [Int] -- Discounts for arguments; non-zero => be keener to inline + ai_discs :: [Int] -- Discounts for remaining arguments; non-zero => be keener to inline -- Always infinite } +addArgTo :: ArgInfo -> OutExpr -> ArgInfo +addArgTo ai arg = ai { ai_args = arg : ai_args ai } + instance Outputable SimplCont where - ppr (Stop ty _) = ptext SLIT("Stop") <+> ppr ty - ppr (ApplyTo dup arg se cont) = ((ptext SLIT("ApplyTo") <+> ppr dup <+> pprParendExpr arg) + ppr (Stop interesting) = ptext (sLit "Stop") <> brackets (ppr interesting) + ppr (ApplyTo dup arg _ cont) = ((ptext (sLit "ApplyTo") <+> ppr dup <+> pprParendExpr arg) {- $$ nest 2 (pprSimplEnv se) -}) $$ ppr cont - ppr (StrictBind b _ _ _ cont) = (ptext SLIT("StrictBind") <+> ppr b) $$ ppr cont - ppr (StrictArg f _ _ _ cont) = (ptext SLIT("StrictArg") <+> ppr f) $$ ppr cont - ppr (Select dup bndr alts se cont) = (ptext SLIT("Select") <+> ppr dup <+> ppr bndr) $$ + ppr (StrictBind b _ _ _ cont) = (ptext (sLit "StrictBind") <+> ppr b) $$ ppr cont + ppr (StrictArg ai _ cont) = (ptext (sLit "StrictArg") <+> ppr (ai_fun ai)) $$ ppr cont + ppr (Select dup bndr alts _ cont) = (ptext (sLit "Select") <+> ppr dup <+> ppr bndr) $$ (nest 4 (ppr alts)) $$ ppr cont - ppr (CoerceIt co cont) = (ptext SLIT("CoerceIt") <+> ppr co) $$ ppr cont + ppr (CoerceIt co cont) = (ptext (sLit "CoerceIt") <+> ppr co) $$ ppr cont data DupFlag = OkToDup | NoDup instance Outputable DupFlag where - ppr OkToDup = ptext SLIT("ok") - ppr NoDup = ptext SLIT("nodup") + ppr OkToDup = ptext (sLit "ok") + ppr NoDup = ptext (sLit "nodup") ------------------- -mkBoringStop :: OutType -> SimplCont -mkBoringStop ty = Stop ty BoringCtxt +mkBoringStop :: SimplCont +mkBoringStop = Stop BoringCtxt -mkLazyArgStop :: OutType -> CallCtxt -> SimplCont -mkLazyArgStop ty cci = Stop ty cci +mkRhsStop :: SimplCont -- See Note [RHS of lets] in CoreUnfold +mkRhsStop = Stop (ArgCtxt False) -mkRhsStop :: OutType -> SimplCont -mkRhsStop ty = Stop ty BoringCtxt +mkLazyArgStop :: CallCtxt -> SimplCont +mkLazyArgStop cci = Stop cci ------------------- -contIsRhsOrArg (Stop {}) = True -contIsRhsOrArg (StrictBind {}) = True -contIsRhsOrArg (StrictArg {}) = True -contIsRhsOrArg other = False +contIsRhsOrArg :: SimplCont -> Bool +contIsRhsOrArg (Stop {}) = True +contIsRhsOrArg (StrictBind {}) = True +contIsRhsOrArg (StrictArg {}) = True +contIsRhsOrArg _ = False ------------------- contIsDupable :: SimplCont -> Bool -contIsDupable (Stop {}) = True +contIsDupable (Stop {}) = True contIsDupable (ApplyTo OkToDup _ _ _) = True contIsDupable (Select OkToDup _ _ _ _) = True contIsDupable (CoerceIt _ cont) = contIsDupable cont -contIsDupable other = False +contIsDupable _ = False ------------------- contIsTrivial :: SimplCont -> Bool -contIsTrivial (Stop {}) = True +contIsTrivial (Stop {}) = True contIsTrivial (ApplyTo _ (Type _) _ cont) = contIsTrivial cont -contIsTrivial (CoerceIt _ cont) = contIsTrivial cont -contIsTrivial other = False +contIsTrivial (CoerceIt _ cont) = contIsTrivial cont +contIsTrivial _ = False ------------------- -contResultType :: SimplCont -> OutType -contResultType (Stop to_ty _) = to_ty -contResultType (StrictArg _ _ _ _ cont) = contResultType cont -contResultType (StrictBind _ _ _ _ cont) = contResultType cont -contResultType (ApplyTo _ _ _ cont) = contResultType cont -contResultType (CoerceIt _ cont) = contResultType cont -contResultType (Select _ _ _ _ cont) = contResultType cont +contResultType :: SimplEnv -> OutType -> SimplCont -> OutType +contResultType env ty cont + = go cont ty + where + subst_ty se ty = substTy (se `setInScope` env) ty + + go (Stop {}) ty = ty + go (CoerceIt co cont) _ = go cont (snd (coercionKind co)) + go (StrictBind _ bs body se cont) _ = go cont (subst_ty se (exprType (mkLams bs body))) + go (StrictArg ai _ cont) _ = go cont (funResultTy (argInfoResultTy ai)) + go (Select _ _ alts se cont) _ = go cont (subst_ty se (coreAltsType alts)) + go (ApplyTo _ arg se cont) ty = go cont (apply_to_arg ty arg se) + + apply_to_arg ty (Type ty_arg) se = applyTy ty (subst_ty se ty_arg) + apply_to_arg ty _ _ = funResultTy ty + +argInfoResultTy :: ArgInfo -> OutType +argInfoResultTy (ArgInfo { ai_fun = fun, ai_args = args }) + = foldr (\arg fn_ty -> applyTypeToArg fn_ty arg) (idType fun) args ------------------- countValArgs :: SimplCont -> Int -countValArgs (ApplyTo _ (Type ty) se cont) = countValArgs cont -countValArgs (ApplyTo _ val_arg se cont) = 1 + countValArgs cont -countValArgs other = 0 +countValArgs (ApplyTo _ (Type _) _ cont) = countValArgs cont +countValArgs (ApplyTo _ _ _ cont) = 1 + countValArgs cont +countValArgs _ = 0 countArgs :: SimplCont -> Int -countArgs (ApplyTo _ arg se cont) = 1 + countArgs cont -countArgs other = 0 +countArgs (ApplyTo _ _ _ cont) = 1 + countArgs cont +countArgs _ = 0 contArgs :: SimplCont -> ([OutExpr], SimplCont) -- Uses substitution to turn each arg into an OutExpr @@ -214,69 +228,19 @@ contArgs cont = go [] cont go args (ApplyTo _ arg se cont) = go (substExpr se arg : args) cont go args cont = (reverse args, cont) +pushArgs :: SimplEnv -> [CoreExpr] -> SimplCont -> SimplCont +pushArgs _env [] cont = cont +pushArgs env (arg:args) cont = ApplyTo NoDup arg env (pushArgs env args cont) + dropArgs :: Int -> SimplCont -> SimplCont dropArgs 0 cont = cont dropArgs n (ApplyTo _ _ _ cont) = dropArgs (n-1) cont dropArgs n other = pprPanic "dropArgs" (ppr n <+> ppr other) - --------------------- -splitInlineCont :: SimplCont -> Maybe (SimplCont, SimplCont) --- Returns Nothing if the continuation should dissolve an InlineMe Note --- Return Just (c1,c2) otherwise, --- where c1 is the continuation to put inside the InlineMe --- and c2 outside - --- Example: (__inline_me__ (/\a. e)) ty --- Here we want to do the beta-redex without dissolving the InlineMe --- See test simpl017 (and Trac #1627) for a good example of why this is important - -splitInlineCont (ApplyTo dup (Type ty) se c) - | Just (c1, c2) <- splitInlineCont c = Just (ApplyTo dup (Type ty) se c1, c2) -splitInlineCont cont@(Stop ty _) = Just (mkBoringStop ty, cont) -splitInlineCont cont@(StrictBind bndr _ _ se _) = Just (mkBoringStop (substTy se (idType bndr)), cont) -splitInlineCont cont@(StrictArg _ fun_ty _ _ _) = Just (mkBoringStop (funArgTy fun_ty), cont) -splitInlineCont other = Nothing - -- NB: the calculation of the type for mkBoringStop is an annoying - -- duplication of the same calucation in mkDupableCont -\end{code} - - -\begin{code} -interestingArg :: OutExpr -> Bool - -- An argument is interesting if it has *some* structure - -- We are here trying to avoid unfolding a function that - -- is applied only to variables that have no unfolding - -- (i.e. they are probably lambda bound): f x y z - -- There is little point in inlining f here. -interestingArg (Var v) = hasSomeUnfolding (idUnfolding v) - -- Was: isValueUnfolding (idUnfolding v') - -- But that seems over-pessimistic - || isDataConWorkId v - -- This accounts for an argument like - -- () or [], which is definitely interesting -interestingArg (Type _) = False -interestingArg (App fn (Type _)) = interestingArg fn -interestingArg (Note _ a) = interestingArg a - --- Idea (from Sam B); I'm not sure if it's a good idea, so commented out for now --- interestingArg expr | isUnLiftedType (exprType expr) --- -- Unlifted args are only ever interesting if we know what they are --- = case expr of --- Lit lit -> True --- _ -> False - -interestingArg other = True - -- Consider let x = 3 in f x - -- The substitution will contain (x -> ContEx 3), and we want to - -- to say that x is an interesting argument. - -- But consider also (\x. f x y) y - -- The substitution will contain (x -> ContEx y), and we want to say - -- that x is not interesting (assuming y has no unfolding) \end{code} -Comment about interestingCallContext -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Note [Interesting call context] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We want to avoid inlining an expression where there can't possibly be any gain, such as in an argument position. Hence, if the continuation is interesting (eg. a case scrutinee, application etc.) then we @@ -311,25 +275,27 @@ default case. \begin{code} interestingCallContext :: SimplCont -> CallCtxt +-- See Note [Interesting call context] interestingCallContext cont = interesting cont where - interestingCtxt = ArgCtxt False 2 -- Give *some* incentive! - interesting (Select _ bndr _ _ _) - | isDeadBinder bndr = CaseCtxt - | otherwise = interestingCtxt + | isDeadBinder bndr = CaseCtxt + | otherwise = ArgCtxt False -- If the binder is used, this + -- is like a strict let + -- See Note [RHS of lets] in CoreUnfold - interesting (ApplyTo {}) = interestingCtxt - -- Can happen if we have (coerce t (f x)) y - -- Perhaps interestingCtxt is a bit over-keen, but I've - -- seen (coerce f) x, where f has an INLINE prag, - -- So we have to give some motivation for inlining it - - interesting (StrictArg _ _ cci _ _) = cci - interesting (StrictBind {}) = BoringCtxt - interesting (Stop ty cci) = cci - interesting (CoerceIt _ cont) = interesting cont + interesting (ApplyTo _ arg _ cont) + | isTypeArg arg = interesting cont + | otherwise = ValAppCtxt -- Can happen if we have (f Int |> co) y + -- If f has an INLINE prag we need to give it some + -- motivation to inline. See Note [Cast then apply] + -- in CoreUnfold + + interesting (StrictArg _ cci _) = cci + interesting (StrictBind {}) = BoringCtxt + interesting (Stop cci) = cci + interesting (CoerceIt _ cont) = interesting cont -- If this call is the arg of a strict function, the context -- is a bit interesting. If we inline here, we may get useful -- evaluation information to avoid repeated evals: e.g. @@ -348,27 +314,35 @@ interestingCallContext cont ------------------- mkArgInfo :: Id + -> [CoreRule] -- Rules for function -> Int -- Number of value args - -> SimplCont -- Context of the cal + -> SimplCont -- Context of the call -> ArgInfo -mkArgInfo fun n_val_args call_cont - = ArgInfo { ai_rules = interestingArgContext fun call_cont - , ai_strs = arg_stricts +mkArgInfo fun rules n_val_args call_cont + | n_val_args < idArity fun -- Note [Unsaturated functions] + = ArgInfo { ai_fun = fun, ai_args = [], ai_rules = rules + , ai_encl = False + , ai_strs = vanilla_stricts + , ai_discs = vanilla_discounts } + | otherwise + = ArgInfo { ai_fun = fun, ai_args = [], ai_rules = rules + , ai_encl = interestingArgContext rules call_cont + , ai_strs = add_type_str (idType fun) arg_stricts , ai_discs = arg_discounts } where vanilla_discounts, arg_discounts :: [Int] vanilla_discounts = repeat 0 arg_discounts = case idUnfolding fun of - CoreUnfolding _ _ _ _ (UnfoldIfGoodArgs _ discounts _ _) + CoreUnfolding {uf_guidance = UnfIfGoodArgs {ug_args = discounts}} -> discounts ++ vanilla_discounts - other -> vanilla_discounts + _ -> vanilla_discounts vanilla_stricts, arg_stricts :: [Bool] vanilla_stricts = repeat False arg_stricts - = case splitStrictSig (idNewStrictness fun) of + = case splitStrictSig (idStrictness fun) of (demands, result_info) | not (demands `lengthExceeds` n_val_args) -> -- Enough args, use the strictness given. @@ -382,10 +356,39 @@ mkArgInfo fun n_val_args call_cont map isStrictDmd demands -- Finite => result is bottom else map isStrictDmd demands ++ vanilla_stricts + | otherwise + -> WARN( True, text "More demands than arity" <+> ppr fun <+> ppr (idArity fun) + <+> ppr n_val_args <+> ppr demands ) + vanilla_stricts -- Not enough args, or no strictness + + add_type_str :: Type -> [Bool] -> [Bool] + -- If the function arg types are strict, record that in the 'strictness bits' + -- No need to instantiate because unboxed types (which dominate the strict + -- types) can't instantiate type variables. + -- add_type_str is done repeatedly (for each call); might be better + -- once-for-all in the function + -- But beware primops/datacons with no strictness + add_type_str _ [] = [] + add_type_str fun_ty strs -- Look through foralls + | Just (_, fun_ty') <- splitForAllTy_maybe fun_ty -- Includes coercions + = add_type_str fun_ty' strs + add_type_str fun_ty (str:strs) -- Add strict-type info + | Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty + = (str || isStrictType arg_ty) : add_type_str fun_ty' strs + add_type_str _ strs + = strs + +{- Note [Unsaturated functions] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider (test eyeball/inline4) + x = a:as + y = f x +where f has arity 2. Then we do not want to inline 'x', because +it'll just be floated out again. Even if f has lots of discounts +on its first argument -- it must be saturated for these to kick in +-} - other -> vanilla_stricts -- Not enough args, or no strictness - -interestingArgContext :: Id -> SimplCont -> Bool +interestingArgContext :: [CoreRule] -> SimplCont -> Bool -- If the argument has form (f x y), where x,y are boring, -- and f is marked INLINE, then we don't want to inline f. -- But if the context of the argument is @@ -396,25 +399,27 @@ interestingArgContext :: Id -> SimplCont -> Bool -- where h has rules, then we do want to inline f; hence the -- call_cont argument to interestingArgContext -- --- The interesting_arg_ctxt flag makes this happen; if it's +-- The ai-rules flag makes this happen; if it's -- set, the inliner gets just enough keener to inline f -- regardless of how boring f's arguments are, if it's marked INLINE -- -- The alternative would be to *always* inline an INLINE function, -- regardless of how boring its context is; but that seems overkill -- For example, it'd mean that wrapper functions were always inlined -interestingArgContext fn call_cont - = idHasRules fn || go call_cont +interestingArgContext rules call_cont + = notNull rules || enclosing_fn_has_rules where - go (Select {}) = False - go (ApplyTo {}) = False - go (StrictArg _ _ cci _ _) = interesting cci - go (StrictBind {}) = False -- ?? - go (CoerceIt _ c) = go c - go (Stop _ cci) = interesting cci - - interesting (ArgCtxt rules _) = rules - interesting other = False + enclosing_fn_has_rules = go call_cont + + go (Select {}) = False + go (ApplyTo {}) = False + go (StrictArg _ cci _) = interesting cci + go (StrictBind {}) = False -- ?? + go (CoerceIt _ c) = go c + go (Stop cci) = interesting cci + + interesting (ArgCtxt rules) = rules + interesting _ = False \end{code} @@ -425,18 +430,61 @@ interestingArgContext fn call_cont %* * %************************************************************************ -Inlining is controlled partly by the SimplifierMode switch. This has two -settings: +\begin{code} +simplEnvForGHCi :: SimplEnv +simplEnvForGHCi = mkSimplEnv allOffSwitchChecker $ + SimplGently { sm_rules = False, sm_inline = False } + -- Do not do any inlining, in case we expose some unboxed + -- tuple stuff that confuses the bytecode interpreter + +simplEnvForRules :: SimplEnv +simplEnvForRules = mkSimplEnv allOffSwitchChecker $ + SimplGently { sm_rules = True, sm_inline = False } + +updModeForInlineRules :: SimplifierMode -> SimplifierMode +updModeForInlineRules mode + = case mode of + SimplGently {} -> mode -- Don't modify mode if we already gentle + SimplPhase {} -> SimplGently { sm_rules = True, sm_inline = True } + -- Simplify as much as possible, subject to the usual "gentle" rules +\end{code} +Inlining is controlled partly by the SimplifierMode switch. This has two +settings + SimplGently (a) Simplifying before specialiser/full laziness - (b) Simplifiying inside INLINE pragma + (b) Simplifiying inside InlineRules (c) Simplifying the LHS of a rule (d) Simplifying a GHCi expression or Template Haskell splice SimplPhase n _ Used at all other times -The key thing about SimplGently is that it does no call-site inlining. +Note [Gentle mode] +~~~~~~~~~~~~~~~~~~ +Gentle mode has a separate boolean flag to control + a) inlining (sm_inline flag) + b) rules (sm_rules flag) +A key invariant about Gentle mode is that it is treated as the EARLIEST +phase. Something is inlined if the sm_inline flag is on AND the thing +is inlinable in the earliest phase. This is important. Example + + {-# INLINE [~1] g #-} + g = ... + + {-# INLINE f #-} + f x = g (g x) + +If we were to inline g into f's inlining, then an importing module would +never be able to do + f e --> g (g e) ---> RULE fires +because the InlineRule for f has had g inlined into it. + +On the other hand, it is bad not to do ANY inlining into an +InlineRule, because then recursive knots in instance declarations +don't get unravelled. + +However, *sometimes* SimplGently must do no call-site inlining at all. Before full laziness we must be careful not to inline wrappers, because doing so inhibits floating e.g. ...(case f x of ...)... @@ -450,17 +498,24 @@ running it, we don't want to use -O2. Indeed, we don't want to inline anything, because the byte-code interpreter might get confused about unboxed tuples and suchlike. -INLINE pragmas -~~~~~~~~~~~~~~ -SimplGently is also used as the mode to simplify inside an InlineMe note. +Note [RULEs enabled in SimplGently] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +RULES are enabled when doing "gentle" simplification. Two reasons: -\begin{code} -inlineMode :: SimplifierMode -inlineMode = SimplGently -\end{code} + * We really want the class-op cancellation to happen: + op (df d1 d2) --> $cop3 d1 d2 + because this breaks the mutual recursion between 'op' and 'df' + + * I wanted the RULE + lift String ===> ... + to work in Template Haskell when simplifying + splices, so we get simpler code for literal strings -It really is important to switch off inlinings inside such -expressions. Consider the following example +Note [Simplifying gently inside InlineRules] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We don't do much simplification inside InlineRules (which come from +INLINE pragmas). It really is important to switch off inlinings +inside such expressions. Consider the following example let f = \pq -> BIG in @@ -469,16 +524,14 @@ expressions. Consider the following example in ...g...g...g...g...g... Now, if that's the ONLY occurrence of f, it will be inlined inside g, -and thence copied multiple times when g is inlined. +and thence copied multiple times when g is inlined. - -This function may be inlinined in other modules, so we -don't want to remove (by inlining) calls to functions that have -specialisations, or that may have transformation rules in an importing -scope. +This function may be inlinined in other modules, so we don't want to +remove (by inlining) calls to functions that have specialisations, or +that may have transformation rules in an importing scope. E.g. {-# INLINE f #-} - f x = ...g... + f x = ...g... and suppose that g is strict *and* has specialisations. If we inline g's wrapper, we deny f the chance of getting the specialised version @@ -496,15 +549,14 @@ continuation. That's why the keep_inline predicate returns True for ArgOf continuations. It shouldn't do any harm not to dissolve the inline-me note under these circumstances. -Note that the result is that we do very little simplification -inside an InlineMe. +Although we do very little simplification inside an InlineRule, +the RHS is simplified as normal. For example: all xs = foldr (&&) True xs any p = all . map p {-# INLINE any #-} -Problem: any won't get deforested, and so if it's exported and the -importer doesn't use the inlining, (eg passes it as an arg) then we -won't get deforestation at all. We havn't solved this problem yet! +The RHS of 'any' will get optimised and deforested; but the InlineRule +will still mention the original RHS. preInlineUnconditionally @@ -571,27 +623,46 @@ seems a bit fragile. Conclusion: inline top level things gaily until Phase 0 (the last phase), at which point don't. +Note [pre/postInlineUnconditionally in gentle mode] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Even in gentle mode we want to do preInlineUnconditionally. The +reason is that too little clean-up happens if you don't inline +use-once things. Also a bit of inlining is *good* for full laziness; +it can expose constant sub-expressions. Example in +spectral/mandel/Mandel.hs, where the mandelset function gets a useful +let-float if you inline windowToViewport + +However, as usual for Gentle mode, do not inline things that are +inactive in the intial stages. See Note [Gentle mode]. + +Note [Top-level botomming Ids] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Don't inline top-level Ids that are bottoming, even if they are used just +once, because FloatOut has gone to some trouble to extract them out. +Inlining them won't make the program run faster! + \begin{code} preInlineUnconditionally :: SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool preInlineUnconditionally env top_lvl bndr rhs - | not active = False - | opt_SimplNoPreInlining = False + | not active = False + | isTopLevel top_lvl && isBottomingId bndr = False -- Note [Top-level bottoming Ids] + | opt_SimplNoPreInlining = False | otherwise = case idOccInfo bndr of IAmDead -> True -- Happens in ((\x.1) v) OneOcc in_lam True int_cxt -> try_once in_lam int_cxt - other -> False + _ -> False where phase = getMode env active = case phase of - SimplGently -> isAlwaysActive prag - SimplPhase n _ -> isActive n prag - prag = idInlinePragma bndr - + SimplGently {} -> isEarlyActive act + -- See Note [pre/postInlineUnconditionally in gentle mode] + SimplPhase n _ -> isActive n act + act = idInlineActivation bndr try_once in_lam int_cxt -- There's one textual occurrence | not in_lam = isNotTopLevel top_lvl || early_phase | otherwise = int_cxt && canInlineInLam rhs --- Be very careful before inlining inside a lambda, becuase (a) we must not +-- Be very careful before inlining inside a lambda, because (a) we must not -- invalidate occurrence information, and (b) we want to avoid pushing a -- single allocation (here) into multiple allocations (inside lambda). -- Inlining a *function* with a single *saturated* call would be ok, mind you. @@ -612,14 +683,14 @@ preInlineUnconditionally env top_lvl bndr rhs -- canInlineInLam => free vars of rhs are (Once in_lam) or Many, -- so substituting rhs inside a lambda doesn't change the occ info. -- Sadly, not quite the same as exprIsHNF. - canInlineInLam (Lit l) = True + canInlineInLam (Lit _) = True canInlineInLam (Lam b e) = isRuntimeVar b || canInlineInLam e canInlineInLam (Note _ e) = canInlineInLam e canInlineInLam _ = False early_phase = case phase of SimplPhase 0 _ -> False - other -> True + _ -> True -- If we don't have this early_phase test, consider -- x = length [1,2,3] -- The full laziness pass carefully floats all the cons cells to @@ -668,17 +739,19 @@ story for now. \begin{code} postInlineUnconditionally :: SimplEnv -> TopLevelFlag - -> InId -- The binder (an OutId would be fine too) + -> OutId -- The binder (an InId would be fine too) -> OccInfo -- From the InId -> OutExpr -> Unfolding -> Bool postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding - | not active = False - | isLoopBreaker occ_info = False -- If it's a loop-breaker of any kind, dont' inline + | not active = False + | isLoopBreaker occ_info = False -- If it's a loop-breaker of any kind, don't inline -- because it might be referred to "earlier" - | isExportedId bndr = False - | exprIsTrivial rhs = True + | isExportedId bndr = False + | isStableUnfolding unfolding = False -- Note [InlineRule and postInlineUnconditionally] + | exprIsTrivial rhs = True + | isTopLevel top_lvl = False -- Note [Top level and postInlineUnconditionally] | otherwise = case occ_info of -- The point of examining occ_info here is that for *non-values* @@ -691,8 +764,9 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding -- case v of -- True -> case x of ... -- False -> case x of ... - -- I'm not sure how important this is in practice - OneOcc in_lam one_br int_cxt -- OneOcc => no code-duplication issue + -- This is very important in practice; e.g. wheel-seive1 doubles + -- in allocation if you miss this out + OneOcc in_lam _one_br int_cxt -- OneOcc => no code-duplication issue -> smallEnoughToInline unfolding -- Small enough to dup -- ToDo: consider discount on smallEnoughToInline if int_cxt is true -- @@ -704,8 +778,8 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding -- PRINCIPLE: when we've already simplified an expression once, -- make sure that we only inline it if it's reasonably small. - && ((isNotTopLevel top_lvl && not in_lam) || - -- But outside a lambda, we want to be reasonably aggressive + && (not in_lam || + -- Outside a lambda, we want to be reasonably aggressive -- about inlining into multiple branches of case -- e.g. let x = -- in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... } @@ -723,65 +797,116 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding -- Here x isn't mentioned in the RHS, so we don't want to -- create the (dead) let-binding let x = (a,b) in ... - other -> False + _ -> False -- Here's an example that we don't handle well: -- let f = if b then Left (\x.BIG) else Right (\y.BIG) -- in \y. ....case f of {...} .... -- Here f is used just once, and duplicating the case work is fine (exprIsCheap). -- But --- * We can't preInlineUnconditionally because that woud invalidate --- the occ info for b. --- * We can't postInlineUnconditionally because the RHS is big, and --- that risks exponential behaviour --- * We can't call-site inline, because the rhs is big +-- - We can't preInlineUnconditionally because that woud invalidate +-- the occ info for b. +-- - We can't postInlineUnconditionally because the RHS is big, and +-- that risks exponential behaviour +-- - We can't call-site inline, because the rhs is big -- Alas! where active = case getMode env of - SimplGently -> isAlwaysActive prag - SimplPhase n _ -> isActive n prag - prag = idInlinePragma bndr + SimplGently {} -> isEarlyActive act + -- See Note [pre/postInlineUnconditionally in gentle mode] + SimplPhase n _ -> isActive n act + act = idInlineActivation bndr -activeInline :: SimplEnv -> OutId -> Bool -activeInline env id +activeUnfolding :: SimplEnv -> IdUnfoldingFun +activeUnfolding env = case getMode env of - SimplGently -> False - -- No inlining at all when doing gentle stuff, - -- except for local things that occur once - -- The reason is that too little clean-up happens if you - -- don't inline use-once things. Also a bit of inlining is *good* for - -- full laziness; it can expose constant sub-expressions. - -- Example in spectral/mandel/Mandel.hs, where the mandelset - -- function gets a useful let-float if you inline windowToViewport - - -- NB: we used to have a second exception, for data con wrappers. - -- On the grounds that we use gentle mode for rule LHSs, and - -- they match better when data con wrappers are inlined. - -- But that only really applies to the trivial wrappers (like (:)), - -- and they are now constructed as Compulsory unfoldings (in MkId) - -- so they'll happen anyway. - - SimplPhase n _ -> isActive n prag + SimplGently { sm_inline = False } -> active_unfolding_minimal + SimplGently { sm_inline = True } -> active_unfolding_gentle + SimplPhase n _ -> active_unfolding n + +activeUnfInRule :: SimplEnv -> IdUnfoldingFun +-- When matching in RULE, we want to "look through" an unfolding +-- if *rules* are on, even if *inlinings* are not. A notable example +-- is DFuns, which really we want to match in rules like (op dfun) +-- in gentle mode. +activeUnfInRule env + = case getMode env of + SimplGently { sm_rules = False } -> active_unfolding_minimal + SimplGently { sm_rules = True } -> active_unfolding_gentle + SimplPhase n _ -> active_unfolding n + +active_unfolding_minimal :: IdUnfoldingFun +-- Compuslory unfoldings only +-- Ignore SimplGently, because we want to inline regardless; +-- the Id has no top-level binding at all +-- +-- NB: we used to have a second exception, for data con wrappers. +-- On the grounds that we use gentle mode for rule LHSs, and +-- they match better when data con wrappers are inlined. +-- But that only really applies to the trivial wrappers (like (:)), +-- and they are now constructed as Compulsory unfoldings (in MkId) +-- so they'll happen anyway. +active_unfolding_minimal id + | isCompulsoryUnfolding unf = unf + | otherwise = NoUnfolding where - prag = idInlinePragma id + unf = realIdUnfolding id -- Never a loop breaker + +active_unfolding_gentle :: IdUnfoldingFun +-- Anything that is early-active +-- See Note [Gentle mode] +active_unfolding_gentle id + | isEarlyActive (idInlineActivation id) = idUnfolding id + | otherwise = NoUnfolding + -- idUnfolding checks for loop-breakers + -- Things with an INLINE pragma may have + -- an unfolding *and* be a loop breaker + -- (maybe the knot is not yet untied) + +active_unfolding :: CompilerPhase -> IdUnfoldingFun +active_unfolding n id + | isActive n (idInlineActivation id) = idUnfolding id + | otherwise = NoUnfolding activeRule :: DynFlags -> SimplEnv -> Maybe (Activation -> Bool) -- Nothing => No rules at all activeRule dflags env - | not (dopt Opt_RewriteRules dflags) + | not (dopt Opt_EnableRewriteRules dflags) = Nothing -- Rewriting is off | otherwise = case getMode env of - SimplGently -> Just isAlwaysActive - -- Used to be Nothing (no rules in gentle mode) - -- Main motivation for changing is that I wanted - -- lift String ===> ... - -- to work in Template Haskell when simplifying - -- splices, so we get simpler code for literal strings - SimplPhase n _ -> Just (isActive n) + SimplGently { sm_rules = rules_on } + | rules_on -> Just isEarlyActive -- Note [RULEs enabled in SimplGently] + | otherwise -> Nothing + SimplPhase n _ -> Just (isActive n) \end{code} +Note [Top level and postInlineUnconditionally] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We don't do postInlineUnconditionally for top-level things (except +ones that are trivial). There is no point, because the main goal is +to get rid of local bindings used in multiple case branches. And +doing so risks replacing a single global allocation with local allocations. + + +Note [InlineRule and postInlineUnconditionally] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Do not do postInlineUnconditionally if the Id has an InlineRule, otherwise +we lose the unfolding. Example + + -- f has InlineRule with rhs (e |> co) + -- where 'e' is big + f = e |> co + +Then there's a danger we'll optimise to + + f' = e + f = f' |> co + +and now postInlineUnconditionally, losing the InlineRule on f. Now f' +won't inline because 'e' is too big. + %************************************************************************ %* * @@ -790,14 +915,14 @@ activeRule dflags env %************************************************************************ \begin{code} -mkLam :: [OutBndr] -> OutExpr -> SimplM OutExpr +mkLam :: SimplEnv -> [OutBndr] -> OutExpr -> SimplM OutExpr -- mkLam tries three things -- a) eta reduction, if that gives a trivial expression -- b) eta expansion [only if there are some value lambdas] -mkLam [] body +mkLam _b [] body = return body -mkLam bndrs body +mkLam env bndrs body = do { dflags <- getDOptsSmpl ; mkLam' dflags bndrs body } where @@ -818,8 +943,10 @@ mkLam bndrs body ; return etad_lam } | dopt Opt_DoLambdaEtaExpansion dflags, - any isRuntimeVar bndrs - = do { body' <- tryEtaExpansion dflags body + not (inGentleMode env), -- In gentle mode don't eta-expansion + any isRuntimeVar bndrs -- because it can clutter up the code + -- with casts etc that may not be removed + = do { let body' = tryEtaExpansion dflags body ; return (mkLams bndrs body') } | otherwise @@ -906,22 +1033,33 @@ There are some particularly delicate points here: So it's important to to the right thing. -* We need to be careful if we just look at f's arity. Currently (Dec07), - f's arity is visible in its own RHS (see Note [Arity robustness] in - SimplEnv) so we must *not* trust the arity when checking that 'f' is - a value. Instead, look at the unfolding. +* Note [Arity care]: we need to be careful if we just look at f's + arity. Currently (Dec07), f's arity is visible in its own RHS (see + Note [Arity robustness] in SimplEnv) so we must *not* trust the + arity when checking that 'f' is a value. Otherwise we will + eta-reduce + f = \x. f x + to + f = f + Which might change a terminiating program (think (f `seq` e)) to a + non-terminating one. So we check for being a loop breaker first. However for GlobalIds we can look at the arity; and for primops we must, since they have no unfolding. -* Regardless of whether 'f' is a vlaue, we always want to +* Regardless of whether 'f' is a value, we always want to reduce (/\a -> f a) to f This came up in a RULE: foldr (build (/\a -> g a)) - did not match foldr (build (/\b -> ...something complex...)) + did not match foldr (build (/\b -> ...something complex...)) The type checker can insert these eta-expanded versions, with both type and dictionary lambdas; hence the slightly ad-hoc isDictId +* Never *reduce* arity. For example + f = \xy. g x y + Then if h has arity 1 we don't want to eta-reduce because then + f's arity would decrease, and that is bad + These delicacies are why we don't use exprIsTrivial and exprIsHNF here. Alas. @@ -930,6 +1068,8 @@ tryEtaReduce :: [OutBndr] -> OutExpr -> Maybe OutExpr tryEtaReduce bndrs body = go (reverse bndrs) body where + incoming_arity = count isId bndrs + go (b : bs) (App fun arg) | ok_arg b arg = go bs fun -- Loop round go [] fun | ok_fun fun = Just fun -- Success! go _ _ = Nothing -- Failure! @@ -943,10 +1083,11 @@ tryEtaReduce bndrs body && (ok_fun_id fun_id || all ok_lam bndrs) ok_fun _fun = False - ok_fun_id fun - | isLocalId fun = isEvaldUnfolding (idUnfolding fun) - | isDataConWorkId fun = True - | isGlobalId fun = idArity fun > 0 + ok_fun_id fun = fun_arity fun >= incoming_arity + + fun_arity fun -- See Note [Arity care] + | isLocalId fun && isLoopBreaker (idOccInfo fun) = 0 + | otherwise = idArity fun ok_lam v = isTyVar v || isDictId v @@ -990,11 +1131,10 @@ when computing arity; and etaExpand adds the coerces as necessary when actually computing the expansion. \begin{code} -tryEtaExpansion :: DynFlags -> OutExpr -> SimplM OutExpr +tryEtaExpansion :: DynFlags -> OutExpr -> OutExpr -- There is at least one runtime binder in the binders -tryEtaExpansion dflags body = do - us <- getUniquesM - return (etaExpand fun_arity us body (exprType body)) +tryEtaExpansion dflags body + = etaExpand fun_arity body where fun_arity = exprEtaExpandArity dflags body \end{code} @@ -1013,7 +1153,7 @@ Consider this: We'd like to float this to y1 = /\a. e1 y2 = /\a. e2 - x = /\a. C (y1 a) (y2 a) + x = /\a. C (y1 a) (y2 a) for the usual reasons: we want to inline x rather vigorously. You may think that this kind of thing is rare. But in some programs it is @@ -1147,7 +1287,8 @@ abstractFloats main_tvs body_env body = do { uniq <- getUniqueM ; let poly_name = setNameUnique (idName var) uniq -- Keep same name poly_ty = mkForAllTys tvs_here (idType var) -- But new type of course - poly_id = mkLocalId poly_name poly_ty + poly_id = transferPolyIdInfo var tvs_here $ -- Note [transferPolyIdInfo] in Id.lhs + mkLocalId poly_name poly_ty ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) } -- In the olden days, it was crucial to copy the occInfo of the original var, -- because we were looking at occurrence-analysed but as yet unsimplified code! @@ -1200,83 +1341,61 @@ Historical note: if you use let-bindings instead of a substitution, beware of th prepareAlts tries these things: -1. If several alternatives are identical, merge them into - a single DEFAULT alternative. I've occasionally seen this - making a big difference: +1. Eliminate alternatives that cannot match, including the + DEFAULT alternative. - case e of =====> case e of - C _ -> f x D v -> ....v.... - D v -> ....v.... DEFAULT -> f x - DEFAULT -> f x - - The point is that we merge common RHSs, at least for the DEFAULT case. - [One could do something more elaborate but I've never seen it needed.] - To avoid an expensive test, we just merge branches equal to the *first* - alternative; this picks up the common cases - a) all branches equal - b) some branches equal to the DEFAULT (which occurs first) - -2. Case merging: - case e of b { ==> case e of b { - p1 -> rhs1 p1 -> rhs1 - ... ... - pm -> rhsm pm -> rhsm - _ -> case b of b' { pn -> let b'=b in rhsn - pn -> rhsn ... - ... po -> let b'=b in rhso - po -> rhso _ -> let b'=b in rhsd - _ -> rhsd - } - - which merges two cases in one case when -- the default alternative of - the outer case scrutises the same variable as the outer case This - transformation is called Case Merging. It avoids that the same - variable is scrutinised multiple times. +2. If the DEFAULT alternative can match only one possible constructor, + then make that constructor explicit. + e.g. + case e of x { DEFAULT -> rhs } + ===> + case e of x { (a,b) -> rhs } + where the type is a single constructor type. This gives better code + when rhs also scrutinises x or e. +3. Returns a list of the constructors that cannot holds in the + DEFAULT alternative (if there is one) -The case where transformation (1) showed up was like this (lib/std/PrelCError.lhs): +Here "cannot match" includes knowledge from GADTs - x | p `is` 1 -> e1 - | p `is` 2 -> e2 - ...etc... +It's a good idea do do this stuff before simplifying the alternatives, to +avoid simplifying alternatives we know can't happen, and to come up with +the list of constructors that are handled, to put into the IdInfo of the +case binder, for use when simplifying the alternatives. -where @is@ was something like - - p `is` n = p /= (-1) && p == n +Eliminating the default alternative in (1) isn't so obvious, but it can +happen: -This gave rise to a horrible sequence of cases +data Colour = Red | Green | Blue - case p of - (-1) -> $j p - 1 -> e1 - DEFAULT -> $j p +f x = case x of + Red -> .. + Green -> .. + DEFAULT -> h x -and similarly in cascade for all the join points! +h y = case y of + Blue -> .. + DEFAULT -> [ case y of ... ] -Note [Dead binders] -~~~~~~~~~~~~~~~~~~~~ -We do this *here*, looking at un-simplified alternatives, because we -have to check that r doesn't mention the variables bound by the -pattern in each alternative, so the binder-info is rather useful. +If we inline h into f, the default case of the inlined h can't happen. +If we don't notice this, we may end up filtering out *all* the cases +of the inner case y, which give us nowhere to go! \begin{code} -prepareAlts :: SimplEnv -> OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt]) -prepareAlts env scrut case_bndr' alts - = do { dflags <- getDOptsSmpl - ; alts <- combineIdenticalAlts case_bndr' alts - - ; let (alts_wo_default, maybe_deflt) = findDefault alts +prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt]) +prepareAlts scrut case_bndr' alts + = do { let (alts_wo_default, maybe_deflt) = findDefault alts alt_cons = [con | (con,_,_) <- alts_wo_default] imposs_deflt_cons = nub (imposs_cons ++ alt_cons) -- "imposs_deflt_cons" are handled -- EITHER by the context, -- OR by a non-DEFAULT branch in this case expression. - ; default_alts <- prepareDefault dflags env case_bndr' mb_tc_app + ; default_alts <- prepareDefault case_bndr' mb_tc_app imposs_deflt_cons maybe_deflt ; let trimmed_alts = filterOut impossible_alt alts_wo_default - merged_alts = mergeAlts trimmed_alts default_alts + merged_alts = mergeAlts trimmed_alts default_alts -- We need the mergeAlts in case the new default_alt -- has turned into a constructor alternative. -- The merge keeps the inner DEFAULT at the front, if there is one @@ -1289,37 +1408,15 @@ prepareAlts env scrut case_bndr' alts imposs_cons = case scrut of Var v -> otherCons (idUnfolding v) - other -> [] + _ -> [] impossible_alt :: CoreAlt -> Bool impossible_alt (con, _, _) | con `elem` imposs_cons = True impossible_alt (DataAlt con, _, _) = dataConCannotMatch inst_tys con - impossible_alt alt = False - - --------------------------------------------------- --- 1. Merge identical branches --------------------------------------------------- -combineIdenticalAlts :: OutId -> [InAlt] -> SimplM [InAlt] - -combineIdenticalAlts case_bndr alts@((con1,bndrs1,rhs1) : con_alts) - | all isDeadBinder bndrs1, -- Remember the default - length filtered_alts < length con_alts -- alternative comes first - -- Also Note [Dead binders] - = do { tick (AltMerge case_bndr) - ; return ((DEFAULT, [], rhs1) : filtered_alts) } - where - filtered_alts = filter keep con_alts - keep (con,bndrs,rhs) = not (all isDeadBinder bndrs && rhs `cheapEqExpr` rhs1) + impossible_alt _ = False -combineIdenticalAlts case_bndr alts = return alts -------------------------------------------------------------------------- --- Prepare the default alternative -------------------------------------------------------------------------- -prepareDefault :: DynFlags - -> SimplEnv - -> OutId -- Case binder; need just for its type. Note that as an +prepareDefault :: OutId -- Case binder; need just for its type. Note that as an -- OutId, it has maximum information; this is important. -- Test simpl013 is an example -> Maybe (TyCon, [Type]) -- Type of scrutinee, decomposed @@ -1327,42 +1424,9 @@ prepareDefault :: DynFlags -> Maybe InExpr -- Rhs -> SimplM [InAlt] -- Still unsimplified -- We use a list because it's what mergeAlts expects, - -- And becuase case-merging can cause many to show up - -------- Merge nested cases ---------- -prepareDefault dflags env outer_bndr bndr_ty imposs_cons (Just deflt_rhs) - | dopt Opt_CaseMerge dflags - , Case (Var inner_scrut_var) inner_bndr _ inner_alts <- deflt_rhs - , DoneId inner_scrut_var' <- substId env inner_scrut_var - -- Remember, inner_scrut_var is an InId, but outer_bndr is an OutId - , inner_scrut_var' == outer_bndr - -- NB: the substId means that if the outer scrutinee was a - -- variable, and inner scrutinee is the same variable, - -- then inner_scrut_var' will be outer_bndr - -- via the magic of simplCaseBinder - = do { tick (CaseMerge outer_bndr) - - ; let munge_rhs rhs = bindCaseBndr inner_bndr (Var outer_bndr) rhs - ; return [(con, args, munge_rhs rhs) | (con, args, rhs) <- inner_alts, - not (con `elem` imposs_cons) ] - -- NB: filter out any imposs_cons. Example: - -- case x of - -- A -> e1 - -- DEFAULT -> case x of - -- A -> e2 - -- B -> e3 - -- When we merge, we must ensure that e1 takes - -- precedence over e2 as the value for A! - } - -- Warning: don't call prepareAlts recursively! - -- Firstly, there's no point, because inner alts have already had - -- mkCase applied to them, so they won't have a case in their default - -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr - -- in munge_rhs may put a case into the DEFAULT branch! - --------- Fill in known constructor ----------- -prepareDefault dflags env case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rhs) +prepareDefault case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rhs) | -- This branch handles the case where we are -- scrutinisng an algebraic data type isAlgTyCon tycon -- It's a data type, tuple, or unboxed tuples. @@ -1381,7 +1445,7 @@ prepareDefault dflags env case_bndr (Just (tycon, inst_tys)) imposs_cons (Just d -- which would be quite legitmate. But it's a really obscure corner, and -- not worth wasting code on. , let imposs_data_cons = [con | DataAlt con <- imposs_cons] -- We now know it's a data type - impossible con = con `elem` imposs_data_cons || dataConCannotMatch inst_tys con + impossible con = con `elem` imposs_data_cons || dataConCannotMatch inst_tys con = case filterOut impossible all_cons of [] -> return [] -- Eliminate the default alternative -- altogether if it can't match @@ -1393,25 +1457,51 @@ prepareDefault dflags env case_bndr (Just (tycon, inst_tys)) imposs_cons (Just d dataConRepInstPat us con inst_tys ; return [(DataAlt con, ex_tvs ++ co_tvs ++ arg_ids, deflt_rhs)] } - two_or_more -> return [(DEFAULT, [], deflt_rhs)] + _ -> return [(DEFAULT, [], deflt_rhs)] + + | debugIsOn, isAlgTyCon tycon, not (isOpenTyCon tycon), null (tyConDataCons tycon) + -- Check for no data constructors + -- This can legitimately happen for type families, so don't report that + = pprTrace "prepareDefault" (ppr case_bndr <+> ppr tycon) + $ return [(DEFAULT, [], deflt_rhs)] --------- Catch-all cases ----------- -prepareDefault dflags env case_bndr bndr_ty imposs_cons (Just deflt_rhs) +prepareDefault _case_bndr _bndr_ty _imposs_cons (Just deflt_rhs) = return [(DEFAULT, [], deflt_rhs)] -prepareDefault dflags env case_bndr bndr_ty imposs_cons Nothing +prepareDefault _case_bndr _bndr_ty _imposs_cons Nothing = return [] -- No default branch \end{code} -================================================================================= +%************************************************************************ +%* * + mkCase +%* * +%************************************************************************ mkCase tries these things -1. Eliminate the case altogether if possible +1. Merge Nested Cases + + case e of b { ==> case e of b { + p1 -> rhs1 p1 -> rhs1 + ... ... + pm -> rhsm pm -> rhsm + _ -> case b of b' { pn -> let b'=b in rhsn + pn -> rhsn ... + ... po -> let b'=b in rhso + po -> rhso _ -> let b'=b in rhsd + _ -> rhsd + } + + which merges two cases in one case when -- the default alternative of + the outer case scrutises the same variable as the outer case. This + transformation is called Case Merging. It avoids that the same + variable is scrutinised multiple times. -2. Case-identity: +2. Eliminate Identity Case case e of ===> e True -> True; @@ -1419,34 +1509,99 @@ mkCase tries these things and similar friends. +3. Merge identical alternatives. + If several alternatives are identical, merge them into + a single DEFAULT alternative. I've occasionally seen this + making a big difference: + + case e of =====> case e of + C _ -> f x D v -> ....v.... + D v -> ....v.... DEFAULT -> f x + DEFAULT -> f x + + The point is that we merge common RHSs, at least for the DEFAULT case. + [One could do something more elaborate but I've never seen it needed.] + To avoid an expensive test, we just merge branches equal to the *first* + alternative; this picks up the common cases + a) all branches equal + b) some branches equal to the DEFAULT (which occurs first) + +The case where Merge Identical Alternatives transformation showed up +was like this (base/Foreign/C/Err/Error.lhs): + + x | p `is` 1 -> e1 + | p `is` 2 -> e2 + ...etc... + +where @is@ was something like + + p `is` n = p /= (-1) && p == n + +This gave rise to a horrible sequence of cases + + case p of + (-1) -> $j p + 1 -> e1 + DEFAULT -> $j p + +and similarly in cascade for all the join points! + \begin{code} -mkCase :: OutExpr -> OutId -> OutType - -> [OutAlt] -- Increasing order - -> SimplM OutExpr +mkCase, mkCase1, mkCase2 + :: DynFlags + -> OutExpr -> OutId + -> [OutAlt] -- Alternatives in standard (increasing) order + -> SimplM OutExpr -------------------------------------------------- --- 1. Check for empty alternatives +-- 1. Merge Nested Cases -------------------------------------------------- --- This isn't strictly an error. It's possible that the simplifer might "see" --- that an inner case has no accessible alternatives before it "sees" that the --- entire branch of an outer case is inaccessible. So we simply --- put an error case here insteadd -mkCase scrut case_bndr ty [] - = pprTrace "mkCase: null alts" (ppr case_bndr <+> ppr scrut) $ - return (mkApps (Var rUNTIME_ERROR_ID) - [Type ty, Lit (mkStringLit "Impossible alternative")]) +mkCase dflags scrut outer_bndr ((DEFAULT, _, deflt_rhs) : outer_alts) + | dopt Opt_CaseMerge dflags + , Case (Var inner_scrut_var) inner_bndr _ inner_alts <- deflt_rhs + , inner_scrut_var == outer_bndr + = do { tick (CaseMerge outer_bndr) + + ; let wrap_alt (con, args, rhs) = ASSERT( outer_bndr `notElem` args ) + (con, args, wrap_rhs rhs) + -- Simplifier's no-shadowing invariant should ensure + -- that outer_bndr is not shadowed by the inner patterns + wrap_rhs rhs = Let (NonRec inner_bndr (Var outer_bndr)) rhs + -- The let is OK even for unboxed binders, + + wrapped_alts | isDeadBinder inner_bndr = inner_alts + | otherwise = map wrap_alt inner_alts + merged_alts = mergeAlts outer_alts wrapped_alts + -- NB: mergeAlts gives priority to the left + -- case x of + -- A -> e1 + -- DEFAULT -> case x of + -- A -> e2 + -- B -> e3 + -- When we merge, we must ensure that e1 takes + -- precedence over e2 as the value for A! + + ; mkCase1 dflags scrut outer_bndr merged_alts + } + -- Warning: don't call mkCase recursively! + -- Firstly, there's no point, because inner alts have already had + -- mkCase applied to them, so they won't have a case in their default + -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr + -- in munge_rhs may put a case into the DEFAULT branch! + +mkCase dflags scrut bndr alts = mkCase1 dflags scrut bndr alts -------------------------------------------------- --- 2. Identity case +-- 2. Eliminate Identity Case -------------------------------------------------- -mkCase scrut case_bndr ty alts -- Identity case +mkCase1 _dflags scrut case_bndr alts -- Identity case | all identity_alt alts - = do tick (CaseIdentity case_bndr) - return (re_cast scrut) + = do { tick (CaseIdentity case_bndr) + ; return (re_cast scrut) } where identity_alt (con, args, rhs) = check_eq con args (de_cast rhs) @@ -1454,7 +1609,7 @@ mkCase scrut case_bndr ty alts -- Identity case check_eq (LitAlt lit') _ (Lit lit) = lit == lit' check_eq (DataAlt con) args rhs = rhs `cheapEqExpr` mkConApp con (arg_tys ++ varsToCoreExprs args) || rhs `cheapEqExpr` Var case_bndr - check_eq con args rhs = False + check_eq _ _ _ = False arg_tys = map Type (tyConAppArgs (idType case_bndr)) @@ -1472,23 +1627,95 @@ mkCase scrut case_bndr ty alts -- Identity case re_cast scrut = case head alts of (_,_,Cast _ co) -> Cast scrut co - other -> scrut + _ -> scrut +-------------------------------------------------- +-- 3. Merge Identical Alternatives +-------------------------------------------------- +mkCase1 dflags scrut case_bndr ((_con1,bndrs1,rhs1) : con_alts) + | all isDeadBinder bndrs1 -- Remember the default + , length filtered_alts < length con_alts -- alternative comes first + -- Also Note [Dead binders] + = do { tick (AltMerge case_bndr) + ; mkCase2 dflags scrut case_bndr alts' } + where + alts' = (DEFAULT, [], rhs1) : filtered_alts + filtered_alts = filter keep con_alts + keep (_con,bndrs,rhs) = not (all isDeadBinder bndrs && rhs `cheapEqExpr` rhs1) +mkCase1 dflags scrut bndr alts = mkCase2 dflags scrut bndr alts -------------------------------------------------- -- Catch-all -------------------------------------------------- -mkCase scrut bndr ty alts = return (Case scrut bndr ty alts) +mkCase2 _dflags scrut bndr alts + = return (Case scrut bndr (coreAltsType alts) alts) \end{code} - -When adding auxiliary bindings for the case binder, it's worth checking if -its dead, because it often is, and occasionally these mkCase transformations -cascade rather nicely. - -\begin{code} -bindCaseBndr bndr rhs body - | isDeadBinder bndr = body - | otherwise = bindNonRec bndr rhs body -\end{code} +Note [Dead binders] +~~~~~~~~~~~~~~~~~~~~ +Note that dead-ness is maintained by the simplifier, so that it is +accurate after simplification as well as before. + + +Note [Cascading case merge] +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Case merging should cascade in one sweep, because it +happens bottom-up + + case e of a { + DEFAULT -> case a of b + DEFAULT -> case b of c { + DEFAULT -> e + A -> ea + B -> eb + C -> ec +==> + case e of a { + DEFAULT -> case a of b + DEFAULT -> let c = b in e + A -> let c = b in ea + B -> eb + C -> ec +==> + case e of a { + DEFAULT -> let b = a in let c = b in e + A -> let b = a in let c = b in ea + B -> let b = a in eb + C -> ec + + +However here's a tricky case that we still don't catch, and I don't +see how to catch it in one pass: + + case x of c1 { I# a1 -> + case a1 of c2 -> + 0 -> ... + DEFAULT -> case x of c3 { I# a2 -> + case a2 of ... + +After occurrence analysis (and its binder-swap) we get this + + case x of c1 { I# a1 -> + let x = c1 in -- Binder-swap addition + case a1 of c2 -> + 0 -> ... + DEFAULT -> case x of c3 { I# a2 -> + case a2 of ... + +When we simplify the inner case x, we'll see that +x=c1=I# a1. So we'll bind a2 to a1, and get + + case x of c1 { I# a1 -> + case a1 of c2 -> + 0 -> ... + DEFAULT -> case a1 of ... + +This is corect, but we can't do a case merge in this sweep +because c2 /= a1. Reason: the binding c1=I# a1 went inwards +without getting changed to c1=I# c2. + +I don't think this is worth fixing, even if I knew how. It'll +all come out in the next pass anyway. + + \ No newline at end of file