X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2FcoreSyn%2FCoreUnfold.lhs;h=f32d5b1482c386aaf21d6979c22b8b7ad99e0962;hp=b7086398152d1a8c23f8a2fe6843a39816dcc97d;hb=72462499b891d5779c19f3bda03f96e24f9554ae;hpb=7fc749a43b4b6b85d234fa95d4928648259584f4 diff --git a/compiler/coreSyn/CoreUnfold.lhs b/compiler/coreSyn/CoreUnfold.lhs index b708639..f32d5b1 100644 --- a/compiler/coreSyn/CoreUnfold.lhs +++ b/compiler/coreSyn/CoreUnfold.lhs @@ -15,26 +15,23 @@ literal''). In the corner of a @CoreUnfolding@ unfolding, you will find, unsurprisingly, a Core expression. \begin{code} -{-# OPTIONS -w #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and fix --- any warnings in the module. See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings --- for details - module CoreUnfold ( Unfolding, UnfoldingGuidance, -- Abstract types - noUnfolding, mkTopUnfolding, mkUnfolding, mkCompulsoryUnfolding, seqUnfolding, - evaldUnfolding, mkOtherCon, otherCons, - unfoldingTemplate, maybeUnfoldingTemplate, - isEvaldUnfolding, isValueUnfolding, isCheapUnfolding, isCompulsoryUnfolding, - hasUnfolding, hasSomeUnfolding, neverUnfold, + noUnfolding, mkImplicitUnfolding, + mkTopUnfolding, mkUnfolding, mkCoreUnfolding, + mkInlineRule, mkWwInlineRule, + mkCompulsoryUnfolding, mkDFunUnfolding, + + interestingArg, ArgSummary(..), couldBeSmallEnoughToInline, certainlyWillInline, smallEnoughToInline, - callSiteInline + callSiteInline, CallCtxt(..), + + exprIsConApp_maybe + ) where #include "HsVersions.h" @@ -44,19 +41,25 @@ import DynFlags import CoreSyn import PprCore () -- Instances import OccurAnal +import CoreSubst hiding( substTy ) import CoreUtils import Id import DataCon +import TyCon import Literal import PrimOp import IdInfo -import Type +import BasicTypes ( Arity ) +import TcType ( tcSplitDFunTy ) +import Type +import Coercion import PrelNames import Bag +import Util import FastTypes +import FastString import Outputable -import GHC.Exts ( Int# ) \end{code} @@ -67,19 +70,39 @@ import GHC.Exts ( Int# ) %************************************************************************ \begin{code} +mkTopUnfolding :: CoreExpr -> Unfolding mkTopUnfolding expr = mkUnfolding True {- Top level -} expr -mkUnfolding top_lvl expr - = CoreUnfolding (occurAnalyseExpr expr) - top_lvl - - (exprIsHNF expr) - -- Already evaluated +mkImplicitUnfolding :: CoreExpr -> Unfolding +-- For implicit Ids, do a tiny bit of optimising first +mkImplicitUnfolding expr = mkTopUnfolding (simpleOptExpr expr) - (exprIsCheap expr) - -- OK to inline inside a lambda +mkWwInlineRule :: Id -> CoreExpr -> Arity -> Unfolding +mkWwInlineRule id = mkInlineRule (InlWrapper id) - (calcUnfoldingGuidance opt_UF_CreationThreshold expr) +mkInlineRule :: InlineRuleInfo -> CoreExpr -> Arity -> Unfolding +mkInlineRule inl_info expr arity + = mkCoreUnfolding True -- Note [Top-level flag on inline rules] + expr' arity + (InlineRule { ug_ir_info = inl_info, ug_small = small }) + where + expr' = simpleOptExpr expr + small = case calcUnfoldingGuidance (arity+1) expr' of + (arity_e, UnfoldIfGoodArgs { ug_size = size_e }) + -> uncondInline arity_e size_e + _other {- actually UnfoldNever -} -> False + +-- Note [Top-level flag on inline rules] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- Slight hack: note that mk_inline_rules conservatively sets the +-- top-level flag to True. It gets set more accurately by the simplifier +-- Simplify.simplUnfolding. + +mkUnfolding :: Bool -> CoreExpr -> Unfolding +mkUnfolding top_lvl expr + = mkCoreUnfolding top_lvl expr arity guidance + where + (arity, guidance) = calcUnfoldingGuidance opt_UF_CreationThreshold expr -- Sometimes during simplification, there's a large let-bound thing -- which has been substituted, and so is now dead; so 'expr' contains -- two copies of the thing while the occurrence-analysed expression doesn't @@ -89,16 +112,23 @@ mkUnfolding top_lvl expr -- This can occasionally mean that the guidance is very pessimistic; -- it gets fixed up next round -instance Outputable Unfolding where - ppr NoUnfolding = ptext SLIT("No unfolding") - ppr (OtherCon cs) = ptext SLIT("OtherCon") <+> ppr cs - ppr (CompulsoryUnfolding e) = ptext SLIT("Compulsory") <+> ppr e - ppr (CoreUnfolding e top hnf cheap g) - = ptext SLIT("Unf") <+> sep [ppr top <+> ppr hnf <+> ppr cheap <+> ppr g, - ppr e] - +mkCoreUnfolding :: Bool -> CoreExpr -> Arity -> UnfoldingGuidance -> Unfolding +-- Occurrence-analyses the expression before capturing it +mkCoreUnfolding top_lvl expr arity guidance + = CoreUnfolding { uf_tmpl = occurAnalyseExpr expr, + uf_arity = arity, + uf_is_top = top_lvl, + uf_is_value = exprIsHNF expr, + uf_is_cheap = exprIsCheap expr, + uf_expandable = exprIsExpandable expr, + uf_guidance = guidance } + +mkDFunUnfolding :: DataCon -> [Id] -> Unfolding +mkDFunUnfolding con ops = DFunUnfolding con (map Var ops) + +mkCompulsoryUnfolding :: CoreExpr -> Unfolding mkCompulsoryUnfolding expr -- Used for things that absolutely must be unfolded - = CompulsoryUnfolding (occurAnalyseExpr expr) + = mkCoreUnfolding True expr 0 UnfoldAlways -- Arity of unfolding doesn't matter \end{code} @@ -109,106 +139,104 @@ mkCompulsoryUnfolding expr -- Used for things that absolutely must be unfolded %************************************************************************ \begin{code} -instance Outputable UnfoldingGuidance where - ppr UnfoldNever = ptext SLIT("NEVER") - ppr (UnfoldIfGoodArgs v cs size discount) - = hsep [ ptext SLIT("IF_ARGS"), int v, - brackets (hsep (map int cs)), - int size, - int discount ] -\end{code} - - -\begin{code} calcUnfoldingGuidance :: Int -- bomb out if size gets bigger than this -> CoreExpr -- expression to look at - -> UnfoldingGuidance + -> (Arity, UnfoldingGuidance) calcUnfoldingGuidance bOMB_OUT_SIZE expr - = case collect_val_bndrs expr of { (inline, val_binders, body) -> + = case collectBinders expr of { (binders, body) -> let + val_binders = filter isId binders n_val_binders = length val_binders - - max_inline_size = n_val_binders+2 - -- The idea is that if there is an INLINE pragma (inline is True) - -- and there's a big body, we give a size of n_val_binders+2. This - -- This is just enough to fail the no-size-increase test in callSiteInline, - -- so that INLINE things don't get inlined into entirely boring contexts, - -- but no more. - in case (sizeExpr (iUnbox bOMB_OUT_SIZE) val_binders body) of - - TooBig - | not inline -> UnfoldNever - -- A big function with an INLINE pragma must - -- have an UnfoldIfGoodArgs guidance - | otherwise -> UnfoldIfGoodArgs n_val_binders - (map (const 0) val_binders) - max_inline_size 0 - + TooBig -> (n_val_binders, UnfoldNever) SizeIs size cased_args scrut_discount - -> UnfoldIfGoodArgs - n_val_binders - (map discount_for val_binders) - final_size - (iBox scrut_discount) + -> (n_val_binders, UnfoldIfGoodArgs { ug_args = map discount_for val_binders + , ug_size = iBox size + , ug_res = iBox scrut_discount }) where - boxed_size = iBox size + discount_for b = foldlBag (\acc (b',n) -> if b==b' then acc+n else acc) + 0 cased_args + } +\end{code} - final_size | inline = boxed_size `min` max_inline_size - | otherwise = boxed_size +Note [Computing the size of an expression] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The basic idea of sizeExpr is obvious enough: count nodes. But getting the +heuristics right has taken a long time. Here's the basic strategy: - -- Sometimes an INLINE thing is smaller than n_val_binders+2. - -- A particular case in point is a constructor, which has size 1. - -- We want to inline this regardless, hence the `min` + * Variables, literals: 0 + (Exception for string literals, see litSize.) - discount_for b = foldlBag (\acc (b',n) -> if b==b' then acc+n else acc) - 0 cased_args - } - where - collect_val_bndrs e = go False [] e - -- We need to be a bit careful about how we collect the - -- value binders. In ptic, if we see - -- __inline_me (\x y -> e) - -- We want to say "2 value binders". Why? So that - -- we take account of information given for the arguments - - go inline rev_vbs (Note InlineMe e) = go True rev_vbs e - go inline rev_vbs (Lam b e) | isId b = go inline (b:rev_vbs) e - | otherwise = go inline rev_vbs e - go inline rev_vbs e = (inline, reverse rev_vbs, e) + * Function applications (f e1 .. en): 1 + #value args + + * Constructor applications: 1, regardless of #args + + * Let(rec): 1 + size of components + + * Note, cast: 0 + +Examples + + Size Term + -------------- + 0 42# + 0 x + 2 f x + 1 Just x + 4 f (g x) + +Notice that 'x' counts 0, while (f x) counts 2. That's deliberate: there's +a function call to account for. Notice also that constructor applications +are very cheap, because exposing them to a caller is so valuable. + +Note [Unconditional inlining] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We inline *unconditionally* if inlined thing is smaller (using sizeExpr) +than the thing it's replacing. Notice that + (f x) --> (g 3) -- YES, unconditionally + (f x) --> x : [] -- YES, *even though* there are two + -- arguments to the cons + x --> g 3 -- NO + x --> Just v -- NO + +It's very important not to unconditionally replace a variable by +a non-atomic term. + +\begin{code} +uncondInline :: Arity -> Int -> Bool +-- Inline unconditionally if there no size increase +-- Size of call is arity (+1 for the function) +-- See Note [Unconditional inlining] +uncondInline arity size + | arity == 0 = size == 0 + | otherwise = size <= arity + 1 \end{code} + \begin{code} -sizeExpr :: Int# -- Bomb out if it gets bigger than this +sizeExpr :: FastInt -- Bomb out if it gets bigger than this -> [Id] -- Arguments; we're interested in which of these -- get case'd -> CoreExpr -> ExprSize +-- Note [Computing the size of an expression] + sizeExpr bOMB_OUT_SIZE top_args expr = size_up expr where - size_up (Type t) = sizeZero -- Types cost nothing - size_up (Var v) = sizeOne - - size_up (Note InlineMe body) = sizeOne -- Inline notes make it look very small - -- This can be important. If you have an instance decl like this: - -- instance Foo a => Foo [a] where - -- {-# INLINE op1, op2 #-} - -- op1 = ... - -- op2 = ... - -- then we'll get a dfun which is a pair of two INLINE lambdas - - size_up (Note _ body) = size_up body -- Other notes cost nothing - - size_up (Cast e _) = size_up e - - size_up (App fun (Type t)) = size_up fun + size_up (Cast e _) = size_up e + size_up (Note _ e) = size_up e + size_up (Type _) = sizeZero -- Types cost nothing + size_up (Lit lit) = sizeN (litSize lit) + size_up (Var f) = size_up_call f [] -- Make sure we get constructor + -- discounts even on nullary constructors + + size_up (App fun (Type _)) = size_up fun size_up (App fun arg) = size_up_app fun [arg] - - size_up (Lit lit) = sizeN (litSize lit) + `addSize` nukeScrutDiscount (size_up arg) size_up (Lam b e) | isId b = lamScrutDiscount (size_up e `addSizeN` 1) | otherwise = size_up e @@ -229,54 +257,32 @@ sizeExpr bOMB_OUT_SIZE top_args expr size_up (Case (Var v) _ _ alts) | v `elem` top_args -- We are scrutinising an argument variable - = -{- I'm nuking this special case; BUT see the comment with case alternatives. - - (a) It's too eager. We don't want to inline a wrapper into a - context with no benefit. - E.g. \ x. f (x+x) no point in inlining (+) here! - - (b) It's ineffective. Once g's wrapper is inlined, its case-expressions - aren't scrutinising arguments any more - - case alts of - - [alt] -> size_up_alt alt `addSize` SizeIs 0# (unitBag (v, 1)) 0# - -- We want to make wrapper-style evaluation look cheap, so that - -- when we inline a wrapper it doesn't make call site (much) bigger - -- Otherwise we get nasty phase ordering stuff: - -- f x = g x x - -- h y = ...(f e)... - -- If we inline g's wrapper, f looks big, and doesn't get inlined - -- into h; if we inline f first, while it looks small, then g's - -- wrapper will get inlined later anyway. To avoid this nasty - -- ordering difference, we make (case a of (x,y) -> ...), - -- *where a is one of the arguments* look free. - - other -> --} - alts_size (foldr addSize sizeOne alt_sizes) -- The 1 is for the scrutinee - (foldr1 maxSize alt_sizes) - + = alts_size (foldr addSize sizeOne alt_sizes) -- The 1 is for the case itself + (foldr1 maxSize alt_sizes) -- Good to inline if an arg is scrutinised, because -- that may eliminate allocation in the caller -- And it eliminates the case itself - where alt_sizes = map size_up_alt alts -- alts_size tries to compute a good discount for -- the case when we are scrutinising an argument variable - alts_size (SizeIs tot tot_disc tot_scrut) -- Size of all alternatives - (SizeIs max max_disc max_scrut) -- Size of biggest alternative - = SizeIs tot (unitBag (v, iBox (_ILIT 1 +# tot -# max)) `unionBags` max_disc) max_scrut + alts_size (SizeIs tot tot_disc _tot_scrut) -- Size of all alternatives + (SizeIs max _max_disc max_scrut) -- Size of biggest alternative + = SizeIs tot (unitBag (v, iBox (_ILIT(1) +# tot -# max)) `unionBags` tot_disc) max_scrut -- If the variable is known, we produce a discount that - -- will take us back to 'max', the size of rh largest alternative + -- will take us back to 'max', the size of the largest alternative -- The 1+ is a little discount for reduced allocation in the caller + -- + -- Notice though, that we return tot_disc, the total discount from + -- all branches. I think that's right. + alts_size tot_size _ = tot_size - size_up (Case e _ _ alts) = nukeScrutDiscount (size_up e) `addSize` - foldr (addSize . size_up_alt) sizeZero alts + size_up (Case e _ _ alts) = foldr (addSize . size_up_alt) + (nukeScrutDiscount (size_up e)) + alts + `addSizeN` 1 -- Add 1 for the case itself -- We don't charge for the case itself -- It's a strict thing, and the price of the call -- is paid by scrut. Also consider @@ -284,63 +290,32 @@ sizeExpr bOMB_OUT_SIZE top_args expr -- This is just ';'! Don't charge for it. ------------ - size_up_app (App fun arg) args - | isTypeArg arg = size_up_app fun args - | otherwise = size_up_app fun (arg:args) - size_up_app fun args = foldr (addSize . nukeScrutDiscount . size_up) - (size_up_fun fun args) - args - - -- A function application with at least one value argument - -- so if the function is an argument give it an arg-discount - -- - -- Also behave specially if the function is a build - -- - -- Also if the function is a constant Id (constr or primop) - -- compute discounts specially - size_up_fun (Var fun) args - | fun `hasKey` buildIdKey = buildSize - | fun `hasKey` augmentIdKey = augmentSize - | otherwise - = case globalIdDetails fun of - DataConWorkId dc -> conSizeN dc (valArgCount args) - - FCallId fc -> sizeN opt_UF_DearOp - PrimOpId op -> primOpSize op (valArgCount args) - -- foldr addSize (primOpSize op) (map arg_discount args) - -- At one time I tried giving an arg-discount if a primop - -- is applied to one of the function's arguments, but it's - -- not good. At the moment, any unlifted-type arg gets a - -- 'True' for 'yes I'm evald', so we collect the discount even - -- if we know nothing about it. And just having it in a primop - -- doesn't help at all if we don't know something more. - - other -> fun_discount fun `addSizeN` - (1 + length (filter (not . exprIsTrivial) args)) - -- The 1+ is for the function itself - -- Add 1 for each non-trivial arg; - -- the allocation cost, as in let(rec) - -- Slight hack here: for constructors the args are almost always - -- trivial; and for primops they are almost always prim typed - -- We should really only count for non-prim-typed args in the - -- general case, but that seems too much like hard work - - size_up_fun other args = size_up other + -- size_up_app is used when there's ONE OR MORE value args + size_up_app (App fun arg) args + | isTypeArg arg = size_up_app fun args + | otherwise = size_up_app fun (arg:args) + `addSize` nukeScrutDiscount (size_up arg) + size_up_app (Var fun) args = size_up_call fun args + size_up_app other args = size_up other `addSizeN` length args + + ------------ + size_up_call :: Id -> [CoreExpr] -> ExprSize + size_up_call fun val_args + = case idDetails fun of + FCallId _ -> sizeN opt_UF_DearOp + DataConWorkId dc -> conSize dc (length val_args) + PrimOpId op -> primOpSize op (length val_args) + ClassOpId _ -> classOpSize top_args val_args + _ -> funSize top_args fun (length val_args) ------------ - size_up_alt (con, bndrs, rhs) = size_up rhs + size_up_alt (_con, _bndrs, rhs) = size_up rhs -- Don't charge for args, so that wrappers look cheap -- (See comments about wrappers with Case) ------------ - -- We want to record if we're case'ing, or applying, an argument - fun_discount v | v `elem` top_args = SizeIs 0# (unitBag (v, opt_UF_FunAppDiscount)) 0# - fun_discount other = sizeZero - - ------------ -- These addSize things have to be here because -- I don't want to give them bOMB_OUT_SIZE as an argument - addSizeN TooBig _ = TooBig addSizeN (SizeIs n xs d) m = mkSizeIs bOMB_OUT_SIZE (n +# iUnbox m) xs d @@ -350,48 +325,82 @@ sizeExpr bOMB_OUT_SIZE top_args expr = mkSizeIs bOMB_OUT_SIZE (n1 +# n2) (xs `unionBags` ys) (d1 +# d2) \end{code} -Code for manipulating sizes - \begin{code} -data ExprSize = TooBig - | SizeIs FastInt -- Size found - (Bag (Id,Int)) -- Arguments cased herein, and discount for each such - FastInt -- Size to subtract if result is scrutinised - -- by a case expression - --- subtract the discount before deciding whether to bale out. eg. we --- want to inline a large constructor application into a selector: --- tup = (a_1, ..., a_99) --- x = case tup of ... --- -mkSizeIs max n xs d | (n -# d) ># max = TooBig - | otherwise = SizeIs n xs d - -maxSize TooBig _ = TooBig -maxSize _ TooBig = TooBig -maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 ># n2 = s1 - | otherwise = s2 - -sizeZero = SizeIs (_ILIT 0) emptyBag (_ILIT 0) -sizeOne = SizeIs (_ILIT 1) emptyBag (_ILIT 0) -sizeN n = SizeIs (iUnbox n) emptyBag (_ILIT 0) -conSizeN dc n - | isUnboxedTupleCon dc = SizeIs (_ILIT 0) emptyBag (iUnbox n +# _ILIT 1) - | otherwise = SizeIs (_ILIT 1) emptyBag (iUnbox n +# _ILIT 1) - -- Treat constructors as size 1; we are keen to expose them +-- | Finds a nominal size of a string literal. +litSize :: Literal -> Int +-- Used by CoreUnfold.sizeExpr +litSize (MachStr str) = 1 + ((lengthFS str + 3) `div` 4) + -- If size could be 0 then @f "x"@ might be too small + -- [Sept03: make literal strings a bit bigger to avoid fruitless + -- duplication of little strings] +litSize _other = 0 -- Must match size of nullary constructors + -- Key point: if x |-> 4, then x must inline unconditionally + -- (eg via case binding) + +classOpSize :: [Id] -> [CoreExpr] -> ExprSize +-- See Note [Conlike is interesting] +classOpSize _ [] + = sizeZero +classOpSize top_args (arg1 : other_args) + = SizeIs (iUnbox size) arg_discount (_ILIT(0)) + where + size = 2 + length other_args + -- If the class op is scrutinising a lambda bound dictionary then + -- give it a discount, to encourage the inlining of this function + -- The actual discount is rather arbitrarily chosen + arg_discount = case arg1 of + Var dict | dict `elem` top_args + -> unitBag (dict, opt_UF_DictDiscount) + _other -> emptyBag + +funSize :: [Id] -> Id -> Int -> ExprSize +-- Size for functions that are not constructors or primops +-- Note [Function applications] +funSize top_args fun n_val_args + | fun `hasKey` buildIdKey = buildSize + | fun `hasKey` augmentIdKey = augmentSize + | otherwise = SizeIs (iUnbox size) arg_discount (iUnbox res_discount) + where + some_val_args = n_val_args > 0 + + arg_discount | some_val_args && fun `elem` top_args + = unitBag (fun, opt_UF_FunAppDiscount) + | otherwise = emptyBag + -- If the function is an argument and is applied + -- to some values, give it an arg-discount + + res_discount | idArity fun > n_val_args = opt_UF_FunAppDiscount + | otherwise = 0 + -- If the function is partially applied, show a result discount + + size | some_val_args = 1 + n_val_args + | otherwise = 0 + -- The 1+ is for the function itself + -- Add 1 for each non-trivial arg; + -- the allocation cost, as in let(rec) + + +conSize :: DataCon -> Int -> ExprSize +conSize dc n_val_args + | n_val_args == 0 = SizeIs (_ILIT(0)) emptyBag (_ILIT(1)) + | isUnboxedTupleCon dc = SizeIs (_ILIT(0)) emptyBag (iUnbox n_val_args +# _ILIT(1)) + | otherwise = SizeIs (_ILIT(1)) emptyBag (iUnbox n_val_args +# _ILIT(1)) + -- Treat a constructors application as size 1, regardless of how + -- many arguments it has; we are keen to expose them -- (and we charge separately for their args). We can't treat - -- them as size zero, else we find that (iBox x) has size 1, + -- them as size zero, else we find that (Just x) has size 0, -- which is the same as a lone variable; and hence 'v' will - -- always be replaced by (iBox x), where v is bound to iBox x. + -- always be replaced by (Just x), where v is bound to Just x. -- -- However, unboxed tuples count as size zero -- I found occasions where we had -- f x y z = case op# x y z of { s -> (# s, () #) } -- and f wasn't getting inlined -primOpSize op n_args +primOpSize :: PrimOp -> Int -> ExprSize +primOpSize op n_val_args | not (primOpIsDupable op) = sizeN opt_UF_DearOp - | not (primOpOutOfLine op) = sizeN (2 - n_args) + | not (primOpOutOfLine op) = sizeN 1 -- Be very keen to inline simple primops. -- We give a discount of 1 for each arg so that (op# x y z) costs 2. -- We can't make it cost 1, else we'll inline let v = (op# x y z) @@ -401,9 +410,12 @@ primOpSize op n_args -- let x = +# p q in C {x} -- Even though x get's an occurrence of 'many', its RHS looks cheap, -- and there's a good chance it'll get inlined back into C's RHS. Urgh! - | otherwise = sizeOne -buildSize = SizeIs (-2#) emptyBag 4# + | otherwise = sizeN n_val_args + + +buildSize :: ExprSize +buildSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(4)) -- We really want to inline applications of build -- build t (\cn -> e) should cost only the cost of e (because build will be inlined later) -- Indeed, we should add a result_discount becuause build is @@ -411,70 +423,139 @@ buildSize = SizeIs (-2#) emptyBag 4# -- build is saturated (it usually is). The "-2" discounts for the \c n, -- The "4" is rather arbitrary. -augmentSize = SizeIs (-2#) emptyBag 4# +augmentSize :: ExprSize +augmentSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(4)) -- Ditto (augment t (\cn -> e) ys) should cost only the cost of -- e plus ys. The -2 accounts for the \cn - -nukeScrutDiscount (SizeIs n vs d) = SizeIs n vs 0# -nukeScrutDiscount TooBig = TooBig + +nukeScrutDiscount :: ExprSize -> ExprSize +nukeScrutDiscount (SizeIs n vs _) = SizeIs n vs (_ILIT(0)) +nukeScrutDiscount TooBig = TooBig -- When we return a lambda, give a discount if it's used (applied) -lamScrutDiscount (SizeIs n vs d) = case opt_UF_FunAppDiscount of { d -> SizeIs n vs (iUnbox d) } -lamScrutDiscount TooBig = TooBig +lamScrutDiscount :: ExprSize -> ExprSize +lamScrutDiscount (SizeIs n vs _) = SizeIs n vs (iUnbox opt_UF_FunAppDiscount) +lamScrutDiscount TooBig = TooBig +\end{code} + +Note [Discounts and thresholds] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Constants for discounts and thesholds are defined in main/StaticFlags, +all of form opt_UF_xxxx. They are: + +opt_UF_CreationThreshold (45) + At a definition site, if the unfolding is bigger than this, we + may discard it altogether + +opt_UF_UseThreshold (6) + At a call site, if the unfolding, less discounts, is smaller than + this, then it's small enough inline + +opt_UF_KeennessFactor (1.5) + Factor by which the discounts are multiplied before + subtracting from size + +opt_UF_DictDiscount (1) + The discount for each occurrence of a dictionary argument + as an argument of a class method. Should be pretty small + else big functions may get inlined + +opt_UF_FunAppDiscount (6) + Discount for a function argument that is applied. Quite + large, because if we inline we avoid the higher-order call. + +opt_UF_DearOp (4) + The size of a foreign call or not-dupable PrimOp + + +Note [Function applications] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In a function application (f a b) + + - If 'f' is an argument to the function being analysed, + and there's at least one value arg, record a FunAppDiscount for f + + - If the application if a PAP (arity > 2 in this example) + record a *result* discount (because inlining + with "extra" args in the call may mean that we now + get a saturated application) + +Code for manipulating sizes + +\begin{code} +data ExprSize = TooBig + | SizeIs FastInt -- Size found + (Bag (Id,Int)) -- Arguments cased herein, and discount for each such + FastInt -- Size to subtract if result is scrutinised + -- by a case expression + +instance Outputable ExprSize where + ppr TooBig = ptext (sLit "TooBig") + ppr (SizeIs a _ c) = brackets (int (iBox a) <+> int (iBox c)) + +-- subtract the discount before deciding whether to bale out. eg. we +-- want to inline a large constructor application into a selector: +-- tup = (a_1, ..., a_99) +-- x = case tup of ... +-- +mkSizeIs :: FastInt -> FastInt -> Bag (Id, Int) -> FastInt -> ExprSize +mkSizeIs max n xs d | (n -# d) ># max = TooBig + | otherwise = SizeIs n xs d + +maxSize :: ExprSize -> ExprSize -> ExprSize +maxSize TooBig _ = TooBig +maxSize _ TooBig = TooBig +maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 ># n2 = s1 + | otherwise = s2 + +sizeZero, sizeOne :: ExprSize +sizeN :: Int -> ExprSize + +sizeZero = SizeIs (_ILIT(0)) emptyBag (_ILIT(0)) +sizeOne = SizeIs (_ILIT(1)) emptyBag (_ILIT(0)) +sizeN n = SizeIs (iUnbox n) emptyBag (_ILIT(0)) \end{code} + + %************************************************************************ %* * \subsection[considerUnfolding]{Given all the info, do (not) do the unfolding} %* * %************************************************************************ -We have very limited information about an unfolding expression: (1)~so -many type arguments and so many value arguments expected---for our -purposes here, we assume we've got those. (2)~A ``size'' or ``cost,'' -a single integer. (3)~An ``argument info'' vector. For this, what we -have at the moment is a Boolean per argument position that says, ``I -will look with great favour on an explicit constructor in this -position.'' (4)~The ``discount'' to subtract if the expression -is being scrutinised. - -Assuming we have enough type- and value arguments (if not, we give up -immediately), then we see if the ``discounted size'' is below some -(semi-arbitrary) threshold. It works like this: for every argument -position where we're looking for a constructor AND WE HAVE ONE in our -hands, we get a (again, semi-arbitrary) discount [proportion to the -number of constructors in the type being scrutinized]. - -If we're in the context of a scrutinee ( \tr{(case of A .. -> ...;.. )}) -and the expression in question will evaluate to a constructor, we use -the computed discount size *for the result only* rather than -computing the argument discounts. Since we know the result of -the expression is going to be taken apart, discounting its size -is more accurate (see @sizeExpr@ above for how this discount size -is computed). - -We use this one to avoid exporting inlinings that we ``couldn't possibly -use'' on the other side. Can be overridden w/ flaggery. -Just the same as smallEnoughToInline, except that it has no actual arguments. +We use 'couldBeSmallEnoughToInline' to avoid exporting inlinings that +we ``couldn't possibly use'' on the other side. Can be overridden w/ +flaggery. Just the same as smallEnoughToInline, except that it has no +actual arguments. \begin{code} couldBeSmallEnoughToInline :: Int -> CoreExpr -> Bool -couldBeSmallEnoughToInline threshold rhs = case calcUnfoldingGuidance threshold rhs of - UnfoldNever -> False - other -> True - -certainlyWillInline :: Unfolding -> Bool - -- Sees if the unfolding is pretty certain to inline -certainlyWillInline (CoreUnfolding _ _ _ is_cheap (UnfoldIfGoodArgs n_vals _ size _)) - = is_cheap && size - (n_vals +1) <= opt_UF_UseThreshold -certainlyWillInline other - = False +couldBeSmallEnoughToInline threshold rhs + = case calcUnfoldingGuidance threshold rhs of + (_, UnfoldNever) -> False + _ -> True +---------------- smallEnoughToInline :: Unfolding -> Bool -smallEnoughToInline (CoreUnfolding _ _ _ _ (UnfoldIfGoodArgs _ _ size _)) +smallEnoughToInline (CoreUnfolding {uf_guidance = UnfoldIfGoodArgs {ug_size = size}}) = size <= opt_UF_UseThreshold -smallEnoughToInline other +smallEnoughToInline _ + = False + +---------------- +certainlyWillInline :: Unfolding -> Bool + -- Sees if the unfolding is pretty certain to inline +certainlyWillInline (CoreUnfolding { uf_is_cheap = is_cheap, uf_arity = n_vals, uf_guidance = guidance }) + = case guidance of + UnfoldAlways {} -> True + UnfoldNever -> False + InlineRule {} -> True + UnfoldIfGoodArgs { ug_size = size} + -> is_cheap && size - (n_vals +1) <= opt_UF_UseThreshold + +certainlyWillInline _ = False \end{code} @@ -504,130 +585,562 @@ StrictAnal.addStrictnessInfoToTopId callSiteInline :: DynFlags -> Bool -- True <=> the Id can be inlined -> Id -- The Id - -> [Bool] -- One for each value arg; True if it is interesting - -> Bool -- True <=> continuation is interesting + -> Bool -- True if there are are no arguments at all (incl type args) + -> [ArgSummary] -- One for each value arg; True if it is interesting + -> CallCtxt -- True <=> continuation is interesting -> Maybe CoreExpr -- Unfolding, if any -callSiteInline dflags active_inline id arg_infos interesting_cont - = case idUnfolding id of { - NoUnfolding -> Nothing ; - OtherCon cs -> Nothing ; +instance Outputable ArgSummary where + ppr TrivArg = ptext (sLit "TrivArg") + ppr NonTrivArg = ptext (sLit "NonTrivArg") + ppr ValueArg = ptext (sLit "ValueArg") - CompulsoryUnfolding unf_template -> Just unf_template ; - -- CompulsoryUnfolding => there is no top-level binding - -- for these things, so we must inline it. - -- Only a couple of primop-like things have - -- compulsory unfoldings (see MkId.lhs). - -- We don't allow them to be inactive +data CallCtxt = BoringCtxt - CoreUnfolding unf_template is_top is_value is_cheap guidance -> + | ArgCtxt Bool -- We're somewhere in the RHS of function with rules + -- => be keener to inline + Int -- We *are* the argument of a function with this arg discount + -- => be keener to inline + -- INVARIANT: ArgCtxt False 0 ==> BoringCtxt + | ValAppCtxt -- We're applied to at least one value arg + -- This arises when we have ((f x |> co) y) + -- Then the (f x) has argument 'x' but in a ValAppCtxt + + | CaseCtxt -- We're the scrutinee of a case + -- that decomposes its scrutinee + +instance Outputable CallCtxt where + ppr BoringCtxt = ptext (sLit "BoringCtxt") + ppr (ArgCtxt rules disc) = ptext (sLit "ArgCtxt") <> ppr (rules,disc) + ppr CaseCtxt = ptext (sLit "CaseCtxt") + ppr ValAppCtxt = ptext (sLit "ValAppCtxt") + +callSiteInline dflags active_inline id lone_variable arg_infos cont_info + = let + n_val_args = length arg_infos + in + case idUnfolding id of { + NoUnfolding -> Nothing ; + OtherCon _ -> Nothing ; + DFunUnfolding {} -> Nothing ; -- Never unfold a DFun + CoreUnfolding { uf_tmpl = unf_template, uf_is_top = is_top, uf_is_value = is_value, + uf_is_cheap = is_cheap, uf_arity = uf_arity, uf_guidance = guidance } -> + -- uf_arity will typically be equal to (idArity id), + -- but may be less for InlineRules let result | yes_or_no = Just unf_template | otherwise = Nothing - n_val_args = length arg_infos - - yes_or_no - | not active_inline = False - | otherwise = is_cheap && consider_safe False - -- We consider even the once-in-one-branch - -- occurrences, because they won't all have been - -- caught by preInlineUnconditionally. In particular, - -- if the occurrence is once inside a lambda, and the - -- rhs is cheap but not a manifest lambda, then - -- pre-inline will not have inlined it for fear of - -- invalidating the occurrence info in the rhs. - - consider_safe once - -- consider_safe decides whether it's a good idea to - -- inline something, given that there's no - -- work-duplication issue (the caller checks that). + interesting_args = any nonTriv arg_infos + -- NB: (any nonTriv arg_infos) looks at the + -- over-saturated args too which is "wrong"; + -- but if over-saturated we inline anyway. + + -- some_benefit is used when the RHS is small enough + -- and the call has enough (or too many) value + -- arguments (ie n_val_args >= arity). But there must + -- be *something* interesting about some argument, or the + -- result context, to make it worth inlining + some_benefit = interesting_args + || n_val_args > uf_arity -- Over-saturated + || interesting_saturated_call -- Exactly saturated + + interesting_saturated_call + = case cont_info of + BoringCtxt -> not is_top && uf_arity > 0 -- Note [Nested functions] + CaseCtxt -> not (lone_variable && is_value) -- Note [Lone variables] + ArgCtxt {} -> uf_arity > 0 -- Note [Inlining in ArgCtxt] + ValAppCtxt -> True -- Note [Cast then apply] + + yes_or_no = case guidance of UnfoldNever -> False - UnfoldIfGoodArgs n_vals_wanted arg_discounts size res_discount - - | enough_args && size <= (n_vals_wanted + 1) - -- Inline unconditionally if there no size increase - -- Size of call is n_vals_wanted (+1 for the function) - -> True - - | otherwise - -> some_benefit && small_enough - - where - some_benefit = or arg_infos || really_interesting_cont || - (not is_top && ({- once || -} (n_vals_wanted > 0 && enough_args))) - -- [was (once && not in_lam)] - -- If it occurs more than once, there must be - -- something interesting about some argument, or the - -- result context, to make it worth inlining - -- - -- If a function has a nested defn we also record - -- some-benefit, on the grounds that we are often able - -- to eliminate the binding, and hence the allocation, - -- for the function altogether; this is good for join - -- points. But this only makes sense for *functions*; - -- inlining a constructor doesn't help allocation - -- unless the result is scrutinised. UNLESS the - -- constructor occurs just once, albeit possibly in - -- multiple case branches. Then inlining it doesn't - -- increase allocation, but it does increase the - -- chance that the constructor won't be allocated at - -- all in the branches that don't use it. - - enough_args = n_val_args >= n_vals_wanted - really_interesting_cont | n_val_args < n_vals_wanted = False -- Too few args - | n_val_args == n_vals_wanted = interesting_cont - | otherwise = True -- Extra args - -- really_interesting_cont tells if the result of the - -- call is in an interesting context. - - small_enough = (size - discount) <= opt_UF_UseThreshold - discount = computeDiscount n_vals_wanted arg_discounts res_discount - arg_infos really_interesting_cont + + UnfoldAlways -> True + -- UnfoldAlways => there is no top-level binding for + -- these things, so we must inline it. Only a few + -- primop-like things have compulsory unfoldings (see + -- MkId.lhs). Ignore is_active because we want to + -- inline even if SimplGently is on. + + InlineRule { ug_ir_info = inl_info, ug_small = uncond_inline } + | not active_inline -> False + | n_val_args < uf_arity -> yes_unsat -- Not enough value args + | uncond_inline -> True -- Note [INLINE for small functions] + | otherwise -> some_benefit -- Saturated or over-saturated + where + -- See Note [Inlining an InlineRule] + yes_unsat = case inl_info of + InlSat -> False + _other -> interesting_args + + UnfoldIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size } + | not active_inline -> False + | not is_cheap -> False + | n_val_args < uf_arity -> interesting_args && small_enough + -- Note [Unsaturated applications] + | uncondInline uf_arity size -> True + | otherwise -> some_benefit && small_enough + + where + small_enough = (size - discount) <= opt_UF_UseThreshold + discount = computeDiscount uf_arity arg_discounts + res_discount arg_infos cont_info in if dopt Opt_D_dump_inlinings dflags then - pprTrace "Considering inlining" - (ppr id <+> vcat [text "active:" <+> ppr active_inline, - text "arg infos" <+> ppr arg_infos, - text "interesting continuation" <+> ppr interesting_cont, - text "is value:" <+> ppr is_value, - text "is cheap:" <+> ppr is_cheap, - text "guidance" <+> ppr guidance, - text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"]) + pprTrace ("Considering inlining: " ++ showSDoc (ppr id)) + (vcat [text "active:" <+> ppr active_inline, + text "arg infos" <+> ppr arg_infos, + text "interesting continuation" <+> ppr cont_info, + text "is value:" <+> ppr is_value, + text "is cheap:" <+> ppr is_cheap, + text "guidance" <+> ppr guidance, + text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"]) result else result } +\end{code} -computeDiscount :: Int -> [Int] -> Int -> [Bool] -> Bool -> Int -computeDiscount n_vals_wanted arg_discounts res_discount arg_infos result_used +Note [Unsaturated applications] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +When a call is not saturated, we *still* inline if one of the +arguments has interesting structure. That's sometimes very important. +A good example is the Ord instance for Bool in Base: + + Rec { + $fOrdBool =GHC.Classes.D:Ord + @ Bool + ... + $cmin_ajX + + $cmin_ajX [Occ=LoopBreaker] :: Bool -> Bool -> Bool + $cmin_ajX = GHC.Classes.$dmmin @ Bool $fOrdBool + } + +But the defn of GHC.Classes.$dmmin is: + + $dmmin :: forall a. GHC.Classes.Ord a => a -> a -> a + {- Arity: 3, HasNoCafRefs, Strictness: SLL, + Unfolding: (\ @ a $dOrd :: GHC.Classes.Ord a x :: a y :: a -> + case @ a GHC.Classes.<= @ a $dOrd x y of wild { + GHC.Bool.False -> y GHC.Bool.True -> x }) -} + +We *really* want to inline $dmmin, even though it has arity 3, in +order to unravel the recursion. + + +Note [INLINE for small functions] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider {-# INLINE f #-} + f x = Just x + g y = f y +Then f's RHS is no larger than its LHS, so we should inline it +into even the most boring context. (We do so if there is no INLINE +pragma!) That's the reason for the 'inl_small' flag on an InlineRule. + + +Note [Things to watch] +~~~~~~~~~~~~~~~~~~~~~~ +* { y = I# 3; x = y `cast` co; ...case (x `cast` co) of ... } + Assume x is exported, so not inlined unconditionally. + Then we want x to inline unconditionally; no reason for it + not to, and doing so avoids an indirection. + +* { x = I# 3; ....f x.... } + Make sure that x does not inline unconditionally! + Lest we get extra allocation. + +Note [Inlining an InlineRule] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +An InlineRules is used for + (a) pogrammer INLINE pragmas + (b) inlinings from worker/wrapper + +For (a) the RHS may be large, and our contract is that we *only* inline +when the function is applied to all the arguments on the LHS of the +source-code defn. (The uf_arity in the rule.) + +However for worker/wrapper it may be worth inlining even if the +arity is not satisfied (as we do in the CoreUnfolding case) so we don't +require saturation. + + +Note [Nested functions] +~~~~~~~~~~~~~~~~~~~~~~~ +If a function has a nested defn we also record some-benefit, on the +grounds that we are often able to eliminate the binding, and hence the +allocation, for the function altogether; this is good for join points. +But this only makes sense for *functions*; inlining a constructor +doesn't help allocation unless the result is scrutinised. UNLESS the +constructor occurs just once, albeit possibly in multiple case +branches. Then inlining it doesn't increase allocation, but it does +increase the chance that the constructor won't be allocated at all in +the branches that don't use it. + +Note [Cast then apply] +~~~~~~~~~~~~~~~~~~~~~~ +Consider + myIndex = __inline_me ( (/\a. ) |> co ) + co :: (forall a. a -> a) ~ (forall a. T a) + ... /\a.\x. case ((myIndex a) |> sym co) x of { ... } ... + +We need to inline myIndex to unravel this; but the actual call (myIndex a) has +no value arguments. The ValAppCtxt gives it enough incentive to inline. + +Note [Inlining in ArgCtxt] +~~~~~~~~~~~~~~~~~~~~~~~~~~ +The condition (arity > 0) here is very important, because otherwise +we end up inlining top-level stuff into useless places; eg + x = I# 3# + f = \y. g x +This can make a very big difference: it adds 16% to nofib 'integer' allocs, +and 20% to 'power'. + +At one stage I replaced this condition by 'True' (leading to the above +slow-down). The motivation was test eyeball/inline1.hs; but that seems +to work ok now. + +Note [Lone variables] +~~~~~~~~~~~~~~~~~~~~~ +The "lone-variable" case is important. I spent ages messing about +with unsatisfactory varaints, but this is nice. The idea is that if a +variable appears all alone + + as an arg of lazy fn, or rhs BoringCtxt + as scrutinee of a case CaseCtxt + as arg of a fn ArgCtxt +AND + it is bound to a value + +then we should not inline it (unless there is some other reason, +e.g. is is the sole occurrence). That is what is happening at +the use of 'lone_variable' in 'interesting_saturated_call'. + +Why? At least in the case-scrutinee situation, turning + let x = (a,b) in case x of y -> ... +into + let x = (a,b) in case (a,b) of y -> ... +and thence to + let x = (a,b) in let y = (a,b) in ... +is bad if the binding for x will remain. + +Another example: I discovered that strings +were getting inlined straight back into applications of 'error' +because the latter is strict. + s = "foo" + f = \x -> ...(error s)... + +Fundamentally such contexts should not encourage inlining because the +context can ``see'' the unfolding of the variable (e.g. case or a +RULE) so there's no gain. If the thing is bound to a value. + +However, watch out: + + * Consider this: + foo = _inline_ (\n. [n]) + bar = _inline_ (foo 20) + baz = \n. case bar of { (m:_) -> m + n } + Here we really want to inline 'bar' so that we can inline 'foo' + and the whole thing unravels as it should obviously do. This is + important: in the NDP project, 'bar' generates a closure data + structure rather than a list. + + So the non-inlining of lone_variables should only apply if the + unfolding is regarded as cheap; because that is when exprIsConApp_maybe + looks through the unfolding. Hence the "&& is_cheap" in the + InlineRule branch. + + * Even a type application or coercion isn't a lone variable. + Consider + case $fMonadST @ RealWorld of { :DMonad a b c -> c } + We had better inline that sucker! The case won't see through it. + + For now, I'm treating treating a variable applied to types + in a *lazy* context "lone". The motivating example was + f = /\a. \x. BIG + g = /\a. \y. h (f a) + There's no advantage in inlining f here, and perhaps + a significant disadvantage. Hence some_val_args in the Stop case + +\begin{code} +computeDiscount :: Int -> [Int] -> Int -> [ArgSummary] -> CallCtxt -> Int +computeDiscount n_vals_wanted arg_discounts res_discount arg_infos cont_info -- We multiple the raw discounts (args_discount and result_discount) -- ty opt_UnfoldingKeenessFactor because the former have to do with -- *size* whereas the discounts imply that there's some extra -- *efficiency* to be gained (e.g. beta reductions, case reductions) -- by inlining. - -- we also discount 1 for each argument passed, because these will - -- reduce with the lambdas in the function (we count 1 for a lambda - -- in size_up). - = 1 + -- Discount of 1 because the result replaces the call - -- so we count 1 for the function itself - length (take n_vals_wanted arg_infos) + - -- Discount of 1 for each arg supplied, because the - -- result replaces the call - round (opt_UF_KeenessFactor * - fromIntegral (arg_discount + result_discount)) + = 1 -- Discount of 1 because the result replaces the call + -- so we count 1 for the function itself + + + length (take n_vals_wanted arg_infos) + -- Discount of (un-scaled) 1 for each arg supplied, + -- because the result replaces the call + + + round (opt_UF_KeenessFactor * + fromIntegral (arg_discount + res_discount')) where arg_discount = sum (zipWith mk_arg_discount arg_discounts arg_infos) - mk_arg_discount discount is_evald | is_evald = discount - | otherwise = 0 + mk_arg_discount _ TrivArg = 0 + mk_arg_discount _ NonTrivArg = 1 + mk_arg_discount discount ValueArg = discount + + res_discount' = case cont_info of + BoringCtxt -> 0 + CaseCtxt -> res_discount + _other -> 4 `min` res_discount + -- res_discount can be very large when a function returns + -- construtors; but we only want to invoke that large discount + -- when there's a case continuation. + -- Otherwise we, rather arbitrarily, threshold it. Yuk. + -- But we want to aovid inlining large functions that return + -- constructors into contexts that are simply "interesting" +\end{code} + +%************************************************************************ +%* * + Interesting arguments +%* * +%************************************************************************ + +Note [Interesting arguments] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +An argument is interesting if it deserves a discount for unfoldings +with a discount in that argument position. The idea is to avoid +unfolding a function that is applied only to variables that have no +unfolding (i.e. they are probably lambda bound): f x y z There is +little point in inlining f here. + +Generally, *values* (like (C a b) and (\x.e)) deserve discounts. But +we must look through lets, eg (let x = e in C a b), because the let will +float, exposing the value, if we inline. That makes it different to +exprIsHNF. + +Before 2009 we said it was interesting if the argument had *any* structure +at all; i.e. (hasSomeUnfolding v). But does too much inlining; see Trac #3016. + +But we don't regard (f x y) as interesting, unless f is unsaturated. +If it's saturated and f hasn't inlined, then it's probably not going +to now! + +Note [Conlike is interesting] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + f d = ...((*) d x y)... + ... f (df d')... +where df is con-like. Then we'd really like to inline so that the +rule for (*) (df d) can fire. To do this + a) we give a discount for being an argument of a class-op (eg (*) d) + b) we say that a con-like argument (eg (df d)) is interesting - -- Don't give a result discount unless there are enough args - result_discount | result_used = res_discount -- Over-applied, or case scrut - | otherwise = 0 +\begin{code} +data ArgSummary = TrivArg -- Nothing interesting + | NonTrivArg -- Arg has structure + | ValueArg -- Arg is a con-app or PAP + -- ..or con-like. Note [Conlike is interesting] + +interestingArg :: CoreExpr -> ArgSummary +-- See Note [Interesting arguments] +interestingArg e = go e 0 + where + -- n is # value args to which the expression is applied + go (Lit {}) _ = ValueArg + go (Var v) n + | isConLikeId v = ValueArg -- Experimenting with 'conlike' rather that + -- data constructors here + | idArity v > n = ValueArg -- Catches (eg) primops with arity but no unfolding + | n > 0 = NonTrivArg -- Saturated or unknown call + | evald_unfolding = ValueArg -- n==0; look for a value + | otherwise = TrivArg -- n==0, no useful unfolding + where + evald_unfolding = isEvaldUnfolding (idUnfolding v) + + go (Type _) _ = TrivArg + go (App fn (Type _)) n = go fn n + go (App fn _) n = go fn (n+1) + go (Note _ a) n = go a n + go (Cast e _) n = go e n + go (Lam v e) n + | isTyVar v = go e n + | n>0 = go e (n-1) + | otherwise = ValueArg + go (Let _ e) n = case go e n of { ValueArg -> ValueArg; _ -> NonTrivArg } + go (Case {}) _ = NonTrivArg + +nonTriv :: ArgSummary -> Bool +nonTriv TrivArg = False +nonTriv _ = True +\end{code} + +%************************************************************************ +%* * + exprIsConApp_maybe +%* * +%************************************************************************ + +Note [exprIsConApp_maybe] +~~~~~~~~~~~~~~~~~~~~~~~~~ +exprIsConApp_maybe is a very important function. There are two principal +uses: + * case e of { .... } + * cls_op e, where cls_op is a class operation + +In both cases you want to know if e is of form (C e1..en) where C is +a data constructor. + +However e might not *look* as if + +\begin{code} +-- | Returns @Just (dc, [t1..tk], [x1..xn])@ if the argument expression is +-- a *saturated* constructor application of the form @dc t1..tk x1 .. xn@, +-- where t1..tk are the *universally-qantified* type args of 'dc' +exprIsConApp_maybe :: CoreExpr -> Maybe (DataCon, [Type], [CoreExpr]) + +exprIsConApp_maybe (Note _ expr) + = exprIsConApp_maybe expr + -- We ignore all notes. For example, + -- case _scc_ "foo" (C a b) of + -- C a b -> e + -- should be optimised away, but it will be only if we look + -- through the SCC note. + +exprIsConApp_maybe (Cast expr co) + = -- Here we do the KPush reduction rule as described in the FC paper + -- The transformation applies iff we have + -- (C e1 ... en) `cast` co + -- where co :: (T t1 .. tn) ~ to_ty + -- The left-hand one must be a T, because exprIsConApp returned True + -- but the right-hand one might not be. (Though it usually will.) + + case exprIsConApp_maybe expr of { + Nothing -> Nothing ; + Just (dc, _dc_univ_args, dc_args) -> + + let (_from_ty, to_ty) = coercionKind co + dc_tc = dataConTyCon dc + in + case splitTyConApp_maybe to_ty of { + Nothing -> Nothing ; + Just (to_tc, to_tc_arg_tys) + | dc_tc /= to_tc -> Nothing + -- These two Nothing cases are possible; we might see + -- (C x y) `cast` (g :: T a ~ S [a]), + -- where S is a type function. In fact, exprIsConApp + -- will probably not be called in such circumstances, + -- but there't nothing wrong with it + + | otherwise -> + let + tc_arity = tyConArity dc_tc + dc_univ_tyvars = dataConUnivTyVars dc + dc_ex_tyvars = dataConExTyVars dc + arg_tys = dataConRepArgTys dc + + dc_eqs :: [(Type,Type)] -- All equalities from the DataCon + dc_eqs = [(mkTyVarTy tv, ty) | (tv,ty) <- dataConEqSpec dc] ++ + [getEqPredTys eq_pred | eq_pred <- dataConEqTheta dc] + + (ex_args, rest1) = splitAtList dc_ex_tyvars dc_args + (co_args, val_args) = splitAtList dc_eqs rest1 + + -- Make the "theta" from Fig 3 of the paper + gammas = decomposeCo tc_arity co + theta = zipOpenTvSubst (dc_univ_tyvars ++ dc_ex_tyvars) + (gammas ++ stripTypeArgs ex_args) + + -- Cast the existential coercion arguments + cast_co (ty1, ty2) (Type co) + = Type $ mkSymCoercion (substTy theta ty1) + `mkTransCoercion` co + `mkTransCoercion` (substTy theta ty2) + cast_co _ other_arg = pprPanic "cast_co" (ppr other_arg) + new_co_args = zipWith cast_co dc_eqs co_args + + -- Cast the value arguments (which include dictionaries) + new_val_args = zipWith cast_arg arg_tys val_args + cast_arg arg_ty arg = mkCoerce (substTy theta arg_ty) arg + in +#ifdef DEBUG + let dump_doc = vcat [ppr dc, ppr dc_univ_tyvars, ppr dc_ex_tyvars, + ppr arg_tys, ppr dc_args, ppr _dc_univ_args, + ppr ex_args, ppr val_args] + ASSERT2( coreEqType from_ty (mkTyConApp dc_tc _dc_univ_args), dump_doc ) + ASSERT2( all isTypeArg (ex_args ++ co_args), dump_doc ) + ASSERT2( equalLength val_args arg_tys, dump_doc ) +#endif + + Just (dc, to_tc_arg_tys, ex_args ++ new_co_args ++ new_val_args) + }} + +exprIsConApp_maybe expr + = analyse expr [] + where + analyse (App fun arg) args = analyse fun (arg:args) + analyse fun@(Lam {}) args = beta fun [] args + + analyse (Var fun) args + | Just con <- isDataConWorkId_maybe fun + , is_saturated + , let (univ_ty_args, rest_args) = splitAtList (dataConUnivTyVars con) args + = Just (con, stripTypeArgs univ_ty_args, rest_args) + + -- Look through dictionary functions; see Note [Unfolding DFuns] + | DFunUnfolding con ops <- unfolding + , is_saturated + , let (dfun_tvs, _cls, dfun_res_tys) = tcSplitDFunTy (idType fun) + subst = zipOpenTvSubst dfun_tvs (stripTypeArgs (takeList dfun_tvs args)) + = Just (con, substTys subst dfun_res_tys, + [mkApps op args | op <- ops]) + + -- Look through unfoldings, but only cheap ones, because + -- we are effectively duplicating the unfolding + | CoreUnfolding { uf_expandable = expand_me, uf_tmpl = rhs } <- unfolding + , expand_me = -- pprTrace "expanding" (ppr fun $$ ppr rhs) $ + analyse rhs args + where + is_saturated = count isValArg args == idArity fun + unfolding = idUnfolding fun + + analyse _ _ = Nothing + + ----------- + beta (Lam v body) pairs (arg : args) + | isTypeArg arg + = beta body ((v,arg):pairs) args + + beta (Lam {}) _ _ -- Un-saturated, or not a type lambda + = Nothing + + beta fun pairs args + = case analyse (substExpr (mkOpenSubst pairs) fun) args of + Nothing -> -- pprTrace "Bale out! exprIsConApp_maybe" doc $ + Nothing + Just ans -> -- pprTrace "Woo-hoo! exprIsConApp_maybe" doc $ + Just ans + where + -- doc = vcat [ppr fun, ppr expr, ppr pairs, ppr args] + + +stripTypeArgs :: [CoreExpr] -> [Type] +stripTypeArgs args = ASSERT2( all isTypeArg args, ppr args ) + [ty | Type ty <- args] \end{code} + +Note [Unfolding DFuns] +~~~~~~~~~~~~~~~~~~~~~~ +DFuns look like + + df :: forall a b. (Eq a, Eq b) -> Eq (a,b) + df a b d_a d_b = MkEqD (a,b) ($c1 a b d_a d_b) + ($c2 a b d_a d_b) + +So to split it up we just need to apply the ops $c1, $c2 etc +to the very same args as the dfun. It takes a little more work +to compute the type arguments to the dictionary constructor. +