X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=compiler%2FcoreSyn%2FCoreUnfold.lhs;h=0c7e9e485b4a0a2381a77924c3e1a2ab9e65bf5c;hb=d436c70d43fb905c63220040168295e473f4b90a;hp=4cbe04a27178b360ba20afecc48ea3d83969c9c2;hpb=a0994660b38d62d2614bf79ba4a133905cf7b144;p=ghc-hetmet.git diff --git a/compiler/coreSyn/CoreUnfold.lhs b/compiler/coreSyn/CoreUnfold.lhs index 4cbe04a..0c7e9e4 100644 --- a/compiler/coreSyn/CoreUnfold.lhs +++ b/compiler/coreSyn/CoreUnfold.lhs @@ -18,15 +18,19 @@ find, unsurprisingly, a Core expression. module CoreUnfold ( Unfolding, UnfoldingGuidance, -- Abstract types - noUnfolding, mkImplicitUnfolding, - mkTopUnfolding, mkUnfolding, - mkInlineRule, mkWwInlineRule, - mkCompulsoryUnfolding, + noUnfolding, mkTopUnfolding, mkImplicitUnfolding, mkUnfolding, + mkCompulsoryUnfolding, seqUnfolding, + evaldUnfolding, mkOtherCon, otherCons, + unfoldingTemplate, maybeUnfoldingTemplate, + isEvaldUnfolding, isValueUnfolding, isExpandableUnfolding, isCompulsoryUnfolding, + hasUnfolding, hasSomeUnfolding, neverUnfold, + + interestingArg, ArgSummary(..), couldBeSmallEnoughToInline, certainlyWillInline, smallEnoughToInline, - callSiteInline, CallCtxt(..) + callSiteInline, CallCtxt(..), ) where @@ -35,16 +39,15 @@ import DynFlags import CoreSyn import PprCore () -- Instances import OccurAnal -import CoreSubst +import CoreSubst ( Subst, emptySubst, substTy, extendIdSubst, extendTvSubst + , lookupIdSubst, substBndr, substBndrs, substRecBndrs ) import CoreUtils import Id import DataCon import Literal import PrimOp import IdInfo -import BasicTypes ( Arity ) import Type hiding( substTy, extendTvSubst ) -import Maybes import PrelNames import Bag import FastTypes @@ -67,37 +70,27 @@ mkTopUnfolding expr = mkUnfolding True {- Top level -} expr mkImplicitUnfolding :: CoreExpr -> Unfolding -- For implicit Ids, do a tiny bit of optimising first mkImplicitUnfolding expr - = CoreUnfolding (simpleOptExpr expr) + = CoreUnfolding (simpleOptExpr emptySubst expr) True (exprIsHNF expr) - (exprIsCheap expr) + (exprIsCheap expr) + (exprIsExpandable expr) (calcUnfoldingGuidance opt_UF_CreationThreshold expr) -mkInlineRule :: CoreExpr -> Arity -> Unfolding -mkInlineRule expr arity - = InlineRule { uf_tmpl = simpleOptExpr expr, - uf_is_top = True, -- Conservative; this gets set more - -- accuately by the simplifier (slight hack) - -- in SimplEnv.substUnfolding - uf_arity = arity, - uf_is_value = exprIsHNF expr, - uf_worker = Nothing } - -mkWwInlineRule :: CoreExpr -> Arity -> Id -> Unfolding -mkWwInlineRule expr arity wkr - = InlineRule { uf_tmpl = simpleOptExpr expr, - uf_is_top = True, -- Conservative; see mkInlineRule - uf_arity = arity, - uf_is_value = exprIsHNF expr, - uf_worker = Just wkr } - mkUnfolding :: Bool -> CoreExpr -> Unfolding mkUnfolding top_lvl expr - = CoreUnfolding { uf_tmpl = occurAnalyseExpr expr, - uf_is_top = top_lvl, - uf_is_value = exprIsHNF expr, - uf_is_cheap = exprIsCheap expr, - uf_guidance = calcUnfoldingGuidance opt_UF_CreationThreshold expr } + = CoreUnfolding (occurAnalyseExpr expr) + top_lvl + + (exprIsHNF expr) + -- Already evaluated + + (exprIsCheap expr) + -- OK to inline inside a lambda + + (exprIsExpandable expr) + + (calcUnfoldingGuidance opt_UF_CreationThreshold expr) -- Sometimes during simplification, there's a large let-bound thing -- which has been substituted, and so is now dead; so 'expr' contains -- two copies of the thing while the occurrence-analysed expression doesn't @@ -107,6 +100,14 @@ mkUnfolding top_lvl expr -- This can occasionally mean that the guidance is very pessimistic; -- it gets fixed up next round +instance Outputable Unfolding where + ppr NoUnfolding = ptext (sLit "No unfolding") + ppr (OtherCon cs) = ptext (sLit "OtherCon") <+> ppr cs + ppr (CompulsoryUnfolding e) = ptext (sLit "Compulsory") <+> ppr e + ppr (CoreUnfolding e top hnf cheap expable g) + = ptext (sLit "Unf") <+> sep [ppr top <+> ppr hnf <+> ppr cheap <+> ppr expable <+> ppr g, + ppr e] + mkCompulsoryUnfolding :: CoreExpr -> Unfolding mkCompulsoryUnfolding expr -- Used for things that absolutely must be unfolded = CompulsoryUnfolding (occurAnalyseExpr expr) @@ -120,29 +121,121 @@ mkCompulsoryUnfolding expr -- Used for things that absolutely must be unfolded %************************************************************************ \begin{code} +instance Outputable UnfoldingGuidance where + ppr UnfoldNever = ptext (sLit "NEVER") + ppr (UnfoldIfGoodArgs v cs size discount) + = hsep [ ptext (sLit "IF_ARGS"), int v, + brackets (hsep (map int cs)), + int size, + int discount ] +\end{code} + + +\begin{code} calcUnfoldingGuidance :: Int -- bomb out if size gets bigger than this -> CoreExpr -- expression to look at -> UnfoldingGuidance calcUnfoldingGuidance bOMB_OUT_SIZE expr - = case collectBinders expr of { (binders, body) -> + = case collect_val_bndrs expr of { (inline, val_binders, body) -> let - val_binders = filter isId binders n_val_binders = length val_binders + + max_inline_size = n_val_binders+2 + -- The idea is that if there is an INLINE pragma (inline is True) + -- and there's a big body, we give a size of n_val_binders+2. This + -- This is just enough to fail the no-size-increase test in callSiteInline, + -- so that INLINE things don't get inlined into entirely boring contexts, + -- but no more. + in case (sizeExpr (iUnbox bOMB_OUT_SIZE) val_binders body) of - TooBig -> UnfoldNever + + TooBig + | not inline -> UnfoldNever + -- A big function with an INLINE pragma must + -- have an UnfoldIfGoodArgs guidance + | otherwise -> UnfoldIfGoodArgs n_val_binders + (map (const 0) val_binders) + max_inline_size 0 + SizeIs size cased_args scrut_discount - -> UnfoldIfGoodArgs { ug_arity = n_val_binders - , ug_args = map discount_for val_binders - , ug_size = iBox size - , ug_res = iBox scrut_discount } + -> UnfoldIfGoodArgs + n_val_binders + (map discount_for val_binders) + final_size + (iBox scrut_discount) where + boxed_size = iBox size + + final_size | inline = boxed_size `min` max_inline_size + | otherwise = boxed_size + + -- Sometimes an INLINE thing is smaller than n_val_binders+2. + -- A particular case in point is a constructor, which has size 1. + -- We want to inline this regardless, hence the `min` + discount_for b = foldlBag (\acc (b',n) -> if b==b' then acc+n else acc) 0 cased_args } + where + collect_val_bndrs e = go False [] e + -- We need to be a bit careful about how we collect the + -- value binders. In ptic, if we see + -- __inline_me (\x y -> e) + -- We want to say "2 value binders". Why? So that + -- we take account of information given for the arguments + + go _ rev_vbs (Note InlineMe e) = go True rev_vbs e + go inline rev_vbs (Lam b e) | isId b = go inline (b:rev_vbs) e + | otherwise = go inline rev_vbs e + go inline rev_vbs e = (inline, reverse rev_vbs, e) \end{code} +Note [Computing the size of an expression] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The basic idea of sizeExpr is obvious enough: count nodes. But getting the +heuristics right has taken a long time. Here's the basic strategy: + + * Variables, literals: 0 + (Exception for string literals, see litSize.) + + * Function applications (f e1 .. en): 1 + #value args + + * Constructor applications: 1, regardless of #args + + * Let(rec): 1 + size of components + + * Note, cast: 0 + +Examples + + Size Term + -------------- + 0 42# + 0 x + 2 f x + 1 Just x + 4 f (g x) + +Notice that 'x' counts 0, while (f x) counts 2. That's deliberate: there's +a function call to account for. Notice also that constructor applications +are very cheap, because exposing them to a caller is so valuable. + +Thing to watch out for + +* We inline *unconditionally* if inlined thing is smaller (using sizeExpr) + than the thing it's replacing. Notice that + (f x) --> (g 3) -- YES, unconditionally + (f x) --> x : [] -- YES, *even though* there are two + -- arguments to the cons + x --> g 3 -- NO + x --> Just v -- NO + + It's very important not to unconditionally replace a variable by + a non-atomic term. + + \begin{code} sizeExpr :: FastInt -- Bomb out if it gets bigger than this -> [Id] -- Arguments; we're interested in which of these @@ -150,17 +243,29 @@ sizeExpr :: FastInt -- Bomb out if it gets bigger than this -> CoreExpr -> ExprSize +-- Note [Computing the size of an expression] + sizeExpr bOMB_OUT_SIZE top_args expr = size_up expr where - size_up (Type _) = sizeZero -- Types cost nothing - size_up (Var _) = sizeOne - size_up (Note _ body) = size_up body -- Notes cost nothing - size_up (Cast e _) = size_up e + size_up (Type _) = sizeZero -- Types cost nothing + size_up (Lit lit) = sizeN (litSize lit) + size_up (Var f) = size_up_call f 0 -- Make sure we get constructor + -- discounts even on nullary constructors + size_up (Cast e _) = size_up e + + size_up (Note InlineMe _) = sizeOne -- Inline notes make it look very small + -- This can be important. If you have an instance decl like this: + -- instance Foo a => Foo [a] where + -- {-# INLINE op1, op2 #-} + -- op1 = ... + -- op2 = ... + -- then we'll get a dfun which is a pair of two INLINE lambdas + size_up (Note _ body) = size_up body -- Other notes cost nothing + size_up (App fun (Type _)) = size_up fun size_up (App fun arg) = size_up_app fun [arg] - - size_up (Lit lit) = sizeN (litSize lit) + `addSize` nukeScrutDiscount (size_up arg) size_up (Lam b e) | isId b = lamScrutDiscount (size_up e `addSizeN` 1) | otherwise = size_up e @@ -181,54 +286,32 @@ sizeExpr bOMB_OUT_SIZE top_args expr size_up (Case (Var v) _ _ alts) | v `elem` top_args -- We are scrutinising an argument variable - = -{- I'm nuking this special case; BUT see the comment with case alternatives. - - (a) It's too eager. We don't want to inline a wrapper into a - context with no benefit. - E.g. \ x. f (x+x) no point in inlining (+) here! - - (b) It's ineffective. Once g's wrapper is inlined, its case-expressions - aren't scrutinising arguments any more - - case alts of - - [alt] -> size_up_alt alt `addSize` SizeIs (_ILIT(0)) (unitBag (v, 1)) (_ILIT(0)) - -- We want to make wrapper-style evaluation look cheap, so that - -- when we inline a wrapper it doesn't make call site (much) bigger - -- Otherwise we get nasty phase ordering stuff: - -- f x = g x x - -- h y = ...(f e)... - -- If we inline g's wrapper, f looks big, and doesn't get inlined - -- into h; if we inline f first, while it looks small, then g's - -- wrapper will get inlined later anyway. To avoid this nasty - -- ordering difference, we make (case a of (x,y) -> ...), - -- *where a is one of the arguments* look free. - - other -> --} - alts_size (foldr addSize sizeOne alt_sizes) -- The 1 is for the scrutinee - (foldr1 maxSize alt_sizes) - + = alts_size (foldr addSize sizeOne alt_sizes) -- The 1 is for the case itself + (foldr1 maxSize alt_sizes) -- Good to inline if an arg is scrutinised, because -- that may eliminate allocation in the caller -- And it eliminates the case itself - where alt_sizes = map size_up_alt alts -- alts_size tries to compute a good discount for -- the case when we are scrutinising an argument variable - alts_size (SizeIs tot _tot_disc _tot_scrut) -- Size of all alternatives - (SizeIs max max_disc max_scrut) -- Size of biggest alternative - = SizeIs tot (unitBag (v, iBox (_ILIT(1) +# tot -# max)) `unionBags` max_disc) max_scrut + alts_size (SizeIs tot tot_disc _tot_scrut) -- Size of all alternatives + (SizeIs max _max_disc max_scrut) -- Size of biggest alternative + = SizeIs tot (unitBag (v, iBox (_ILIT(1) +# tot -# max)) `unionBags` tot_disc) max_scrut -- If the variable is known, we produce a discount that - -- will take us back to 'max', the size of rh largest alternative + -- will take us back to 'max', the size of the largest alternative -- The 1+ is a little discount for reduced allocation in the caller + -- + -- Notice though, that we return tot_disc, the total discount from + -- all branches. I think that's right. + alts_size tot_size _ = tot_size - size_up (Case e _ _ alts) = nukeScrutDiscount (size_up e) `addSize` - foldr (addSize . size_up_alt) sizeZero alts + size_up (Case e _ _ alts) = foldr (addSize . size_up_alt) + (nukeScrutDiscount (size_up e)) + alts + `addSizeN` 1 -- Add 1 for the case itself -- We don't charge for the case itself -- It's a strict thing, and the price of the call -- is paid by scrut. Also consider @@ -236,48 +319,22 @@ sizeExpr bOMB_OUT_SIZE top_args expr -- This is just ';'! Don't charge for it. ------------ - size_up_app (App fun arg) args - | isTypeArg arg = size_up_app fun args - | otherwise = size_up_app fun (arg:args) - size_up_app fun args = foldr (addSize . nukeScrutDiscount . size_up) - (size_up_fun fun args) - args - - -- A function application with at least one value argument - -- so if the function is an argument give it an arg-discount - -- - -- Also behave specially if the function is a build - -- - -- Also if the function is a constant Id (constr or primop) - -- compute discounts specially - size_up_fun (Var fun) args - | fun `hasKey` buildIdKey = buildSize - | fun `hasKey` augmentIdKey = augmentSize - | otherwise - = case globalIdDetails fun of - DataConWorkId dc -> conSizeN dc (valArgCount args) - - FCallId _ -> sizeN opt_UF_DearOp - PrimOpId op -> primOpSize op (valArgCount args) - -- foldr addSize (primOpSize op) (map arg_discount args) - -- At one time I tried giving an arg-discount if a primop - -- is applied to one of the function's arguments, but it's - -- not good. At the moment, any unlifted-type arg gets a - -- 'True' for 'yes I'm evald', so we collect the discount even - -- if we know nothing about it. And just having it in a primop - -- doesn't help at all if we don't know something more. - - _ -> fun_discount fun `addSizeN` - (1 + length (filter (not . exprIsTrivial) args)) - -- The 1+ is for the function itself - -- Add 1 for each non-trivial arg; - -- the allocation cost, as in let(rec) - -- Slight hack here: for constructors the args are almost always - -- trivial; and for primops they are almost always prim typed - -- We should really only count for non-prim-typed args in the - -- general case, but that seems too much like hard work - - size_up_fun other _ = size_up other + -- size_up_app is used when there's ONE OR MORE value args + size_up_app (App fun arg) args + | isTypeArg arg = size_up_app fun args + | otherwise = size_up_app fun (arg:args) + `addSize` nukeScrutDiscount (size_up arg) + size_up_app (Var fun) args = size_up_call fun (length args) + size_up_app other args = size_up other `addSizeN` length args + + ------------ + size_up_call :: Id -> Int -> ExprSize + size_up_call fun n_val_args + = case idDetails fun of + FCallId _ -> sizeN opt_UF_DearOp + DataConWorkId dc -> conSize dc n_val_args + PrimOpId op -> primOpSize op n_val_args + _ -> funSize top_args fun n_val_args ------------ size_up_alt (_con, _bndrs, rhs) = size_up rhs @@ -285,14 +342,8 @@ sizeExpr bOMB_OUT_SIZE top_args expr -- (See comments about wrappers with Case) ------------ - -- We want to record if we're case'ing, or applying, an argument - fun_discount v | v `elem` top_args = SizeIs (_ILIT(0)) (unitBag (v, opt_UF_FunAppDiscount)) (_ILIT(0)) - fun_discount _ = sizeZero - - ------------ -- These addSize things have to be here because -- I don't want to give them bOMB_OUT_SIZE as an argument - addSizeN TooBig _ = TooBig addSizeN (SizeIs n xs d) m = mkSizeIs bOMB_OUT_SIZE (n +# iUnbox m) xs d @@ -302,45 +353,56 @@ sizeExpr bOMB_OUT_SIZE top_args expr = mkSizeIs bOMB_OUT_SIZE (n1 +# n2) (xs `unionBags` ys) (d1 +# d2) \end{code} -Code for manipulating sizes - \begin{code} -data ExprSize = TooBig - | SizeIs FastInt -- Size found - (Bag (Id,Int)) -- Arguments cased herein, and discount for each such - FastInt -- Size to subtract if result is scrutinised - -- by a case expression - --- subtract the discount before deciding whether to bale out. eg. we --- want to inline a large constructor application into a selector: --- tup = (a_1, ..., a_99) --- x = case tup of ... --- -mkSizeIs :: FastInt -> FastInt -> Bag (Id, Int) -> FastInt -> ExprSize -mkSizeIs max n xs d | (n -# d) ># max = TooBig - | otherwise = SizeIs n xs d - -maxSize :: ExprSize -> ExprSize -> ExprSize -maxSize TooBig _ = TooBig -maxSize _ TooBig = TooBig -maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 ># n2 = s1 - | otherwise = s2 - -sizeZero, sizeOne :: ExprSize -sizeN :: Int -> ExprSize -conSizeN :: DataCon ->Int -> ExprSize - -sizeZero = SizeIs (_ILIT(0)) emptyBag (_ILIT(0)) -sizeOne = SizeIs (_ILIT(1)) emptyBag (_ILIT(0)) -sizeN n = SizeIs (iUnbox n) emptyBag (_ILIT(0)) -conSizeN dc n - | isUnboxedTupleCon dc = SizeIs (_ILIT(0)) emptyBag (iUnbox n +# _ILIT(1)) - | otherwise = SizeIs (_ILIT(1)) emptyBag (iUnbox n +# _ILIT(1)) - -- Treat constructors as size 1; we are keen to expose them +-- | Finds a nominal size of a string literal. +litSize :: Literal -> Int +-- Used by CoreUnfold.sizeExpr +litSize (MachStr str) = 1 + ((lengthFS str + 3) `div` 4) + -- If size could be 0 then @f "x"@ might be too small + -- [Sept03: make literal strings a bit bigger to avoid fruitless + -- duplication of little strings] +litSize _other = 0 -- Must match size of nullary constructors + -- Key point: if x |-> 4, then x must inline unconditionally + -- (eg via case binding) + +funSize :: [Id] -> Id -> Int -> ExprSize +-- Size for functions that are not constructors or primops +-- Note [Function applications] +funSize top_args fun n_val_args + | fun `hasKey` buildIdKey = buildSize + | fun `hasKey` augmentIdKey = augmentSize + | otherwise = SizeIs (iUnbox size) arg_discount (iUnbox res_discount) + where + some_val_args = n_val_args > 0 + + arg_discount | some_val_args && fun `elem` top_args + = unitBag (fun, opt_UF_FunAppDiscount) + | otherwise = emptyBag + -- If the function is an argument and is applied + -- to some values, give it an arg-discount + + res_discount | idArity fun > n_val_args = opt_UF_FunAppDiscount + | otherwise = 0 + -- If the function is partially applied, show a result discount + + size | some_val_args = 1 + n_val_args + | otherwise = 0 + -- The 1+ is for the function itself + -- Add 1 for each non-trivial arg; + -- the allocation cost, as in let(rec) + + +conSize :: DataCon -> Int -> ExprSize +conSize dc n_val_args + | n_val_args == 0 = SizeIs (_ILIT(0)) emptyBag (_ILIT(1)) + | isUnboxedTupleCon dc = SizeIs (_ILIT(0)) emptyBag (iUnbox n_val_args +# _ILIT(1)) + | otherwise = SizeIs (_ILIT(1)) emptyBag (iUnbox n_val_args +# _ILIT(1)) + -- Treat a constructors application as size 1, regardless of how + -- many arguments it has; we are keen to expose them -- (and we charge separately for their args). We can't treat - -- them as size zero, else we find that (iBox x) has size 1, + -- them as size zero, else we find that (Just x) has size 0, -- which is the same as a lone variable; and hence 'v' will - -- always be replaced by (iBox x), where v is bound to iBox x. + -- always be replaced by (Just x), where v is bound to Just x. -- -- However, unboxed tuples count as size zero -- I found occasions where we had @@ -348,9 +410,9 @@ conSizeN dc n -- and f wasn't getting inlined primOpSize :: PrimOp -> Int -> ExprSize -primOpSize op n_args +primOpSize op n_val_args | not (primOpIsDupable op) = sizeN opt_UF_DearOp - | not (primOpOutOfLine op) = sizeN (2 - n_args) + | not (primOpOutOfLine op) = sizeN 1 -- Be very keen to inline simple primops. -- We give a discount of 1 for each arg so that (op# x y z) costs 2. -- We can't make it cost 1, else we'll inline let v = (op# x y z) @@ -360,10 +422,12 @@ primOpSize op n_args -- let x = +# p q in C {x} -- Even though x get's an occurrence of 'many', its RHS looks cheap, -- and there's a good chance it'll get inlined back into C's RHS. Urgh! - | otherwise = sizeOne + + | otherwise = sizeN n_val_args + buildSize :: ExprSize -buildSize = SizeIs (_ILIT(-2)) emptyBag (_ILIT(4)) +buildSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(4)) -- We really want to inline applications of build -- build t (\cn -> e) should cost only the cost of e (because build will be inlined later) -- Indeed, we should add a result_discount becuause build is @@ -372,7 +436,7 @@ buildSize = SizeIs (_ILIT(-2)) emptyBag (_ILIT(4)) -- The "4" is rather arbitrary. augmentSize :: ExprSize -augmentSize = SizeIs (_ILIT(-2)) emptyBag (_ILIT(4)) +augmentSize = SizeIs (_ILIT(0)) emptyBag (_ILIT(4)) -- Ditto (augment t (\cn -> e) ys) should cost only the cost of -- e plus ys. The -2 accounts for the \cn @@ -382,11 +446,62 @@ nukeScrutDiscount TooBig = TooBig -- When we return a lambda, give a discount if it's used (applied) lamScrutDiscount :: ExprSize -> ExprSize -lamScrutDiscount (SizeIs n vs _) = case opt_UF_FunAppDiscount of { d -> SizeIs n vs (iUnbox d) } +lamScrutDiscount (SizeIs n vs _) = SizeIs n vs (iUnbox opt_UF_FunAppDiscount) lamScrutDiscount TooBig = TooBig \end{code} +Note [Function applications] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In a function application (f a b) + + - If 'f' is an argument to the function being analysed, + and there's at least one value arg, record a FunAppDiscount for f + + - If the application if a PAP (arity > 2 in this example) + record a *result* discount (because inlining + with "extra" args in the call may mean that we now + get a saturated application) + +Code for manipulating sizes + +\begin{code} +data ExprSize = TooBig + | SizeIs FastInt -- Size found + (Bag (Id,Int)) -- Arguments cased herein, and discount for each such + FastInt -- Size to subtract if result is scrutinised + -- by a case expression + +instance Outputable ExprSize where + ppr TooBig = ptext (sLit "TooBig") + ppr (SizeIs a _ c) = brackets (int (iBox a) <+> int (iBox c)) + +-- subtract the discount before deciding whether to bale out. eg. we +-- want to inline a large constructor application into a selector: +-- tup = (a_1, ..., a_99) +-- x = case tup of ... +-- +mkSizeIs :: FastInt -> FastInt -> Bag (Id, Int) -> FastInt -> ExprSize +mkSizeIs max n xs d | (n -# d) ># max = TooBig + | otherwise = SizeIs n xs d + +maxSize :: ExprSize -> ExprSize -> ExprSize +maxSize TooBig _ = TooBig +maxSize _ TooBig = TooBig +maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 ># n2 = s1 + | otherwise = s2 + +sizeZero, sizeOne :: ExprSize +sizeN :: Int -> ExprSize + +sizeZero = SizeIs (_ILIT(0)) emptyBag (_ILIT(0)) +sizeOne = SizeIs (_ILIT(1)) emptyBag (_ILIT(0)) +sizeN n = SizeIs (iUnbox n) emptyBag (_ILIT(0)) +\end{code} + + + + %************************************************************************ %* * \subsection[considerUnfolding]{Given all the info, do (not) do the unfolding} @@ -429,17 +544,13 @@ couldBeSmallEnoughToInline threshold rhs = case calcUnfoldingGuidance threshold certainlyWillInline :: Unfolding -> Bool -- Sees if the unfolding is pretty certain to inline -certainlyWillInline (CompulsoryUnfolding {}) = True -certainlyWillInline (InlineRule {}) = True -certainlyWillInline (CoreUnfolding - { uf_is_cheap = is_cheap - , uf_guidance = UnfoldIfGoodArgs {ug_arity = n_vals, ug_size = size}}) - = is_cheap && size - (n_vals +1) <= opt_UF_UseThreshold +certainlyWillInline (CoreUnfolding _ _ _ is_cheap _ (UnfoldIfGoodArgs n_vals _ size _)) + = is_cheap && size - (n_vals+1) <= opt_UF_UseThreshold certainlyWillInline _ = False smallEnoughToInline :: Unfolding -> Bool -smallEnoughToInline (CoreUnfolding {uf_guidance = UnfoldIfGoodArgs {ug_size = size}}) +smallEnoughToInline (CoreUnfolding _ _ _ _ _ (UnfoldIfGoodArgs _ _ size _)) = size <= opt_UF_UseThreshold smallEnoughToInline _ = False @@ -472,11 +583,16 @@ callSiteInline :: DynFlags -> Bool -- True <=> the Id can be inlined -> Id -- The Id -> Bool -- True if there are are no arguments at all (incl type args) - -> [Bool] -- One for each value arg; True if it is interesting + -> [ArgSummary] -- One for each value arg; True if it is interesting -> CallCtxt -- True <=> continuation is interesting -> Maybe CoreExpr -- Unfolding, if any +instance Outputable ArgSummary where + ppr TrivArg = ptext (sLit "TrivArg") + ppr NonTrivArg = ptext (sLit "NonTrivArg") + ppr ValueArg = ptext (sLit "ValueArg") + data CallCtxt = BoringCtxt | ArgCtxt Bool -- We're somewhere in the RHS of function with rules @@ -499,10 +615,7 @@ instance Outputable CallCtxt where ppr ValAppCtxt = ptext (sLit "ValAppCtxt") callSiteInline dflags active_inline id lone_variable arg_infos cont_info - = let - n_val_args = length arg_infos - in - case idUnfolding id of { + = case idUnfolding id of { NoUnfolding -> Nothing ; OtherCon _ -> Nothing ; @@ -513,45 +626,14 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info -- compulsory unfoldings (see MkId.lhs). -- We don't allow them to be inactive - InlineRule { uf_tmpl = unf_template, uf_arity = arity, uf_is_top = is_top - , uf_is_value = is_value, uf_worker = mb_worker } - -> let yes_or_no | not active_inline = False - | n_val_args < arity = yes_unsat -- Not enough value args - | n_val_args == arity = yes_exact -- Exactly saturated - | otherwise = True -- Over-saturated - result | yes_or_no = Just unf_template - | otherwise = Nothing - - -- See Note [Inlining an InlineRule] - is_wrapper = isJust mb_worker - yes_unsat | is_wrapper = or arg_infos - | otherwise = False - - yes_exact = or arg_infos || interesting_saturated_call - interesting_saturated_call - = case cont_info of - BoringCtxt -> not is_top -- Note [Nested functions] - CaseCtxt -> not lone_variable || not is_value -- Note [Lone variables] - ArgCtxt {} -> arity > 0 -- Note [Inlining in ArgCtxt] - ValAppCtxt -> True -- Note [Cast then apply] - in - if dopt Opt_D_dump_inlinings dflags then - pprTrace ("Considering InlineRule for: " ++ showSDoc (ppr id)) - (vcat [text "active:" <+> ppr active_inline, - text "arg infos" <+> ppr arg_infos, - text "interesting call" <+> ppr interesting_saturated_call, - text "is value:" <+> ppr is_value, - text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"]) - result - else result ; - - CoreUnfolding { uf_tmpl = unf_template, uf_is_top = is_top, uf_is_value = is_value, - uf_is_cheap = is_cheap, uf_guidance = guidance } -> + CoreUnfolding unf_template is_top is_value is_cheap is_expable guidance -> let result | yes_or_no = Just unf_template | otherwise = Nothing + n_val_args = length arg_infos + yes_or_no = active_inline && is_cheap && consider_safe -- We consider even the once-in-one-branch -- occurrences, because they won't all have been @@ -567,26 +649,30 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info -- work-duplication issue (the caller checks that). = case guidance of UnfoldNever -> False - UnfoldIfGoodArgs { ug_arity = n_vals_wanted, ug_args = arg_discounts - , ug_res = res_discount, ug_size = size } - | enough_args && size <= (n_vals_wanted + 1) + UnfoldIfGoodArgs n_vals_wanted arg_discounts size res_discount + | uncond_inline -> True + | otherwise -> some_benefit && small_enough && inline_enough_args + + where -- Inline unconditionally if there no size increase -- Size of call is n_vals_wanted (+1 for the function) - -> True - - | otherwise - -> some_benefit && small_enough && inline_enough_args + uncond_inline + | n_vals_wanted == 0 = size == 0 + | otherwise = enough_args && (size <= n_vals_wanted + 1) - where enough_args = n_val_args >= n_vals_wanted inline_enough_args = not (dopt Opt_InlineIfEnoughArgs dflags) || enough_args - some_benefit = or arg_infos || really_interesting_cont + some_benefit = any nonTriv arg_infos || really_interesting_cont -- There must be something interesting -- about some argument, or the result -- context, to make it worth inlining + + -- NB: (any nonTriv arg_infos) looks at the over-saturated + -- args too which is wrong; but if over-saturated + -- we'll probably inline anyway. really_interesting_cont | n_val_args < n_vals_wanted = False -- Too few args @@ -604,17 +690,7 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info small_enough = (size - discount) <= opt_UF_UseThreshold discount = computeDiscount n_vals_wanted arg_discounts - res_discount' arg_infos - res_discount' = case cont_info of - BoringCtxt -> 0 - CaseCtxt -> res_discount - _other -> 4 `min` res_discount - -- res_discount can be very large when a function returns - -- construtors; but we only want to invoke that large discount - -- when there's a case continuation. - -- Otherwise we, rather arbitrarily, threshold it. Yuk. - -- But we want to aovid inlining large functions that return - -- constructors into contexts that are simply "interesting" + res_discount arg_infos cont_info in if dopt Opt_D_dump_inlinings dflags then @@ -623,7 +699,8 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info text "arg infos" <+> ppr arg_infos, text "interesting continuation" <+> ppr cont_info, text "is value:" <+> ppr is_value, - text "is cheap:" <+> ppr is_cheap, + text "is cheap:" <+> ppr is_cheap, + text "is expandable:" <+> ppr is_expable, text "guidance" <+> ppr guidance, text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"]) result @@ -632,20 +709,16 @@ callSiteInline dflags active_inline id lone_variable arg_infos cont_info } \end{code} -Note [Inlining an InlineRule] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -An InlineRules is used for - (a) pogrammer INLINE pragmas - (b) inlinings from worker/wrapper - -For (a) the RHS may be large, and our contract is that we *only* inline -when the function is applied to all the arguments on the LHS of the -source-code defn. (The uf_arity in the rule.) - -However for worker/wrapper it may be worth inlining even if the -arity is not satisfied (as we do in the CoreUnfolding case) so we don't -require saturation. +Note [Things to watch] +~~~~~~~~~~~~~~~~~~~~~~ +* { y = I# 3; x = y `cast` co; ...case (x `cast` co) of ... } + Assume x is exported, so not inlined unconditionally. + Then we want x to inline unconditionally; no reason for it + not to, and doing so avoids an indirection. +* { x = I# 3; ....f x.... } + Make sure that x does not inline unconditionally! + Lest we get extra allocation. Note [Nested functions] ~~~~~~~~~~~~~~~~~~~~~~~ @@ -683,7 +756,7 @@ slow-down). The motivation was test eyeball/inline1.hs; but that seems to work ok now. Note [Lone variables] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~ The "lone-variable" case is important. I spent ages messing about with unsatisfactory varaints, but this is nice. The idea is that if a variable appears all alone @@ -738,28 +811,174 @@ However, watch out: a significant disadvantage. Hence some_val_args in the Stop case \begin{code} -computeDiscount :: Int -> [Int] -> Int -> [Bool] -> Int -computeDiscount n_vals_wanted arg_discounts result_discount arg_infos +computeDiscount :: Int -> [Int] -> Int -> [ArgSummary] -> CallCtxt -> Int +computeDiscount n_vals_wanted arg_discounts res_discount arg_infos cont_info -- We multiple the raw discounts (args_discount and result_discount) -- ty opt_UnfoldingKeenessFactor because the former have to do with -- *size* whereas the discounts imply that there's some extra -- *efficiency* to be gained (e.g. beta reductions, case reductions) -- by inlining. - -- we also discount 1 for each argument passed, because these will - -- reduce with the lambdas in the function (we count 1 for a lambda - -- in size_up). - = 1 + -- Discount of 1 because the result replaces the call - -- so we count 1 for the function itself - length (take n_vals_wanted arg_infos) + - -- Discount of 1 for each arg supplied, because the - -- result replaces the call - round (opt_UF_KeenessFactor * - fromIntegral (arg_discount + result_discount)) + = 1 -- Discount of 1 because the result replaces the call + -- so we count 1 for the function itself + + + length (take n_vals_wanted arg_infos) + -- Discount of (un-scaled) 1 for each arg supplied, + -- because the result replaces the call + + + round (opt_UF_KeenessFactor * + fromIntegral (arg_discount + res_discount')) where arg_discount = sum (zipWith mk_arg_discount arg_discounts arg_infos) - mk_arg_discount discount is_evald | is_evald = discount - | otherwise = 0 + mk_arg_discount _ TrivArg = 0 + mk_arg_discount _ NonTrivArg = 1 + mk_arg_discount discount ValueArg = discount + + res_discount' = case cont_info of + BoringCtxt -> 0 + CaseCtxt -> res_discount + _other -> 4 `min` res_discount + -- res_discount can be very large when a function returns + -- construtors; but we only want to invoke that large discount + -- when there's a case continuation. + -- Otherwise we, rather arbitrarily, threshold it. Yuk. + -- But we want to aovid inlining large functions that return + -- constructors into contexts that are simply "interesting" +\end{code} + +%************************************************************************ +%* * + Interesting arguments +%* * +%************************************************************************ + +Note [Interesting arguments] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +An argument is interesting if it deserves a discount for unfoldings +with a discount in that argument position. The idea is to avoid +unfolding a function that is applied only to variables that have no +unfolding (i.e. they are probably lambda bound): f x y z There is +little point in inlining f here. + +Generally, *values* (like (C a b) and (\x.e)) deserve discounts. But +we must look through lets, eg (let x = e in C a b), because the let will +float, exposing the value, if we inline. That makes it different to +exprIsHNF. + +Before 2009 we said it was interesting if the argument had *any* structure +at all; i.e. (hasSomeUnfolding v). But does too much inlining; see Trac #3016. + +But we don't regard (f x y) as interesting, unless f is unsaturated. +If it's saturated and f hasn't inlined, then it's probably not going +to now! + +\begin{code} +data ArgSummary = TrivArg -- Nothing interesting + | NonTrivArg -- Arg has structure + | ValueArg -- Arg is a con-app or PAP + +interestingArg :: CoreExpr -> ArgSummary +-- See Note [Interesting arguments] +interestingArg e = go e 0 + where + -- n is # value args to which the expression is applied + go (Lit {}) _ = ValueArg + go (Var v) n + | isDataConWorkId v = ValueArg + | idArity v > n = ValueArg -- Catches (eg) primops with arity but no unfolding + | n > 0 = NonTrivArg -- Saturated or unknown call + | evald_unfolding = ValueArg -- n==0; look for a value + | otherwise = TrivArg -- n==0, no useful unfolding + where + evald_unfolding = isEvaldUnfolding (idUnfolding v) + + go (Type _) _ = TrivArg + go (App fn (Type _)) n = go fn n + go (App fn _) n = go fn (n+1) + go (Note _ a) n = go a n + go (Cast e _) n = go e n + go (Lam v e) n + | isTyVar v = go e n + | n>0 = go e (n-1) + | otherwise = ValueArg + go (Let _ e) n = case go e n of { ValueArg -> ValueArg; _ -> NonTrivArg } + go (Case {}) _ = NonTrivArg + +nonTriv :: ArgSummary -> Bool +nonTriv TrivArg = False +nonTriv _ = True \end{code} + +%************************************************************************ +%* * + The Very Simple Optimiser +%* * +%************************************************************************ + + +\begin{code} +simpleOptExpr :: Subst -> CoreExpr -> CoreExpr +-- Return an occur-analysed and slightly optimised expression +-- The optimisation is very straightforward: just +-- inline non-recursive bindings that are used only once, +-- or wheere the RHS is trivial + +simpleOptExpr subst expr + = go subst (occurAnalyseExpr expr) + where + go subst (Var v) = lookupIdSubst subst v + go subst (App e1 e2) = App (go subst e1) (go subst e2) + go subst (Type ty) = Type (substTy subst ty) + go _ (Lit lit) = Lit lit + go subst (Note note e) = Note note (go subst e) + go subst (Cast e co) = Cast (go subst e) (substTy subst co) + go subst (Let bind body) = go_bind subst bind body + go subst (Lam bndr body) = Lam bndr' (go subst' body) + where + (subst', bndr') = substBndr subst bndr + + go subst (Case e b ty as) = Case (go subst e) b' + (substTy subst ty) + (map (go_alt subst') as) + where + (subst', b') = substBndr subst b + + + ---------------------- + go_alt subst (con, bndrs, rhs) = (con, bndrs', go subst' rhs) + where + (subst', bndrs') = substBndrs subst bndrs + + ---------------------- + go_bind subst (Rec prs) body = Let (Rec (bndrs' `zip` rhss')) + (go subst' body) + where + (bndrs, rhss) = unzip prs + (subst', bndrs') = substRecBndrs subst bndrs + rhss' = map (go subst') rhss + + go_bind subst (NonRec b r) body = go_nonrec subst b (go subst r) body + + ---------------------- + go_nonrec subst b (Type ty') body + | isTyVar b = go (extendTvSubst subst b ty') body + -- let a::* = TYPE ty in + go_nonrec subst b r' body + | isId b -- let x = e in + , exprIsTrivial r' || safe_to_inline (idOccInfo b) + = go (extendIdSubst subst b r') body + go_nonrec subst b r' body + = Let (NonRec b' r') (go subst' body) + where + (subst', b') = substBndr subst b + + ---------------------- + -- Unconditionally safe to inline + safe_to_inline :: OccInfo -> Bool + safe_to_inline IAmDead = True + safe_to_inline (OneOcc in_lam one_br _) = not in_lam && one_br + safe_to_inline (IAmALoopBreaker {}) = False + safe_to_inline NoOccInfo = False +\end{code} \ No newline at end of file