+
+
+%************************************************************************
+%* *
+\subsection{callSiteInline}
+%* *
+%************************************************************************
+
+This is the key function. It decides whether to inline a variable at a call site
+
+callSiteInline is used at call sites, so it is a bit more generous.
+It's a very important function that embodies lots of heuristics.
+A non-WHNF can be inlined if it doesn't occur inside a lambda,
+and occurs exactly once or
+ occurs once in each branch of a case and is small
+
+If the thing is in WHNF, there's no danger of duplicating work,
+so we can inline if it occurs once, or is small
+
+\begin{code}
+callSiteInline :: Bool -- True <=> the Id is black listed
+ -> Bool -- 'inline' note at call site
+ -> Id -- The Id
+ -> [CoreExpr] -- Arguments
+ -> Bool -- True <=> continuation is interesting
+ -> Maybe CoreExpr -- Unfolding, if any
+
+
+callSiteInline black_listed inline_call id args interesting_cont
+ = case getIdUnfolding id of {
+ NoUnfolding -> Nothing ;
+ OtherCon _ -> Nothing ;
+ CoreUnfolding form guidance unf_template ->
+
+ let
+ result | yes_or_no = Just unf_template
+ | otherwise = Nothing
+
+ inline_prag = getInlinePragma id
+ arg_infos = map interestingArg val_args
+ val_args = filter isValArg args
+ whnf = whnfOrBottom form
+
+ yes_or_no =
+ case inline_prag of
+ IAmDead -> pprTrace "callSiteInline: dead" (ppr id) False
+ IMustNotBeINLINEd -> False
+ IAmALoopBreaker -> False
+ IMustBeINLINEd -> True -- Overrides absolutely everything, including the black list
+ ICanSafelyBeINLINEd in_lam one_br -> consider in_lam one_br
+ NoInlinePragInfo -> consider InsideLam False
+
+ consider in_lam one_branch
+ | black_listed = False
+ | inline_call = True
+ | one_branch -- Be very keen to inline something if this is its unique occurrence; that
+ -- gives a good chance of eliminating the original binding for the thing.
+ -- The only time we hold back is when substituting inside a lambda;
+ -- then if the context is totally uninteresting (not applied, not scrutinised)
+ -- there is no point in substituting because it might just increase allocation.
+ = WARN( case in_lam of { NotInsideLam -> True; other -> False },
+ text "callSiteInline:oneOcc" <+> ppr id )
+ -- If it has one occurrence, not inside a lambda, PreInlineUnconditionally
+ -- should have zapped it already
+ whnf && (not (null args) || interesting_cont)
+
+ | otherwise -- Occurs (textually) more than once, so look at its size
+ = case guidance of
+ UnfoldAlways -> True
+ UnfoldNever -> False
+ UnfoldIfGoodArgs n_vals_wanted arg_discounts size res_discount
+ | enough_args && size <= (n_vals_wanted + 1)
+ -- No size increase
+ -- Size of call is n_vals_wanted (+1 for the function)
+ -> case in_lam of
+ NotInsideLam -> True
+ InsideLam -> whnf
+
+ | not (or arg_infos || really_interesting_cont)
+ -- If it occurs more than once, there must be something interesting
+ -- about some argument, or the result, to make it worth inlining
+ -> False
+
+ | otherwise
+ -> case in_lam of
+ NotInsideLam -> small_enough
+ InsideLam -> whnf && small_enough
+
+ where
+ n_args = length arg_infos
+ enough_args = n_args >= n_vals_wanted
+ really_interesting_cont | n_args < n_vals_wanted = False -- Too few args
+ | n_args == n_vals_wanted = interesting_cont
+ | otherwise = True -- Extra args
+ -- This rather elaborate defn for really_interesting_cont is important
+ -- Consider an I# = INLINE (\x -> I# {x})
+ -- The unfolding guidance deems it to have size 2, and no arguments.
+ -- So in an application (I# y) we must take the extra arg 'y' as
+ -- evidence of an interesting context!
+
+ small_enough = (size - discount) <= opt_UF_UseThreshold
+ discount = computeDiscount n_vals_wanted arg_discounts res_discount
+ arg_infos really_interesting_cont
+
+
+ in
+#ifdef DEBUG
+ if opt_D_dump_inlinings then
+ pprTrace "Considering inlining"
+ (ppr id <+> vcat [text "black listed" <+> ppr black_listed,
+ text "inline prag:" <+> ppr inline_prag,
+ text "arg infos" <+> ppr arg_infos,
+ text "interesting continuation" <+> ppr interesting_cont,
+ text "whnf" <+> ppr whnf,
+ text "guidance" <+> ppr guidance,
+ text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO",
+ if yes_or_no then
+ text "Unfolding =" <+> pprCoreExpr unf_template
+ else empty])
+ result
+ else
+#endif
+ result
+ }
+
+-- An argument is interesting if it has *some* structure
+-- We are here trying to avoid unfolding a function that
+-- is applied only to variables that have no unfolding
+-- (i.e. they are probably lambda bound): f x y z
+-- There is little point in inlining f here.
+interestingArg (Type _) = False
+interestingArg (App fn (Type _)) = interestingArg fn
+interestingArg (Var v) = hasUnfolding (getIdUnfolding v)
+interestingArg other = True
+
+
+computeDiscount :: Int -> [Int] -> Int -> [Bool] -> Bool -> Int
+computeDiscount n_vals_wanted arg_discounts res_discount arg_infos result_used
+ -- We multiple the raw discounts (args_discount and result_discount)
+ -- ty opt_UnfoldingKeenessFactor because the former have to do with
+ -- *size* whereas the discounts imply that there's some extra
+ -- *efficiency* to be gained (e.g. beta reductions, case reductions)
+ -- by inlining.
+
+ -- we also discount 1 for each argument passed, because these will
+ -- reduce with the lambdas in the function (we count 1 for a lambda
+ -- in size_up).
+ = length (take n_vals_wanted arg_infos) +
+ -- Discount of 1 for each arg supplied, because the
+ -- result replaces the call
+ round (opt_UF_KeenessFactor *
+ fromInt (arg_discount + result_discount))
+ where
+ arg_discount = sum (zipWith mk_arg_discount arg_discounts arg_infos)
+
+ mk_arg_discount discount is_evald | is_evald = discount
+ | otherwise = 0
+
+ -- Don't give a result discount unless there are enough args
+ result_discount | result_used = res_discount -- Over-applied, or case scrut
+ | otherwise = 0
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{Black-listing}
+%* *
+%************************************************************************
+
+Inlining is controlled by the "Inline phase" number, which is set
+by the per-simplification-pass '-finline-phase' flag.
+
+For optimisation we use phase 1,2 and nothing (i.e. no -finline-phase flag)
+in that order. The meanings of these are determined by the @blackListed@ function
+here.
+
+\begin{code}
+blackListed :: IdSet -- Used in transformation rules
+ -> Maybe Int -- Inline phase
+ -> Id -> Bool -- True <=> blacklisted
+
+-- The blackListed function sees whether a variable should *not* be
+-- inlined because of the inline phase we are in. This is the sole
+-- place that the inline phase number is looked at.
+
+-- Phase 0: used for 'no inlinings please'
+blackListed rule_vars (Just 0)
+ = \v -> True
+
+-- Phase 1: don't inline any rule-y things or things with specialisations
+blackListed rule_vars (Just 1)
+ = \v -> let v_uniq = idUnique v
+ in v `elemVarSet` rule_vars
+ || not (isEmptyCoreRules (getIdSpecialisation v))
+ || v_uniq == runSTRepIdKey
+
+-- Phase 2: allow build/augment to inline, and specialisations
+blackListed rule_vars (Just 2)
+ = \v -> let v_uniq = idUnique v
+ in (v `elemVarSet` rule_vars && not (v_uniq == buildIdKey ||
+ v_uniq == augmentIdKey))
+ || v_uniq == runSTRepIdKey
+
+-- Otherwise just go for it
+blackListed rule_vars phase
+ = \v -> False
+\end{code}
+
+
+SLPJ 95/04: Why @runST@ must be inlined very late:
+\begin{verbatim}
+f x =
+ runST ( \ s -> let
+ (a, s') = newArray# 100 [] s
+ (_, s'') = fill_in_array_or_something a x s'
+ in
+ freezeArray# a s'' )
+\end{verbatim}
+If we inline @runST@, we'll get:
+\begin{verbatim}
+f x = let
+ (a, s') = newArray# 100 [] realWorld#{-NB-}
+ (_, s'') = fill_in_array_or_something a x s'
+ in
+ freezeArray# a s''
+\end{verbatim}
+And now the @newArray#@ binding can be floated to become a CAF, which
+is totally and utterly wrong:
+\begin{verbatim}
+f = let
+ (a, s') = newArray# 100 [] realWorld#{-NB-} -- YIKES!!!
+ in
+ \ x ->
+ let (_, s'') = fill_in_array_or_something a x s' in
+ freezeArray# a s''
+\end{verbatim}
+All calls to @f@ will share a {\em single} array!
+
+Yet we do want to inline runST sometime, so we can avoid
+needless code. Solution: black list it until the last moment.
+