+
+
+%************************************************************************
+%* *
+\subsection{callSiteInline}
+%* *
+%************************************************************************
+
+This is the key function. It decides whether to inline a variable at a call site
+
+callSiteInline is used at call sites, so it is a bit more generous.
+It's a very important function that embodies lots of heuristics.
+A non-WHNF can be inlined if it doesn't occur inside a lambda,
+and occurs exactly once or
+ occurs once in each branch of a case and is small
+
+If the thing is in WHNF, there's no danger of duplicating work,
+so we can inline if it occurs once, or is small
+
+NOTE: we don't want to inline top-level functions that always diverge.
+It just makes the code bigger. Tt turns out that the convenient way to prevent
+them inlining is to give them a NOINLINE pragma, which we do in
+StrictAnal.addStrictnessInfoToTopId
+
+\begin{code}
+callSiteInline :: Bool -- True <=> the Id is black listed
+ -> Bool -- 'inline' note at call site
+ -> OccInfo
+ -> Id -- The Id
+ -> [Bool] -- One for each value arg; True if it is interesting
+ -> Bool -- True <=> continuation is interesting
+ -> Maybe CoreExpr -- Unfolding, if any
+
+
+callSiteInline black_listed inline_call occ id arg_infos interesting_cont
+ = case idUnfolding id of {
+ NoUnfolding -> Nothing ;
+ OtherCon cs -> Nothing ;
+ CompulsoryUnfolding unf_template | black_listed -> Nothing
+ | otherwise -> Just unf_template ;
+ -- Constructors have compulsory unfoldings, but
+ -- may have rules, in which case they are
+ -- black listed till later
+ CoreUnfolding unf_template is_top is_cheap is_value is_bot guidance ->
+
+ let
+ result | yes_or_no = Just unf_template
+ | otherwise = Nothing
+
+ n_val_args = length arg_infos
+
+ ok_inside_lam = is_value || is_bot || (is_cheap && not is_top)
+ -- I'm experimenting with is_cheap && not is_top
+
+ yes_or_no
+ | black_listed = False
+ | otherwise = case occ of
+ IAmDead -> pprTrace "callSiteInline: dead" (ppr id) False
+ IAmALoopBreaker -> False
+ OneOcc in_lam one_br -> (not in_lam || ok_inside_lam) && consider_safe in_lam True one_br
+ NoOccInfo -> ok_inside_lam && consider_safe True False False
+
+ consider_safe in_lam once once_in_one_branch
+ -- consider_safe decides whether it's a good idea to inline something,
+ -- given that there's no work-duplication issue (the caller checks that).
+ -- once_in_one_branch = True means there's a unique textual occurrence
+ | inline_call = True
+
+ | once_in_one_branch
+ -- Be very keen to inline something if this is its unique occurrence:
+ --
+ -- a) Inlining gives a good chance of eliminating the original
+ -- binding (and hence the allocation) for the thing.
+ -- (Provided it's not a top level binding, in which case the
+ -- allocation costs nothing.)
+ --
+ -- b) Inlining a function that is called only once exposes the
+ -- body function to the call site.
+ --
+ -- The only time we hold back is when substituting inside a lambda;
+ -- then if the context is totally uninteresting (not applied, not scrutinised)
+ -- there is no point in substituting because it might just increase allocation,
+ -- by allocating the function itself many times
+ --
+ -- Note: there used to be a '&& not top_level' in the guard above,
+ -- but that stopped us inlining top-level functions used only once,
+ -- which is stupid
+ = not in_lam || not (null arg_infos) || interesting_cont
+
+ | otherwise
+ = case guidance of
+ UnfoldNever -> False ;
+ UnfoldIfGoodArgs n_vals_wanted arg_discounts size res_discount
+
+ | enough_args && size <= (n_vals_wanted + 1)
+ -- No size increase
+ -- Size of call is n_vals_wanted (+1 for the function)
+ -> True
+
+ | otherwise
+ -> some_benefit && small_enough
+
+ where
+ some_benefit = or arg_infos || really_interesting_cont ||
+ (not is_top && (once || (n_vals_wanted > 0 && enough_args)))
+ -- If it occurs more than once, there must be something interesting
+ -- about some argument, or the result context, to make it worth inlining
+ --
+ -- If a function has a nested defn we also record some-benefit,
+ -- on the grounds that we are often able to eliminate the binding,
+ -- and hence the allocation, for the function altogether; this is good
+ -- for join points. But this only makes sense for *functions*;
+ -- inlining a constructor doesn't help allocation unless the result is
+ -- scrutinised. UNLESS the constructor occurs just once, albeit possibly
+ -- in multiple case branches. Then inlining it doesn't increase allocation,
+ -- but it does increase the chance that the constructor won't be allocated at all
+ -- in the branches that don't use it.
+
+ enough_args = n_val_args >= n_vals_wanted
+ really_interesting_cont | n_val_args < n_vals_wanted = False -- Too few args
+ | n_val_args == n_vals_wanted = interesting_cont
+ | otherwise = True -- Extra args
+ -- really_interesting_cont tells if the result of the
+ -- call is in an interesting context.
+
+ small_enough = (size - discount) <= opt_UF_UseThreshold
+ discount = computeDiscount n_vals_wanted arg_discounts res_discount
+ arg_infos really_interesting_cont
+
+ in
+#ifdef DEBUG
+ if opt_D_dump_inlinings then
+ pprTrace "Considering inlining"
+ (ppr id <+> vcat [text "black listed" <+> ppr black_listed,
+ text "occ info:" <+> ppr occ,
+ text "arg infos" <+> ppr arg_infos,
+ text "interesting continuation" <+> ppr interesting_cont,
+ text "is value:" <+> ppr is_value,
+ text "is cheap:" <+> ppr is_cheap,
+ text "is bottom:" <+> ppr is_bot,
+ text "is top-level:" <+> ppr is_top,
+ text "guidance" <+> ppr guidance,
+ text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO",
+ if yes_or_no then
+ text "Unfolding =" <+> pprCoreExpr unf_template
+ else empty])
+ result
+ else
+#endif
+ result
+ }
+
+computeDiscount :: Int -> [Int] -> Int -> [Bool] -> Bool -> Int
+computeDiscount n_vals_wanted arg_discounts res_discount arg_infos result_used
+ -- We multiple the raw discounts (args_discount and result_discount)
+ -- ty opt_UnfoldingKeenessFactor because the former have to do with
+ -- *size* whereas the discounts imply that there's some extra
+ -- *efficiency* to be gained (e.g. beta reductions, case reductions)
+ -- by inlining.
+
+ -- we also discount 1 for each argument passed, because these will
+ -- reduce with the lambdas in the function (we count 1 for a lambda
+ -- in size_up).
+ = 1 + -- Discount of 1 because the result replaces the call
+ -- so we count 1 for the function itself
+ length (take n_vals_wanted arg_infos) +
+ -- Discount of 1 for each arg supplied, because the
+ -- result replaces the call
+ round (opt_UF_KeenessFactor *
+ fromInt (arg_discount + result_discount))
+ where
+ arg_discount = sum (zipWith mk_arg_discount arg_discounts arg_infos)
+
+ mk_arg_discount discount is_evald | is_evald = discount
+ | otherwise = 0
+
+ -- Don't give a result discount unless there are enough args
+ result_discount | result_used = res_discount -- Over-applied, or case scrut
+ | otherwise = 0
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{Black-listing}
+%* *
+%************************************************************************
+
+Inlining is controlled by the "Inline phase" number, which is set
+by the per-simplification-pass '-finline-phase' flag.
+
+For optimisation we use phase 1,2 and nothing (i.e. no -finline-phase flag)
+in that order. The meanings of these are determined by the @blackListed@ function
+here.
+
+The final simplification doesn't have a phase number.
+
+Pragmas
+~~~~~~~
+ Pragma Black list if
+
+(least black listing, most inlining)
+ INLINE n foo phase is Just p *and* p<n *and* foo appears on LHS of rule
+ INLINE foo phase is Just p *and* foo appears on LHS of rule
+ NOINLINE n foo phase is Just p *and* (p<n *or* foo appears on LHS of rule)
+ NOINLINE foo always
+(most black listing, least inlining)
+
+\begin{code}
+blackListed :: IdSet -- Used in transformation rules
+ -> Maybe Int -- Inline phase
+ -> Id -> Bool -- True <=> blacklisted
+
+-- The blackListed function sees whether a variable should *not* be
+-- inlined because of the inline phase we are in. This is the sole
+-- place that the inline phase number is looked at.
+
+blackListed rule_vars Nothing -- Last phase
+ = \v -> isNeverInlinePrag (idInlinePragma v)
+
+blackListed rule_vars (Just phase)
+ = \v -> normal_case rule_vars phase v
+
+normal_case rule_vars phase v
+ = case idInlinePragma v of
+ NoInlinePragInfo -> has_rules
+
+ IMustNotBeINLINEd from_INLINE Nothing
+ | from_INLINE -> has_rules -- Black list until final phase
+ | otherwise -> True -- Always blacklisted
+
+ IMustNotBeINLINEd from_inline (Just threshold)
+ | from_inline -> (phase < threshold && has_rules)
+ | otherwise -> (phase < threshold || has_rules)
+ where
+ has_rules = v `elemVarSet` rule_vars
+ || not (isEmptyCoreRules (idSpecialisation v))
+\end{code}
+
+
+SLPJ 95/04: Why @runST@ must be inlined very late:
+\begin{verbatim}
+f x =
+ runST ( \ s -> let
+ (a, s') = newArray# 100 [] s
+ (_, s'') = fill_in_array_or_something a x s'
+ in
+ freezeArray# a s'' )
+\end{verbatim}
+If we inline @runST@, we'll get:
+\begin{verbatim}
+f x = let
+ (a, s') = newArray# 100 [] realWorld#{-NB-}
+ (_, s'') = fill_in_array_or_something a x s'
+ in
+ freezeArray# a s''
+\end{verbatim}
+And now the @newArray#@ binding can be floated to become a CAF, which
+is totally and utterly wrong:
+\begin{verbatim}
+f = let
+ (a, s') = newArray# 100 [] realWorld#{-NB-} -- YIKES!!!
+ in
+ \ x ->
+ let (_, s'') = fill_in_array_or_something a x s' in
+ freezeArray# a s''
+\end{verbatim}
+All calls to @f@ will share a {\em single} array!
+
+Yet we do want to inline runST sometime, so we can avoid
+needless code. Solution: black list it until the last moment.
+