[project @ 2005-08-04 11:23:42 by simonmar]

[ghc-hetmet.git] / ghc / compiler / coreSyn / CoreUnfold.lhs
diff --git a/ghc/compiler/coreSyn/CoreUnfold.lhs b/ghc/compiler/coreSyn/CoreUnfold.lhs

index c59b937..d840f45 100644 (file)
--- a/ghc/compiler/coreSyn/CoreUnfold.lhs
+++ b/ghc/compiler/coreSyn/CoreUnfold.lhs
@@ -14,247 +14,192 @@ find, unsurprisingly, a Core expression.
  
  \begin{code}
  module CoreUnfold (
  
  \begin{code}
  module CoreUnfold (
-       Unfolding, UnfoldingGuidance, -- types
+       Unfolding, UnfoldingGuidance,   -- Abstract types
  
  
-       noUnfolding, mkUnfolding, seqUnfolding,
-       mkOtherCon, otherCons,
+       noUnfolding, mkTopUnfolding, mkUnfolding, mkCompulsoryUnfolding, seqUnfolding,
+       evaldUnfolding, mkOtherCon, otherCons,
         unfoldingTemplate, maybeUnfoldingTemplate,
         unfoldingTemplate, maybeUnfoldingTemplate,
-       isEvaldUnfolding, isCheapUnfolding,
-       hasUnfolding, hasSomeUnfolding,
+       isEvaldUnfolding, isValueUnfolding, isCheapUnfolding, isCompulsoryUnfolding,
+       hasUnfolding, hasSomeUnfolding, neverUnfold,
  
         couldBeSmallEnoughToInline, 
  
         couldBeSmallEnoughToInline, 
-       certainlySmallEnoughToInline, 
-       okToUnfoldInHiFile,
+       certainlyWillInline, 
  
  
-       calcUnfoldingGuidance, 
-
-       callSiteInline, blackListed
+       callSiteInline
      ) where
  
  #include "HsVersions.h"
  
      ) where
  
  #include "HsVersions.h"
  
-import CmdLineOpts     ( opt_UF_CreationThreshold,
-                         opt_UF_UseThreshold,
-                         opt_UF_ScrutConDiscount,
-                         opt_UF_FunAppDiscount,
-                         opt_UF_PrimArgDiscount,
-                         opt_UF_KeenessFactor,
-                         opt_UF_CheapOp, opt_UF_DearOp, opt_UF_NoRepLit,
-                         opt_UnfoldCasms, opt_PprStyle_Debug,
-                         opt_D_dump_inlinings
+import StaticFlags     ( opt_UF_CreationThreshold, opt_UF_UseThreshold,
+                         opt_UF_FunAppDiscount, opt_UF_KeenessFactor,
+                         opt_UF_DearOp,
                         )
                         )
+import DynFlags                ( DynFlags, DynFlag(..), dopt )
  import CoreSyn
  import PprCore         ( pprCoreExpr )
  import CoreSyn
  import PprCore         ( pprCoreExpr )
-import OccurAnal       ( occurAnalyseGlobalExpr )
-import BinderInfo      ( )
-import CoreUtils       ( coreExprType, exprIsTrivial, exprIsValue, exprIsCheap )
-import Id              ( Id, idType, idUnique, isId, 
-                         getIdSpecialisation, getInlinePragma, getIdUnfolding
+import OccurAnal       ( occurAnalyseExpr )
+import CoreUtils       ( exprIsValue, exprIsCheap, exprIsTrivial )
+import Id              ( Id, idType, isId,
+                         idUnfolding, globalIdDetails
                         )
                         )
-import VarSet
-import Name            ( isLocallyDefined )
-import Const           ( Con(..), isLitLitLit, isWHNFCon )
-import PrimOp          ( PrimOp(..), primOpIsDupable )
-import IdInfo          ( ArityInfo(..), InlinePragInfo(..), OccInfo(..) )
-import TyCon           ( tyConFamilySize )
-import Type            ( splitAlgTyConApp_maybe, splitFunTy_maybe, isUnLiftedType )
-import Const           ( isNoRepLit )
-import Unique          ( Unique, buildIdKey, augmentIdKey, runSTRepIdKey )
-import Maybes          ( maybeToBool )
+import DataCon         ( isUnboxedTupleCon )
+import Literal         ( litSize )
+import PrimOp          ( primOpIsDupable, primOpOutOfLine )
+import IdInfo          ( OccInfo(..), GlobalIdDetails(..) )
+import Type            ( isUnLiftedType )
+import PrelNames       ( hasKey, buildIdKey, augmentIdKey )
  import Bag
  import Bag
-import Util            ( isIn, lengthExceeds )
+import FastTypes
  import Outputable
  import Outputable
+import Util
+
+#if __GLASGOW_HASKELL__ >= 404
+import GLAEXTS         ( Int# )
+#endif
  \end{code}
  
  \end{code}
  
+
  %************************************************************************
  %*                                                                     *
  %************************************************************************
  %*                                                                     *
-\subsection{@Unfolding@ and @UnfoldingGuidance@ types}
+\subsection{Making unfoldings}
  %*                                                                     *
  %************************************************************************
  
  \begin{code}
  %*                                                                     *
  %************************************************************************
  
  \begin{code}
-data Unfolding
-  = NoUnfolding
-
-  | OtherCon [Con]             -- It ain't one of these
-                               -- (OtherCon xs) also indicates that something has been evaluated
-                               -- and hence there's no point in re-evaluating it.
-                               -- OtherCon [] is used even for non-data-type values
-                               -- to indicated evaluated-ness.  Notably:
-                               --      data C = C !(Int -> Int)
-                               --      case x of { C f -> ... }
-                               -- Here, f gets an OtherCon [] unfolding.
-
-  | CoreUnfolding                      -- An unfolding with redundant cached information
-               CoreExpr                -- Template; binder-info is correct
-               Bool                    -- exprIsCheap template (cached); it won't duplicate (much) work 
-                                       --      if you inline this in more than one place
-               Bool                    -- exprIsValue template (cached); it is ok to discard a `seq` on
-                                       --      this variable
-               UnfoldingGuidance       -- Tells about the *size* of the template.
-
-seqUnfolding :: Unfolding -> ()
-seqUnfolding (CoreUnfolding e b1 b2 g)
-  = seqExpr e `seq` b1 `seq` b2 `seq` seqGuidance g
-seqUnfolding other = ()
-\end{code}
+mkTopUnfolding expr = mkUnfolding True {- Top level -} expr
  
  
-\begin{code}
-noUnfolding = NoUnfolding
-mkOtherCon  = OtherCon
+mkUnfolding top_lvl expr
+  = CoreUnfolding (occurAnalyseExpr expr)
+                 top_lvl
  
  
-mkUnfolding expr
-  = CoreUnfolding (occurAnalyseGlobalExpr expr)
-                 (exprIsCheap expr)
                   (exprIsValue expr)
                   (exprIsValue expr)
-                 (calcUnfoldingGuidance opt_UF_CreationThreshold expr)
-
-unfoldingTemplate :: Unfolding -> CoreExpr
-unfoldingTemplate (CoreUnfolding expr _ _ _) = expr
-unfoldingTemplate other = panic "getUnfoldingTemplate"
-
-maybeUnfoldingTemplate :: Unfolding -> Maybe CoreExpr
-maybeUnfoldingTemplate (CoreUnfolding expr _ _ _) = Just expr
-maybeUnfoldingTemplate other                     = Nothing
+                       -- Already evaluated
  
  
-otherCons (OtherCon cons) = cons
-otherCons other                  = []
-
-isEvaldUnfolding :: Unfolding -> Bool
-isEvaldUnfolding (OtherCon _)                  = True
-isEvaldUnfolding (CoreUnfolding _ _ is_evald _) = is_evald
-isEvaldUnfolding other                         = False
-
-isCheapUnfolding :: Unfolding -> Bool
-isCheapUnfolding (CoreUnfolding _ is_cheap _ _) = is_cheap
-isCheapUnfolding other                         = False
-
-hasUnfolding :: Unfolding -> Bool
-hasUnfolding (CoreUnfolding _ _ _ _) = True
-hasUnfolding other                  = False
-
-hasSomeUnfolding :: Unfolding -> Bool
-hasSomeUnfolding NoUnfolding = False
-hasSomeUnfolding other      = True
-
-data UnfoldingGuidance
-  = UnfoldNever
-  | UnfoldAlways               -- There is no "original" definition,
-                               -- so you'd better unfold.  Or: something
-                               -- so cheap to unfold (e.g., 1#) that
-                               -- you should do it absolutely always.
-
-  | UnfoldIfGoodArgs   Int     -- and "n" value args
-
-                       [Int]   -- Discount if the argument is evaluated.
-                               -- (i.e., a simplification will definitely
-                               -- be possible).  One elt of the list per *value* arg.
+                 (exprIsCheap expr)
+                       -- OK to inline inside a lambda
  
  
-                       Int     -- The "size" of the unfolding; to be elaborated
-                               -- later. ToDo
+                 (calcUnfoldingGuidance opt_UF_CreationThreshold expr)
+       -- Sometimes during simplification, there's a large let-bound thing     
+       -- which has been substituted, and so is now dead; so 'expr' contains
+       -- two copies of the thing while the occurrence-analysed expression doesn't
+       -- Nevertheless, we don't occ-analyse before computing the size because the
+       -- size computation bales out after a while, whereas occurrence analysis does not.
+       --
+       -- This can occasionally mean that the guidance is very pessimistic;
+       -- it gets fixed up next round
+
+mkCompulsoryUnfolding expr     -- Used for things that absolutely must be unfolded
+  = CompulsoryUnfolding (occurAnalyseExpr expr)
+\end{code}
  
  
-                       Int     -- Scrutinee discount: the discount to substract if the thing is in
-                               -- a context (case (thing args) of ...),
-                               -- (where there are the right number of arguments.)
  
  
-seqGuidance (UnfoldIfGoodArgs n ns a b) = n `seq` sum ns `seq` a `seq` b `seq` ()
-seqGuidance other                      = ()
-\end{code}
+%************************************************************************
+%*                                                                     *
+\subsection{The UnfoldingGuidance type}
+%*                                                                     *
+%************************************************************************
  
  \begin{code}
  instance Outputable UnfoldingGuidance where
  
  \begin{code}
  instance Outputable UnfoldingGuidance where
-    ppr UnfoldAlways    = ptext SLIT("ALWAYS")
      ppr UnfoldNever    = ptext SLIT("NEVER")
      ppr (UnfoldIfGoodArgs v cs size discount)
      ppr UnfoldNever    = ptext SLIT("NEVER")
      ppr (UnfoldIfGoodArgs v cs size discount)
-      = hsep [ptext SLIT("IF_ARGS"), int v,
-              if null cs       -- always print *something*
-               then char 'X'
-               else hcat (map (text . show) cs),
+      = hsep [ ptext SLIT("IF_ARGS"), int v,
+              brackets (hsep (map int cs)),
                int size,
                int discount ]
  \end{code}
  
  
                int size,
                int discount ]
  \end{code}
  
  
-%************************************************************************
-%*                                                                     *
-\subsection[calcUnfoldingGuidance]{Calculate ``unfolding guidance'' for an expression}
-%*                                                                     *
-%************************************************************************
-
  \begin{code}
  calcUnfoldingGuidance
         :: Int                  -- bomb out if size gets bigger than this
         -> CoreExpr             -- expression to look at
         -> UnfoldingGuidance
  calcUnfoldingGuidance bOMB_OUT_SIZE expr
  \begin{code}
  calcUnfoldingGuidance
         :: Int                  -- bomb out if size gets bigger than this
         -> CoreExpr             -- expression to look at
         -> UnfoldingGuidance
  calcUnfoldingGuidance bOMB_OUT_SIZE expr
-  | exprIsTrivial expr         -- Often trivial expressions are never bound
-                               -- to an expression, but it can happen.  For
-                               -- example, the Id for a nullary constructor has
-                               -- a trivial expression as its unfolding, and
-                               -- we want to make sure that we always unfold it.
-  = UnfoldAlways
- 
-  | otherwise
-  = case collectBinders expr of { (binders, body) ->
+  = case collect_val_bndrs expr of { (inline, val_binders, body) ->
      let
      let
-       val_binders = filter isId binders
+       n_val_binders = length val_binders
+
+       max_inline_size = n_val_binders+2
+       -- The idea is that if there is an INLINE pragma (inline is True)
+       -- and there's a big body, we give a size of n_val_binders+2.  This
+       -- This is just enough to fail the no-size-increase test in callSiteInline,
+       --   so that INLINE things don't get inlined into entirely boring contexts,
+       --   but no more.
+
      in
      in
-    case (sizeExpr bOMB_OUT_SIZE val_binders body) of
+    case (sizeExpr (iUnbox bOMB_OUT_SIZE) val_binders body) of
  
  
-      TooBig -> UnfoldNever
+      TooBig 
+       | not inline -> UnfoldNever
+               -- A big function with an INLINE pragma must
+               -- have an UnfoldIfGoodArgs guidance
+       | otherwise  -> UnfoldIfGoodArgs n_val_binders
+                                        (map (const 0) val_binders)
+                                        max_inline_size 0
  
        SizeIs size cased_args scrut_discount
         -> UnfoldIfGoodArgs
  
        SizeIs size cased_args scrut_discount
         -> UnfoldIfGoodArgs
-                       (length val_binders)
+                       n_val_binders
                         (map discount_for val_binders)
                         (map discount_for val_binders)
-                       (I# size)
-                       (I# scrut_discount)
+                       final_size
+                       (iBox scrut_discount)
         where        
         where        
-           discount_for b 
-               | num_cases == 0 = 0
-               | is_fun_ty      = num_cases * opt_UF_FunAppDiscount
-               | is_data_ty     = num_cases * opt_UF_ScrutConDiscount
-               | otherwise      = num_cases * opt_UF_PrimArgDiscount
-               where
-                 num_cases           = foldlBag (\n b' -> if b==b' then n+1 else n) 0 cased_args
-                                       -- Count occurrences of b in cased_args
-                 arg_ty              = idType b
-                 is_fun_ty           = maybeToBool (splitFunTy_maybe arg_ty)
-                 (is_data_ty, tycon) = case (splitAlgTyConApp_maybe (idType b)) of
-                                         Nothing       -> (False, panic "discount")
-                                         Just (tc,_,_) -> (True,  tc)
+           boxed_size    = iBox size
+
+           final_size | inline     = boxed_size `min` max_inline_size
+                      | otherwise  = boxed_size
+
+               -- Sometimes an INLINE thing is smaller than n_val_binders+2.
+               -- A particular case in point is a constructor, which has size 1.
+               -- We want to inline this regardless, hence the `min`
+
+           discount_for b = foldlBag (\acc (b',n) -> if b==b' then acc+n else acc) 
+                                     0 cased_args
         }
         }
+  where
+    collect_val_bndrs e = go False [] e
+       -- We need to be a bit careful about how we collect the
+       -- value binders.  In ptic, if we see 
+       --      __inline_me (\x y -> e)
+       -- We want to say "2 value binders".  Why?  So that 
+       -- we take account of information given for the arguments
+
+    go inline rev_vbs (Note InlineMe e)     = go True   rev_vbs     e
+    go inline rev_vbs (Lam b e) | isId b    = go inline (b:rev_vbs) e
+                               | otherwise = go inline rev_vbs     e
+    go inline rev_vbs e                            = (inline, reverse rev_vbs, e)
  \end{code}
  
  \begin{code}
  \end{code}
  
  \begin{code}
-sizeExpr :: Int            -- Bomb out if it gets bigger than this
+sizeExpr :: Int#           -- Bomb out if it gets bigger than this
          -> [Id]            -- Arguments; we're interested in which of these
                             -- get case'd
          -> CoreExpr
          -> ExprSize
  
          -> [Id]            -- Arguments; we're interested in which of these
                             -- get case'd
          -> CoreExpr
          -> ExprSize
  
-sizeExpr (I# bOMB_OUT_SIZE) args expr
+sizeExpr bOMB_OUT_SIZE top_args expr
    = size_up expr
    where
      size_up (Type t)         = sizeZero        -- Types cost nothing
      size_up (Var v)           = sizeOne
  
    = size_up expr
    where
      size_up (Type t)         = sizeZero        -- Types cost nothing
      size_up (Var v)           = sizeOne
  
-    size_up (Note InlineMe _) = sizeTwo                -- The idea is that this is one more
-                                               -- than the size of the "call" (i.e. 1)
-                                               -- We want to reply "no" to noSizeIncrease
-                                               -- for a bare reference (i.e. applied to no args) 
-                                               -- to an INLINE thing
+    size_up (Note InlineMe body) = sizeOne     -- Inline notes make it look very small
+       -- This can be important.  If you have an instance decl like this:
+       --      instance Foo a => Foo [a] where
+       --         {-# INLINE op1, op2 #-}
+       --         op1 = ...
+       --         op2 = ...
+       -- then we'll get a dfun which is a pair of two INLINE lambdas
  
  
-    size_up (Note _ body)     = size_up body   -- Notes cost nothing
+    size_up (Note _        body) = size_up body        -- Other notes cost nothing
  
  
-    size_up (App fun (Type t))  = size_up fun
-    size_up (App fun arg)       = size_up_app fun [arg]
+    size_up (App fun (Type t)) = size_up fun
+    size_up (App fun arg)      = size_up_app fun [arg]
  
  
-    size_up (Con con args) = foldr (addSize . size_up) 
-                                  (size_up_con con args)
-                                  args
+    size_up (Lit lit)         = sizeN (litSize lit)
  
  
-    size_up (Lam b e) | isId b    = size_up e `addSizeN` 1
+    size_up (Lam b e) | isId b    = lamScrutDiscount (size_up e `addSizeN` 1)
                       | otherwise = size_up e
  
      size_up (Let (NonRec binder rhs) body)
                       | otherwise = size_up e
  
      size_up (Let (NonRec binder rhs) body)
@@ -271,96 +216,182 @@ sizeExpr (I# bOMB_OUT_SIZE) args expr
        where
         rhs_size = foldr (addSize . size_up . snd) sizeZero pairs
  
        where
         rhs_size = foldr (addSize . size_up . snd) sizeZero pairs
  
-    size_up (Case scrut _ alts)
-      = nukeScrutDiscount (size_up scrut)              `addSize`
-       arg_discount scrut                              `addSize`
-       foldr (addSize . size_up_alt) sizeZero alts     
-         `addSizeN` 1  -- charge one for the case itself.
-
--- Just charge for the alts that exist, not the ones that might exist
---     `addSizeN`
---     case (splitAlgTyConApp_maybe (coreExprType scrut)) of
---             Nothing       -> 1
---             Just (tc,_,_) -> tyConFamilySize tc
+    size_up (Case (Var v) _ _ alts) 
+       | v `elem` top_args             -- We are scrutinising an argument variable
+       = 
+{-     I'm nuking this special case; BUT see the comment with case alternatives.
+
+       (a) It's too eager.  We don't want to inline a wrapper into a
+           context with no benefit.  
+           E.g.  \ x. f (x+x)          no point in inlining (+) here!
+
+       (b) It's ineffective. Once g's wrapper is inlined, its case-expressions 
+           aren't scrutinising arguments any more
+
+           case alts of
+
+               [alt] -> size_up_alt alt `addSize` SizeIs 0# (unitBag (v, 1)) 0#
+               -- We want to make wrapper-style evaluation look cheap, so that
+               -- when we inline a wrapper it doesn't make call site (much) bigger
+               -- Otherwise we get nasty phase ordering stuff: 
+               --      f x = g x x
+               --      h y = ...(f e)...
+               -- If we inline g's wrapper, f looks big, and doesn't get inlined
+               -- into h; if we inline f first, while it looks small, then g's 
+               -- wrapper will get inlined later anyway.  To avoid this nasty
+               -- ordering difference, we make (case a of (x,y) -> ...), 
+               --  *where a is one of the arguments* look free.
+
+               other -> 
+-}
+                        alts_size (foldr addSize sizeOne alt_sizes)    -- The 1 is for the scrutinee
+                                  (foldr1 maxSize alt_sizes)
+
+               -- Good to inline if an arg is scrutinised, because
+               -- that may eliminate allocation in the caller
+               -- And it eliminates the case itself
+
+       where
+         alt_sizes = map size_up_alt alts
+
+               -- alts_size tries to compute a good discount for
+               -- the case when we are scrutinising an argument variable
+         alts_size (SizeIs tot tot_disc tot_scrut)             -- Size of all alternatives
+                   (SizeIs max max_disc max_scrut)             -- Size of biggest alternative
+               = SizeIs tot (unitBag (v, iBox (_ILIT 1 +# tot -# max)) `unionBags` max_disc) max_scrut
+                       -- If the variable is known, we produce a discount that
+                       -- will take us back to 'max', the size of rh largest alternative
+                       -- The 1+ is a little discount for reduced allocation in the caller
+         alts_size tot_size _ = tot_size
+
+-- gaw 2004
+    size_up (Case e _ _ alts) = nukeScrutDiscount (size_up e) `addSize` 
+                                foldr (addSize . size_up_alt) sizeZero alts
+               -- We don't charge for the case itself
+               -- It's a strict thing, and the price of the call
+               -- is paid by scrut.  Also consider
+               --      case f x of DEFAULT -> e
+               -- This is just ';'!  Don't charge for it.
  
      ------------ 
  
      ------------ 
-    size_up_app (App fun arg) args   = size_up_app fun (arg:args)
-    size_up_app fun          args   = foldr (addSize . size_up) (fun_discount fun) args
+    size_up_app (App fun arg) args   
+       | isTypeArg arg              = size_up_app fun args
+       | otherwise                  = size_up_app fun (arg:args)
+    size_up_app fun          args   = foldr (addSize . nukeScrutDiscount . size_up) 
+                                            (size_up_fun fun args)
+                                            args
  
         -- A function application with at least one value argument
         -- so if the function is an argument give it an arg-discount
  
         -- A function application with at least one value argument
         -- so if the function is an argument give it an arg-discount
+       --
         -- Also behave specially if the function is a build
         -- Also behave specially if the function is a build
-    fun_discount (Var fun) | idUnique fun == buildIdKey   = buildSize
-                          | idUnique fun == augmentIdKey = augmentSize
-                          | fun `is_elem` args         = scrutArg fun
-    fun_discount other                                 = sizeZero
+       --
+       -- Also if the function is a constant Id (constr or primop)
+       -- compute discounts specially
+    size_up_fun (Var fun) args
+      | fun `hasKey` buildIdKey   = buildSize
+      | fun `hasKey` augmentIdKey = augmentSize
+      | otherwise 
+      = case globalIdDetails fun of
+         DataConWorkId dc -> conSizeN dc (valArgCount args)
+
+         FCallId fc   -> sizeN opt_UF_DearOp
+         PrimOpId op  -> primOpSize op (valArgCount args)
+                         -- foldr addSize (primOpSize op) (map arg_discount args)
+                         -- At one time I tried giving an arg-discount if a primop 
+                         -- is applied to one of the function's arguments, but it's
+                         -- not good.  At the moment, any unlifted-type arg gets a
+                         -- 'True' for 'yes I'm evald', so we collect the discount even
+                         -- if we know nothing about it.  And just having it in a primop
+                         -- doesn't help at all if we don't know something more.
+
+         other        -> fun_discount fun `addSizeN` 
+                         (1 + length (filter (not . exprIsTrivial) args))
+                               -- The 1+ is for the function itself
+                               -- Add 1 for each non-trivial arg;
+                               -- the allocation cost, as in let(rec)
+                               -- Slight hack here: for constructors the args are almost always
+                               --      trivial; and for primops they are almost always prim typed
+                               --      We should really only count for non-prim-typed args in the
+                               --      general case, but that seems too much like hard work
+
+    size_up_fun other args = size_up other
  
      ------------ 
      size_up_alt (con, bndrs, rhs) = size_up rhs
  
      ------------ 
      size_up_alt (con, bndrs, rhs) = size_up rhs
-           -- Don't charge for args, so that wrappers look cheap
+       -- Don't charge for args, so that wrappers look cheap
+       -- (See comments about wrappers with Case)
  
      ------------
  
      ------------
-    size_up_con (Literal lit) args | isNoRepLit lit = sizeN opt_UF_NoRepLit
-                                  | otherwise      = sizeOne
-
-    size_up_con (DataCon dc) args = conSizeN (valArgCount args)
-                            
-    size_up_con (PrimOp op) args = foldr addSize (sizeN op_cost) (map arg_discount args)
-               -- Give an arg-discount if a primop is applies to
-               -- one of the function's arguments
-      where
-       op_cost | primOpIsDupable op = opt_UF_CheapOp
-               | otherwise          = opt_UF_DearOp
-
         -- We want to record if we're case'ing, or applying, an argument
         -- We want to record if we're case'ing, or applying, an argument
-    arg_discount (Var v) | v `is_elem` args = scrutArg v
-    arg_discount other                     = sizeZero
-
-    ------------
-    is_elem :: Id -> [Id] -> Bool
-    is_elem = isIn "size_up_scrut"
+    fun_discount v | v `elem` top_args = SizeIs 0# (unitBag (v, opt_UF_FunAppDiscount)) 0#
+    fun_discount other                = sizeZero
  
      ------------
         -- These addSize things have to be here because
         -- I don't want to give them bOMB_OUT_SIZE as an argument
  
  
      ------------
         -- These addSize things have to be here because
         -- I don't want to give them bOMB_OUT_SIZE as an argument
  
-    addSizeN TooBig          _ = TooBig
-    addSizeN (SizeIs n xs d) (I# m)
-      | n_tot -# d <# bOMB_OUT_SIZE = SizeIs n_tot xs d
-      | otherwise                  = TooBig
-      where
-       n_tot = n +# m
+    addSizeN TooBig          _  = TooBig
+    addSizeN (SizeIs n xs d) m         = mkSizeIs bOMB_OUT_SIZE (n +# iUnbox m) xs d
      
      
-    addSize TooBig _ = TooBig
-    addSize _ TooBig = TooBig
-    addSize (SizeIs n1 xs d1) (SizeIs n2 ys d2)
-      | (n_tot -# d_tot) <# bOMB_OUT_SIZE = SizeIs n_tot xys d_tot
-      | otherwise                        = TooBig
-      where
-       n_tot = n1 +# n2
-       d_tot = d1 +# d2
-       xys   = xs `unionBags` ys
+    addSize TooBig           _                 = TooBig
+    addSize _                TooBig            = TooBig
+    addSize (SizeIs n1 xs d1) (SizeIs n2 ys d2) 
+       = mkSizeIs bOMB_OUT_SIZE (n1 +# n2) (xs `unionBags` ys) (d1 +# d2)
  \end{code}
  
  Code for manipulating sizes
  
  \begin{code}
  \end{code}
  
  Code for manipulating sizes
  
  \begin{code}
-
  data ExprSize = TooBig
  data ExprSize = TooBig
-             | SizeIs Int#     -- Size found
-                      (Bag Id) -- Arguments cased herein
-                      Int#     -- Size to subtract if result is scrutinised 
-                               -- by a case expression
-
-sizeZero       = SizeIs 0# emptyBag 0#
-sizeOne        = SizeIs 1# emptyBag 0#
-sizeTwo        = SizeIs 2# emptyBag 0#
-sizeN (I# n)   = SizeIs n  emptyBag 0#
-conSizeN (I# n) = SizeIs 1# emptyBag (n +# 1#)
-       -- Treat constructors as size 1, that unfoldAlways responsds 'False'
-       -- when asked about 'x' when x is bound to (C 3#).
-       -- This avoids gratuitous 'ticks' when x itself appears as an
-       -- atomic constructor argument.
+             | SizeIs FastInt          -- Size found
+                      (Bag (Id,Int))   -- Arguments cased herein, and discount for each such
+                      FastInt          -- Size to subtract if result is scrutinised 
+                                       -- by a case expression
+
+-- subtract the discount before deciding whether to bale out. eg. we
+-- want to inline a large constructor application into a selector:
+--     tup = (a_1, ..., a_99)
+--     x = case tup of ...
+--
+mkSizeIs max n xs d | (n -# d) ># max = TooBig
+                   | otherwise       = SizeIs n xs d
+ 
+maxSize TooBig         _                                 = TooBig
+maxSize _              TooBig                            = TooBig
+maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 ># n2  = s1
+                                             | otherwise = s2
+
+sizeZero       = SizeIs (_ILIT 0)  emptyBag (_ILIT 0)
+sizeOne        = SizeIs (_ILIT 1)  emptyBag (_ILIT 0)
+sizeN n        = SizeIs (iUnbox n) emptyBag (_ILIT 0)
+conSizeN dc n   
+  | isUnboxedTupleCon dc = SizeIs (_ILIT 0) emptyBag (iUnbox n +# _ILIT 1)
+  | otherwise           = SizeIs (_ILIT 1) emptyBag (iUnbox n +# _ILIT 1)
+       -- Treat constructors as size 1; we are keen to expose them
+       -- (and we charge separately for their args).  We can't treat
+       -- them as size zero, else we find that (iBox x) has size 1,
+       -- which is the same as a lone variable; and hence 'v' will 
+       -- always be replaced by (iBox x), where v is bound to iBox x.
+       --
+       -- However, unboxed tuples count as size zero
+       -- I found occasions where we had 
+       --      f x y z = case op# x y z of { s -> (# s, () #) }
+       -- and f wasn't getting inlined
+
+primOpSize op n_args
+ | not (primOpIsDupable op) = sizeN opt_UF_DearOp
+ | not (primOpOutOfLine op) = sizeN (2 - n_args)
+       -- Be very keen to inline simple primops.
+       -- We give a discount of 1 for each arg so that (op# x y z) costs 2.
+       -- We can't make it cost 1, else we'll inline let v = (op# x y z) 
+       -- at every use of v, which is excessive.
+       --
+       -- A good example is:
+       --      let x = +# p q in C {x}
+       -- Even though x get's an occurrence of 'many', its RHS looks cheap,
+       -- and there's a good chance it'll get inlined back into C's RHS. Urgh!
+ | otherwise               = sizeOne
  
  buildSize = SizeIs (-2#) emptyBag 4#
         -- We really want to inline applications of build
  
  buildSize = SizeIs (-2#) emptyBag 4#
         -- We really want to inline applications of build
@@ -374,10 +405,12 @@ augmentSize = SizeIs (-2#) emptyBag 4#
         -- Ditto (augment t (\cn -> e) ys) should cost only the cost of
         -- e plus ys. The -2 accounts for the \cn 
                                                 
         -- Ditto (augment t (\cn -> e) ys) should cost only the cost of
         -- e plus ys. The -2 accounts for the \cn 
                                                 
-scrutArg v     = SizeIs 0# (unitBag v) 0#
-
  nukeScrutDiscount (SizeIs n vs d) = SizeIs n vs 0#
  nukeScrutDiscount TooBig         = TooBig
  nukeScrutDiscount (SizeIs n vs d) = SizeIs n vs 0#
  nukeScrutDiscount TooBig         = TooBig
+
+-- When we return a lambda, give a discount if it's used (applied)
+lamScrutDiscount  (SizeIs n vs d) = case opt_UF_FunAppDiscount of { d -> SizeIs n vs (iUnbox d) }
+lamScrutDiscount TooBig                  = TooBig
  \end{code}
  
  
  \end{code}
  
  
@@ -416,49 +449,19 @@ use'' on the other side.  Can be overridden w/ flaggery.
  Just the same as smallEnoughToInline, except that it has no actual arguments.
  
  \begin{code}
  Just the same as smallEnoughToInline, except that it has no actual arguments.
  
  \begin{code}
-couldBeSmallEnoughToInline :: UnfoldingGuidance -> Bool
-couldBeSmallEnoughToInline UnfoldNever = False
-couldBeSmallEnoughToInline other       = True
-
-certainlySmallEnoughToInline :: UnfoldingGuidance -> Bool
-certainlySmallEnoughToInline UnfoldNever                  = False
-certainlySmallEnoughToInline UnfoldAlways                 = True
-certainlySmallEnoughToInline (UnfoldIfGoodArgs _ _ size _) = size <= opt_UF_UseThreshold
+couldBeSmallEnoughToInline :: Int -> CoreExpr -> Bool
+couldBeSmallEnoughToInline threshold rhs = case calcUnfoldingGuidance threshold rhs of
+                                               UnfoldNever -> False
+                                               other       -> True
+
+certainlyWillInline :: Unfolding -> Bool
+  -- Sees if the unfolding is pretty certain to inline 
+certainlyWillInline (CoreUnfolding _ _ _ is_cheap (UnfoldIfGoodArgs n_vals _ size _))
+  = is_cheap && size - (n_vals +1) <= opt_UF_UseThreshold
+certainlyWillInline other
+  = False
  \end{code}
  
  \end{code}
  
-@okToUnfoldInHifile@ is used when emitting unfolding info into an interface
-file to determine whether an unfolding candidate really should be unfolded.
-The predicate is needed to prevent @_casm_@s (+ lit-lits) from being emitted
-into interface files. 
-
-The reason for inlining expressions containing _casm_s into interface files
-is that these fragments of C are likely to mention functions/#defines that
-will be out-of-scope when inlined into another module. This is not an
-unfixable problem for the user (just need to -#include the approp. header
-file), but turning it off seems to the simplest thing to do.
-
-\begin{code}
-okToUnfoldInHiFile :: CoreExpr -> Bool
-okToUnfoldInHiFile e = opt_UnfoldCasms || go e
- where
-    -- Race over an expression looking for CCalls..
-    go (Var _)                = True
-    go (Con (Literal lit) _)  = not (isLitLitLit lit)
-    go (Con (PrimOp op) args) = okToUnfoldPrimOp op && all go args
-    go (Con con args)         = True -- con args are always atomic
-    go (App fun arg)          = go fun && go arg
-    go (Lam _ body)           = go body
-    go (Let binds body)       = and (map go (body :rhssOfBind binds))
-    go (Case scrut bndr alts) = and (map go (scrut:rhssOfAlts alts))
-    go (Note _ body)          = go body
-    go (Type _)                      = True
-
-    -- ok to unfold a PrimOp as long as it's not a _casm_
-    okToUnfoldPrimOp (CCallOp _ is_casm _ _) = not is_casm
-    okToUnfoldPrimOp _                       = True
-\end{code}
-
-
  %************************************************************************
  %*                                                                     *
  \subsection{callSiteInline}
  %************************************************************************
  %*                                                                     *
  \subsection{callSiteInline}
@@ -476,109 +479,120 @@ and occurs exactly once or
  If the thing is in WHNF, there's no danger of duplicating work, 
  so we can inline if it occurs once, or is small
  
  If the thing is in WHNF, there's no danger of duplicating work, 
  so we can inline if it occurs once, or is small
  
+NOTE: we don't want to inline top-level functions that always diverge.
+It just makes the code bigger.  Tt turns out that the convenient way to prevent
+them inlining is to give them a NOINLINE pragma, which we do in 
+StrictAnal.addStrictnessInfoToTopId
+
  \begin{code}
  \begin{code}
-callSiteInline :: Bool                 -- True <=> the Id is black listed
+callSiteInline :: DynFlags
+              -> Bool                  -- True <=> the Id can be inlined
                -> Bool                  -- 'inline' note at call site
                -> Bool                  -- 'inline' note at call site
+              -> OccInfo
                -> Id                    -- The Id
                -> [Bool]                -- One for each value arg; True if it is interesting
                -> Bool                  -- True <=> continuation is interesting
                -> Maybe CoreExpr        -- Unfolding, if any
  
  
                -> Id                    -- The Id
                -> [Bool]                -- One for each value arg; True if it is interesting
                -> Bool                  -- True <=> continuation is interesting
                -> Maybe CoreExpr        -- Unfolding, if any
  
  
-callSiteInline black_listed inline_call id arg_infos interesting_cont
-  = case getIdUnfolding id of {
+callSiteInline dflags active_inline inline_call occ id arg_infos interesting_cont
+  = case idUnfolding id of {
         NoUnfolding -> Nothing ;
         NoUnfolding -> Nothing ;
-       OtherCon _  -> Nothing ;
-       CoreUnfolding unf_template is_cheap _ guidance ->
+       OtherCon cs -> Nothing ;
+
+       CompulsoryUnfolding unf_template -> Just unf_template ;
+               -- CompulsoryUnfolding => there is no top-level binding
+               -- for these things, so we must inline it.
+               -- Only a couple of primop-like things have 
+               -- compulsory unfoldings (see MkId.lhs).
+               -- We don't allow them to be inactive
+
+       CoreUnfolding unf_template is_top is_value is_cheap guidance ->
  
      let
         result | yes_or_no = Just unf_template
                | otherwise = Nothing
  
  
      let
         result | yes_or_no = Just unf_template
                | otherwise = Nothing
  
-       inline_prag = getInlinePragma id
         n_val_args  = length arg_infos
  
         n_val_args  = length arg_infos
  
-       yes_or_no =
-           case inline_prag of
-               IAmDead           -> pprTrace "callSiteInline: dead" (ppr id) False
-               IMustNotBeINLINEd -> False
-               IAmALoopBreaker   -> False
-               IMustBeINLINEd    -> True       -- Overrides absolutely everything, including the black list
-               ICanSafelyBeINLINEd in_lam one_br -> consider in_lam    True  one_br
-               NoInlinePragInfo                  -> consider InsideLam False False
-
-       consider in_lam once once_in_one_branch
-         | black_listed = False
+       yes_or_no 
+         | not active_inline = False
+         | otherwise = case occ of
+                               IAmDead              -> pprTrace "callSiteInline: dead" (ppr id) False
+                               IAmALoopBreaker      -> False
+                               OneOcc in_lam _ _    -> (not in_lam || is_cheap) && consider_safe True
+                               other                -> is_cheap && consider_safe False
+               -- we consider even the once-in-one-branch
+               -- occurrences, because they won't all have been
+               -- caught by preInlineUnconditionally.  In particular,
+               -- if the occurrence is once inside a lambda, and the
+               -- rhs is cheap but not a manifest lambda, then
+               -- pre-inline will not have inlined it for fear of
+               -- invalidating the occurrence info in the rhs.
+
+       consider_safe once
+               -- consider_safe decides whether it's a good idea to
+               -- inline something, given that there's no
+               -- work-duplication issue (the caller checks that).
           | inline_call  = True
           | inline_call  = True
-         | once_in_one_branch  -- Be very keen to inline something if this is its unique occurrence; that
-                               -- gives a good chance of eliminating the original binding for the thing.
-                               -- The only time we hold back is when substituting inside a lambda;
-                               -- then if the context is totally uninteresting (not applied, not scrutinised)
-                               -- there is no point in substituting because it might just increase allocation.
-         = WARN( case in_lam of { NotInsideLam -> True; other -> False },
-                 text "callSiteInline:oneOcc" <+> ppr id )
-               -- If it has one occurrence, not inside a lambda, PreInlineUnconditionally
-               -- should have zapped it already
-           is_cheap && (not (null arg_infos) || interesting_cont)
-
-         | otherwise   -- Occurs (textually) more than once, so look at its size
+
+         | otherwise
           = case guidance of
           = case guidance of
-             UnfoldAlways -> True
               UnfoldNever  -> False
               UnfoldIfGoodArgs n_vals_wanted arg_discounts size res_discount
               UnfoldNever  -> False
               UnfoldIfGoodArgs n_vals_wanted arg_discounts size res_discount
-               | enough_args && size <= (n_vals_wanted + 1)
-                       -- No size increase
+
+                 | enough_args && size <= (n_vals_wanted + 1)
+                       -- Inline unconditionally if there no size increase
                         -- Size of call is n_vals_wanted (+1 for the function)
                         -- Size of call is n_vals_wanted (+1 for the function)
-               -> case in_lam of
-                       NotInsideLam -> True
-                       InsideLam    -> is_cheap
-
-               | not (or arg_infos || really_interesting_cont || once)
-                       -- If it occurs more than once, there must be something interesting 
-                       -- about some argument, or the result, to make it worth inlining
-                       -- We also drop this case if the thing occurs once, although perhaps in 
-                       -- several branches.  In this case we are keener about inlining in the hope
-                       -- that we'll be able to drop the allocation for the function altogether.
-               -> False
-  
-               | otherwise
-               -> case in_lam of
-                       NotInsideLam -> small_enough
-                       InsideLam    -> is_cheap && small_enough
-
-               where
-                 enough_args             = n_val_args >= n_vals_wanted
-                 really_interesting_cont | n_val_args <  n_vals_wanted = False -- Too few args
-                                         | n_val_args == n_vals_wanted = interesting_cont
-                                         | otherwise                   = True  -- Extra args
-                       -- This rather elaborate defn for really_interesting_cont is important
-                       -- Consider an I# = INLINE (\x -> I# {x})
-                       -- The unfolding guidance deems it to have size 2, and no arguments.
-                       -- So in an application (I# y) we must take the extra arg 'y' as
-                       -- evidence of an interesting context!
-                       
-                 small_enough = (size - discount) <= opt_UF_UseThreshold
-                 discount     = computeDiscount n_vals_wanted arg_discounts res_discount 
+                 -> True
+
+                 | otherwise
+                 -> some_benefit && small_enough
+
+                 where
+                   some_benefit = or arg_infos || really_interesting_cont || 
+                                  (not is_top && (once || (n_vals_wanted > 0 && enough_args)))
+               -- If it occurs more than once, there must be
+               -- something interesting about some argument, or the
+               -- result context, to make it worth inlining
+               --
+               -- If a function has a nested defn we also record
+               -- some-benefit, on the grounds that we are often able
+               -- to eliminate the binding, and hence the allocation,
+               -- for the function altogether; this is good for join
+               -- points.  But this only makes sense for *functions*;
+               -- inlining a constructor doesn't help allocation
+               -- unless the result is scrutinised.  UNLESS the
+               -- constructor occurs just once, albeit possibly in
+               -- multiple case branches.  Then inlining it doesn't
+               -- increase allocation, but it does increase the
+               -- chance that the constructor won't be allocated at
+               -- all in the branches that don't use it.
+
+                   enough_args           = n_val_args >= n_vals_wanted
+                   really_interesting_cont | n_val_args <  n_vals_wanted = False       -- Too few args
+                                           | n_val_args == n_vals_wanted = interesting_cont
+                                           | otherwise                   = True        -- Extra args
+                       -- really_interesting_cont tells if the result of the
+                       -- call is in an interesting context.
+
+                   small_enough = (size - discount) <= opt_UF_UseThreshold
+                   discount     = computeDiscount n_vals_wanted arg_discounts res_discount 
                                                  arg_infos really_interesting_cont
                                                  arg_infos really_interesting_cont
-
-                               
+               
      in    
      in    
-#ifdef DEBUG
-    if opt_D_dump_inlinings then
+    if dopt Opt_D_dump_inlinings dflags then
         pprTrace "Considering inlining"
         pprTrace "Considering inlining"
-                (ppr id <+> vcat [text "black listed" <+> ppr black_listed,
-                                  text "inline prag:" <+> ppr inline_prag,
+                (ppr id <+> vcat [text "active:" <+> ppr active_inline,
+                                  text "occ info:" <+> ppr occ,
                                    text "arg infos" <+> ppr arg_infos,
                                    text "interesting continuation" <+> ppr interesting_cont,
                                    text "arg infos" <+> ppr arg_infos,
                                    text "interesting continuation" <+> ppr interesting_cont,
-                                  text "is cheap" <+> ppr is_cheap,
+                                  text "is value:" <+> ppr is_value,
+                                  text "is cheap:" <+> ppr is_cheap,
                                    text "guidance" <+> ppr guidance,
                                    text "guidance" <+> ppr guidance,
-                                  text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO",
-                                  if yes_or_no then
-                                       text "Unfolding =" <+> pprCoreExpr unf_template
-                                  else empty])
+                                  text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"])
                   result
      else
                   result
      else
-#endif
      result
      }
  
      result
      }
  
@@ -586,18 +600,20 @@ computeDiscount :: Int -> [Int] -> Int -> [Bool] -> Bool -> Int
  computeDiscount n_vals_wanted arg_discounts res_discount arg_infos result_used
         -- We multiple the raw discounts (args_discount and result_discount)
         -- ty opt_UnfoldingKeenessFactor because the former have to do with
  computeDiscount n_vals_wanted arg_discounts res_discount arg_infos result_used
         -- We multiple the raw discounts (args_discount and result_discount)
         -- ty opt_UnfoldingKeenessFactor because the former have to do with
-       -- *size* whereas the discounts imply that there's some extra 
-       -- *efficiency* to be gained (e.g. beta reductions, case reductions) 
+       --  *size* whereas the discounts imply that there's some extra 
+       --  *efficiency* to be gained (e.g. beta reductions, case reductions) 
         -- by inlining.
  
         -- we also discount 1 for each argument passed, because these will
         -- reduce with the lambdas in the function (we count 1 for a lambda
         -- in size_up).
         -- by inlining.
  
         -- we also discount 1 for each argument passed, because these will
         -- reduce with the lambdas in the function (we count 1 for a lambda
         -- in size_up).
-  = length (take n_vals_wanted arg_infos) +
+  = 1 +                        -- Discount of 1 because the result replaces the call
+                       -- so we count 1 for the function itself
+    length (take n_vals_wanted arg_infos) +
                         -- Discount of 1 for each arg supplied, because the 
                         -- result replaces the call
      round (opt_UF_KeenessFactor * 
                         -- Discount of 1 for each arg supplied, because the 
                         -- result replaces the call
      round (opt_UF_KeenessFactor * 
-          fromInt (arg_discount + result_discount))
+          fromIntegral (arg_discount + result_discount))
    where
      arg_discount = sum (zipWith mk_arg_discount arg_discounts arg_infos)
  
    where
      arg_discount = sum (zipWith mk_arg_discount arg_discounts arg_infos)
  
@@ -608,84 +624,3 @@ computeDiscount n_vals_wanted arg_discounts res_discount arg_infos result_used
      result_discount | result_used = res_discount       -- Over-applied, or case scrut
                     | otherwise   = 0
  \end{code}
      result_discount | result_used = res_discount       -- Over-applied, or case scrut
                     | otherwise   = 0
  \end{code}
-
-
-%************************************************************************
-%*                                                                     *
-\subsection{Black-listing}
-%*                                                                     *
-%************************************************************************
-
-Inlining is controlled by the "Inline phase" number, which is set
-by the per-simplification-pass '-finline-phase' flag.
-
-For optimisation we use phase 1,2 and nothing (i.e. no -finline-phase flag)
-in that order.  The meanings of these are determined by the @blackListed@ function
-here.
-
-\begin{code}
-blackListed :: IdSet           -- Used in transformation rules
-           -> Maybe Int        -- Inline phase
-           -> Id -> Bool       -- True <=> blacklisted
-       
--- The blackListed function sees whether a variable should *not* be 
--- inlined because of the inline phase we are in.  This is the sole
--- place that the inline phase number is looked at.
-
--- Phase 0: used for 'no imported inlinings please'
--- This prevents wrappers getting inlined which in turn is bad for full laziness
-blackListed rule_vars (Just 0)
-  = \v -> not (isLocallyDefined v)
-
--- Phase 1: don't inline any rule-y things or things with specialisations
-blackListed rule_vars (Just 1)
-  = \v -> let v_uniq = idUnique v
-         in v `elemVarSet` rule_vars
-         || not (isEmptyCoreRules (getIdSpecialisation v))
-         || v_uniq == runSTRepIdKey
-
--- Phase 2: allow build/augment to inline, and specialisations
-blackListed rule_vars (Just 2)
-  = \v -> let v_uniq = idUnique v
-         in (v `elemVarSet` rule_vars && not (v_uniq == buildIdKey || 
-                                              v_uniq == augmentIdKey))
-         || v_uniq == runSTRepIdKey
-
--- Otherwise just go for it
-blackListed rule_vars phase
-  = \v -> False
-\end{code}
-
-
-SLPJ 95/04: Why @runST@ must be inlined very late:
-\begin{verbatim}
-f x =
-  runST ( \ s -> let
-                   (a, s')  = newArray# 100 [] s
-                   (_, s'') = fill_in_array_or_something a x s'
-                 in
-                 freezeArray# a s'' )
-\end{verbatim}
-If we inline @runST@, we'll get:
-\begin{verbatim}
-f x = let
-       (a, s')  = newArray# 100 [] realWorld#{-NB-}
-       (_, s'') = fill_in_array_or_something a x s'
-      in
-      freezeArray# a s''
-\end{verbatim}
-And now the @newArray#@ binding can be floated to become a CAF, which
-is totally and utterly wrong:
-\begin{verbatim}
-f = let
-    (a, s')  = newArray# 100 [] realWorld#{-NB-} -- YIKES!!!
-    in
-    \ x ->
-       let (_, s'') = fill_in_array_or_something a x s' in
-       freezeArray# a s''
-\end{verbatim}
-All calls to @f@ will share a {\em single} array!  
-
-Yet we do want to inline runST sometime, so we can avoid
-needless code.  Solution: black list it until the last moment.
-