[project @ 2004-12-22 12:06:13 by simonpj]

[ghc-hetmet.git] / ghc / compiler / coreSyn / CoreUnfold.lhs
diff --git a/ghc/compiler/coreSyn/CoreUnfold.lhs b/ghc/compiler/coreSyn/CoreUnfold.lhs

index 44fe5a7..cc664f1 100644 (file)
--- a/ghc/compiler/coreSyn/CoreUnfold.lhs
+++ b/ghc/compiler/coreSyn/CoreUnfold.lhs
@@ -14,226 +14,202 @@ find, unsurprisingly, a Core expression.
  
  \begin{code}
  module CoreUnfold (
  
  \begin{code}
  module CoreUnfold (
-       Unfolding(..), UnfoldingGuidance, -- types
+       Unfolding, UnfoldingGuidance,   -- Abstract types
  
  
-       noUnfolding, mkUnfolding, getUnfoldingTemplate,
-       isEvaldUnfolding, hasUnfolding,
+       noUnfolding, mkTopUnfolding, mkUnfolding, mkCompulsoryUnfolding, seqUnfolding,
+       mkOtherCon, otherCons,
+       unfoldingTemplate, maybeUnfoldingTemplate,
+       isEvaldUnfolding, isValueUnfolding, isCheapUnfolding, isCompulsoryUnfolding,
+       hasUnfolding, hasSomeUnfolding, neverUnfold,
  
         couldBeSmallEnoughToInline, 
  
         couldBeSmallEnoughToInline, 
-       certainlySmallEnoughToInline, 
-       okToUnfoldInHiFile,
+       certainlyWillInline, 
  
  
-       calcUnfoldingGuidance,
-
-       callSiteInline, blackListed
+       callSiteInline
      ) where
  
  #include "HsVersions.h"
  
  import CmdLineOpts     ( opt_UF_CreationThreshold,
                           opt_UF_UseThreshold,
      ) where
  
  #include "HsVersions.h"
  
  import CmdLineOpts     ( opt_UF_CreationThreshold,
                           opt_UF_UseThreshold,
-                         opt_UF_ScrutConDiscount,
                           opt_UF_FunAppDiscount,
                           opt_UF_FunAppDiscount,
-                         opt_UF_PrimArgDiscount,
-                         opt_UF_KeenessFactor,
-                         opt_UF_CheapOp, opt_UF_DearOp, opt_UF_NoRepLit,
-                         opt_UnfoldCasms, opt_PprStyle_Debug,
-                         opt_D_dump_inlinings
+                         opt_UF_KeenessFactor,
+                         opt_UF_DearOp,
+                         DynFlags, DynFlag(..), dopt
                         )
  import CoreSyn
  import PprCore         ( pprCoreExpr )
                         )
  import CoreSyn
  import PprCore         ( pprCoreExpr )
-import CoreUtils       ( whnfOrBottom )
  import OccurAnal       ( occurAnalyseGlobalExpr )
  import OccurAnal       ( occurAnalyseGlobalExpr )
-import BinderInfo      ( )
-import CoreUtils       ( coreExprType, exprIsTrivial, mkFormSummary, 
-                         FormSummary(..) )
-import Id              ( Id, idType, idUnique, isId, 
-                         getIdSpecialisation, getInlinePragma, getIdUnfolding
+import CoreUtils       ( exprIsValue, exprIsCheap, exprIsTrivial )
+import Id              ( Id, idType, isId,
+                         idUnfolding, globalIdDetails
                         )
                         )
-import VarSet
-import Const           ( Con(..), isLitLitLit, isWHNFCon )
-import PrimOp          ( PrimOp(..), primOpIsDupable )
-import IdInfo          ( ArityInfo(..), InlinePragInfo(..), OccInfo(..) )
-import TyCon           ( tyConFamilySize )
-import Type            ( splitAlgTyConApp_maybe, splitFunTy_maybe )
-import Const           ( isNoRepLit )
-import Unique          ( Unique, buildIdKey, augmentIdKey, runSTRepIdKey )
-import Maybes          ( maybeToBool )
+import DataCon         ( isUnboxedTupleCon )
+import Literal         ( litSize )
+import PrimOp          ( primOpIsDupable, primOpOutOfLine )
+import IdInfo          ( OccInfo(..), GlobalIdDetails(..) )
+import Type            ( isUnLiftedType )
+import PrelNames       ( hasKey, buildIdKey, augmentIdKey )
  import Bag
  import Bag
-import Util            ( isIn, lengthExceeds )
+import FastTypes
  import Outputable
  import Outputable
+import Util
+
+#if __GLASGOW_HASKELL__ >= 404
+import GLAEXTS         ( Int# )
+#endif
  \end{code}
  
  \end{code}
  
+
  %************************************************************************
  %*                                                                     *
  %************************************************************************
  %*                                                                     *
-\subsection{@Unfolding@ and @UnfoldingGuidance@ types}
+\subsection{Making unfoldings}
  %*                                                                     *
  %************************************************************************
  
  \begin{code}
  %*                                                                     *
  %************************************************************************
  
  \begin{code}
-data Unfolding
-  = NoUnfolding
-
-  | OtherCon [Con]             -- It ain't one of these
-                               -- (OtherCon xs) also indicates that something has been evaluated
-                               -- and hence there's no point in re-evaluating it.
-                               -- OtherCon [] is used even for non-data-type values
-                               -- to indicated evaluated-ness.  Notably:
-                               --      data C = C !(Int -> Int)
-                               --      case x of { C f -> ... }
-                               -- Here, f gets an OtherCon [] unfolding.
-
-  | CoreUnfolding                      -- An unfolding with redundant cached information
-               FormSummary             -- Tells whether the template is a WHNF or bottom
-               UnfoldingGuidance       -- Tells about the *size* of the template.
-               CoreExpr                -- Template; binder-info is correct
+mkTopUnfolding expr = mkUnfolding True {- Top level -} expr
+
+mkUnfolding top_lvl expr
+  = CoreUnfolding (occurAnalyseGlobalExpr expr)
+                 top_lvl
+
+                 (exprIsValue expr)
+                       -- Already evaluated
+
+                 (exprIsCheap expr)
+                       -- OK to inline inside a lambda
+
+                 (calcUnfoldingGuidance opt_UF_CreationThreshold expr)
+       -- Sometimes during simplification, there's a large let-bound thing     
+       -- which has been substituted, and so is now dead; so 'expr' contains
+       -- two copies of the thing while the occurrence-analysed expression doesn't
+       -- Nevertheless, we don't occ-analyse before computing the size because the
+       -- size computation bales out after a while, whereas occurrence analysis does not.
+       --
+       -- This can occasionally mean that the guidance is very pessimistic;
+       -- it gets fixed up next round
+
+mkCompulsoryUnfolding expr     -- Used for things that absolutely must be unfolded
+  = CompulsoryUnfolding (occurAnalyseGlobalExpr expr)
  \end{code}
  
  \end{code}
  
-\begin{code}
-noUnfolding = NoUnfolding
-
-mkUnfolding expr
-  = let
-     -- strictness mangling (depends on there being no CSE)
-     ufg = calcUnfoldingGuidance opt_UF_CreationThreshold expr
-     occ = occurAnalyseGlobalExpr expr
-    in
-    CoreUnfolding (mkFormSummary expr) ufg occ
-
-getUnfoldingTemplate :: Unfolding -> CoreExpr
-getUnfoldingTemplate (CoreUnfolding _ _ expr) = expr
-getUnfoldingTemplate other = panic "getUnfoldingTemplate"
-
-isEvaldUnfolding :: Unfolding -> Bool
-isEvaldUnfolding (OtherCon _)                    = True
-isEvaldUnfolding (CoreUnfolding ValueForm _ expr) = True
-isEvaldUnfolding other                           = False
-
-hasUnfolding :: Unfolding -> Bool
-hasUnfolding NoUnfolding = False
-hasUnfolding other      = True
  
  
-data UnfoldingGuidance
-  = UnfoldNever
-  | UnfoldAlways               -- There is no "original" definition,
-                               -- so you'd better unfold.  Or: something
-                               -- so cheap to unfold (e.g., 1#) that
-                               -- you should do it absolutely always.
-
-  | UnfoldIfGoodArgs   Int     -- and "n" value args
-
-                       [Int]   -- Discount if the argument is evaluated.
-                               -- (i.e., a simplification will definitely
-                               -- be possible).  One elt of the list per *value* arg.
-
-                       Int     -- The "size" of the unfolding; to be elaborated
-                               -- later. ToDo
-
-                       Int     -- Scrutinee discount: the discount to substract if the thing is in
-                               -- a context (case (thing args) of ...),
-                               -- (where there are the right number of arguments.)
-\end{code}
+%************************************************************************
+%*                                                                     *
+\subsection{The UnfoldingGuidance type}
+%*                                                                     *
+%************************************************************************
  
  \begin{code}
  instance Outputable UnfoldingGuidance where
  
  \begin{code}
  instance Outputable UnfoldingGuidance where
-    ppr UnfoldAlways    = ptext SLIT("ALWAYS")
      ppr UnfoldNever    = ptext SLIT("NEVER")
      ppr (UnfoldIfGoodArgs v cs size discount)
      ppr UnfoldNever    = ptext SLIT("NEVER")
      ppr (UnfoldIfGoodArgs v cs size discount)
-      = hsep [ptext SLIT("IF_ARGS"), int v,
-              if null cs       -- always print *something*
-               then char 'X'
-               else hcat (map (text . show) cs),
+      = hsep [ ptext SLIT("IF_ARGS"), int v,
+              brackets (hsep (map int cs)),
                int size,
                int discount ]
  \end{code}
  
  
                int size,
                int discount ]
  \end{code}
  
  
-%************************************************************************
-%*                                                                     *
-\subsection[calcUnfoldingGuidance]{Calculate ``unfolding guidance'' for an expression}
-%*                                                                     *
-%************************************************************************
-
  \begin{code}
  calcUnfoldingGuidance
         :: Int                  -- bomb out if size gets bigger than this
         -> CoreExpr             -- expression to look at
         -> UnfoldingGuidance
  calcUnfoldingGuidance bOMB_OUT_SIZE expr
  \begin{code}
  calcUnfoldingGuidance
         :: Int                  -- bomb out if size gets bigger than this
         -> CoreExpr             -- expression to look at
         -> UnfoldingGuidance
  calcUnfoldingGuidance bOMB_OUT_SIZE expr
-  | exprIsTrivial expr         -- Often trivial expressions are never bound
-                               -- to an expression, but it can happen.  For
-                               -- example, the Id for a nullary constructor has
-                               -- a trivial expression as its unfolding, and
-                               -- we want to make sure that we always unfold it.
-  = UnfoldAlways
- 
-  | otherwise
-  = case collectBinders expr of { (binders, body) ->
+  = case collect_val_bndrs expr of { (inline, val_binders, body) ->
      let
      let
-       val_binders = filter isId binders
+       n_val_binders = length val_binders
+
+       max_inline_size = n_val_binders+2
+       -- The idea is that if there is an INLINE pragma (inline is True)
+       -- and there's a big body, we give a size of n_val_binders+2.  This
+       -- This is just enough to fail the no-size-increase test in callSiteInline,
+       --   so that INLINE things don't get inlined into entirely boring contexts,
+       --   but no more.
+
      in
      in
-    case (sizeExpr bOMB_OUT_SIZE val_binders body) of
+    case (sizeExpr (iUnbox bOMB_OUT_SIZE) val_binders body) of
  
  
-      TooBig -> UnfoldNever
+      TooBig 
+       | not inline -> UnfoldNever
+               -- A big function with an INLINE pragma must
+               -- have an UnfoldIfGoodArgs guidance
+       | otherwise  -> UnfoldIfGoodArgs n_val_binders
+                                        (map (const 0) val_binders)
+                                        max_inline_size 0
  
        SizeIs size cased_args scrut_discount
         -> UnfoldIfGoodArgs
  
        SizeIs size cased_args scrut_discount
         -> UnfoldIfGoodArgs
-                       (length val_binders)
+                       n_val_binders
                         (map discount_for val_binders)
                         (map discount_for val_binders)
-                       (I# size)
-                       (I# scrut_discount)
+                       final_size
+                       (iBox scrut_discount)
         where        
         where        
-           discount_for b 
-               | num_cases == 0 = 0
-               | is_fun_ty      = num_cases * opt_UF_FunAppDiscount
-               | is_data_ty     = num_cases * tyConFamilySize tycon * opt_UF_ScrutConDiscount
-               | otherwise      = num_cases * opt_UF_PrimArgDiscount
-               where
-                 num_cases           = foldlBag (\n b' -> if b==b' then n+1 else n) 0 cased_args
-                                       -- Count occurrences of b in cased_args
-                 arg_ty              = idType b
-                 is_fun_ty           = maybeToBool (splitFunTy_maybe arg_ty)
-                 (is_data_ty, tycon) = case (splitAlgTyConApp_maybe (idType b)) of
-                                         Nothing       -> (False, panic "discount")
-                                         Just (tc,_,_) -> (True,  tc)
+           boxed_size    = iBox size
+
+           final_size | inline     = boxed_size `min` max_inline_size
+                      | otherwise  = boxed_size
+
+               -- Sometimes an INLINE thing is smaller than n_val_binders+2.
+               -- A particular case in point is a constructor, which has size 1.
+               -- We want to inline this regardless, hence the `min`
+
+           discount_for b = foldlBag (\acc (b',n) -> if b==b' then acc+n else acc) 
+                                     0 cased_args
         }
         }
+  where
+    collect_val_bndrs e = go False [] e
+       -- We need to be a bit careful about how we collect the
+       -- value binders.  In ptic, if we see 
+       --      __inline_me (\x y -> e)
+       -- We want to say "2 value binders".  Why?  So that 
+       -- we take account of information given for the arguments
+
+    go inline rev_vbs (Note InlineMe e)     = go True   rev_vbs     e
+    go inline rev_vbs (Lam b e) | isId b    = go inline (b:rev_vbs) e
+                               | otherwise = go inline rev_vbs     e
+    go inline rev_vbs e                            = (inline, reverse rev_vbs, e)
  \end{code}
  
  \begin{code}
  \end{code}
  
  \begin{code}
-sizeExpr :: Int            -- Bomb out if it gets bigger than this
+sizeExpr :: Int#           -- Bomb out if it gets bigger than this
          -> [Id]            -- Arguments; we're interested in which of these
                             -- get case'd
          -> CoreExpr
          -> ExprSize
  
          -> [Id]            -- Arguments; we're interested in which of these
                             -- get case'd
          -> CoreExpr
          -> ExprSize
  
-sizeExpr (I# bOMB_OUT_SIZE) args expr
+sizeExpr bOMB_OUT_SIZE top_args expr
    = size_up expr
    where
      size_up (Type t)         = sizeZero        -- Types cost nothing
      size_up (Var v)           = sizeOne
  
    = size_up expr
    where
      size_up (Type t)         = sizeZero        -- Types cost nothing
      size_up (Var v)           = sizeOne
  
-    size_up (Note InlineMe _) = sizeTwo                -- The idea is that this is one more
-                                               -- than the size of the "call" (i.e. 1)
-                                               -- We want to reply "no" to noSizeIncrease
-                                               -- for a bare reference (i.e. applied to no args) 
-                                               -- to an INLINE thing
+    size_up (Note InlineMe body) = sizeOne     -- Inline notes make it look very small
+       -- This can be important.  If you have an instance decl like this:
+       --      instance Foo a => Foo [a] where
+       --         {-# INLINE op1, op2 #-}
+       --         op1 = ...
+       --         op2 = ...
+       -- then we'll get a dfun which is a pair of two INLINE lambdas
  
  
-    size_up (Note _ body)     = size_up body   -- Notes cost nothing
+    size_up (Note _        body) = size_up body        -- Other notes cost nothing
  
      size_up (App fun (Type t)) = size_up fun
  
      size_up (App fun (Type t)) = size_up fun
-    size_up (App fun arg)      = size_up_app fun `addSize` size_up arg
+    size_up (App fun arg)      = size_up_app fun [arg]
  
  
-    size_up (Con con args) = foldr (addSize . size_up) 
-                                  (size_up_con con args)
-                                  args
+    size_up (Lit lit)         = sizeN (litSize lit)
  
  
-    size_up (Lam b e) | isId b    = size_up e `addSizeN` 1
+    size_up (Lam b e) | isId b    = lamScrutDiscount (size_up e `addSizeN` 1)
                       | otherwise = size_up e
  
      size_up (Let (NonRec binder rhs) body)
        = nukeScrutDiscount (size_up rhs)                `addSize`
         size_up body                            `addSizeN`
                       | otherwise = size_up e
  
      size_up (Let (NonRec binder rhs) body)
        = nukeScrutDiscount (size_up rhs)                `addSize`
         size_up body                            `addSizeN`
-       1       -- For the allocation
+       (if isUnLiftedType (idType binder) then 0 else 1)
+               -- For the allocation
+               -- If the binder has an unlifted type there is no allocation
  
      size_up (Let (Rec pairs) body)
        = nukeScrutDiscount rhs_size             `addSize`
  
      size_up (Let (Rec pairs) body)
        = nukeScrutDiscount rhs_size             `addSize`
@@ -242,91 +218,201 @@ sizeExpr (I# bOMB_OUT_SIZE) args expr
        where
         rhs_size = foldr (addSize . size_up . snd) sizeZero pairs
  
        where
         rhs_size = foldr (addSize . size_up . snd) sizeZero pairs
  
-    size_up (Case scrut _ alts)
-      = nukeScrutDiscount (size_up scrut)              `addSize`
-       arg_discount scrut                              `addSize`
-       foldr (addSize . size_up_alt) sizeZero alts     `addSizeN`
-       case (splitAlgTyConApp_maybe (coreExprType scrut)) of
-               Nothing       -> 1
-               Just (tc,_,_) -> tyConFamilySize tc
+    size_up (Case (Var v) _ _ alts) 
+       | v `elem` top_args             -- We are scrutinising an argument variable
+       = 
+{-     I'm nuking this special case; BUT see the comment with case alternatives.
+
+       (a) It's too eager.  We don't want to inline a wrapper into a
+           context with no benefit.  
+           E.g.  \ x. f (x+x)          no point in inlining (+) here!
+
+       (b) It's ineffective. Once g's wrapper is inlined, its case-expressions 
+           aren't scrutinising arguments any more
+
+           case alts of
+
+               [alt] -> size_up_alt alt `addSize` SizeIs 0# (unitBag (v, 1)) 0#
+               -- We want to make wrapper-style evaluation look cheap, so that
+               -- when we inline a wrapper it doesn't make call site (much) bigger
+               -- Otherwise we get nasty phase ordering stuff: 
+               --      f x = g x x
+               --      h y = ...(f e)...
+               -- If we inline g's wrapper, f looks big, and doesn't get inlined
+               -- into h; if we inline f first, while it looks small, then g's 
+               -- wrapper will get inlined later anyway.  To avoid this nasty
+               -- ordering difference, we make (case a of (x,y) -> ...), 
+               -- *where a is one of the arguments* look free.
+
+               other -> 
+-}
+                        alts_size (foldr addSize sizeOne alt_sizes)    -- The 1 is for the scrutinee
+                                  (foldr1 maxSize alt_sizes)
+
+               -- Good to inline if an arg is scrutinised, because
+               -- that may eliminate allocation in the caller
+               -- And it eliminates the case itself
+
+       where
+         alt_sizes = map size_up_alt alts
+
+               -- alts_size tries to compute a good discount for
+               -- the case when we are scrutinising an argument variable
+         alts_size (SizeIs tot tot_disc tot_scrut)             -- Size of all alternatives
+                   (SizeIs max max_disc max_scrut)             -- Size of biggest alternative
+               = SizeIs tot (unitBag (v, iBox (_ILIT 1 +# tot -# max)) `unionBags` max_disc) max_scrut
+                       -- If the variable is known, we produce a discount that
+                       -- will take us back to 'max', the size of rh largest alternative
+                       -- The 1+ is a little discount for reduced allocation in the caller
+         alts_size tot_size _ = tot_size
+
+-- gaw 2004
+    size_up (Case e _ _ alts) = nukeScrutDiscount (size_up e) `addSize` 
+                                foldr (addSize . size_up_alt) sizeZero alts
+               -- We don't charge for the case itself
+               -- It's a strict thing, and the price of the call
+               -- is paid by scrut.  Also consider
+               --      case f x of DEFAULT -> e
+               -- This is just ';'!  Don't charge for it.
  
      ------------ 
  
      ------------ 
+    size_up_app (App fun arg) args   
+       | isTypeArg arg              = size_up_app fun args
+       | otherwise                  = size_up_app fun (arg:args)
+    size_up_app fun          args   = foldr (addSize . nukeScrutDiscount . size_up) 
+                                            (size_up_fun fun args)
+                                            args
+
         -- A function application with at least one value argument
         -- so if the function is an argument give it an arg-discount
         -- A function application with at least one value argument
         -- so if the function is an argument give it an arg-discount
-    size_up_app (App fun arg) = size_up_app fun  `addSize` size_up arg
-    size_up_app fun          = arg_discount fun `addSize` size_up fun
+       --
+       -- Also behave specially if the function is a build
+       --
+       -- Also if the function is a constant Id (constr or primop)
+       -- compute discounts specially
+    size_up_fun (Var fun) args
+      | fun `hasKey` buildIdKey   = buildSize
+      | fun `hasKey` augmentIdKey = augmentSize
+      | otherwise 
+      = case globalIdDetails fun of
+         DataConWorkId dc -> conSizeN dc (valArgCount args)
+
+         FCallId fc   -> sizeN opt_UF_DearOp
+         PrimOpId op  -> primOpSize op (valArgCount args)
+                         -- foldr addSize (primOpSize op) (map arg_discount args)
+                         -- At one time I tried giving an arg-discount if a primop 
+                         -- is applied to one of the function's arguments, but it's
+                         -- not good.  At the moment, any unlifted-type arg gets a
+                         -- 'True' for 'yes I'm evald', so we collect the discount even
+                         -- if we know nothing about it.  And just having it in a primop
+                         -- doesn't help at all if we don't know something more.
+
+         other        -> fun_discount fun `addSizeN` 
+                         (1 + length (filter (not . exprIsTrivial) args))
+                               -- The 1+ is for the function itself
+                               -- Add 1 for each non-trivial arg;
+                               -- the allocation cost, as in let(rec)
+                               -- Slight hack here: for constructors the args are almost always
+                               --      trivial; and for primops they are almost always prim typed
+                               --      We should really only count for non-prim-typed args in the
+                               --      general case, but that seems too much like hard work
+
+    size_up_fun other args = size_up other
  
      ------------ 
      size_up_alt (con, bndrs, rhs) = size_up rhs
  
      ------------ 
      size_up_alt (con, bndrs, rhs) = size_up rhs
-           -- Don't charge for args, so that wrappers look cheap
-
-    ------------
-    size_up_con (Literal lit) args | isNoRepLit lit = sizeN opt_UF_NoRepLit
-                                  | otherwise      = sizeOne
-
-    size_up_con (DataCon dc) args = conSizeN (valArgCount args)
-                            
-    size_up_con (PrimOp op) args = foldr addSize (sizeN op_cost) (map arg_discount args)
-               -- Give an arg-discount if a primop is applies to
-               -- one of the function's arguments
-      where
-       op_cost | primOpIsDupable op = opt_UF_CheapOp
-               | otherwise          = opt_UF_DearOp
+       -- Don't charge for args, so that wrappers look cheap
+       -- (See comments about wrappers with Case)
  
      ------------
         -- We want to record if we're case'ing, or applying, an argument
  
      ------------
         -- We want to record if we're case'ing, or applying, an argument
-    arg_discount (Var v) | v `is_elem` args = scrutArg v
-    arg_discount other                     = sizeZero
-
-    is_elem :: Id -> [Id] -> Bool
-    is_elem = isIn "size_up_scrut"
+    fun_discount v | v `elem` top_args = SizeIs 0# (unitBag (v, opt_UF_FunAppDiscount)) 0#
+    fun_discount other                = sizeZero
  
      ------------
         -- These addSize things have to be here because
         -- I don't want to give them bOMB_OUT_SIZE as an argument
  
  
      ------------
         -- These addSize things have to be here because
         -- I don't want to give them bOMB_OUT_SIZE as an argument
  
-    addSizeN TooBig          _ = TooBig
-    addSizeN (SizeIs n xs d) (I# m)
-      | n_tot -# d <# bOMB_OUT_SIZE = SizeIs n_tot xs d
-      | otherwise                  = TooBig
-      where
-       n_tot = n +# m
+    addSizeN TooBig          _  = TooBig
+    addSizeN (SizeIs n xs d) m         = mkSizeIs bOMB_OUT_SIZE (n +# iUnbox m) xs d
      
      
-    addSize TooBig _ = TooBig
-    addSize _ TooBig = TooBig
-    addSize (SizeIs n1 xs d1) (SizeIs n2 ys d2)
-      | (n_tot -# d_tot) <# bOMB_OUT_SIZE = SizeIs n_tot xys d_tot
-      | otherwise                        = TooBig
-      where
-       n_tot = n1 +# n2
-       d_tot = d1 +# d2
-       xys   = xs `unionBags` ys
+    addSize TooBig           _                 = TooBig
+    addSize _                TooBig            = TooBig
+    addSize (SizeIs n1 xs d1) (SizeIs n2 ys d2) 
+       = mkSizeIs bOMB_OUT_SIZE (n1 +# n2) (xs `unionBags` ys) (d1 +# d2)
  \end{code}
  
  Code for manipulating sizes
  
  \begin{code}
  \end{code}
  
  Code for manipulating sizes
  
  \begin{code}
-
  data ExprSize = TooBig
  data ExprSize = TooBig
-             | SizeIs Int#     -- Size found
-                      (Bag Id) -- Arguments cased herein
-                      Int#     -- Size to subtract if result is scrutinised 
-                               -- by a case expression
-
-sizeZero       = SizeIs 0# emptyBag 0#
-sizeOne        = SizeIs 1# emptyBag 0#
-sizeTwo        = SizeIs 2# emptyBag 0#
-sizeN (I# n)   = SizeIs n  emptyBag 0#
-conSizeN (I# n) = SizeIs 1# emptyBag (n +# 1#)
-       -- Treat constructors as size 1, that unfoldAlways responsds 'False'
-       -- when asked about 'x' when x is bound to (C 3#).
-       -- This avoids gratuitous 'ticks' when x itself appears as an
-       -- atomic constructor argument.
+             | SizeIs FastInt          -- Size found
+                      (Bag (Id,Int))   -- Arguments cased herein, and discount for each such
+                      FastInt          -- Size to subtract if result is scrutinised 
+                                       -- by a case expression
+
+-- subtract the discount before deciding whether to bale out. eg. we
+-- want to inline a large constructor application into a selector:
+--     tup = (a_1, ..., a_99)
+--     x = case tup of ...
+--
+mkSizeIs max n xs d | (n -# d) ># max = TooBig
+                   | otherwise       = SizeIs n xs d
+ 
+maxSize TooBig         _                                 = TooBig
+maxSize _              TooBig                            = TooBig
+maxSize s1@(SizeIs n1 _ _) s2@(SizeIs n2 _ _) | n1 ># n2  = s1
+                                             | otherwise = s2
+
+sizeZero       = SizeIs (_ILIT 0)  emptyBag (_ILIT 0)
+sizeOne        = SizeIs (_ILIT 1)  emptyBag (_ILIT 0)
+sizeN n        = SizeIs (iUnbox n) emptyBag (_ILIT 0)
+conSizeN dc n   
+  | isUnboxedTupleCon dc = SizeIs (_ILIT 0) emptyBag (iUnbox n +# _ILIT 1)
+  | otherwise           = SizeIs (_ILIT 1) emptyBag (iUnbox n +# _ILIT 1)
+       -- Treat constructors as size 1; we are keen to expose them
+       -- (and we charge separately for their args).  We can't treat
+       -- them as size zero, else we find that (iBox x) has size 1,
+       -- which is the same as a lone variable; and hence 'v' will 
+       -- always be replaced by (iBox x), where v is bound to iBox x.
+       --
+       -- However, unboxed tuples count as size zero
+       -- I found occasions where we had 
+       --      f x y z = case op# x y z of { s -> (# s, () #) }
+       -- and f wasn't getting inlined
+
+primOpSize op n_args
+ | not (primOpIsDupable op) = sizeN opt_UF_DearOp
+ | not (primOpOutOfLine op) = sizeN (2 - n_args)
+       -- Be very keen to inline simple primops.
+       -- We give a discount of 1 for each arg so that (op# x y z) costs 2.
+       -- We can't make it cost 1, else we'll inline let v = (op# x y z) 
+       -- at every use of v, which is excessive.
+       --
+       -- A good example is:
+       --      let x = +# p q in C {x}
+       -- Even though x get's an occurrence of 'many', its RHS looks cheap,
+       -- and there's a good chance it'll get inlined back into C's RHS. Urgh!
+ | otherwise               = sizeOne
+
+buildSize = SizeIs (-2#) emptyBag 4#
+       -- We really want to inline applications of build
+       -- build t (\cn -> e) should cost only the cost of e (because build will be inlined later)
+       -- Indeed, we should add a result_discount becuause build is 
+       -- very like a constructor.  We don't bother to check that the
+       -- build is saturated (it usually is).  The "-2" discounts for the \c n, 
+       -- The "4" is rather arbitrary.
+
+augmentSize = SizeIs (-2#) emptyBag 4#
+       -- Ditto (augment t (\cn -> e) ys) should cost only the cost of
+       -- e plus ys. The -2 accounts for the \cn 
                                                 
                                                 
-scrutArg v     = SizeIs 0# (unitBag v) 0#
-
  nukeScrutDiscount (SizeIs n vs d) = SizeIs n vs 0#
  nukeScrutDiscount TooBig         = TooBig
  nukeScrutDiscount (SizeIs n vs d) = SizeIs n vs 0#
  nukeScrutDiscount TooBig         = TooBig
+
+-- When we return a lambda, give a discount if it's used (applied)
+lamScrutDiscount  (SizeIs n vs d) = case opt_UF_FunAppDiscount of { d -> SizeIs n vs (iUnbox d) }
+lamScrutDiscount TooBig                  = TooBig
  \end{code}
  
  
  \end{code}
  
  
@@ -365,49 +451,19 @@ use'' on the other side.  Can be overridden w/ flaggery.
  Just the same as smallEnoughToInline, except that it has no actual arguments.
  
  \begin{code}
  Just the same as smallEnoughToInline, except that it has no actual arguments.
  
  \begin{code}
-couldBeSmallEnoughToInline :: UnfoldingGuidance -> Bool
-couldBeSmallEnoughToInline UnfoldNever = False
-couldBeSmallEnoughToInline other       = True
-
-certainlySmallEnoughToInline :: UnfoldingGuidance -> Bool
-certainlySmallEnoughToInline UnfoldNever                  = False
-certainlySmallEnoughToInline UnfoldAlways                 = True
-certainlySmallEnoughToInline (UnfoldIfGoodArgs _ _ size _) = size <= opt_UF_UseThreshold
-\end{code}
-
-@okToUnfoldInHifile@ is used when emitting unfolding info into an interface
-file to determine whether an unfolding candidate really should be unfolded.
-The predicate is needed to prevent @_casm_@s (+ lit-lits) from being emitted
-into interface files. 
-
-The reason for inlining expressions containing _casm_s into interface files
-is that these fragments of C are likely to mention functions/#defines that
-will be out-of-scope when inlined into another module. This is not an
-unfixable problem for the user (just need to -#include the approp. header
-file), but turning it off seems to the simplest thing to do.
-
-\begin{code}
-okToUnfoldInHiFile :: CoreExpr -> Bool
-okToUnfoldInHiFile e = opt_UnfoldCasms || go e
- where
-    -- Race over an expression looking for CCalls..
-    go (Var _)                = True
-    go (Con (Literal lit) _)  = not (isLitLitLit lit)
-    go (Con (PrimOp op) args) = okToUnfoldPrimOp op && all go args
-    go (Con con args)         = True -- con args are always atomic
-    go (App fun arg)          = go fun && go arg
-    go (Lam _ body)           = go body
-    go (Let binds body)       = and (map go (body :rhssOfBind binds))
-    go (Case scrut bndr alts) = and (map go (scrut:rhssOfAlts alts))
-    go (Note _ body)          = go body
-    go (Type _)                      = True
-
-    -- ok to unfold a PrimOp as long as it's not a _casm_
-    okToUnfoldPrimOp (CCallOp _ is_casm _ _) = not is_casm
-    okToUnfoldPrimOp _                       = True
+couldBeSmallEnoughToInline :: Int -> CoreExpr -> Bool
+couldBeSmallEnoughToInline threshold rhs = case calcUnfoldingGuidance threshold rhs of
+                                               UnfoldNever -> False
+                                               other       -> True
+
+certainlyWillInline :: Unfolding -> Bool
+  -- Sees if the unfolding is pretty certain to inline 
+certainlyWillInline (CoreUnfolding _ _ _ is_cheap (UnfoldIfGoodArgs n_vals _ size _))
+  = is_cheap && size - (n_vals +1) <= opt_UF_UseThreshold
+certainlyWillInline other
+  = False
  \end{code}
  
  \end{code}
  
-
  %************************************************************************
  %*                                                                     *
  \subsection{callSiteInline}
  %************************************************************************
  %*                                                                     *
  \subsection{callSiteInline}
@@ -425,99 +481,135 @@ and occurs exactly once or
  If the thing is in WHNF, there's no danger of duplicating work, 
  so we can inline if it occurs once, or is small
  
  If the thing is in WHNF, there's no danger of duplicating work, 
  so we can inline if it occurs once, or is small
  
+NOTE: we don't want to inline top-level functions that always diverge.
+It just makes the code bigger.  Tt turns out that the convenient way to prevent
+them inlining is to give them a NOINLINE pragma, which we do in 
+StrictAnal.addStrictnessInfoToTopId
+
  \begin{code}
  \begin{code}
-callSiteInline :: Bool                 -- True <=> the Id is black listed
+callSiteInline :: DynFlags
+              -> Bool                  -- True <=> the Id can be inlined
                -> Bool                  -- 'inline' note at call site
                -> Bool                  -- 'inline' note at call site
+              -> OccInfo
                -> Id                    -- The Id
                -> Id                    -- The Id
-              -> [CoreExpr]            -- Arguments
+              -> [Bool]                -- One for each value arg; True if it is interesting
                -> Bool                  -- True <=> continuation is interesting
                -> Maybe CoreExpr        -- Unfolding, if any
  
  
                -> Bool                  -- True <=> continuation is interesting
                -> Maybe CoreExpr        -- Unfolding, if any
  
  
-callSiteInline black_listed inline_call id args interesting_cont
-  = case getIdUnfolding id of {
+callSiteInline dflags active_inline inline_call occ id arg_infos interesting_cont
+  = case idUnfolding id of {
         NoUnfolding -> Nothing ;
         NoUnfolding -> Nothing ;
-       OtherCon _  -> Nothing ;
-       CoreUnfolding form guidance unf_template ->
+       OtherCon cs -> Nothing ;
+
+       CompulsoryUnfolding unf_template -> Just unf_template ;
+               -- CompulsoryUnfolding => there is no top-level binding
+               -- for these things, so we must inline it.
+               -- Only a couple of primop-like things have 
+               -- compulsory unfoldings (see MkId.lhs).
+               -- We don't allow them to be inactive
+
+       CoreUnfolding unf_template is_top is_value is_cheap guidance ->
  
      let
         result | yes_or_no = Just unf_template
                | otherwise = Nothing
  
  
      let
         result | yes_or_no = Just unf_template
                | otherwise = Nothing
  
-       inline_prag = getInlinePragma id
-       arg_infos   = map interestingArg val_args
-       val_args    = filter isValArg args
-       whnf        = whnfOrBottom form
-
-       yes_or_no =
-           case inline_prag of
-               IAmDead           -> pprTrace "callSiteInline: dead" (ppr id) False
-               IMustNotBeINLINEd -> False
-               IAmALoopBreaker   -> False
-               IMustBeINLINEd    -> True       -- Overrides absolutely everything, including the black list
-               ICanSafelyBeINLINEd in_lam one_br -> consider in_lam    one_br
-               NoInlinePragInfo                  -> consider InsideLam False
-
-       consider in_lam one_branch 
-         | black_listed = False
+       n_val_args  = length arg_infos
+
+       yes_or_no 
+         | not active_inline = False
+         | otherwise = case occ of
+                               IAmDead              -> pprTrace "callSiteInline: dead" (ppr id) False
+                               IAmALoopBreaker      -> False
+                               OneOcc in_lam one_br -> (not in_lam || is_cheap) && consider_safe in_lam True  one_br
+                               NoOccInfo            -> is_cheap                 && consider_safe True   False False
+
+       consider_safe in_lam once once_in_one_branch
+               -- consider_safe decides whether it's a good idea to inline something,
+               -- given that there's no work-duplication issue (the caller checks that).
+               -- once_in_one_branch = True means there's a unique textual occurrence
           | inline_call  = True
           | inline_call  = True
-         | one_branch  -- Be very keen to inline something if this is its unique occurrence; that
-                       -- gives a good chance of eliminating the original binding for the thing.
-                       -- The only time we hold back is when substituting inside a lambda;
-                       -- then if the context is totally uninteresting (not applied, not scrutinised)
-                       -- there is no point in substituting because it might just increase allocation.
-         = case in_lam of
-               NotInsideLam -> True
-               InsideLam    -> whnf && (not (null args) || interesting_cont)
-
-         | otherwise   -- Occurs (textually) more than once, so look at its size
+
+         | once_in_one_branch
+               -- Be very keen to inline something if this is its unique occurrence:
+               --
+               --   a) Inlining gives a good chance of eliminating the original 
+               --      binding (and hence the allocation) for the thing.  
+               --      (Provided it's not a top level binding, in which case the 
+               --       allocation costs nothing.)
+               --
+               --   b) Inlining a function that is called only once exposes the 
+               --      body function to the call site.
+               --
+               -- The only time we hold back is when substituting inside a lambda;
+               -- then if the context is totally uninteresting (not applied, not scrutinised)
+               -- there is no point in substituting because it might just increase allocation,
+               -- by allocating the function itself many times
+               -- Note [Jan 2002]: this comment looks out of date.  The actual code
+               -- doesn't inline *ever* in an uninteresting context.  Why not?  I
+               -- think it's just because we don't want to inline top-level constants
+               -- into uninteresting contexts, lest we (for example) re-nest top-level
+               -- literal lists.
+               --
+               -- Note: there used to be a '&& not top_level' in the guard above,
+               --       but that stopped us inlining top-level functions used only once,
+               --       which is stupid
+         = WARN( not is_top && not in_lam, ppr id )
+                       -- If (not in_lam) && one_br then PreInlineUnconditionally
+                       -- should have caught it, shouldn't it?  Unless it's a top
+                       -- level thing.
+           notNull arg_infos || interesting_cont
+
+         | otherwise
           = case guidance of
           = case guidance of
-             UnfoldAlways -> True
-             UnfoldNever  -> False
+             UnfoldNever  -> False ;
               UnfoldIfGoodArgs n_vals_wanted arg_discounts size res_discount
               UnfoldIfGoodArgs n_vals_wanted arg_discounts size res_discount
-               | enough_args && size <= (n_vals_wanted + 1)
-                       -- No size increase
+
+                 | enough_args && size <= (n_vals_wanted + 1)
+                       -- Inline unconditionally if there no size increase
                         -- Size of call is n_vals_wanted (+1 for the function)
                         -- Size of call is n_vals_wanted (+1 for the function)
-               -> case in_lam of
-                       NotInsideLam -> True
-                       InsideLam    -> whnf
-
-               | not (or arg_infos || really_interesting_cont)
-                       -- If it occurs more than once, there must be something interesting 
-                       -- about some argument, or the result, to make it worth inlining
-               -> False
-  
-               | otherwise
-               -> case in_lam of
-                       NotInsideLam -> small_enough
-                       InsideLam    -> whnf && small_enough
-
-               where
-                 n_args                  = length arg_infos
-                 enough_args             = n_args >= n_vals_wanted
-                 really_interesting_cont | n_args <  n_vals_wanted = False     -- Too few args
-                                         | n_args == n_vals_wanted = interesting_cont
-                                         | otherwise               = True      -- Extra args
-                       -- This rather elaborate defn for really_interesting_cont is important
-                       -- Consider an I# = INLINE (\x -> I# {x})
-                       -- The unfolding guidance deems it to have size 2, and no arguments.
-                       -- So in an application (I# y) we must take the extra arg 'y' as
-                       -- evidene of an interesting context!
-                       
-                 small_enough = (size - discount) <= opt_UF_UseThreshold
-                 discount     = computeDiscount n_vals_wanted arg_discounts res_discount 
+                 -> True
+
+                 | otherwise
+                 -> some_benefit && small_enough
+
+                 where
+                   some_benefit = or arg_infos || really_interesting_cont || 
+                                  (not is_top && (once || (n_vals_wanted > 0 && enough_args)))
+                       -- If it occurs more than once, there must be something interesting 
+                       -- about some argument, or the result context, to make it worth inlining
+                       --
+                       -- If a function has a nested defn we also record some-benefit,
+                       -- on the grounds that we are often able to eliminate the binding,
+                       -- and hence the allocation, for the function altogether; this is good
+                       -- for join points.  But this only makes sense for *functions*;
+                       -- inlining a constructor doesn't help allocation unless the result is
+                       -- scrutinised.  UNLESS the constructor occurs just once, albeit possibly
+                       -- in multiple case branches.  Then inlining it doesn't increase allocation,
+                       -- but it does increase the chance that the constructor won't be allocated at all
+                       -- in the branches that don't use it.
+           
+                   enough_args           = n_val_args >= n_vals_wanted
+                   really_interesting_cont | n_val_args <  n_vals_wanted = False       -- Too few args
+                                           | n_val_args == n_vals_wanted = interesting_cont
+                                           | otherwise                   = True        -- Extra args
+                       -- really_interesting_cont tells if the result of the
+                       -- call is in an interesting context.
+
+                   small_enough = (size - discount) <= opt_UF_UseThreshold
+                   discount     = computeDiscount n_vals_wanted arg_discounts res_discount 
                                                  arg_infos really_interesting_cont
                                                  arg_infos really_interesting_cont
-
-                               
+               
      in    
      in    
-#ifdef DEBUG
-    if opt_D_dump_inlinings then
+    if dopt Opt_D_dump_inlinings dflags then
         pprTrace "Considering inlining"
         pprTrace "Considering inlining"
-                (ppr id <+> vcat [text "black listed" <+> ppr black_listed,
-                                  text "inline prag:" <+> ppr inline_prag,
+                (ppr id <+> vcat [text "active:" <+> ppr active_inline,
+                                  text "occ info:" <+> ppr occ,
                                    text "arg infos" <+> ppr arg_infos,
                                    text "interesting continuation" <+> ppr interesting_cont,
                                    text "arg infos" <+> ppr arg_infos,
                                    text "interesting continuation" <+> ppr interesting_cont,
-                                  text "whnf" <+> ppr whnf,
+                                  text "is value:" <+> ppr is_value,
+                                  text "is cheap:" <+> ppr is_cheap,
                                    text "guidance" <+> ppr guidance,
                                    text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO",
                                    if yes_or_no then
                                    text "guidance" <+> ppr guidance,
                                    text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO",
                                    if yes_or_no then
@@ -525,21 +617,9 @@ callSiteInline black_listed inline_call id args interesting_cont
                                    else empty])
                   result
      else
                                    else empty])
                   result
      else
-#endif
      result
      }
  
      result
      }
  
--- An argument is interesting if it has *some* structure
--- We are here trying to avoid unfolding a function that
--- is applied only to variables that have no unfolding
--- (i.e. they are probably lambda bound): f x y z
--- There is little point in inlining f here.
-interestingArg (Type _)                 = False
-interestingArg (App fn (Type _)) = interestingArg fn
-interestingArg (Var v)          = hasUnfolding (getIdUnfolding v)
-interestingArg other            = True
-
-
  computeDiscount :: Int -> [Int] -> Int -> [Bool] -> Bool -> Int
  computeDiscount n_vals_wanted arg_discounts res_discount arg_infos result_used
         -- We multiple the raw discounts (args_discount and result_discount)
  computeDiscount :: Int -> [Int] -> Int -> [Bool] -> Bool -> Int
  computeDiscount n_vals_wanted arg_discounts res_discount arg_infos result_used
         -- We multiple the raw discounts (args_discount and result_discount)
@@ -551,11 +631,13 @@ computeDiscount n_vals_wanted arg_discounts res_discount arg_infos result_used
         -- we also discount 1 for each argument passed, because these will
         -- reduce with the lambdas in the function (we count 1 for a lambda
         -- in size_up).
         -- we also discount 1 for each argument passed, because these will
         -- reduce with the lambdas in the function (we count 1 for a lambda
         -- in size_up).
-  = length (take n_vals_wanted arg_infos) +
+  = 1 +                        -- Discount of 1 because the result replaces the call
+                       -- so we count 1 for the function itself
+    length (take n_vals_wanted arg_infos) +
                         -- Discount of 1 for each arg supplied, because the 
                         -- result replaces the call
      round (opt_UF_KeenessFactor * 
                         -- Discount of 1 for each arg supplied, because the 
                         -- result replaces the call
      round (opt_UF_KeenessFactor * 
-          fromInt (arg_discount + result_discount))
+          fromIntegral (arg_discount + result_discount))
    where
      arg_discount = sum (zipWith mk_arg_discount arg_discounts arg_infos)
  
    where
      arg_discount = sum (zipWith mk_arg_discount arg_discounts arg_infos)
  
@@ -566,83 +648,3 @@ computeDiscount n_vals_wanted arg_discounts res_discount arg_infos result_used
      result_discount | result_used = res_discount       -- Over-applied, or case scrut
                     | otherwise   = 0
  \end{code}
      result_discount | result_used = res_discount       -- Over-applied, or case scrut
                     | otherwise   = 0
  \end{code}
-
-
-%************************************************************************
-%*                                                                     *
-\subsection{Black-listing}
-%*                                                                     *
-%************************************************************************
-
-Inlining is controlled by the "Inline phase" number, which is set
-by the per-simplification-pass '-finline-phase' flag.
-
-For optimisation we use phase 1,2 and nothing (i.e. no -finline-phase flag)
-in that order.  The meanings of these are determined by the @blackListed@ function
-here.
-
-\begin{code}
-blackListed :: IdSet           -- Used in transformation rules
-           -> Maybe Int        -- Inline phase
-           -> Id -> Bool       -- True <=> blacklisted
-       
--- The blackListed function sees whether a variable should *not* be 
--- inlined because of the inline phase we are in.  This is the sole
--- place that the inline phase number is looked at.
-
--- Phase 0: used for 'no inlinings please'
-blackListed rule_vars (Just 0)
-  = \v -> True
-
--- Phase 1: don't inline any rule-y things or things with specialisations
-blackListed rule_vars (Just 1)
-  = \v -> let v_uniq = idUnique v
-         in v `elemVarSet` rule_vars
-         || not (isEmptyCoreRules (getIdSpecialisation v))
-         || v_uniq == runSTRepIdKey
-
--- Phase 2: allow build/augment to inline, and specialisations
-blackListed rule_vars (Just 2)
-  = \v -> let v_uniq = idUnique v
-         in (v `elemVarSet` rule_vars && not (v_uniq == buildIdKey || 
-                                              v_uniq == augmentIdKey))
-         || v_uniq == runSTRepIdKey
-
--- Otherwise just go for it
-blackListed rule_vars phase
-  = \v -> False
-\end{code}
-
-
-SLPJ 95/04: Why @runST@ must be inlined very late:
-\begin{verbatim}
-f x =
-  runST ( \ s -> let
-                   (a, s')  = newArray# 100 [] s
-                   (_, s'') = fill_in_array_or_something a x s'
-                 in
-                 freezeArray# a s'' )
-\end{verbatim}
-If we inline @runST@, we'll get:
-\begin{verbatim}
-f x = let
-       (a, s')  = newArray# 100 [] realWorld#{-NB-}
-       (_, s'') = fill_in_array_or_something a x s'
-      in
-      freezeArray# a s''
-\end{verbatim}
-And now the @newArray#@ binding can be floated to become a CAF, which
-is totally and utterly wrong:
-\begin{verbatim}
-f = let
-    (a, s')  = newArray# 100 [] realWorld#{-NB-} -- YIKES!!!
-    in
-    \ x ->
-       let (_, s'') = fill_in_array_or_something a x s' in
-       freezeArray# a s''
-\end{verbatim}
-All calls to @f@ will share a {\em single} array!  
-
-Yet we do want to inline runST sometime, so we can avoid
-needless code.  Solution: black list it until the last moment.
-