[project @ 2005-10-17 11:10:36 by simonpj]

[ghc-hetmet.git] / ghc / compiler / simplCore / SimplUtils.lhs
diff --git a/ghc/compiler/simplCore/SimplUtils.lhs b/ghc/compiler/simplCore/SimplUtils.lhs

index 3ba53e0..0d9be52 100644 (file)
--- a/ghc/compiler/simplCore/SimplUtils.lhs
+++ b/ghc/compiler/simplCore/SimplUtils.lhs
@@ -23,35 +23,37 @@ module SimplUtils (
  #include "HsVersions.h"
  
  import SimplEnv
  #include "HsVersions.h"
  
  import SimplEnv
-import CmdLineOpts     ( SimplifierSwitch(..), SimplifierMode(..), opt_UF_UpdateInPlace,
-                         opt_SimplNoPreInlining, opt_RulesOff,
+import DynFlags                ( SimplifierSwitch(..), SimplifierMode(..),
                           DynFlag(..), dopt )
                           DynFlag(..), dopt )
+import StaticFlags     ( opt_UF_UpdateInPlace, opt_SimplNoPreInlining,
+                         opt_RulesOff )
+                         
  import CoreSyn
  import CoreFVs         ( exprFreeVars )
  import CoreSyn
  import CoreFVs         ( exprFreeVars )
-import CoreUtils       ( cheapEqExpr, exprType, exprIsTrivial,
+import CoreUtils       ( cheapEqExpr, exprType, exprIsTrivial, exprIsCheap,
                           etaExpand, exprEtaExpandArity, bindNonRec, mkCoerce2,
                           etaExpand, exprEtaExpandArity, bindNonRec, mkCoerce2,
-                         findDefault, exprOkForSpeculation, exprIsValue
+                         findDefault, exprOkForSpeculation, exprIsHNF
                         )
                         )
-import qualified Subst ( simplBndrs, simplBndr, simplLetId, simplLamBndr )
-import Id              ( Id, idType, idInfo, isDataConWorkId, idOccInfo,
+import Literal         ( mkStringLit )
+import CoreUnfold      ( smallEnoughToInline )
+import MkId            ( eRROR_ID )
+import Id              ( idType, isDataConWorkId, idOccInfo, isDictId, 
                           mkSysLocal, isDeadBinder, idNewDemandInfo, isExportedId,
                           idUnfolding, idNewStrictness, idInlinePragma,
                         )
  import NewDemand       ( isStrictDmd, isBotRes, splitStrictSig )
  import SimplMonad
                           mkSysLocal, isDeadBinder, idNewDemandInfo, isExportedId,
                           idUnfolding, idNewStrictness, idInlinePragma,
                         )
  import NewDemand       ( isStrictDmd, isBotRes, splitStrictSig )
  import SimplMonad
-import Type            ( Type, seqType, splitFunTys, dropForAlls, isStrictType,
+import Type            ( Type, splitFunTys, dropForAlls, isStrictType,
                           splitTyConApp_maybe, tyConAppArgs, mkTyVarTys
                         )
                           splitTyConApp_maybe, tyConAppArgs, mkTyVarTys
                         )
-import TcType          ( isDictTy )
  import Name            ( mkSysTvName )
  import Name            ( mkSysTvName )
-import OccName         ( EncodedFS )
  import TyCon           ( tyConDataCons_maybe, isAlgTyCon, isNewTyCon )
  import TyCon           ( tyConDataCons_maybe, isAlgTyCon, isNewTyCon )
-import DataCon         ( dataConRepArity, dataConTyVars, dataConArgTys, isVanillaDataCon )
+import DataCon         ( dataConRepArity, dataConTyVars, dataConInstArgTys, isVanillaDataCon )
  import Var             ( tyVarKind, mkTyVar )
  import VarSet
  import Var             ( tyVarKind, mkTyVar )
  import VarSet
-import BasicTypes      ( TopLevelFlag(..), isTopLevel, OccInfo(..), isLoopBreaker, isOneOcc,
+import BasicTypes      ( TopLevelFlag(..), isNotTopLevel, OccInfo(..), isLoopBreaker, isOneOcc,
                           Activation, isAlwaysActive, isActive )
                           Activation, isAlwaysActive, isActive )
-import Util            ( lengthExceeds, mapAccumL )
+import Util            ( lengthExceeds )
  import Outputable
  \end{code}
  
  import Outputable
  \end{code}
  
@@ -532,14 +534,28 @@ better.  Consider
         xN = eN[xN-1]
  
  We may end up simplifying e1 N times, e2 N-1 times, e3 N-3 times etc.
         xN = eN[xN-1]
  
  We may end up simplifying e1 N times, e2 N-1 times, e3 N-3 times etc.
+This can happen with cascades of functions too:
+
+       f1 = \x1.e1
+       f2 = \xs.e2[f1]
+       f3 = \xs.e3[f3]
+       ...etc...
+
+THE MAIN INVARIANT is this:
+
+       ----  preInlineUnconditionally invariant -----
+   IF preInlineUnconditionally chooses to inline x = <rhs>
+   THEN doing the inlining should not change the occurrence
+       info for the free vars of <rhs>
+       ----------------------------------------------
+
+For example, it's tempting to look at trivial binding like
+       x = y
+and inline it unconditionally.  But suppose x is used many times,
+but this is the unique occurrence of y.  Then inlining x would change
+y's occurrence info, which breaks the invariant.  It matters: y
+might have a BIG rhs, which will now be dup'd at every occurrenc of x.
  
  
-NB: we don't even look at the RHS to see if it's trivial
-We might have
-                       x = y
-where x is used many times, but this is the unique occurrence of y.
-We should NOT inline x at all its uses, because then we'd do the same
-for y -- aargh!  So we must base this pre-rhs-simplification decision
-solely on x's occurrences, not on its rhs.
  
  Evne RHSs labelled InlineMe aren't caught here, because there might be
  no benefit from inlining at the call site.
  
  Evne RHSs labelled InlineMe aren't caught here, because there might be
  no benefit from inlining at the call site.
@@ -564,10 +580,55 @@ Conclusion: inline top level things gaily until Phase 0 (the last
  phase), at which point don't.
  
  \begin{code}
  phase), at which point don't.
  
  \begin{code}
-preInlineUnconditionally :: SimplEnv -> TopLevelFlag -> InId -> Bool
-preInlineUnconditionally env top_lvl bndr
-  | isTopLevel top_lvl, SimplPhase 0 <- phase = False
--- If we don't have this test, consider
+preInlineUnconditionally :: SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
+preInlineUnconditionally env top_lvl bndr rhs
+  | not active                    = False
+  | opt_SimplNoPreInlining = False
+  | otherwise = case idOccInfo bndr of
+                 IAmDead                    -> True    -- Happens in ((\x.1) v)
+                 OneOcc in_lam True int_cxt -> try_once in_lam int_cxt
+                 other                      -> False
+  where
+    phase = getMode env
+    active = case phase of
+                  SimplGently  -> isAlwaysActive prag
+                  SimplPhase n -> isActive n prag
+    prag = idInlinePragma bndr
+
+    try_once in_lam int_cxt    -- There's one textual occurrence
+       | not in_lam = isNotTopLevel top_lvl || early_phase
+       | otherwise  = int_cxt && canInlineInLam rhs
+
+-- Be very careful before inlining inside a lambda, becuase (a) we must not 
+-- invalidate occurrence information, and (b) we want to avoid pushing a
+-- single allocation (here) into multiple allocations (inside lambda).  
+-- Inlining a *function* with a single *saturated* call would be ok, mind you.
+--     || (if is_cheap && not (canInlineInLam rhs) then pprTrace "preinline" (ppr bndr <+> ppr rhs) ok else ok)
+--     where 
+--             is_cheap = exprIsCheap rhs
+--             ok = is_cheap && int_cxt
+
+       --      int_cxt         The context isn't totally boring
+       -- E.g. let f = \ab.BIG in \y. map f xs
+       --      Don't want to substitute for f, because then we allocate
+       --      its closure every time the \y is called
+       -- But: let f = \ab.BIG in \y. map (f y) xs
+       --      Now we do want to substitute for f, even though it's not 
+       --      saturated, because we're going to allocate a closure for 
+       --      (f y) every time round the loop anyhow.
+
+       -- canInlineInLam => free vars of rhs are (Once in_lam) or Many,
+       -- so substituting rhs inside a lambda doesn't change the occ info.
+       -- Sadly, not quite the same as exprIsHNF.
+    canInlineInLam (Lit l)             = True
+    canInlineInLam (Lam b e)           = isRuntimeVar b || canInlineInLam e
+    canInlineInLam (Note _ e)          = canInlineInLam e
+    canInlineInLam _                   = False
+
+    early_phase = case phase of
+                       SimplPhase 0 -> False
+                       other        -> True
+-- If we don't have this early_phase test, consider
  --     x = length [1,2,3]
  -- The full laziness pass carefully floats all the cons cells to
  -- top level, and preInlineUnconditionally floats them all back in.
  --     x = length [1,2,3]
  -- The full laziness pass carefully floats all the cons cells to
  -- top level, and preInlineUnconditionally floats them all back in.
@@ -582,19 +643,6 @@ preInlineUnconditionally env top_lvl bndr
  -- top level things, but then we become more leery about inlining
  -- them.  
  
  -- top level things, but then we become more leery about inlining
  -- them.  
  
-  | not active                    = False
-  | opt_SimplNoPreInlining = False
-  | otherwise = case idOccInfo bndr of
-                 IAmDead            -> True    -- Happens in ((\x.1) v)
-                 OneOcc in_lam once -> not in_lam && once
-                       -- Not inside a lambda, one occurrence ==> safe!
-                 other              -> False
-  where
-    phase = getMode env
-    active = case phase of
-                  SimplGently  -> isAlwaysActive prag
-                  SimplPhase n -> isActive n prag
-    prag = idInlinePragma bndr
  \end{code}
  
  postInlineUnconditionally
  \end{code}
  
  postInlineUnconditionally
@@ -626,29 +674,54 @@ our new view that inlining is like a RULE, so I'm sticking to the 'active'
  story for now.
  
  \begin{code}
  story for now.
  
  \begin{code}
-postInlineUnconditionally :: SimplEnv -> OutId -> OccInfo -> OutExpr -> Bool
-postInlineUnconditionally env bndr occ_info rhs 
-  =  exprIsTrivial rhs
-  && active
-  && not (isLoopBreaker occ_info)
-  && not (isExportedId bndr)
-       -- We used to have (isOneOcc occ_info) instead of
-       -- not (isLoopBreaker occ_info) && not (isExportedId bndr)
-       -- That was because a rather fragile use of rules got confused
-       -- if you inlined even a binding f=g  e.g. We used to have
-       --      map = mapList
-       -- But now a more precise use of phases has eliminated this problem,
-       -- so the is_active test will do the job.  I think.
-       --
-       -- OLD COMMENT: (delete soon)
-       -- Indeed, you might suppose that
-       -- there is nothing wrong with substituting for a trivial RHS, even
-       -- if it occurs many times.  But consider
-       --      x = y
-       --      h = _inline_me_ (...x...)
-       -- Here we do *not* want to have x inlined, even though the RHS is
-       -- trivial, becuase the contract for an INLINE pragma is "no inlining".
-       -- This is important in the rules for the Prelude 
+postInlineUnconditionally :: SimplEnv -> TopLevelFlag -> OutId -> OccInfo -> OutExpr -> Unfolding -> Bool
+postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
+  | not active            = False
+  | isLoopBreaker occ_info = False
+  | isExportedId bndr      = False
+  | exprIsTrivial rhs     = True
+  | otherwise
+  = case occ_info of
+      OneOcc in_lam one_br int_cxt
+       ->     (one_br || smallEnoughToInline unfolding)        -- Small enough to dup
+                       -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
+                       --
+                       -- NB: Do we want to inline arbitrarily big things becuase
+                       -- one_br is True? that can lead to inline cascades.  But
+                       -- preInlineUnconditionlly has dealt with all the common cases
+                       -- so perhaps it's worth the risk. Here's an example
+                       --      let f = if b then Left (\x.BIG) else Right (\y.BIG)
+                       --      in \y. ....f....
+                       -- We can't preInlineUnconditionally because that woud invalidate
+                       -- the occ info for b.  Yet f is used just once, and duplicating
+                       -- the case work is fine (exprIsCheap).
+
+          &&  ((isNotTopLevel top_lvl && not in_lam) || 
+                       -- But outside a lambda, we want to be reasonably aggressive
+                       -- about inlining into multiple branches of case
+                       -- e.g. let x = <non-value> 
+                       --      in case y of { C1 -> ..x..; C2 -> ..x..; C3 -> ... } 
+                       -- Inlining can be a big win if C3 is the hot-spot, even if
+                       -- the uses in C1, C2 are not 'interesting'
+                       -- An example that gets worse if you add int_cxt here is 'clausify'
+
+               (isCheapUnfolding unfolding && int_cxt))
+                       -- isCheap => acceptable work duplication; in_lam may be true
+                       -- int_cxt to prevent us inlining inside a lambda without some 
+                       -- good reason.  See the notes on int_cxt in preInlineUnconditionally
+
+      other -> False
+       -- The point here is that for *non-values* that occur
+       -- outside a lambda, the call-site inliner won't have
+       -- a chance (becuase it doesn't know that the thing
+       -- only occurs once).   The pre-inliner won't have gotten
+       -- it either, if the thing occurs in more than one branch
+       -- So the main target is things like
+       --      let x = f y in
+       --      case v of
+       --         True  -> case x of ...
+       --         False -> case x of ...
+       -- I'm not sure how important this is in practice
    where
      active = case getMode env of
                    SimplGently  -> isAlwaysActive prag
    where
      active = case getMode env of
                    SimplGently  -> isAlwaysActive prag
@@ -769,9 +842,9 @@ tryEtaReduce bndrs body
  
      ok_fun fun =  exprIsTrivial fun
                && not (any (`elemVarSet` (exprFreeVars fun)) bndrs)
  
      ok_fun fun =  exprIsTrivial fun
                && not (any (`elemVarSet` (exprFreeVars fun)) bndrs)
-              && (exprIsValue fun || all ok_lam bndrs)
-    ok_lam v = isTyVar v || isDictTy (idType v)
-       -- The exprIsValue is because eta reduction is not 
+              && (exprIsHNF fun || all ok_lam bndrs)
+    ok_lam v = isTyVar v || isDictId v
+       -- The exprIsHNF is because eta reduction is not 
         -- valid in general:  \x. bot  /=  bot
         -- So we need to be sure that the "fun" is a value.
         --
         -- valid in general:  \x. bot  /=  bot
         -- So we need to be sure that the "fun" is a value.
         --
@@ -1045,7 +1118,7 @@ of the inner case y, which give us nowhere to go!
  
  \begin{code}
  prepareAlts :: OutExpr                 -- Scrutinee
  
  \begin{code}
  prepareAlts :: OutExpr                 -- Scrutinee
-           -> InId             -- Case binder
+           -> InId             -- Case binder (passed only to use in statistics)
             -> [InAlt]          -- Increasing order
             -> SimplM ([InAlt],         -- Better alternatives, still incresaing order
                         [AltCon])       -- These cases are handled
             -> [InAlt]          -- Increasing order
             -> SimplM ([InAlt],         -- Better alternatives, still incresaing order
                         [AltCon])       -- These cases are handled
@@ -1071,14 +1144,17 @@ prepareAlts scrut case_bndr alts
         -- Filter out the default, if it can't happen,
         -- or replace it with "proper" alternative if there
         -- is only one constructor left
         -- Filter out the default, if it can't happen,
         -- or replace it with "proper" alternative if there
         -- is only one constructor left
-    prepareDefault case_bndr handled_cons maybe_deflt  `thenSmpl` \ deflt_alt ->
+    prepareDefault scrut case_bndr handled_cons maybe_deflt    `thenSmpl` \ deflt_alt ->
  
      returnSmpl (mergeAlts better_alts deflt_alt, handled_cons)
         -- We need the mergeAlts in case the new default_alt 
         -- has turned into a constructor alternative.
  
  
      returnSmpl (mergeAlts better_alts deflt_alt, handled_cons)
         -- We need the mergeAlts in case the new default_alt 
         -- has turned into a constructor alternative.
  
-prepareDefault case_bndr handled_cons (Just rhs)
-  | Just (tycon, inst_tys) <- splitTyConApp_maybe (idType case_bndr),
+prepareDefault scrut case_bndr handled_cons (Just rhs)
+  | Just (tycon, inst_tys) <- splitTyConApp_maybe (exprType scrut),
+       -- Use exprType scrut here, rather than idType case_bndr, because
+       -- case_bndr is an InId, so exprType scrut may have more information
+       -- Test simpl013 is an example
      isAlgTyCon tycon,          -- It's a data type, tuple, or unboxed tuples.  
      not (isNewTyCon tycon),    -- We can have a newtype, if we are just doing an eval:
                                 --      case x of { DEFAULT -> e }
      isAlgTyCon tycon,          -- It's a data type, tuple, or unboxed tuples.  
      not (isNewTyCon tycon),    -- We can have a newtype, if we are just doing an eval:
                                 --      case x of { DEFAULT -> e }
@@ -1111,13 +1187,13 @@ prepareDefault case_bndr handled_cons (Just rhs)
    | otherwise
    = returnSmpl [(DEFAULT, [], rhs)]
  
    | otherwise
    = returnSmpl [(DEFAULT, [], rhs)]
  
-prepareDefault case_bndr handled_cons Nothing
+prepareDefault scrut case_bndr handled_cons Nothing
    = returnSmpl []
  
  mk_args missing_con inst_tys
    = mk_tv_bndrs missing_con inst_tys   `thenSmpl` \ (tv_bndrs, inst_tys') ->
      getUniquesSmpl                     `thenSmpl` \ id_uniqs ->
    = returnSmpl []
  
  mk_args missing_con inst_tys
    = mk_tv_bndrs missing_con inst_tys   `thenSmpl` \ (tv_bndrs, inst_tys') ->
      getUniquesSmpl                     `thenSmpl` \ id_uniqs ->
-    let arg_tys = dataConArgTys missing_con inst_tys'
+    let arg_tys = dataConInstArgTys missing_con inst_tys'
         arg_ids = zipWith (mkSysLocal FSLIT("a")) id_uniqs arg_tys
      in
      returnSmpl (tv_bndrs ++ arg_ids)
         arg_ids = zipWith (mkSysLocal FSLIT("a")) id_uniqs arg_tys
      in
      returnSmpl (tv_bndrs ++ arg_ids)
@@ -1417,11 +1493,14 @@ I don't really know how to improve this situation.
  --     0. Check for empty alternatives
  --------------------------------------------------
  
  --     0. Check for empty alternatives
  --------------------------------------------------
  
-#ifdef DEBUG
+-- This isn't strictly an error.  It's possible that the simplifer might "see"
+-- that an inner case has no accessible alternatives before it "sees" that the
+-- entire branch of an outer case is inaccessible.  So we simply
+-- put an error case here insteadd
  mkCase1 scrut case_bndr ty []
    = pprTrace "mkCase1: null alts" (ppr case_bndr <+> ppr scrut) $
  mkCase1 scrut case_bndr ty []
    = pprTrace "mkCase1: null alts" (ppr case_bndr <+> ppr scrut) $
-    returnSmpl scrut
-#endif
+    return (mkApps (Var eRROR_ID)
+                  [Type ty, Lit (mkStringLit "Impossible alternative")])
  
  --------------------------------------------------
  --     1. Eliminate the case altogether if poss
  
  --------------------------------------------------
  --     1. Eliminate the case altogether if poss
@@ -1444,7 +1523,7 @@ mkCase1 scrut case_bndr ty [(con,bndrs,rhs)]
                 --      x
                 -- This particular example shows up in default methods for
                 -- comparision operations (e.g. in (>=) for Int.Int32)
                 --      x
                 -- This particular example shows up in default methods for
                 -- comparision operations (e.g. in (>=) for Int.Int32)
-       || exprIsValue scrut                    -- It's already evaluated
+       || exprIsHNF scrut                      -- It's already evaluated
         || var_demanded_later scrut             -- It'll be demanded later
  
  --      || not opt_SimplPedanticBottoms)       -- Or we don't care!
         || var_demanded_later scrut             -- It'll be demanded later
  
  --      || not opt_SimplPedanticBottoms)       -- Or we don't care!