Improve exprIsCheap

[ghc-hetmet.git] / compiler / coreSyn / CoreUtils.lhs
diff --git a/compiler/coreSyn/CoreUtils.lhs b/compiler/coreSyn/CoreUtils.lhs

index e358be4..69f78fa 100644 (file)
--- a/compiler/coreSyn/CoreUtils.lhs
+++ b/compiler/coreSyn/CoreUtils.lhs
@@ -11,7 +11,7 @@ module CoreUtils (
         mkIfThenElse, mkAltExpr, mkPiType, mkPiTypes,
  
         -- Taking expressions apart
-       findDefault, findAlt, isDefaultAlt,
+       findDefault, findAlt, isDefaultAlt, mergeAlts,
  
         -- Properties of expressions
         exprType, coreAltType,
@@ -46,7 +46,6 @@ import Var            ( Var )
  import VarSet          ( unionVarSet )
  import VarEnv
  import Name            ( hashName )
-import Packages                ( HomeModules )
  #if mingw32_TARGET_OS
  import Packages                ( isDllName )
  #endif
@@ -58,7 +57,7 @@ import PrimOp         ( PrimOp(..), primOpOkForSpeculation, primOpIsCheap )
  import Id              ( Id, idType, globalIdDetails, idNewStrictness, 
                           mkWildId, idArity, idName, idUnfolding, idInfo,
                           isOneShotBndr, isStateHackType, isDataConWorkId_maybe, mkSysLocal,
-                         isDataConWorkId, isBottomingId
+                         isDataConWorkId, isBottomingId, isDictId
                         )
  import IdInfo          ( GlobalIdDetails(..), megaSeqIdInfo )
  import NewDemand       ( appIsBottom )
@@ -72,8 +71,10 @@ import TyCon         ( tyConArity )
  import TysWiredIn      ( boolTy, trueDataCon, falseDataCon )
  import CostCentre      ( CostCentre )
  import BasicTypes      ( Arity )
+import PackageConfig   ( PackageId )
  import Unique          ( Unique )
  import Outputable
+import DynFlags                ( DynFlags, DynFlag(Opt_DictsCheap), dopt )
  import TysPrim         ( alphaTy )     -- Debugging only
  import Util             ( equalLength, lengthAtLeast, foldl2 )
  \end{code}
@@ -165,12 +166,6 @@ mkNote (SCC cc)    expr               = mkSCC cc expr
  mkNote InlineMe expr              = mkInlineMe expr
  mkNote note     expr              = Note note expr
  #endif
-
--- Slide InlineCall in around the function
---     No longer necessary I think (SLPJ Apr 99)
--- mkNote InlineCall (App f a) = App (mkNote InlineCall f) a
--- mkNote InlineCall (Var v)   = Note InlineCall (Var v)
--- mkNote InlineCall expr      = expr
  \end{code}
  
  Drop trivial InlineMe's.  This is somewhat important, because if we have an unfolding
@@ -306,6 +301,18 @@ findAlt con alts
  isDefaultAlt :: CoreAlt -> Bool
  isDefaultAlt (DEFAULT, _, _) = True
  isDefaultAlt other          = False
+
+---------------------------------
+mergeAlts :: [CoreAlt] -> [CoreAlt] -> [CoreAlt]
+-- Merge preserving order; alternatives in the first arg
+-- shadow ones in the second
+mergeAlts [] as2 = as2
+mergeAlts as1 [] = as1
+mergeAlts (a1:as1) (a2:as2)
+  = case a1 `cmpAlt` a2 of
+       LT -> a1 : mergeAlts as1      (a2:as2)
+       EQ -> a1 : mergeAlts as1      as2       -- Discard a2
+       GT -> a2 : mergeAlts (a1:as1) as2
  \end{code}
  
  
@@ -411,14 +418,14 @@ because sharing will make sure it is only evaluated once.
  
  \begin{code}
  exprIsCheap :: CoreExpr -> Bool
-exprIsCheap (Lit lit)              = True
-exprIsCheap (Type _)               = True
-exprIsCheap (Var _)                = True
-exprIsCheap (Note InlineMe e)              = True
-exprIsCheap (Note _ e)             = exprIsCheap e
-exprIsCheap (Lam x e)               = isRuntimeVar x || exprIsCheap e
-exprIsCheap (Case e _ _ alts)       = exprIsCheap e && 
-                                   and [exprIsCheap rhs | (_,_,rhs) <- alts]
+exprIsCheap (Lit lit)        = True
+exprIsCheap (Type _)          = True
+exprIsCheap (Var _)           = True
+exprIsCheap (Note InlineMe e) = True
+exprIsCheap (Note _ e)        = exprIsCheap e
+exprIsCheap (Lam x e)         = isRuntimeVar x || exprIsCheap e
+exprIsCheap (Case e _ _ alts) = exprIsCheap e && 
+                               and [exprIsCheap rhs | (_,_,rhs) <- alts]
         -- Experimentally, treat (case x of ...) as cheap
         -- (and case __coerce x etc.)
         -- This improves arities of overloaded functions where
@@ -426,46 +433,54 @@ exprIsCheap (Case e _ _ alts)       = exprIsCheap e &&
  exprIsCheap (Let (NonRec x _) e)  
        | isUnLiftedType (idType x) = exprIsCheap e
        | otherwise                = False
-       -- strict lets always have cheap right hand sides, and
-       -- do no allocation.
+       -- strict lets always have cheap right hand sides,
+       -- and do no allocation.
  
-exprIsCheap other_expr 
-  = go other_expr 0 True
+exprIsCheap other_expr         -- Applications and variables
+  = go other_expr []
    where
-    go (Var f) n_args args_cheap 
-       = (idAppIsCheap f n_args && args_cheap)
-                       -- A constructor, cheap primop, or partial application
-
-         || idAppIsBottom f n_args 
+       -- Accumulate value arguments, then decide
+    go (App f a) val_args | isRuntimeArg a = go f (a:val_args)
+                         | otherwise      = go f val_args
+
+    go (Var f) [] = True       -- Just a type application of a variable
+                               -- (f t1 t2 t3) counts as WHNF
+    go (Var f) args
+       = case globalIdDetails f of
+               RecordSelId {} -> go_sel args
+               ClassOpId _    -> go_sel args
+               PrimOpId op    -> go_primop op args
+
+               DataConWorkId _ -> go_pap args
+               other | length args < idArity f -> go_pap args
+
+               other -> isBottomingId f
                         -- Application of a function which
                         -- always gives bottom; we treat this as cheap
                         -- because it certainly doesn't need to be shared!
         
-    go (App f a) n_args args_cheap 
-       | not (isRuntimeArg a) = go f n_args      args_cheap
-       | otherwise            = go f (n_args + 1) (exprIsCheap a && args_cheap)
-
-    go other   n_args args_cheap = False
-
-idAppIsCheap :: Id -> Int -> Bool
-idAppIsCheap id n_val_args 
-  | n_val_args == 0 = True     -- Just a type application of
-                               -- a variable (f t1 t2 t3)
-                               -- counts as WHNF
-  | otherwise 
-  = case globalIdDetails id of
-       DataConWorkId _ -> True
-       RecordSelId {}  -> n_val_args == 1      -- I'm experimenting with making record selection
-       ClassOpId _     -> n_val_args == 1      -- look cheap, so we will substitute it inside a
-                                               -- lambda.  Particularly for dictionary field selection.
-               -- BUT: Take care with (sel d x)!  The (sel d) might be cheap, but
-               --      there's no guarantee that (sel d x) will be too.  Hence (n_val_args == 1)
-
-       PrimOpId op   -> primOpIsCheap op       -- In principle we should worry about primops
-                                               -- that return a type variable, since the result
-                                               -- might be applied to something, but I'm not going
-                                               -- to bother to check the number of args
-       other         -> n_val_args < idArity id
+    go other args = False
+ 
+    --------------
+    go_pap args = all exprIsTrivial args
+       -- For constructor applications and primops, check that all
+       -- the args are trivial.  We don't want to treat as cheap, say,
+       --      (1:2:3:4:5:[])
+       -- We'll put up with one constructor application, but not dozens
+       
+    --------------
+    go_primop op args = primOpIsCheap op && all exprIsCheap args
+       -- In principle we should worry about primops
+       -- that return a type variable, since the result
+       -- might be applied to something, but I'm not going
+       -- to bother to check the number of args
+ 
+    --------------
+    go_sel [arg] = exprIsTrivial arg   -- I'm experimenting with making record selection
+    go_sel other = False               -- look cheap, so we will substitute it inside a
+                                       -- lambda.  Particularly for dictionary field selection.
+               -- BUT: Take care with (sel d x)!  The (sel d) might be cheap, but
+               --      there's no guarantee that (sel d x) will be too.  Hence (n_val_args == 1)
  \end{code}
  
  exprOkForSpeculation returns True of an expression that it is
@@ -481,7 +496,6 @@ It returns True iff
         soon, 
         without raising an exception,
         without causing a side effect (e.g. writing a mutable variable)
-
  E.G.
         let x = case y# +# 1# of { r# -> I# r# }
         in E
@@ -700,7 +714,7 @@ exprIsConApp_maybe expr = analyse (collectArgs expr)
  %************************************************************************
  
  \begin{code}
-exprEtaExpandArity :: CoreExpr -> Arity
+exprEtaExpandArity :: DynFlags -> CoreExpr -> Arity
  {- The Arity returned is the number of value args the 
     thing can be applied to without doing much work
  
@@ -780,7 +794,7 @@ decopose Int to a function type.   Hence the final case in eta_expand.
  -}
  
  
-exprEtaExpandArity e = arityDepth (arityType e)
+exprEtaExpandArity dflags e = arityDepth (arityType dflags e)
  
  -- A limited sort of function type
  data ArityType = AFun Bool ArityType   -- True <=> one-shot
@@ -796,17 +810,17 @@ andArityType ATop     at2           = ATop
  andArityType (AFun t1 at1)  (AFun t2 at2) = AFun (t1 && t2) (andArityType at1 at2)
  andArityType at1           at2           = andArityType at2 at1
  
-arityType :: CoreExpr -> ArityType
+arityType :: DynFlags -> CoreExpr -> ArityType
         -- (go1 e) = [b1,..,bn]
         -- means expression can be rewritten \x_b1 -> ... \x_bn -> body
         -- where bi is True <=> the lambda is one-shot
  
-arityType (Note n e) = arityType e
+arityType dflags (Note n e) = arityType dflags e
  --     Not needed any more: etaExpand is cleverer
---  | ok_note n = arityType e
+--  | ok_note n = arityType dflags e
  --  | otherwise = ATop
  
-arityType (Var v) 
+arityType dflags (Var v) 
    = mk (idArity v) (arg_tys (idType v))
    where
      mk :: Arity -> [Type] -> ArityType
@@ -816,8 +830,9 @@ arityType (Var v)
         --              False -> \(s:RealWorld) -> e
         -- where foo has arity 1.  Then we want the state hack to
         -- apply to foo too, so we can eta expand the case.
-    mk 0 tys | isBottomingId v  = ABot
-             | otherwise       = ATop
+    mk 0 tys | isBottomingId v                    = ABot
+             | (ty:tys) <- tys, isStateHackType ty = AFun True ATop
+            | otherwise                           = ATop
      mk n (ty:tys) = AFun (isStateHackType ty) (mk (n-1) tys)
      mk n []       = AFun False               (mk (n-1) [])
  
@@ -828,14 +843,15 @@ arityType (Var v)
         | otherwise                                = []
  
         -- Lambdas; increase arity
-arityType (Lam x e) | isId x    = AFun (isOneShotBndr x) (arityType e)
-                   | otherwise = arityType e
+arityType dflags (Lam x e)
+  | isId x    = AFun (isOneShotBndr x) (arityType dflags e)
+  | otherwise = arityType dflags e
  
         -- Applications; decrease arity
-arityType (App f (Type _)) = arityType f
-arityType (App f a)       = case arityType f of
-                               AFun one_shot xs | exprIsCheap a -> xs
-                               other                            -> ATop
+arityType dflags (App f (Type _)) = arityType dflags f
+arityType dflags (App f a)       = case arityType dflags f of
+                                       AFun one_shot xs | exprIsCheap a -> xs
+                                       other                            -> ATop
                                                            
         -- Case/Let; keep arity if either the expression is cheap
         -- or it's a 1-shot lambda
@@ -844,17 +860,40 @@ arityType (App f a)          = case arityType f of
         --  ===>
         --      f x y = case x of { (a,b) -> e }
         -- The difference is observable using 'seq'
-arityType (Case scrut _ _ alts) = case foldr1 andArityType [arityType rhs | (_,_,rhs) <- alts] of
-                                 xs@(AFun one_shot _) | one_shot -> xs
-                                 xs | exprIsCheap scrut          -> xs
-                                    | otherwise                  -> ATop
-
-arityType (Let b e) = case arityType e of
-                       xs@(AFun one_shot _) | one_shot                       -> xs
-                       xs                   | all exprIsCheap (rhssOfBind b) -> xs
-                                            | otherwise                      -> ATop
-
-arityType other = ATop
+arityType dflags (Case scrut _ _ alts)
+  = case foldr1 andArityType [arityType dflags rhs | (_,_,rhs) <- alts] of
+       xs | exprIsCheap scrut          -> xs
+       xs@(AFun one_shot _) | one_shot -> AFun True ATop
+       other                           -> ATop
+
+arityType dflags (Let b e) 
+  = case arityType dflags e of
+       xs                   | cheap_bind b -> xs
+       xs@(AFun one_shot _) | one_shot     -> AFun True ATop
+       other                               -> ATop
+  where
+    cheap_bind (NonRec b e) = is_cheap (b,e)
+    cheap_bind (Rec prs)    = all is_cheap prs
+    is_cheap (b,e) = (dopt Opt_DictsCheap dflags && isDictId b)
+                  || exprIsCheap e
+       -- If the experimental -fdicts-cheap flag is on, we eta-expand through
+       -- dictionary bindings.  This improves arities. Thereby, it also
+       -- means that full laziness is less prone to floating out the
+       -- application of a function to its dictionary arguments, which
+       -- can thereby lose opportunities for fusion.  Example:
+       --      foo :: Ord a => a -> ...
+       --      foo = /\a \(d:Ord a). let d' = ...d... in \(x:a). ....
+       --              -- So foo has arity 1
+       --
+       --      f = \x. foo dInt $ bar x
+       --
+       -- The (foo DInt) is floated out, and makes ineffective a RULE 
+       --      foo (bar x) = ...
+       --
+       -- One could go further and make exprIsCheap reply True to any
+       -- dictionary-typed expression, but that's more work.
+
+arityType dflags other = ATop
  
  {- NOT NEEDED ANY MORE: etaExpand is cleverer
  ok_note InlineMe = False
@@ -1094,7 +1133,6 @@ eq_alt env (c1,vs1,r1) (c2,vs2,r2) = c1==c2 && tcEqExprX (rnBndrs2 env vs1  vs2)
  
  eq_note env (SCC cc1)      (SCC cc2)      = cc1 == cc2
  eq_note env (Coerce t1 f1) (Coerce t2 f2) = tcEqTypeX env t1 t2 && tcEqTypeX env f1 f2
-eq_note env InlineCall     InlineCall     = True
  eq_note env (CoreNote s1)  (CoreNote s2)  = s1 == s2
  eq_note env other1            other2     = False
  \end{code}
@@ -1124,7 +1162,6 @@ exprSize (Type t)        = seqType t `seq` 1
  
  noteSize (SCC cc)       = cc `seq` 1
  noteSize (Coerce t1 t2) = seqType t1 `seq` seqType t2 `seq` 1
-noteSize InlineCall     = 1
  noteSize InlineMe       = 1
  noteSize (CoreNote s)   = s `seq` 1  -- hdaume: core annotations
  
@@ -1194,7 +1231,7 @@ If this happens we simply make the RHS into an updatable thunk,
  and 'exectute' it rather than allocating it statically.
  
  \begin{code}
-rhsIsStatic :: HomeModules -> CoreExpr -> Bool
+rhsIsStatic :: PackageId -> CoreExpr -> Bool
  -- This function is called only on *top-level* right-hand sides
  -- Returns True if the RHS can be allocated statically, with
  -- no thunks involved at all.
@@ -1255,7 +1292,7 @@ rhsIsStatic :: HomeModules -> CoreExpr -> Bool
  -- When opt_RuntimeTypes is on, we keep type lambdas and treat
  -- them as making the RHS re-entrant (non-updatable).
  
-rhsIsStatic hmods rhs = is_static False rhs
+rhsIsStatic this_pkg rhs = is_static False rhs
    where
    is_static :: Bool    -- True <=> in a constructor argument; must be atomic
           -> CoreExpr -> Bool
@@ -1282,7 +1319,7 @@ rhsIsStatic hmods rhs = is_static False rhs
     where
      go (Var f) n_val_args
  #if mingw32_TARGET_OS
-        | not (isDllName hmods (idName f))
+        | not (isDllName this_pkg (idName f))
  #endif
         =  saturated_data_con f n_val_args
         || (in_arg && n_val_args == 0)