Merge branch 'master' into ghc-new-co

[ghc-hetmet.git] / compiler / simplCore / SimplUtils.lhs
diff --git a/compiler/simplCore/SimplUtils.lhs b/compiler/simplCore/SimplUtils.lhs

index a2fe28d..976bb87 100644 (file)
--- a/compiler/simplCore/SimplUtils.lhs
+++ b/compiler/simplCore/SimplUtils.lhs
@@ -36,6 +36,7 @@ import StaticFlags
  import CoreSyn
  import qualified CoreSubst
  import PprCore
+import DataCon ( dataConCannotMatch )
  import CoreFVs
  import CoreUtils
  import CoreArity
@@ -46,15 +47,15 @@ import Var
  import Demand
  import SimplMonad
  import Type    hiding( substTy )
-import Coercion ( coercionKind )
+import Coercion hiding( substCo )
  import TyCon
-import Unify   ( dataConCannotMatch )
  import VarSet
  import BasicTypes
  import Util
  import MonadUtils
  import Outputable
  import FastString
+import Pair
  
  import Data.List
  \end{code}
@@ -207,6 +208,7 @@ contIsDupable _                          = False
  contIsTrivial :: SimplCont -> Bool
  contIsTrivial (Stop {})                   = True
  contIsTrivial (ApplyTo _ (Type _) _ cont) = contIsTrivial cont
+contIsTrivial (ApplyTo _ (Coercion _) _ cont) = contIsTrivial cont
  contIsTrivial (CoerceIt _ cont)           = contIsTrivial cont
  contIsTrivial _                           = False
  
@@ -215,17 +217,19 @@ contResultType :: SimplEnv -> OutType -> SimplCont -> OutType
  contResultType env ty cont
    = go cont ty
    where
-    subst_ty se ty = substTy (se `setInScope` env) ty
+    subst_ty se ty = SimplEnv.substTy (se `setInScope` env) ty
+    subst_co se co = SimplEnv.substCo (se `setInScope` env) co
  
      go (Stop {})                      ty = ty
-    go (CoerceIt co cont)             _  = go cont (snd (coercionKind co))
+    go (CoerceIt co cont)             _  = go cont (pSnd (coercionKind co))
      go (StrictBind _ bs body se cont) _  = go cont (subst_ty se (exprType (mkLams bs body)))
      go (StrictArg ai _ cont)          _  = go cont (funResultTy (argInfoResultTy ai))
      go (Select _ _ alts se cont)      _  = go cont (subst_ty se (coreAltsType alts))
      go (ApplyTo _ arg se cont)        ty = go cont (apply_to_arg ty arg se)
  
-    apply_to_arg ty (Type ty_arg) se = applyTy ty (subst_ty se ty_arg)
-    apply_to_arg ty _             _  = funResultTy ty
+    apply_to_arg ty (Type ty_arg)     se = applyTy ty (subst_ty se ty_arg)
+    apply_to_arg ty (Coercion co_arg) se = applyCo ty (subst_co se co_arg)
+    apply_to_arg ty _                 _  = funResultTy ty
  
  argInfoResultTy :: ArgInfo -> OutType
  argInfoResultTy (ArgInfo { ai_fun = fun, ai_args = args })
@@ -234,6 +238,7 @@ argInfoResultTy (ArgInfo { ai_fun = fun, ai_args = args })
  -------------------
  countValArgs :: SimplCont -> Int
  countValArgs (ApplyTo _ (Type _) _ cont) = countValArgs cont
+countValArgs (ApplyTo _ (Coercion _) _ cont) = countValArgs cont
  countValArgs (ApplyTo _ _        _ cont) = 1 + countValArgs cont
  countValArgs _                           = 0
  
@@ -467,12 +472,17 @@ CoreMonad
          sm_eta_expand :: Bool     -- Whether eta-expansion is enabled
  
  \begin{code}
-simplEnvForGHCi :: SimplEnv
-simplEnvForGHCi = mkSimplEnv $
-                  SimplMode { sm_names = ["GHCi"]
-                            , sm_phase = InitialPhase
-                            , sm_rules = True, sm_inline = False
-                            , sm_eta_expand = False, sm_case_case = True }
+simplEnvForGHCi :: DynFlags -> SimplEnv
+simplEnvForGHCi dflags
+  = mkSimplEnv $ SimplMode { sm_names = ["GHCi"]
+                           , sm_phase = InitialPhase
+                           , sm_rules = rules_on
+                           , sm_inline = False
+                           , sm_eta_expand = eta_expand_on
+                           , sm_case_case = True }
+  where
+    rules_on      = dopt Opt_EnableRewriteRules   dflags
+    eta_expand_on = dopt Opt_DoLambdaEtaExpansion dflags
     -- Do not do any inlining, in case we expose some unboxed
     -- tuple stuff that confuses the bytecode interpreter
  
@@ -480,9 +490,10 @@ updModeForInlineRules :: Activation -> SimplifierMode -> SimplifierMode
  -- See Note [Simplifying inside InlineRules]
  updModeForInlineRules inline_rule_act current_mode
    = current_mode { sm_phase = phaseFromActivation inline_rule_act
-                 , sm_rules = True
                   , sm_inline = True
                   , sm_eta_expand = False }
+                -- For sm_rules, just inherit; sm_rules might be "off"
+                -- becuase of -fno-enable-rewrite-rules
    where
      phaseFromActivation (ActiveAfter n) = Phase n
      phaseFromActivation _               = InitialPhase
@@ -856,7 +867,7 @@ a thing based on the form of its RHS; in particular if it has a
  trivial RHS.  If so, we can inline and discard the binding altogether.
  
  NB: a loop breaker has must_keep_binding = True and non-loop-breakers
-only have *forward* references Hence, it's safe to discard the binding
+only have *forward* references. Hence, it's safe to discard the binding
         
  NOTE: This isn't our last opportunity to inline.  We're at the binding
  site right now, and we'll get another opportunity when we get to the
@@ -891,8 +902,8 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
                                         -- because it might be referred to "earlier"
    | isExportedId bndr           = False
    | isStableUnfolding unfolding = False        -- Note [InlineRule and postInlineUnconditionally]
-  | exprIsTrivial rhs          = True
    | isTopLevel top_lvl          = False        -- Note [Top level and postInlineUnconditionally]
+  | exprIsTrivial rhs          = True
    | otherwise
    = case occ_info of
         -- The point of examining occ_info here is that for *non-values* 
@@ -959,14 +970,29 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
  
  Note [Top level and postInlineUnconditionally]
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-We don't do postInlineUnconditionally for top-level things (exept ones that
-are trivial):
-  * There is no point, because the main goal is to get rid of local
-    bindings used in multiple case branches.
+We don't do postInlineUnconditionally for top-level things (even for
+ones that are trivial):
+
    * Doing so will inline top-level error expressions that have been
      carefully floated out by FloatOut.  More generally, it might 
      replace static allocation with dynamic.
  
+  * Even for trivial expressions there's a problem.  Consider
+      {-# RULE "foo" forall (xs::[T]). reverse xs = ruggle xs #-}
+      blah xs = reverse xs
+      ruggle = sort
+    In one simplifier pass we might fire the rule, getting 
+      blah xs = ruggle xs
+    but in *that* simplifier pass we must not do postInlineUnconditionally
+    on 'ruggle' because then we'll have an unbound occurrence of 'ruggle'
+
+    If the rhs is trivial it'll be inlined by callSiteInline, and then
+    the binding will be dead and discarded by the next use of OccurAnal
+
+  * There is less point, because the main goal is to get rid of local
+    bindings used in multiple case branches.  
+    
+
  Note [InlineRule and postInlineUnconditionally]
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  Do not do postInlineUnconditionally if the Id has an InlineRule, otherwise
@@ -1010,9 +1036,9 @@ mkLam _env bndrs body
        | not (any bad bndrs)
         -- Note [Casts and lambdas]
        = do { lam <- mkLam' dflags bndrs body
-          ; return (mkCoerce (mkPiTypes bndrs co) lam) }
+           ; return (mkCoerce (mkPiCos bndrs co) lam) }
        where
-       co_vars  = tyVarsOfType co
+        co_vars  = tyCoVarsOfCo co
         bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars      
  
      mkLam' dflags bndrs body@(Lam {})
@@ -1026,7 +1052,7 @@ mkLam _env bndrs body
        = do { tick (EtaReduction (head bndrs))
            ; return etad_lam }
  
-      | otherwise 
+      | otherwise
        = return (mkLams bndrs body)
  \end{code}
  
@@ -1085,7 +1111,8 @@ tryEtaExpand env bndr rhs
      try_expand dflags
        | sm_eta_expand (getMode env)      -- Provided eta-expansion is on
        , not (exprIsTrivial rhs)
-      , let new_arity = exprEtaExpandArity dflags rhs
+      , let dicts_cheap = dopt Opt_DictsCheap dflags
+            new_arity   = findArity dicts_cheap bndr rhs old_arity
        , new_arity > rhs_arity
        = do { tick (EtaExpansion bndr)
             ; return (new_arity, etaExpand new_arity rhs) }
@@ -1095,6 +1122,67 @@ tryEtaExpand env bndr rhs
      rhs_arity  = exprArity rhs
      old_arity  = idArity bndr
      _dmd_arity = length $ fst $ splitStrictSig $ idStrictness bndr
+
+findArity :: Bool -> Id -> CoreExpr -> Arity -> Arity
+-- This implements the fixpoint loop for arity analysis
+-- See Note [Arity analysis]
+findArity dicts_cheap bndr rhs old_arity
+  = go (exprEtaExpandArity (mk_cheap_fn dicts_cheap init_cheap_app) rhs)
+       -- We always call exprEtaExpandArity once, but usually 
+       -- that produces a result equal to old_arity, and then
+       -- we stop right away (since arities should not decrease)
+       -- Result: the common case is that there is just one iteration
+  where
+    go :: Arity -> Arity
+    go cur_arity
+      | cur_arity <= old_arity = cur_arity     
+      | new_arity == cur_arity = cur_arity
+      | otherwise = ASSERT( new_arity < cur_arity )
+                    pprTrace "Exciting arity" 
+                       (vcat [ ppr bndr <+> ppr cur_arity <+> ppr new_arity
+                             , ppr rhs])
+                    go new_arity
+      where
+        new_arity = exprEtaExpandArity (mk_cheap_fn dicts_cheap cheap_app) rhs
+      
+        cheap_app :: CheapAppFun
+        cheap_app fn n_val_args
+          | fn == bndr = n_val_args < cur_arity
+          | otherwise  = isCheapApp fn n_val_args
+
+    init_cheap_app :: CheapAppFun
+    init_cheap_app fn n_val_args
+      | fn == bndr = True
+      | otherwise  = isCheapApp fn n_val_args
+ 
+mk_cheap_fn :: Bool -> CheapAppFun -> CheapFun
+mk_cheap_fn dicts_cheap cheap_app
+  | not dicts_cheap
+  = \e _     -> exprIsCheap' cheap_app e
+  | otherwise
+  = \e mb_ty -> exprIsCheap' cheap_app e
+             || case mb_ty of
+                  Nothing -> False
+                  Just ty -> isDictLikeTy ty
+       -- If the experimental -fdicts-cheap flag is on, we eta-expand through
+       -- dictionary bindings.  This improves arities. Thereby, it also
+       -- means that full laziness is less prone to floating out the
+       -- application of a function to its dictionary arguments, which
+       -- can thereby lose opportunities for fusion.  Example:
+       --      foo :: Ord a => a -> ...
+       --      foo = /\a \(d:Ord a). let d' = ...d... in \(x:a). ....
+       --              -- So foo has arity 1
+       --
+       --      f = \x. foo dInt $ bar x
+       --
+       -- The (foo DInt) is floated out, and makes ineffective a RULE 
+       --      foo (bar x) = ...
+       --
+       -- One could go further and make exprIsCheap reply True to any
+       -- dictionary-typed expression, but that's more work.
+       -- 
+       -- See Note [Dictionary-like types] in TcType.lhs for why we use
+       -- isDictLikeTy here rather than isDictTy
  \end{code}
  
  Note [Eta-expanding at let bindings]
@@ -1120,6 +1208,33 @@ because then 'genMap' will inline, and it really shouldn't: at least
  as far as the programmer is concerned, it's not applied to two
  arguments!
  
+Note [Arity analysis]
+~~~~~~~~~~~~~~~~~~~~~
+The motivating example for arity analysis is this:
+ 
+  f = \x. let g = f (x+1) 
+          in \y. ...g...
+
+What arity does f have?  Really it should have arity 2, but a naive
+look at the RHS won't see that.  You need a fixpoint analysis which
+says it has arity "infinity" the first time round.
+
+This example happens a lot; it first showed up in Andy Gill's thesis,
+fifteen years ago!  It also shows up in the code for 'rnf' on lists
+in Trac #4138.
+
+The analysis is easy to achieve because exprEtaExpandArity takes an
+argument
+     type CheapFun = CoreExpr -> Maybe Type -> Bool
+used to decide if an expression is cheap enough to push inside a 
+lambda.  And exprIsCheap' in turn takes an argument
+     type CheapAppFun = Id -> Int -> Bool
+which tells when an application is cheap. This makes it easy to
+write the analysis loop.
+
+The analysis is cheap-and-cheerful because it doesn't deal with
+mutual recursion.  But the self-recursive case is the important one.
+
  
  %************************************************************************
  %*                                                                     *
@@ -1222,9 +1337,7 @@ abstractFloats main_tvs body_env body
            ; return (subst', (NonRec poly_id poly_rhs)) }
        where
         rhs' = CoreSubst.substExpr (text "abstract_floats2") subst rhs
-       tvs_here | any isCoVar main_tvs = main_tvs      -- Note [Abstract over coercions]
-                | otherwise 
-                = varSetElems (main_tv_set `intersectVarSet` exprSomeFreeVars isTyCoVar rhs')
+       tvs_here = varSetElems (main_tv_set `intersectVarSet` exprSomeFreeVars isTyVar rhs')
         
                 -- Abstract only over the type variables free in the rhs
                 -- wrt which the new binding is abstracted.  But the naive
@@ -1436,9 +1549,8 @@ prepareDefault case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rhs)
         [con] ->        -- It matches exactly one constructor, so fill it in
                  do { tick (FillInCaseDefault case_bndr)
                      ; us <- getUniquesM
-                    ; let (ex_tvs, co_tvs, arg_ids) =
-                              dataConRepInstPat us con inst_tys
-                    ; return [(DataAlt con, ex_tvs ++ co_tvs ++ arg_ids, deflt_rhs)] }
+                    ; let (ex_tvs, arg_ids) = dataConRepInstPat us con inst_tys
+                    ; return [(DataAlt con, ex_tvs ++ arg_ids, deflt_rhs)] }
  
         _ -> return [(DEFAULT, [], deflt_rhs)]