Merge remote branch 'origin/master'

[ghc-hetmet.git] / compiler / simplCore / SimplUtils.lhs
diff --git a/compiler/simplCore/SimplUtils.lhs b/compiler/simplCore/SimplUtils.lhs

index a2fe28d..7d5d764 100644 (file)
--- a/compiler/simplCore/SimplUtils.lhs
+++ b/compiler/simplCore/SimplUtils.lhs
@@ -36,6 +36,7 @@ import StaticFlags
  import CoreSyn
  import qualified CoreSubst
  import PprCore
+import DataCon ( dataConCannotMatch )
  import CoreFVs
  import CoreUtils
  import CoreArity
@@ -46,15 +47,15 @@ import Var
  import Demand
  import SimplMonad
  import Type    hiding( substTy )
-import Coercion ( coercionKind )
+import Coercion hiding( substCo )
  import TyCon
-import Unify   ( dataConCannotMatch )
  import VarSet
  import BasicTypes
  import Util
  import MonadUtils
  import Outputable
  import FastString
+import Pair
  
  import Data.List
  \end{code}
@@ -98,6 +99,7 @@ data SimplCont
  
    | CoerceIt           -- C `cast` co
         OutCoercion             -- The coercion simplified
+                               -- Invariant: never an identity coercion
         SimplCont
  
    | ApplyTo            -- C arg
@@ -207,6 +209,7 @@ contIsDupable _                          = False
  contIsTrivial :: SimplCont -> Bool
  contIsTrivial (Stop {})                   = True
  contIsTrivial (ApplyTo _ (Type _) _ cont) = contIsTrivial cont
+contIsTrivial (ApplyTo _ (Coercion _) _ cont) = contIsTrivial cont
  contIsTrivial (CoerceIt _ cont)           = contIsTrivial cont
  contIsTrivial _                           = False
  
@@ -215,17 +218,19 @@ contResultType :: SimplEnv -> OutType -> SimplCont -> OutType
  contResultType env ty cont
    = go cont ty
    where
-    subst_ty se ty = substTy (se `setInScope` env) ty
+    subst_ty se ty = SimplEnv.substTy (se `setInScope` env) ty
+    subst_co se co = SimplEnv.substCo (se `setInScope` env) co
  
      go (Stop {})                      ty = ty
-    go (CoerceIt co cont)             _  = go cont (snd (coercionKind co))
+    go (CoerceIt co cont)             _  = go cont (pSnd (coercionKind co))
      go (StrictBind _ bs body se cont) _  = go cont (subst_ty se (exprType (mkLams bs body)))
      go (StrictArg ai _ cont)          _  = go cont (funResultTy (argInfoResultTy ai))
      go (Select _ _ alts se cont)      _  = go cont (subst_ty se (coreAltsType alts))
      go (ApplyTo _ arg se cont)        ty = go cont (apply_to_arg ty arg se)
  
-    apply_to_arg ty (Type ty_arg) se = applyTy ty (subst_ty se ty_arg)
-    apply_to_arg ty _             _  = funResultTy ty
+    apply_to_arg ty (Type ty_arg)     se = applyTy ty (subst_ty se ty_arg)
+    apply_to_arg ty (Coercion co_arg) se = applyCo ty (subst_co se co_arg)
+    apply_to_arg ty _                 _  = funResultTy ty
  
  argInfoResultTy :: ArgInfo -> OutType
  argInfoResultTy (ArgInfo { ai_fun = fun, ai_args = args })
@@ -234,6 +239,7 @@ argInfoResultTy (ArgInfo { ai_fun = fun, ai_args = args })
  -------------------
  countValArgs :: SimplCont -> Int
  countValArgs (ApplyTo _ (Type _) _ cont) = countValArgs cont
+countValArgs (ApplyTo _ (Coercion _) _ cont) = countValArgs cont
  countValArgs (ApplyTo _ _        _ cont) = 1 + countValArgs cont
  countValArgs _                           = 0
  
@@ -467,12 +473,17 @@ CoreMonad
          sm_eta_expand :: Bool     -- Whether eta-expansion is enabled
  
  \begin{code}
-simplEnvForGHCi :: SimplEnv
-simplEnvForGHCi = mkSimplEnv $
-                  SimplMode { sm_names = ["GHCi"]
-                            , sm_phase = InitialPhase
-                            , sm_rules = True, sm_inline = False
-                            , sm_eta_expand = False, sm_case_case = True }
+simplEnvForGHCi :: DynFlags -> SimplEnv
+simplEnvForGHCi dflags
+  = mkSimplEnv $ SimplMode { sm_names = ["GHCi"]
+                           , sm_phase = InitialPhase
+                           , sm_rules = rules_on
+                           , sm_inline = False
+                           , sm_eta_expand = eta_expand_on
+                           , sm_case_case = True }
+  where
+    rules_on      = dopt Opt_EnableRewriteRules   dflags
+    eta_expand_on = dopt Opt_DoLambdaEtaExpansion dflags
     -- Do not do any inlining, in case we expose some unboxed
     -- tuple stuff that confuses the bytecode interpreter
  
@@ -480,9 +491,10 @@ updModeForInlineRules :: Activation -> SimplifierMode -> SimplifierMode
  -- See Note [Simplifying inside InlineRules]
  updModeForInlineRules inline_rule_act current_mode
    = current_mode { sm_phase = phaseFromActivation inline_rule_act
-                 , sm_rules = True
                   , sm_inline = True
                   , sm_eta_expand = False }
+                -- For sm_rules, just inherit; sm_rules might be "off"
+                -- becuase of -fno-enable-rewrite-rules
    where
      phaseFromActivation (ActiveAfter n) = Phase n
      phaseFromActivation _               = InitialPhase
@@ -777,6 +789,11 @@ Don't inline top-level Ids that are bottoming, even if they are used just
  once, because FloatOut has gone to some trouble to extract them out.
  Inlining them won't make the program run faster!
  
+Note [Do not inline CoVars unconditionally]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Coercion variables appear inside coercions, and have a separate
+substitution, so don't inline them via the IdSubst!
+
  \begin{code}
  preInlineUnconditionally :: SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
  preInlineUnconditionally env top_lvl bndr rhs
@@ -784,6 +801,7 @@ preInlineUnconditionally env top_lvl bndr rhs
    | isStableUnfolding (idUnfolding bndr)     = False    -- Note [InlineRule and preInlineUnconditionally]
    | isTopLevel top_lvl && isBottomingId bndr = False   -- Note [Top-level bottoming Ids]
    | opt_SimplNoPreInlining                   = False
+  | isCoVar bndr                             = False -- Note [Do not inline CoVars unconditionally]
    | otherwise = case idOccInfo bndr of
                   IAmDead                    -> True    -- Happens in ((\x.1) v)
                   OneOcc in_lam True int_cxt -> try_once in_lam int_cxt
@@ -856,7 +874,7 @@ a thing based on the form of its RHS; in particular if it has a
  trivial RHS.  If so, we can inline and discard the binding altogether.
  
  NB: a loop breaker has must_keep_binding = True and non-loop-breakers
-only have *forward* references Hence, it's safe to discard the binding
+only have *forward* references. Hence, it's safe to discard the binding
         
  NOTE: This isn't our last opportunity to inline.  We're at the binding
  site right now, and we'll get another opportunity when we get to the
@@ -881,6 +899,7 @@ story for now.
  postInlineUnconditionally 
      :: SimplEnv -> TopLevelFlag
      -> OutId           -- The binder (an InId would be fine too)
+                               --            (*not* a CoVar)
      -> OccInfo                 -- From the InId
      -> OutExpr
      -> Unfolding
@@ -891,8 +910,8 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
                                         -- because it might be referred to "earlier"
    | isExportedId bndr           = False
    | isStableUnfolding unfolding = False        -- Note [InlineRule and postInlineUnconditionally]
-  | exprIsTrivial rhs          = True
    | isTopLevel top_lvl          = False        -- Note [Top level and postInlineUnconditionally]
+  | exprIsTrivial rhs          = True
    | otherwise
    = case occ_info of
         -- The point of examining occ_info here is that for *non-values* 
@@ -959,14 +978,29 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
  
  Note [Top level and postInlineUnconditionally]
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-We don't do postInlineUnconditionally for top-level things (exept ones that
-are trivial):
-  * There is no point, because the main goal is to get rid of local
-    bindings used in multiple case branches.
+We don't do postInlineUnconditionally for top-level things (even for
+ones that are trivial):
+
    * Doing so will inline top-level error expressions that have been
      carefully floated out by FloatOut.  More generally, it might 
      replace static allocation with dynamic.
  
+  * Even for trivial expressions there's a problem.  Consider
+      {-# RULE "foo" forall (xs::[T]). reverse xs = ruggle xs #-}
+      blah xs = reverse xs
+      ruggle = sort
+    In one simplifier pass we might fire the rule, getting 
+      blah xs = ruggle xs
+    but in *that* simplifier pass we must not do postInlineUnconditionally
+    on 'ruggle' because then we'll have an unbound occurrence of 'ruggle'
+
+    If the rhs is trivial it'll be inlined by callSiteInline, and then
+    the binding will be dead and discarded by the next use of OccurAnal
+
+  * There is less point, because the main goal is to get rid of local
+    bindings used in multiple case branches.  
+    
+
  Note [InlineRule and postInlineUnconditionally]
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  Do not do postInlineUnconditionally if the Id has an InlineRule, otherwise
@@ -1010,9 +1044,9 @@ mkLam _env bndrs body
        | not (any bad bndrs)
         -- Note [Casts and lambdas]
        = do { lam <- mkLam' dflags bndrs body
-          ; return (mkCoerce (mkPiTypes bndrs co) lam) }
+           ; return (mkCoerce (mkPiCos bndrs co) lam) }
        where
-       co_vars  = tyVarsOfType co
+        co_vars  = tyCoVarsOfCo co
         bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars      
  
      mkLam' dflags bndrs body@(Lam {})
@@ -1026,7 +1060,7 @@ mkLam _env bndrs body
        = do { tick (EtaReduction (head bndrs))
            ; return etad_lam }
  
-      | otherwise 
+      | otherwise
        = return (mkLams bndrs body)
  \end{code}
  
@@ -1085,7 +1119,8 @@ tryEtaExpand env bndr rhs
      try_expand dflags
        | sm_eta_expand (getMode env)      -- Provided eta-expansion is on
        , not (exprIsTrivial rhs)
-      , let new_arity = exprEtaExpandArity dflags rhs
+      , let dicts_cheap = dopt Opt_DictsCheap dflags
+            new_arity   = findArity dicts_cheap bndr rhs old_arity
        , new_arity > rhs_arity
        = do { tick (EtaExpansion bndr)
             ; return (new_arity, etaExpand new_arity rhs) }
@@ -1095,6 +1130,67 @@ tryEtaExpand env bndr rhs
      rhs_arity  = exprArity rhs
      old_arity  = idArity bndr
      _dmd_arity = length $ fst $ splitStrictSig $ idStrictness bndr
+
+findArity :: Bool -> Id -> CoreExpr -> Arity -> Arity
+-- This implements the fixpoint loop for arity analysis
+-- See Note [Arity analysis]
+findArity dicts_cheap bndr rhs old_arity
+  = go (exprEtaExpandArity (mk_cheap_fn dicts_cheap init_cheap_app) rhs)
+       -- We always call exprEtaExpandArity once, but usually 
+       -- that produces a result equal to old_arity, and then
+       -- we stop right away (since arities should not decrease)
+       -- Result: the common case is that there is just one iteration
+  where
+    go :: Arity -> Arity
+    go cur_arity
+      | cur_arity <= old_arity = cur_arity     
+      | new_arity == cur_arity = cur_arity
+      | otherwise = ASSERT( new_arity < cur_arity )
+                    pprTrace "Exciting arity" 
+                       (vcat [ ppr bndr <+> ppr cur_arity <+> ppr new_arity
+                             , ppr rhs])
+                    go new_arity
+      where
+        new_arity = exprEtaExpandArity (mk_cheap_fn dicts_cheap cheap_app) rhs
+      
+        cheap_app :: CheapAppFun
+        cheap_app fn n_val_args
+          | fn == bndr = n_val_args < cur_arity
+          | otherwise  = isCheapApp fn n_val_args
+
+    init_cheap_app :: CheapAppFun
+    init_cheap_app fn n_val_args
+      | fn == bndr = True
+      | otherwise  = isCheapApp fn n_val_args
+ 
+mk_cheap_fn :: Bool -> CheapAppFun -> CheapFun
+mk_cheap_fn dicts_cheap cheap_app
+  | not dicts_cheap
+  = \e _     -> exprIsCheap' cheap_app e
+  | otherwise
+  = \e mb_ty -> exprIsCheap' cheap_app e
+             || case mb_ty of
+                  Nothing -> False
+                  Just ty -> isDictLikeTy ty
+       -- If the experimental -fdicts-cheap flag is on, we eta-expand through
+       -- dictionary bindings.  This improves arities. Thereby, it also
+       -- means that full laziness is less prone to floating out the
+       -- application of a function to its dictionary arguments, which
+       -- can thereby lose opportunities for fusion.  Example:
+       --      foo :: Ord a => a -> ...
+       --      foo = /\a \(d:Ord a). let d' = ...d... in \(x:a). ....
+       --              -- So foo has arity 1
+       --
+       --      f = \x. foo dInt $ bar x
+       --
+       -- The (foo DInt) is floated out, and makes ineffective a RULE 
+       --      foo (bar x) = ...
+       --
+       -- One could go further and make exprIsCheap reply True to any
+       -- dictionary-typed expression, but that's more work.
+       -- 
+       -- See Note [Dictionary-like types] in TcType.lhs for why we use
+       -- isDictLikeTy here rather than isDictTy
  \end{code}
  
  Note [Eta-expanding at let bindings]
@@ -1120,6 +1216,33 @@ because then 'genMap' will inline, and it really shouldn't: at least
  as far as the programmer is concerned, it's not applied to two
  arguments!
  
+Note [Arity analysis]
+~~~~~~~~~~~~~~~~~~~~~
+The motivating example for arity analysis is this:
+ 
+  f = \x. let g = f (x+1) 
+          in \y. ...g...
+
+What arity does f have?  Really it should have arity 2, but a naive
+look at the RHS won't see that.  You need a fixpoint analysis which
+says it has arity "infinity" the first time round.
+
+This example happens a lot; it first showed up in Andy Gill's thesis,
+fifteen years ago!  It also shows up in the code for 'rnf' on lists
+in Trac #4138.
+
+The analysis is easy to achieve because exprEtaExpandArity takes an
+argument
+     type CheapFun = CoreExpr -> Maybe Type -> Bool
+used to decide if an expression is cheap enough to push inside a 
+lambda.  And exprIsCheap' in turn takes an argument
+     type CheapAppFun = Id -> Int -> Bool
+which tells when an application is cheap. This makes it easy to
+write the analysis loop.
+
+The analysis is cheap-and-cheerful because it doesn't deal with
+mutual recursion.  But the self-recursive case is the important one.
+
  
  %************************************************************************
  %*                                                                     *
@@ -1222,9 +1345,7 @@ abstractFloats main_tvs body_env body
            ; return (subst', (NonRec poly_id poly_rhs)) }
        where
         rhs' = CoreSubst.substExpr (text "abstract_floats2") subst rhs
-       tvs_here | any isCoVar main_tvs = main_tvs      -- Note [Abstract over coercions]
-                | otherwise 
-                = varSetElems (main_tv_set `intersectVarSet` exprSomeFreeVars isTyCoVar rhs')
+       tvs_here = varSetElems (main_tv_set `intersectVarSet` exprSomeFreeVars isTyVar rhs')
         
                 -- Abstract only over the type variables free in the rhs
                 -- wrt which the new binding is abstracted.  But the naive
@@ -1436,9 +1557,8 @@ prepareDefault case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rhs)
         [con] ->        -- It matches exactly one constructor, so fill it in
                  do { tick (FillInCaseDefault case_bndr)
                      ; us <- getUniquesM
-                    ; let (ex_tvs, co_tvs, arg_ids) =
-                              dataConRepInstPat us con inst_tys
-                    ; return [(DataAlt con, ex_tvs ++ co_tvs ++ arg_ids, deflt_rhs)] }
+                    ; let (ex_tvs, arg_ids) = dataConRepInstPat us con inst_tys
+                    ; return [(DataAlt con, ex_tvs ++ arg_ids, deflt_rhs)] }
  
         _ -> return [(DEFAULT, [], deflt_rhs)]