Keep Touch'd variables live through the back end

[ghc-hetmet.git] / compiler / simplCore / SimplUtils.lhs
diff --git a/compiler/simplCore/SimplUtils.lhs b/compiler/simplCore/SimplUtils.lhs

index e8714d4..c87e1fc 100644 (file)
--- a/compiler/simplCore/SimplUtils.lhs
+++ b/compiler/simplCore/SimplUtils.lhs
@@ -4,27 +4,21 @@
  \section[SimplUtils]{The simplifier utilities}
  
  \begin{code}
-{-# OPTIONS -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
---     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
  module SimplUtils (
         -- Rebuilding
         mkLam, mkCase, prepareAlts, bindCaseBndr,
  
         -- Inlining,
         preInlineUnconditionally, postInlineUnconditionally, 
-       activeInline, activeRule, inlineMode,
+       activeInline, activeRule, 
+        simplEnvForGHCi, simplEnvForRules, updModeForInlineRules,
  
         -- The continuation type
-       SimplCont(..), DupFlag(..), LetRhsFlag(..), 
+       SimplCont(..), DupFlag(..), ArgInfo(..),
         contIsDupable, contResultType, contIsTrivial, contArgs, dropArgs, 
-       countValArgs, countArgs, splitInlineCont,
-       mkBoringStop, mkLazyArgStop, mkRhsStop, contIsRhsOrArg,
-       interestingCallContext, interestingArgContext,
+       countValArgs, countArgs, 
+       mkBoringStop, mkRhsStop, mkLazyArgStop, contIsRhsOrArg,
+       interestingCallContext, 
  
         interestingArg, mkArgInfo,
         
@@ -41,24 +35,25 @@ import qualified CoreSubst
  import PprCore
  import CoreFVs
  import CoreUtils
-import Literal 
+import CoreArity       ( etaExpand, exprEtaExpandArity )
  import CoreUnfold
-import MkId
  import Name
  import Id
  import Var     ( isCoVar )
  import NewDemand
  import SimplMonad
-import Type    ( Type, funArgTy, mkForAllTys, mkTyVarTys, 
-                 splitTyConApp_maybe, tyConAppArgs )
+import Type    hiding( substTy )
+import Coercion ( coercionKind )
  import TyCon
-import DataCon
  import Unify   ( dataConCannotMatch )
  import VarSet
  import BasicTypes
  import Util
+import MonadUtils
  import Outputable
-import List( nub )
+import FastString
+
+import Data.List
  \end{code}
  
  
@@ -91,15 +86,11 @@ Key points:
  \begin{code}
  data SimplCont 
    = Stop               -- An empty context, or hole, []     
-       OutType         -- Type of the result
-       LetRhsFlag
-       Bool            -- True <=> There is something interesting about
+       CallCtxt        -- True <=> There is something interesting about
                         --          the context, and hence the inliner
                         --          should be a bit keener (see interestingCallContext)
-                       -- Two cases:
-                       -- (a) This is the RHS of a thunk whose type suggests
-                       --     that update-in-place would be possible
-                       -- (b) This is an argument of a function that has RULES
+                       -- Specifically:
+                       --     This is an argument of a function that has RULES
                         --     Inlining the call might allow the rule to fire
  
    | CoerceIt           -- C `cast` co
@@ -123,84 +114,99 @@ data SimplCont
         SimplCont       
  
    | StrictArg          -- e C
-       OutExpr OutType         -- e and its type
-       (Bool,[Bool])           -- Whether the function at the head of e has rules,
-       SimplCont               --     plus strictness flags for further args
-
-data LetRhsFlag = AnArg                -- It's just an argument not a let RHS
-               | AnRhs         -- It's the RHS of a let (so please float lets out of big lambdas)
-
-instance Outputable LetRhsFlag where
-  ppr AnArg = ptext SLIT("arg")
-  ppr AnRhs = ptext SLIT("rhs")
+       OutExpr                 -- e; *always* of form (Var v `App1` e1 .. `App` en)
+       CallCtxt                -- Whether *this* argument position is interesting
+       ArgInfo                 -- Whether the function at the head of e has rules, etc
+       SimplCont               --     plus strictness flags for *further* args
+
+data ArgInfo 
+  = ArgInfo {
+       ai_rules :: Bool,       -- Function has rules (recursively)
+                               --      => be keener to inline in all args
+       ai_strs :: [Bool],      -- Strictness of arguments
+                               --   Usually infinite, but if it is finite it guarantees
+                               --   that the function diverges after being given
+                               --   that number of args
+       ai_discs :: [Int]       -- Discounts for arguments; non-zero => be keener to inline
+                               --   Always infinite
+    }
  
  instance Outputable SimplCont where
-  ppr (Stop ty is_rhs _)            = ptext SLIT("Stop") <> brackets (ppr is_rhs) <+> ppr ty
-  ppr (ApplyTo dup arg se cont)      = ((ptext SLIT("ApplyTo") <+> ppr dup <+> pprParendExpr arg)
+  ppr (Stop interesting)            = ptext (sLit "Stop") <> brackets (ppr interesting)
+  ppr (ApplyTo dup arg _ cont)       = ((ptext (sLit "ApplyTo") <+> ppr dup <+> pprParendExpr arg)
                                           {-  $$ nest 2 (pprSimplEnv se) -}) $$ ppr cont
-  ppr (StrictBind b _ _ _ cont)      = (ptext SLIT("StrictBind") <+> ppr b) $$ ppr cont
-  ppr (StrictArg f _ _ cont)         = (ptext SLIT("StrictArg") <+> ppr f) $$ ppr cont
-  ppr (Select dup bndr alts se cont) = (ptext SLIT("Select") <+> ppr dup <+> ppr bndr) $$ 
+  ppr (StrictBind b _ _ _ cont)      = (ptext (sLit "StrictBind") <+> ppr b) $$ ppr cont
+  ppr (StrictArg f _ _ cont)         = (ptext (sLit "StrictArg") <+> ppr f) $$ ppr cont
+  ppr (Select dup bndr alts _ cont)  = (ptext (sLit "Select") <+> ppr dup <+> ppr bndr) $$ 
                                        (nest 4 (ppr alts)) $$ ppr cont 
-  ppr (CoerceIt co cont)            = (ptext SLIT("CoerceIt") <+> ppr co) $$ ppr cont
+  ppr (CoerceIt co cont)            = (ptext (sLit "CoerceIt") <+> ppr co) $$ ppr cont
  
  data DupFlag = OkToDup | NoDup
  
  instance Outputable DupFlag where
-  ppr OkToDup = ptext SLIT("ok")
-  ppr NoDup   = ptext SLIT("nodup")
+  ppr OkToDup = ptext (sLit "ok")
+  ppr NoDup   = ptext (sLit "nodup")
  
  
  
  -------------------
-mkBoringStop :: OutType -> SimplCont
-mkBoringStop ty = Stop ty AnArg False
+mkBoringStop :: SimplCont
+mkBoringStop = Stop BoringCtxt
  
-mkLazyArgStop :: OutType -> Bool -> SimplCont
-mkLazyArgStop ty has_rules = Stop ty AnArg (canUpdateInPlace ty || has_rules)
+mkRhsStop :: SimplCont -- See Note [RHS of lets] in CoreUnfold
+mkRhsStop = Stop (ArgCtxt False)
  
-mkRhsStop :: OutType -> SimplCont
-mkRhsStop ty = Stop ty AnRhs (canUpdateInPlace ty)
+mkLazyArgStop :: CallCtxt -> SimplCont
+mkLazyArgStop cci = Stop cci
  
  -------------------
-contIsRhsOrArg (Stop {})                = True
-contIsRhsOrArg (StrictBind {})          = True
-contIsRhsOrArg (StrictArg {})           = True
-contIsRhsOrArg other            = False
+contIsRhsOrArg :: SimplCont -> Bool
+contIsRhsOrArg (Stop {})       = True
+contIsRhsOrArg (StrictBind {}) = True
+contIsRhsOrArg (StrictArg {})  = True
+contIsRhsOrArg _               = False
  
  -------------------
  contIsDupable :: SimplCont -> Bool
-contIsDupable (Stop {})                 = True
+contIsDupable (Stop {})                  = True
  contIsDupable (ApplyTo  OkToDup _ _ _)   = True
  contIsDupable (Select   OkToDup _ _ _ _) = True
  contIsDupable (CoerceIt _ cont)          = contIsDupable cont
-contIsDupable other                     = False
+contIsDupable _                          = False
  
  -------------------
  contIsTrivial :: SimplCont -> Bool
-contIsTrivial (Stop {})                          = True
+contIsTrivial (Stop {})                   = True
  contIsTrivial (ApplyTo _ (Type _) _ cont) = contIsTrivial cont
-contIsTrivial (CoerceIt _ cont)          = contIsTrivial cont
-contIsTrivial other                      = False
+contIsTrivial (CoerceIt _ cont)           = contIsTrivial cont
+contIsTrivial _                           = False
  
  -------------------
-contResultType :: SimplCont -> OutType
-contResultType (Stop to_ty _ _)                 = to_ty
-contResultType (StrictArg _ _ _ cont)   = contResultType cont
-contResultType (StrictBind _ _ _ _ cont) = contResultType cont
-contResultType (ApplyTo _ _ _ cont)     = contResultType cont
-contResultType (CoerceIt _ cont)        = contResultType cont
-contResultType (Select _ _ _ _ cont)    = contResultType cont
+contResultType :: SimplEnv -> OutType -> SimplCont -> OutType
+contResultType env ty cont
+  = go cont ty
+  where
+    subst_ty se ty = substTy (se `setInScope` env) ty
+
+    go (Stop {})                      ty = ty
+    go (CoerceIt co cont)             _  = go cont (snd (coercionKind co))
+    go (StrictBind _ bs body se cont) _  = go cont (subst_ty se (exprType (mkLams bs body)))
+    go (StrictArg fn _ _ cont)        _  = go cont (funResultTy (exprType fn))
+    go (Select _ _ alts se cont)      _  = go cont (subst_ty se (coreAltsType alts))
+    go (ApplyTo _ arg se cont)        ty = go cont (apply_to_arg ty arg se)
+
+    apply_to_arg ty (Type ty_arg) se = applyTy ty (subst_ty se ty_arg)
+    apply_to_arg ty _             _  = funResultTy ty
  
  -------------------
  countValArgs :: SimplCont -> Int
-countValArgs (ApplyTo _ (Type ty) se cont) = countValArgs cont
-countValArgs (ApplyTo _ val_arg   se cont) = 1 + countValArgs cont
-countValArgs other                        = 0
+countValArgs (ApplyTo _ (Type _) _ cont) = countValArgs cont
+countValArgs (ApplyTo _ _        _ cont) = 1 + countValArgs cont
+countValArgs _                           = 0
  
  countArgs :: SimplCont -> Int
-countArgs (ApplyTo _ arg se cont) = 1 + countArgs cont
-countArgs other                          = 0
+countArgs (ApplyTo _ _ _ cont) = 1 + countArgs cont
+countArgs _                    = 0
  
  contArgs :: SimplCont -> ([OutExpr], SimplCont)
  -- Uses substitution to turn each arg into an OutExpr
@@ -213,65 +219,11 @@ dropArgs :: Int -> SimplCont -> SimplCont
  dropArgs 0 cont = cont
  dropArgs n (ApplyTo _ _ _ cont) = dropArgs (n-1) cont
  dropArgs n other               = pprPanic "dropArgs" (ppr n <+> ppr other)
-
---------------------
-splitInlineCont :: SimplCont -> Maybe (SimplCont, SimplCont)
--- Returns Nothing if the continuation should dissolve an InlineMe Note
--- Return Just (c1,c2) otherwise, 
---     where c1 is the continuation to put inside the InlineMe 
---     and   c2 outside
-
--- Example: (__inline_me__ (/\a. e)) ty
---     Here we want to do the beta-redex without dissolving the InlineMe
--- See test simpl017 (and Trac #1627) for a good example of why this is important
-
-splitInlineCont (ApplyTo dup (Type ty) se c)
-  | Just (c1, c2) <- splitInlineCont c                 = Just (ApplyTo dup (Type ty) se c1, c2)
-splitInlineCont cont@(Stop ty _ _)             = Just (mkBoringStop ty, cont)
-splitInlineCont cont@(StrictBind bndr _ _ se _) = Just (mkBoringStop (substTy se (idType bndr)), cont)
-splitInlineCont cont@(StrictArg _ fun_ty _ _)   = Just (mkBoringStop (funArgTy fun_ty), cont)
-splitInlineCont other                          = Nothing
-       -- NB: the calculation of the type for mkBoringStop is an annoying
-       --     duplication of the same calucation in mkDupableCont
  \end{code}
  
  
-\begin{code}
-interestingArg :: OutExpr -> Bool
-       -- An argument is interesting if it has *some* structure
-       -- We are here trying to avoid unfolding a function that
-       -- is applied only to variables that have no unfolding
-       -- (i.e. they are probably lambda bound): f x y z
-       -- There is little point in inlining f here.
-interestingArg (Var v)          = hasSomeUnfolding (idUnfolding v)
-                                       -- Was: isValueUnfolding (idUnfolding v')
-                                       -- But that seems over-pessimistic
-                                || isDataConWorkId v
-                                       -- This accounts for an argument like
-                                       -- () or [], which is definitely interesting
-interestingArg (Type _)                 = False
-interestingArg (App fn (Type _)) = interestingArg fn
-interestingArg (Note _ a)       = interestingArg a
-
--- Idea (from Sam B); I'm not sure if it's a good idea, so commented out for now
--- interestingArg expr | isUnLiftedType (exprType expr)
---        -- Unlifted args are only ever interesting if we know what they are
---  =                  case expr of
---                        Lit lit -> True
---                        _       -> False
-
-interestingArg other            = True
-       -- Consider     let x = 3 in f x
-       -- The substitution will contain (x -> ContEx 3), and we want to
-       -- to say that x is an interesting argument.
-       -- But consider also (\x. f x y) y
-       -- The substitution will contain (x -> ContEx y), and we want to say
-       -- that x is not interesting (assuming y has no unfolding)
-\end{code}
-
-
-Comment about interestingCallContext
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Note [Interesting call context]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  We want to avoid inlining an expression where there can't possibly be
  any gain, such as in an argument position.  Hence, if the continuation
  is interesting (eg. a case scrutinee, application etc.) then we
@@ -303,62 +255,30 @@ applies when x is bound to a lambda expression.  Hence
  contIsInteresting looks for case expressions with just a single
  default case.
  
+
  \begin{code}
-interestingCallContext :: Bool                 -- False <=> no args at all
-                      -> Bool          -- False <=> no value args
-                      -> SimplCont -> Bool
-       -- The "lone-variable" case is important.  I spent ages
-       -- messing about with unsatisfactory varaints, but this is nice.
-       -- The idea is that if a variable appear all alone
-       --      as an arg of lazy fn, or rhs    Stop
-       --      as scrutinee of a case          Select
-       --      as arg of a strict fn           ArgOf
-       -- then we should not inline it (unless there is some other reason,
-       -- e.g. is is the sole occurrence).  We achieve this by making
-       -- interestingCallContext return False for a lone variable.
-       --
-       -- Why?  At least in the case-scrutinee situation, turning
-       --      let x = (a,b) in case x of y -> ...
-       -- into
-       --      let x = (a,b) in case (a,b) of y -> ...
-       -- and thence to 
-       --      let x = (a,b) in let y = (a,b) in ...
-       -- is bad if the binding for x will remain.
-       --
-       -- Another example: I discovered that strings
-       -- were getting inlined straight back into applications of 'error'
-       -- because the latter is strict.
-       --      s = "foo"
-       --      f = \x -> ...(error s)...
-
-       -- Fundamentally such contexts should not ecourage inlining because
-       -- the context can ``see'' the unfolding of the variable (e.g. case or a RULE)
-       -- so there's no gain.
-       --
-       -- However, even a type application or coercion isn't a lone variable.
-       -- Consider
-       --      case $fMonadST @ RealWorld of { :DMonad a b c -> c }
-       -- We had better inline that sucker!  The case won't see through it.
-       --
-       -- For now, I'm treating treating a variable applied to types 
-       -- in a *lazy* context "lone". The motivating example was
-       --      f = /\a. \x. BIG
-       --      g = /\a. \y.  h (f a)
-       -- There's no advantage in inlining f here, and perhaps
-       -- a significant disadvantage.  Hence some_val_args in the Stop case
-
-interestingCallContext some_args some_val_args cont
+interestingCallContext :: SimplCont -> CallCtxt
+-- See Note [Interesting call context]
+interestingCallContext cont
    = interesting cont
    where
-    interesting (Select {})              = some_args
-    interesting (ApplyTo {})             = True        -- Can happen if we have (coerce t (f x)) y
-                                               -- Perhaps True is a bit over-keen, but I've
-                                               -- seen (coerce f) x, where f has an INLINE prag,
-                                               -- So we have to give some motivaiton for inlining it
-    interesting (StrictArg {})          = some_val_args
-    interesting (StrictBind {})                 = some_val_args        -- ??
-    interesting (Stop ty _ interesting)  = some_val_args && interesting
-    interesting (CoerceIt _ cont)        = interesting cont
+    interesting (Select _ bndr _ _ _)
+       | isDeadBinder bndr = CaseCtxt
+       | otherwise         = ArgCtxt False     -- If the binder is used, this
+                                               -- is like a strict let
+                                               -- See Note [RHS of lets] in CoreUnfold
+               
+    interesting (ApplyTo _ arg _ cont)
+       | isTypeArg arg = interesting cont
+       | otherwise     = ValAppCtxt    -- Can happen if we have (f Int |> co) y
+                                       -- If f has an INLINE prag we need to give it some
+                                       -- motivation to inline. See Note [Cast then apply]
+                                       -- in CoreUnfold
+
+    interesting (StrictArg _ cci _ _)  = cci
+    interesting (StrictBind {})                = BoringCtxt
+    interesting (Stop cci)             = cci
+    interesting (CoerceIt _ cont)      = interesting cont
         -- If this call is the arg of a strict function, the context
         -- is a bit interesting.  If we inline here, we may get useful
         -- evaluation information to avoid repeated evals: e.g.
@@ -377,23 +297,32 @@ interestingCallContext some_args some_val_args cont
  
  -------------------
  mkArgInfo :: Id
+         -> [CoreRule] -- Rules for function
           -> Int        -- Number of value args
-         -> SimplCont  -- Context of the cal
-         -> (Bool, [Bool])     -- Arg info
--- The arg info consists of
---  * A Bool indicating if the function has rules (recursively)
---  * A [Bool] indicating strictness for each arg
--- The [Bool] is usually infinite, but if it is finite it 
--- guarantees that the function diverges after being given
--- that number of args
-
-mkArgInfo fun n_val_args call_cont
-  = (interestingArgContext fun call_cont, fun_stricts)
+         -> SimplCont  -- Context of the call
+         -> ArgInfo
+
+mkArgInfo fun rules n_val_args call_cont
+  | n_val_args < idArity fun           -- Note [Unsaturated functions]
+  = ArgInfo { ai_rules = False
+           , ai_strs = vanilla_stricts 
+           , ai_discs = vanilla_discounts }
+  | otherwise
+  = ArgInfo { ai_rules = interestingArgContext rules call_cont
+           , ai_strs  = add_type_str (idType fun) arg_stricts
+           , ai_discs = arg_discounts }
    where
-    vanilla_stricts, fun_stricts :: [Bool]
+    vanilla_discounts, arg_discounts :: [Int]
+    vanilla_discounts = repeat 0
+    arg_discounts = case idUnfolding fun of
+                       CoreUnfolding {uf_guidance = UnfoldIfGoodArgs {ug_args = discounts}}
+                             -> discounts ++ vanilla_discounts
+                       _     -> vanilla_discounts
+
+    vanilla_stricts, arg_stricts :: [Bool]
      vanilla_stricts  = repeat False
  
-    fun_stricts
+    arg_stricts
        = case splitStrictSig (idNewStrictness fun) of
           (demands, result_info)
                 | not (demands `lengthExceeds` n_val_args)
@@ -408,10 +337,39 @@ mkArgInfo fun n_val_args call_cont
                         map isStrictDmd demands         -- Finite => result is bottom
                    else
                         map isStrictDmd demands ++ vanilla_stricts
+              | otherwise
+              -> WARN( True, text "More demands than arity" <+> ppr fun <+> ppr (idArity fun) 
+                               <+> ppr n_val_args <+> ppr demands ) 
+                  vanilla_stricts      -- Not enough args, or no strictness
+
+    add_type_str :: Type -> [Bool] -> [Bool]
+    -- If the function arg types are strict, record that in the 'strictness bits'
+    -- No need to instantiate because unboxed types (which dominate the strict
+    -- types) can't instantiate type variables.
+    -- add_type_str is done repeatedly (for each call); might be better 
+    -- once-for-all in the function
+    -- But beware primops/datacons with no strictness
+    add_type_str _ [] = []
+    add_type_str fun_ty strs           -- Look through foralls
+       | Just (_, fun_ty') <- splitForAllTy_maybe fun_ty       -- Includes coercions
+       = add_type_str fun_ty' strs
+    add_type_str fun_ty (str:strs)     -- Add strict-type info
+       | Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty
+       = (str || isStrictType arg_ty) : add_type_str fun_ty' strs
+    add_type_str _ strs
+       = strs
+
+{- Note [Unsaturated functions]
+  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider (test eyeball/inline4)
+       x = a:as
+       y = f x
+where f has arity 2.  Then we do not want to inline 'x', because
+it'll just be floated out again.  Even if f has lots of discounts
+on its first argument -- it must be saturated for these to kick in
+-}
  
-         other -> vanilla_stricts      -- Not enough args, or no strictness
-
-interestingArgContext :: Id -> SimplCont -> Bool
+interestingArgContext :: [CoreRule] -> SimplCont -> Bool
  -- If the argument has form (f x y), where x,y are boring,
  -- and f is marked INLINE, then we don't want to inline f.
  -- But if the context of the argument is
@@ -419,44 +377,30 @@ interestingArgContext :: Id -> SimplCont -> Bool
  -- where g has rules, then we *do* want to inline f, in case it
  -- exposes a rule that might fire.  Similarly, if the context is
  --     h (g (f x x))
--- where h has rules, then we do want to inline f.
--- The interesting_arg_ctxt flag makes this happen; if it's
+-- where h has rules, then we do want to inline f; hence the
+-- call_cont argument to interestingArgContext
+--
+-- The ai-rules flag makes this happen; if it's
  -- set, the inliner gets just enough keener to inline f 
  -- regardless of how boring f's arguments are, if it's marked INLINE
  --
  -- The alternative would be to *always* inline an INLINE function,
  -- regardless of how boring its context is; but that seems overkill
  -- For example, it'd mean that wrapper functions were always inlined
-interestingArgContext fn cont
-  = idHasRules fn || go cont
+interestingArgContext rules call_cont
+  = notNull rules || enclosing_fn_has_rules
    where
-    go (Select {})           = False
-    go (ApplyTo {})          = False
-    go (StrictArg {})        = True
-    go (StrictBind {})       = False   -- ??
-    go (CoerceIt _ c)        = go c
-    go (Stop _ _ interesting) = interesting
+    enclosing_fn_has_rules = go call_cont
  
--------------------
-canUpdateInPlace :: Type -> Bool
--- Consider   let x = <wurble> in ...
--- If <wurble> returns an explicit constructor, we might be able
--- to do update in place.  So we treat even a thunk RHS context
--- as interesting if update in place is possible.  We approximate
--- this by seeing if the type has a single constructor with a
--- small arity.  But arity zero isn't good -- we share the single copy
--- for that case, so no point in sharing.
-
-canUpdateInPlace ty 
-  | not opt_UF_UpdateInPlace = False
-  | otherwise
-  = case splitTyConApp_maybe ty of 
-       Nothing         -> False 
-       Just (tycon, _) -> case tyConDataCons_maybe tycon of
-                               Just [dc]  -> arity == 1 || arity == 2
-                                          where
-                                             arity = dataConRepArity dc
-                               other -> False
+    go (Select {})          = False
+    go (ApplyTo {})         = False
+    go (StrictArg _ cci _ _) = interesting cci
+    go (StrictBind {})      = False    -- ??
+    go (CoerceIt _ c)       = go c
+    go (Stop cci)            = interesting cci
+
+    interesting (ArgCtxt rules) = rules
+    interesting _               = False
  \end{code}
  
  
@@ -467,18 +411,61 @@ canUpdateInPlace ty
  %*                                                                     *
  %************************************************************************
  
-Inlining is controlled partly by the SimplifierMode switch.  This has two
-settings:
+\begin{code}
+simplEnvForGHCi :: SimplEnv
+simplEnvForGHCi = mkSimplEnv allOffSwitchChecker $
+                  SimplGently { sm_rules = False, sm_inline = False }
+   -- Do not do any inlining, in case we expose some unboxed
+   -- tuple stuff that confuses the bytecode interpreter
+
+simplEnvForRules :: SimplEnv
+simplEnvForRules = mkSimplEnv allOffSwitchChecker $
+                   SimplGently { sm_rules = True, sm_inline = False }
+
+updModeForInlineRules :: SimplifierMode -> SimplifierMode
+updModeForInlineRules mode
+  = case mode of      
+      SimplGently {} -> mode   -- Don't modify mode if we already gentle
+      SimplPhase  {} -> SimplGently { sm_rules = True, sm_inline = True }
+       -- Simplify as much as possible, subject to the usual "gentle" rules
+\end{code}
  
+Inlining is controlled partly by the SimplifierMode switch.  This has two
+settings
+       
         SimplGently     (a) Simplifying before specialiser/full laziness
-                       (b) Simplifiying inside INLINE pragma
+                       (b) Simplifiying inside InlineRules
                         (c) Simplifying the LHS of a rule
                         (d) Simplifying a GHCi expression or Template 
                                 Haskell splice
  
-       SimplPhase n    Used at all other times
-
-The key thing about SimplGently is that it does no call-site inlining.
+       SimplPhase n _   Used at all other times
+
+Note [Gentle mode]
+~~~~~~~~~~~~~~~~~~
+Gentle mode has a separate boolean flag to control
+       a) inlining (sm_inline flag)
+       b) rules    (sm_rules  flag)
+A key invariant about Gentle mode is that it is treated as the EARLIEST
+phase.  Something is inlined if the sm_inline flag is on AND the thing
+is inlinable in the earliest phase.  This is important. Example
+
+  {-# INLINE [~1] g #-}
+  g = ...
+  
+  {-# INLINE f #-}
+  f x = g (g x)
+
+If we were to inline g into f's inlining, then an importing module would
+never be able to do
+       f e --> g (g e) ---> RULE fires
+because the InlineRule for f has had g inlined into it.
+
+On the other hand, it is bad not to do ANY inlining into an
+InlineRule, because then recursive knots in instance declarations
+don't get unravelled.
+
+However, *sometimes* SimplGently must do no call-site inlining at all.
  Before full laziness we must be careful not to inline wrappers,
  because doing so inhibits floating
      e.g. ...(case f x of ...)...
@@ -492,17 +479,24 @@ running it, we don't want to use -O2.  Indeed, we don't want to inline
  anything, because the byte-code interpreter might get confused about 
  unboxed tuples and suchlike.
  
-INLINE pragmas
-~~~~~~~~~~~~~~
-SimplGently is also used as the mode to simplify inside an InlineMe note.
+Note [RULEs enabled in SimplGently]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+RULES are enabled when doing "gentle" simplification.  Two reasons:
  
-\begin{code}
-inlineMode :: SimplifierMode
-inlineMode = SimplGently
-\end{code}
+  * We really want the class-op cancellation to happen:
+        op (df d1 d2) --> $cop3 d1 d2
+    because this breaks the mutual recursion between 'op' and 'df'
  
-It really is important to switch off inlinings inside such
-expressions.  Consider the following example 
+  * I wanted the RULE
+        lift String ===> ...
+    to work in Template Haskell when simplifying
+    splices, so we get simpler code for literal strings
+
+Note [Simplifying gently inside InlineRules]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We don't do much simplification inside InlineRules (which come from
+INLINE pragmas).  It really is important to switch off inlinings
+inside such expressions.  Consider the following example
  
         let f = \pq -> BIG
         in
@@ -511,16 +505,14 @@ expressions.  Consider the following example
         in ...g...g...g...g...g...
  
  Now, if that's the ONLY occurrence of f, it will be inlined inside g,
-and thence copied multiple times when g is inlined.
-
+and thence copied multiple times when g is inlined.  
  
-This function may be inlinined in other modules, so we
-don't want to remove (by inlining) calls to functions that have
-specialisations, or that may have transformation rules in an importing
-scope.
+This function may be inlinined in other modules, so we don't want to
+remove (by inlining) calls to functions that have specialisations, or
+that may have transformation rules in an importing scope.
  
  E.g.   {-# INLINE f #-}
-               f x = ...g...
+       f x = ...g...
  
  and suppose that g is strict *and* has specialisations.  If we inline
  g's wrapper, we deny f the chance of getting the specialised version
@@ -538,15 +530,14 @@ continuation.  That's why the keep_inline predicate returns True for
  ArgOf continuations.  It shouldn't do any harm not to dissolve the
  inline-me note under these circumstances.
  
-Note that the result is that we do very little simplification
-inside an InlineMe.  
+Although we do very little simplification inside an InlineRule,
+the RHS is simplified as normal.  For example:
  
         all xs = foldr (&&) True xs
         any p = all . map p  {-# INLINE any #-}
  
-Problem: any won't get deforested, and so if it's exported and the
-importer doesn't use the inlining, (eg passes it as an arg) then we
-won't get deforestation at all.  We havn't solved this problem yet!
+The RHS of 'any' will get optimised and deforested; but the InlineRule
+will still mention the original RHS.
  
  
  preInlineUnconditionally
@@ -591,7 +582,7 @@ y's occurrence info, which breaks the invariant.  It matters: y
  might have a BIG rhs, which will now be dup'd at every occurrenc of x.
  
  
-Evne RHSs labelled InlineMe aren't caught here, because there might be
+Even RHSs labelled InlineMe aren't caught here, because there might be
  no benefit from inlining at the call site.
  
  [Sept 01] Don't unconditionally inline a top-level thing, because that
@@ -613,6 +604,18 @@ seems a bit fragile.
  Conclusion: inline top level things gaily until Phase 0 (the last
  phase), at which point don't.
  
+Note [pre/postInlineUnconditionally in gentle mode]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Even in gentle mode we want to do preInlineUnconditionally.  The
+reason is that too little clean-up happens if you don't inline
+use-once things.  Also a bit of inlining is *good* for full laziness;
+it can expose constant sub-expressions.  Example in
+spectral/mandel/Mandel.hs, where the mandelset function gets a useful
+let-float if you inline windowToViewport
+
+However, as usual for Gentle mode, do not inline things that are
+inactive in the intial stages.  See Note [Gentle mode].
+
  \begin{code}
  preInlineUnconditionally :: SimplEnv -> TopLevelFlag -> InId -> InExpr -> Bool
  preInlineUnconditionally env top_lvl bndr rhs
@@ -621,13 +624,14 @@ preInlineUnconditionally env top_lvl bndr rhs
    | otherwise = case idOccInfo bndr of
                   IAmDead                    -> True    -- Happens in ((\x.1) v)
                   OneOcc in_lam True int_cxt -> try_once in_lam int_cxt
-                 other                      -> False
+                 _                          -> False
    where
      phase = getMode env
      active = case phase of
-                  SimplGently  -> isAlwaysActive prag
-                  SimplPhase n -> isActive n prag
-    prag = idInlinePragma bndr
+                  SimplGently {} -> isEarlyActive act
+                       -- See Note [pre/postInlineUnconditionally in gentle mode]
+                  SimplPhase n _ -> isActive n act
+    act = idInlineActivation bndr
  
      try_once in_lam int_cxt    -- There's one textual occurrence
         | not in_lam = isNotTopLevel top_lvl || early_phase
@@ -654,14 +658,14 @@ preInlineUnconditionally env top_lvl bndr rhs
         -- canInlineInLam => free vars of rhs are (Once in_lam) or Many,
         -- so substituting rhs inside a lambda doesn't change the occ info.
         -- Sadly, not quite the same as exprIsHNF.
-    canInlineInLam (Lit l)             = True
+    canInlineInLam (Lit _)             = True
      canInlineInLam (Lam b e)           = isRuntimeVar b || canInlineInLam e
      canInlineInLam (Note _ e)          = canInlineInLam e
      canInlineInLam _                   = False
  
      early_phase = case phase of
-                       SimplPhase 0 -> False
-                       other        -> True
+                       SimplPhase 0 _ -> False
+                       _              -> True
  -- If we don't have this early_phase test, consider
  --     x = length [1,2,3]
  -- The full laziness pass carefully floats all the cons cells to
@@ -710,16 +714,17 @@ story for now.
  \begin{code}
  postInlineUnconditionally 
      :: SimplEnv -> TopLevelFlag
-    -> InId            -- The binder (an OutId would be fine too)
+    -> OutId           -- The binder (an InId would be fine too)
      -> OccInfo                 -- From the InId
      -> OutExpr
      -> Unfolding
      -> Bool
  postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
    | not active            = False
-  | isLoopBreaker occ_info = False     -- If it's a loop-breaker of any kind, dont' inline
+  | isLoopBreaker occ_info = False     -- If it's a loop-breaker of any kind, don't inline
                                         -- because it might be referred to "earlier"
    | isExportedId bndr      = False
+  | isInlineRule unfolding = False     -- Note [InlineRule and postInlineUnconditionally]
    | exprIsTrivial rhs     = True
    | otherwise
    = case occ_info of
@@ -734,7 +739,7 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
         --         True  -> case x of ...
         --         False -> case x of ...
         -- I'm not sure how important this is in practice
-      OneOcc in_lam one_br int_cxt     -- OneOcc => no code-duplication issue
+      OneOcc in_lam _one_br int_cxt    -- OneOcc => no code-duplication issue
         ->     smallEnoughToInline unfolding    -- Small enough to dup
                         -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
                         --
@@ -765,37 +770,37 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
                         -- Here x isn't mentioned in the RHS, so we don't want to
                         -- create the (dead) let-binding  let x = (a,b) in ...
  
-      other -> False
+      _ -> False
  
  -- Here's an example that we don't handle well:
  --     let f = if b then Left (\x.BIG) else Right (\y.BIG)
  --     in \y. ....case f of {...} ....
  -- Here f is used just once, and duplicating the case work is fine (exprIsCheap).
  -- But
--- * We can't preInlineUnconditionally because that woud invalidate
---   the occ info for b.  
--- * We can't postInlineUnconditionally because the RHS is big, and
---   that risks exponential behaviour
--- * We can't call-site inline, because the rhs is big
+--  - We can't preInlineUnconditionally because that woud invalidate
+--    the occ info for b.
+--  - We can't postInlineUnconditionally because the RHS is big, and
+--    that risks exponential behaviour
+--  - We can't call-site inline, because the rhs is big
  -- Alas!
  
    where
      active = case getMode env of
-                  SimplGently  -> isAlwaysActive prag
-                  SimplPhase n -> isActive n prag
-    prag = idInlinePragma bndr
+                  SimplGently {} -> isEarlyActive act
+                       -- See Note [pre/postInlineUnconditionally in gentle mode]
+                  SimplPhase n _ -> isActive n act
+    act = idInlineActivation bndr
  
  activeInline :: SimplEnv -> OutId -> Bool
  activeInline env id
+  | isNonRuleLoopBreaker (idOccInfo id)          -- Things with an INLINE pragma may have 
+                                         -- an unfolding *and* be a loop breaker
+  = False                                -- (maybe the knot is not yet untied)
+  | otherwise
    = case getMode env of
-      SimplGently -> False
-       -- No inlining at all when doing gentle stuff,
-       -- except for local things that occur once
-       -- The reason is that too little clean-up happens if you 
-       -- don't inline use-once things.   Also a bit of inlining is *good* for
-       -- full laziness; it can expose constant sub-expressions.
-       -- Example in spectral/mandel/Mandel.hs, where the mandelset 
-       -- function gets a useful let-float if you inline windowToViewport
+      SimplGently { sm_inline = inlining_on } 
+         -> inlining_on && isEarlyActive act
+       -- See Note [Gentle mode]
  
         -- NB: we used to have a second exception, for data con wrappers.
         -- On the grounds that we use gentle mode for rule LHSs, and 
@@ -804,26 +809,40 @@ activeInline env id
         -- and they are now constructed as Compulsory unfoldings (in MkId)
         -- so they'll happen anyway.
  
-      SimplPhase n -> isActive n prag
+      SimplPhase n _ -> isActive n act
    where
-    prag = idInlinePragma id
+    act = idInlineActivation id
  
  activeRule :: DynFlags -> SimplEnv -> Maybe (Activation -> Bool)
  -- Nothing => No rules at all
  activeRule dflags env
-  | not (dopt Opt_RewriteRules dflags)
+  | not (dopt Opt_EnableRewriteRules dflags)
    = Nothing    -- Rewriting is off
    | otherwise
    = case getMode env of
-       SimplGently  -> Just isAlwaysActive
-                       -- Used to be Nothing (no rules in gentle mode)
-                       -- Main motivation for changing is that I wanted
-                       --      lift String ===> ...
-                       -- to work in Template Haskell when simplifying
-                       -- splices, so we get simpler code for literal strings
-       SimplPhase n -> Just (isActive n)
+      SimplGently { sm_rules = rules_on } 
+        | rules_on  -> Just isEarlyActive      -- Note [RULEs enabled in SimplGently]
+        | otherwise -> Nothing
+      SimplPhase n _ -> Just (isActive n)
  \end{code}
  
+Note [InlineRule and postInlineUnconditionally]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Do not do postInlineUnconditionally if the Id has an InlineRule, otherwise
+we lose the unfolding.  Example
+
+     -- f has InlineRule with rhs (e |> co)
+     --   where 'e' is big
+     f = e |> co
+
+Then there's a danger we'll optimise to
+
+     f' = e
+     f = f' |> co
+
+and now postInlineUnconditionally, losing the InlineRule on f.  Now f'
+won't inline because 'e' is too big.
+
  
  %************************************************************************
  %*                                                                     *
@@ -832,24 +851,26 @@ activeRule dflags env
  %************************************************************************
  
  \begin{code}
-mkLam :: [OutBndr] -> OutExpr -> SimplM OutExpr
+mkLam :: SimplEnv -> [OutBndr] -> OutExpr -> SimplM OutExpr
  -- mkLam tries three things
  --     a) eta reduction, if that gives a trivial expression
  --     b) eta expansion [only if there are some value lambdas]
  
-mkLam [] body 
+mkLam _b [] body 
    = return body
-mkLam bndrs body
+mkLam env bndrs body
    = do { dflags <- getDOptsSmpl
         ; mkLam' dflags bndrs body }
    where
      mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr
-    mkLam' dflags bndrs (Cast body@(Lam _ _) co)
+    mkLam' dflags bndrs (Cast body co)
+      | not (any bad bndrs)
         -- Note [Casts and lambdas]
-      = do { lam <- mkLam' dflags (bndrs ++ bndrs') body'
+      = do { lam <- mkLam' dflags bndrs body
            ; return (mkCoerce (mkPiTypes bndrs co) lam) }
-      where    
-       (bndrs',body') = collectBinders body
+      where
+       co_vars  = tyVarsOfType co
+       bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars      
  
      mkLam' dflags bndrs body
        | dopt Opt_DoEtaReduction dflags,
@@ -858,12 +879,14 @@ mkLam bndrs body
            ; return etad_lam }
  
        | dopt Opt_DoLambdaEtaExpansion dflags,
-       any isRuntimeVar bndrs
-      = do { body' <- tryEtaExpansion dflags body
+        not (inGentleMode env),              -- In gentle mode don't eta-expansion
+       any isRuntimeVar bndrs        -- because it can clutter up the code
+                                     -- with casts etc that may not be removed
+      = do { let body' = tryEtaExpansion dflags body
            ; return (mkLams bndrs body') }
     
        | otherwise 
-      = returnSmpl (mkLams bndrs body)
+      = return (mkLams bndrs body)
  \end{code}
  
  Note [Casts and lambdas]
@@ -877,9 +900,26 @@ So this equation in mkLam' floats the g1 out, thus:
         (\x. e `cast` g1)  -->  (\x.e) `cast` (tx -> g1)
  where x:tx.
  
-In general, this floats casts outside lambdas, where (I hope) they might meet
-and cancel with some other cast.
-
+In general, this floats casts outside lambdas, where (I hope) they
+might meet and cancel with some other cast:
+       \x. e `cast` co   ===>   (\x. e) `cast` (tx -> co)
+       /\a. e `cast` co  ===>   (/\a. e) `cast` (/\a. co)
+       /\g. e `cast` co  ===>   (/\g. e) `cast` (/\g. co)
+                         (if not (g `in` co))
+
+Notice that it works regardless of 'e'.  Originally it worked only
+if 'e' was itself a lambda, but in some cases that resulted in 
+fruitless iteration in the simplifier.  A good example was when
+compiling Text.ParserCombinators.ReadPrec, where we had a definition 
+like   (\x. Get `cast` g)
+where Get is a constructor with nonzero arity.  Then mkLam eta-expanded
+the Get, and the next iteration eta-reduced it, and then eta-expanded 
+it again.
+
+Note also the side condition for the case of coercion binders.
+It does not make sense to transform
+       /\g. e `cast` g  ==>  (/\g.e) `cast` (/\g.g)
+because the latter is not well-kinded.
  
  --     c) floating lets out through big lambdas 
  --             [only if all tyvar lambdas, and only if this lambda
@@ -892,55 +932,111 @@ and cancel with some other cast.
                         -- if this is indeed a right-hand side; otherwise
                         -- we end up floating the thing out, only for float-in
                         -- to float it right back in again!
- = tryRhsTyLam env bndrs body          `thenSmpl` \ (floats, body') ->
-   returnSmpl (floats, mkLams bndrs body')
+ = do (floats, body') <- tryRhsTyLam env bndrs body
+      return (floats, mkLams bndrs body')
  -}
  
  
  %************************************************************************
  %*                                                                     *
-\subsection{Eta expansion and reduction}
+               Eta reduction
  %*                                                                     *
  %************************************************************************
  
-We try for eta reduction here, but *only* if we get all the 
-way to an exprIsTrivial expression.    
-We don't want to remove extra lambdas unless we are going 
-to avoid allocating this thing altogether
+Note [Eta reduction conditions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We try for eta reduction here, but *only* if we get all the way to an
+trivial expression.  We don't want to remove extra lambdas unless we
+are going to avoid allocating this thing altogether.
+
+There are some particularly delicate points here:
+
+* Eta reduction is not valid in general:  
+       \x. bot  /=  bot
+  This matters, partly for old-fashioned correctness reasons but,
+  worse, getting it wrong can yield a seg fault. Consider
+       f = \x.f x
+       h y = case (case y of { True -> f `seq` True; False -> False }) of
+               True -> ...; False -> ...
+
+  If we (unsoundly) eta-reduce f to get f=f, the strictness analyser
+  says f=bottom, and replaces the (f `seq` True) with just
+  (f `cast` unsafe-co).  BUT, as thing stand, 'f' got arity 1, and it
+  *keeps* arity 1 (perhaps also wrongly).  So CorePrep eta-expands 
+  the definition again, so that it does not termninate after all.
+  Result: seg-fault because the boolean case actually gets a function value.
+  See Trac #1947.
+
+  So it's important to to the right thing.
+
+* Note [Arity care]: we need to be careful if we just look at f's
+  arity. Currently (Dec07), f's arity is visible in its own RHS (see
+  Note [Arity robustness] in SimplEnv) so we must *not* trust the
+  arity when checking that 'f' is a value.  Otherwise we will
+  eta-reduce
+      f = \x. f x
+  to
+      f = f
+  Which might change a terminiating program (think (f `seq` e)) to a 
+  non-terminating one.  So we check for being a loop breaker first.
+
+  However for GlobalIds we can look at the arity; and for primops we
+  must, since they have no unfolding.  
+
+* Regardless of whether 'f' is a value, we always want to 
+  reduce (/\a -> f a) to f
+  This came up in a RULE: foldr (build (/\a -> g a))
+  did not match          foldr (build (/\b -> ...something complex...))
+  The type checker can insert these eta-expanded versions,
+  with both type and dictionary lambdas; hence the slightly 
+  ad-hoc isDictId
+
+* Never *reduce* arity. For example
+      f = \xy. g x y
+  Then if h has arity 1 we don't want to eta-reduce because then
+  f's arity would decrease, and that is bad
+
+These delicacies are why we don't use exprIsTrivial and exprIsHNF here.
+Alas.
  
  \begin{code}
  tryEtaReduce :: [OutBndr] -> OutExpr -> Maybe OutExpr
  tryEtaReduce bndrs body 
-       -- We don't use CoreUtils.etaReduce, because we can be more
-       -- efficient here:
-       --  (a) we already have the binders
-       --  (b) we can do the triviality test before computing the free vars
    = go (reverse bndrs) body
    where
+    incoming_arity = count isId bndrs
+
      go (b : bs) (App fun arg) | ok_arg b arg = go bs fun       -- Loop round
      go []       fun           | ok_fun fun   = Just fun                -- Success!
      go _        _                           = Nothing          -- Failure!
  
-    ok_fun fun =  exprIsTrivial fun
-              && not (any (`elemVarSet` (exprFreeVars fun)) bndrs)
-              && (exprIsHNF fun || all ok_lam bndrs)
+       -- Note [Eta reduction conditions]
+    ok_fun (App fun (Type ty)) 
+       | not (any (`elemVarSet` tyVarsOfType ty) bndrs)
+       =  ok_fun fun
+    ok_fun (Var fun_id)
+       =  not (fun_id `elem` bndrs)
+       && (ok_fun_id fun_id || all ok_lam bndrs)
+    ok_fun _fun = False
+
+    ok_fun_id fun = fun_arity fun >= incoming_arity
+
+    fun_arity fun            -- See Note [Arity care]
+       | isLocalId fun && isLoopBreaker (idOccInfo fun) = 0
+       | otherwise = idArity fun             
+
      ok_lam v = isTyVar v || isDictId v
-       -- The exprIsHNF is because eta reduction is not 
-       -- valid in general:  \x. bot  /=  bot
-       -- So we need to be sure that the "fun" is a value.
-       --
-       -- However, we always want to reduce (/\a -> f a) to f
-       -- This came up in a RULE: foldr (build (/\a -> g a))
-       --      did not match      foldr (build (/\b -> ...something complex...))
-       -- The type checker can insert these eta-expanded versions,
-       -- with both type and dictionary lambdas; hence the slightly 
-       -- ad-hoc isDictTy
  
      ok_arg b arg = varToCoreExpr b `cheapEqExpr` arg
  \end{code}
  
  
-       Try eta expansion for RHSs
+%************************************************************************
+%*                                                                     *
+               Eta expansion
+%*                                                                     *
+%************************************************************************
+
  
  We go for:
     f = \x1..xn -> N  ==>   f = \x1..xn y1..ym -> N y1..ym
@@ -955,17 +1051,26 @@ where (in both cases)
         * N is a NORMAL FORM (i.e. no redexes anywhere)
           wanting a suitable number of extra args.
  
+The biggest reason for doing this is for cases like
+
+       f = \x -> case x of
+                   True  -> \y -> e1
+                   False -> \y -> e2
+
+Here we want to get the lambdas together.  A good exmaple is the nofib
+program fibheaps, which gets 25% more allocation if you don't do this
+eta-expansion.
+
  We may have to sandwich some coerces between the lambdas
  to make the types work.   exprEtaExpandArity looks through coerces
  when computing arity; and etaExpand adds the coerces as necessary when
  actually computing the expansion.
  
  \begin{code}
-tryEtaExpansion :: DynFlags -> OutExpr -> SimplM OutExpr
+tryEtaExpansion :: DynFlags -> OutExpr -> OutExpr
  -- There is at least one runtime binder in the binders
  tryEtaExpansion dflags body
-  = getUniquesSmpl                     `thenSmpl` \ us ->
-    returnSmpl (etaExpand fun_arity us body (exprType body))
+  = etaExpand fun_arity body
    where
      fun_arity = exprEtaExpandArity dflags body
  \end{code}
@@ -984,7 +1089,7 @@ Consider this:
  We'd like to float this to 
         y1 = /\a. e1
         y2 = /\a. e2
-       x = /\a. C (y1 a) (y2 a)
+       x  = /\a. C (y1 a) (y2 a)
  for the usual reasons: we want to inline x rather vigorously.
  
  You may think that this kind of thing is rare.  But in some programs it is
@@ -1057,7 +1162,7 @@ it is guarded by the doFloatFromRhs call in simplLazyBind.
  abstractFloats :: [OutTyVar] -> SimplEnv -> OutExpr -> SimplM ([OutBind], OutExpr)
  abstractFloats main_tvs body_env body
    = ASSERT( notNull body_floats )
-    do { (subst, float_binds) <- mapAccumLSmpl abstract empty_subst body_floats
+    do { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
         ; return (float_binds, CoreSubst.substExpr subst body) }
    where
      main_tv_set = mkVarSet main_tvs
@@ -1093,7 +1198,7 @@ abstractFloats main_tvs body_env body
                 -- gives rise to problems.   SLPJ June 98
  
      abstract subst (Rec prs)
-       = do { (poly_ids, poly_apps) <- mapAndUnzipSmpl (mk_poly tvs_here) ids
+       = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly tvs_here) ids
             ; let subst' = CoreSubst.extendSubstList subst (ids `zip` poly_apps)
                   poly_rhss = [mkLams tvs_here (CoreSubst.substExpr subst' rhs) | rhs <- rhss]
             ; return (subst', Rec (poly_ids `zip` poly_rhss)) }
@@ -1115,10 +1220,11 @@ abstractFloats main_tvs body_env body
          tvs_here = main_tvs
  
      mk_poly tvs_here var
-      = do { uniq <- getUniqueSmpl
+      = do { uniq <- getUniqueM
            ; let  poly_name = setNameUnique (idName var) uniq           -- Keep same name
                   poly_ty   = mkForAllTys tvs_here (idType var) -- But new type of course
-                 poly_id   = mkLocalId poly_name poly_ty 
+                 poly_id   = transferPolyIdInfo var tvs_here $ -- Note [transferPolyIdInfo] in Id.lhs
+                             mkLocalId poly_name poly_ty 
            ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
                 -- In the olden days, it was crucial to copy the occInfo of the original var, 
                 -- because we were looking at occurrence-analysed but as yet unsimplified code!
@@ -1231,8 +1337,8 @@ have to check that r doesn't mention the variables bound by the
  pattern in each alternative, so the binder-info is rather useful.
  
  \begin{code}
-prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
-prepareAlts scrut case_bndr' alts
+prepareAlts :: SimplEnv -> OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
+prepareAlts env scrut case_bndr' alts
    = do { dflags <- getDOptsSmpl
         ; alts <- combineIdenticalAlts case_bndr' alts
  
@@ -1243,7 +1349,7 @@ prepareAlts scrut case_bndr' alts
                 --   EITHER by the context, 
                 --   OR by a non-DEFAULT branch in this case expression.
  
-       ; default_alts <- prepareDefault dflags scrut case_bndr' mb_tc_app 
+       ; default_alts <- prepareDefault dflags env case_bndr' mb_tc_app 
                                          imposs_deflt_cons maybe_deflt
  
         ; let trimmed_alts = filterOut impossible_alt alts_wo_default
@@ -1260,12 +1366,12 @@ prepareAlts scrut case_bndr' alts
  
      imposs_cons = case scrut of
                     Var v -> otherCons (idUnfolding v)
-                   other -> []
+                   _     -> []
  
      impossible_alt :: CoreAlt -> Bool
      impossible_alt (con, _, _) | con `elem` imposs_cons = True
      impossible_alt (DataAlt con, _, _) = dataConCannotMatch inst_tys con
-    impossible_alt alt                = False
+    impossible_alt _                   = False
  
  
  --------------------------------------------------
@@ -1273,7 +1379,7 @@ prepareAlts scrut case_bndr' alts
  --------------------------------------------------
  combineIdenticalAlts :: OutId -> [InAlt] -> SimplM [InAlt]
  
-combineIdenticalAlts case_bndr alts@((con1,bndrs1,rhs1) : con_alts)
+combineIdenticalAlts case_bndr ((_con1,bndrs1,rhs1) : con_alts)
    | all isDeadBinder bndrs1,                   -- Remember the default 
      length filtered_alts < length con_alts     -- alternative comes first
         -- Also Note [Dead binders]
@@ -1281,15 +1387,15 @@ combineIdenticalAlts case_bndr alts@((con1,bndrs1,rhs1) : con_alts)
         ; return ((DEFAULT, [], rhs1) : filtered_alts) }
    where
      filtered_alts       = filter keep con_alts
-    keep (con,bndrs,rhs) = not (all isDeadBinder bndrs && rhs `cheapEqExpr` rhs1)
+    keep (_con,bndrs,rhs) = not (all isDeadBinder bndrs && rhs `cheapEqExpr` rhs1)
  
-combineIdenticalAlts case_bndr alts = return alts
+combineIdenticalAlts _ alts = return alts
  
  -------------------------------------------------------------------------
  --                     Prepare the default alternative
  -------------------------------------------------------------------------
  prepareDefault :: DynFlags
-              -> OutExpr       -- Scrutinee
+              -> SimplEnv
                -> OutId         -- Case binder; need just for its type. Note that as an
                                 --   OutId, it has maximum information; this is important.
                                 --   Test simpl013 is an example
@@ -1301,10 +1407,16 @@ prepareDefault :: DynFlags
                                         -- And becuase case-merging can cause many to show up
  
  -------        Merge nested cases ----------
-prepareDefault dflags scrut outer_bndr bndr_ty imposs_cons (Just deflt_rhs)
+prepareDefault dflags env outer_bndr _bndr_ty imposs_cons (Just deflt_rhs)
    | dopt Opt_CaseMerge dflags
-  , Case (Var scrut_var) inner_bndr _ inner_alts <- deflt_rhs
-  , scruting_same_var scrut_var
+  , Case (Var inner_scrut_var) inner_bndr _ inner_alts <- deflt_rhs
+  , DoneId inner_scrut_var' <- substId env inner_scrut_var
+       -- Remember, inner_scrut_var is an InId, but outer_bndr is an OutId
+  , inner_scrut_var' == outer_bndr
+       -- NB: the substId means that if the outer scrutinee was a 
+       --     variable, and inner scrutinee is the same variable, 
+       --     then inner_scrut_var' will be outer_bndr
+       --     via the magic of simplCaseBinder
    = do { tick (CaseMerge outer_bndr)
  
         ; let munge_rhs rhs = bindCaseBndr inner_bndr (Var outer_bndr) rhs
@@ -1324,17 +1436,10 @@ prepareDefault dflags scrut outer_bndr bndr_ty imposs_cons (Just deflt_rhs)
         -- mkCase applied to them, so they won't have a case in their default
         -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
         -- in munge_rhs may put a case into the DEFAULT branch!
-  where
-       -- We are scrutinising the same variable if it's
-       -- the outer case-binder, or if the outer case scrutinises a variable
-       -- (and it's the same).  Testing both allows us not to replace the
-       -- outer scrut-var with the outer case-binder (Simplify.simplCaseBinder).
-    scruting_same_var = case scrut of
-                         Var outer_scrut -> \ v -> v == outer_bndr || v == outer_scrut
-                         other           -> \ v -> v == outer_bndr
+
  
  --------- Fill in known constructor -----------
-prepareDefault dflags scrut case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rhs)
+prepareDefault _ _ case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rhs)
    |    -- This branch handles the case where we are 
         -- scrutinisng an algebraic data type
      isAlgTyCon tycon           -- It's a data type, tuple, or unboxed tuples.  
@@ -1360,18 +1465,23 @@ prepareDefault dflags scrut case_bndr (Just (tycon, inst_tys)) imposs_cons (Just
  
         [con] ->        -- It matches exactly one constructor, so fill it in
                  do { tick (FillInCaseDefault case_bndr)
-                    ; us <- getUniquesSmpl
+                    ; us <- getUniquesM
                      ; let (ex_tvs, co_tvs, arg_ids) =
                                dataConRepInstPat us con inst_tys
                      ; return [(DataAlt con, ex_tvs ++ co_tvs ++ arg_ids, deflt_rhs)] }
  
-       two_or_more -> return [(DEFAULT, [], deflt_rhs)]
+       _ -> return [(DEFAULT, [], deflt_rhs)]
+
+  | debugIsOn, isAlgTyCon tycon, not (isOpenTyCon tycon), null (tyConDataCons tycon)
+       -- This can legitimately happen for type families, so don't report that
+  = pprTrace "prepareDefault" (ppr case_bndr <+> ppr tycon)
+        $ return [(DEFAULT, [], deflt_rhs)]
  
  --------- Catch-all cases -----------
-prepareDefault dflags scrut case_bndr bndr_ty imposs_cons (Just deflt_rhs)
+prepareDefault _dflags _env _case_bndr _bndr_ty _imposs_cons (Just deflt_rhs)
    = return [(DEFAULT, [], deflt_rhs)]
  
-prepareDefault dflags scrut case_bndr bndr_ty imposs_cons Nothing
+prepareDefault _dflags _env _case_bndr _bndr_ty _imposs_cons Nothing
    = return []  -- No default branch
  \end{code}
  
@@ -1393,32 +1503,17 @@ mkCase tries these things
  
  
  \begin{code}
-mkCase :: OutExpr -> OutId -> OutType
-       -> [OutAlt]             -- Increasing order
+mkCase :: OutExpr -> OutId -> [OutAlt] -- Increasing order
         -> SimplM OutExpr
  
  --------------------------------------------------
---     1. Check for empty alternatives
---------------------------------------------------
-
--- This isn't strictly an error.  It's possible that the simplifer might "see"
--- that an inner case has no accessible alternatives before it "sees" that the
--- entire branch of an outer case is inaccessible.  So we simply
--- put an error case here insteadd
-mkCase scrut case_bndr ty []
-  = pprTrace "mkCase: null alts" (ppr case_bndr <+> ppr scrut) $
-    return (mkApps (Var rUNTIME_ERROR_ID)
-                  [Type ty, Lit (mkStringLit "Impossible alternative")])
-
-
---------------------------------------------------
  --     2. Identity case
  --------------------------------------------------
  
-mkCase scrut case_bndr ty alts -- Identity case
+mkCase scrut case_bndr alts    -- Identity case
    | all identity_alt alts
-  = tick (CaseIdentity case_bndr)              `thenSmpl_`
-    returnSmpl (re_cast scrut)
+  = do tick (CaseIdentity case_bndr)
+       return (re_cast scrut)
    where
      identity_alt (con, args, rhs) = check_eq con args (de_cast rhs)
  
@@ -1426,7 +1521,7 @@ mkCase scrut case_bndr ty alts    -- Identity case
      check_eq (LitAlt lit') _    (Lit lit) = lit == lit'
      check_eq (DataAlt con) args rhs       = rhs `cheapEqExpr` mkConApp con (arg_tys ++ varsToCoreExprs args)
                                          || rhs `cheapEqExpr` Var case_bndr
-    check_eq con args rhs = False
+    check_eq _ _ _ = False
  
      arg_tys = map Type (tyConAppArgs (idType case_bndr))
  
@@ -1444,14 +1539,14 @@ mkCase scrut case_bndr ty alts  -- Identity case
  
      re_cast scrut = case head alts of
                         (_,_,Cast _ co) -> Cast scrut co
-                       other           -> scrut
+                       _               -> scrut
  
  
  
  --------------------------------------------------
  --     Catch-all
  --------------------------------------------------
-mkCase scrut bndr ty alts = returnSmpl (Case scrut bndr ty alts)
+mkCase scrut bndr alts = return (Case scrut bndr (coreAltsType alts) alts)
  \end{code}
  
  
@@ -1460,7 +1555,8 @@ its dead, because it often is, and occasionally these mkCase transformations
  cascade rather nicely.
  
  \begin{code}
+bindCaseBndr :: Id -> CoreExpr -> CoreExpr -> CoreExpr
  bindCaseBndr bndr rhs body
    | isDeadBinder bndr = body
-  | otherwise        = bindNonRec bndr rhs body
+  | otherwise         = bindNonRec bndr rhs body
  \end{code}