Whitespace only

[ghc-hetmet.git] / compiler / simplCore / SimplUtils.lhs
diff --git a/compiler/simplCore/SimplUtils.lhs b/compiler/simplCore/SimplUtils.lhs

index 1ff6f8f..060d346 100644 (file)
--- a/compiler/simplCore/SimplUtils.lhs
+++ b/compiler/simplCore/SimplUtils.lhs
@@ -4,6 +4,13 @@
  \section[SimplUtils]{The simplifier utilities}
  
  \begin{code}
+{-# OPTIONS -w #-}
+-- The above warning supression flag is a temporary kludge.
+-- While working on this module you are encouraged to remove it and fix
+-- any warnings in the module. See
+--     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
+-- for details
+
  module SimplUtils (
         -- Rebuilding
         mkLam, mkCase, prepareAlts, bindCaseBndr,
@@ -13,13 +20,15 @@ module SimplUtils (
         activeInline, activeRule, inlineMode,
  
         -- The continuation type
-       SimplCont(..), DupFlag(..), LetRhsFlag(..), 
+       SimplCont(..), DupFlag(..), ArgInfo(..),
         contIsDupable, contResultType, contIsTrivial, contArgs, dropArgs, 
-       countValArgs, countArgs,
+       countValArgs, countArgs, splitInlineCont,
         mkBoringStop, mkLazyArgStop, mkRhsStop, contIsRhsOrArg,
         interestingCallContext, interestingArgContext,
  
-       interestingArg, mkArgInfo
+       interestingArg, mkArgInfo,
+       
+       abstractFloats
      ) where
  
  #include "HsVersions.h"
@@ -28,23 +37,28 @@ import SimplEnv
  import DynFlags
  import StaticFlags
  import CoreSyn
+import qualified CoreSubst
  import PprCore
  import CoreFVs
  import CoreUtils
  import Literal 
  import CoreUnfold
  import MkId
+import Name
  import Id
+import Var     ( isCoVar )
  import NewDemand
  import SimplMonad
-import Type
+import Type    hiding( substTy )
  import TyCon
  import DataCon
-import TcGadt  ( dataConCanMatch )
+import Unify   ( dataConCannotMatch )
  import VarSet
  import BasicTypes
  import Util
+import MonadUtils
  import Outputable
+
  import List( nub )
  \end{code}
  
@@ -79,14 +93,11 @@ Key points:
  data SimplCont 
    = Stop               -- An empty context, or hole, []     
         OutType         -- Type of the result
-       LetRhsFlag
-       Bool            -- True <=> There is something interesting about
+       CallCtxt        -- True <=> There is something interesting about
                         --          the context, and hence the inliner
                         --          should be a bit keener (see interestingCallContext)
-                       -- Two cases:
-                       -- (a) This is the RHS of a thunk whose type suggests
-                       --     that update-in-place would be possible
-                       -- (b) This is an argument of a function that has RULES
+                       -- Specifically:
+                       --     This is an argument of a function that has RULES
                         --     Inlining the call might allow the rule to fire
  
    | CoerceIt           -- C `cast` co
@@ -111,24 +122,30 @@ data SimplCont
  
    | StrictArg          -- e C
         OutExpr OutType         -- e and its type
-       (Bool,[Bool])           -- Whether the function at the head of e has rules,
-       SimplCont               --     plus strictness flags for further args
-
-data LetRhsFlag = AnArg                -- It's just an argument not a let RHS
-               | AnRhs         -- It's the RHS of a let (so please float lets out of big lambdas)
-
-instance Outputable LetRhsFlag where
-  ppr AnArg = ptext SLIT("arg")
-  ppr AnRhs = ptext SLIT("rhs")
+       CallCtxt                -- Whether *this* argument position is interesting
+       ArgInfo                 -- Whether the function at the head of e has rules, etc
+       SimplCont               --     plus strictness flags for *further* args
+
+data ArgInfo 
+  = ArgInfo {
+       ai_rules :: Bool,       -- Function has rules (recursively)
+                               --      => be keener to inline in all args
+       ai_strs :: [Bool],      -- Strictness of arguments
+                               --   Usually infinite, but if it is finite it guarantees
+                               --   that the function diverges after being given
+                               --   that number of args
+       ai_discs :: [Int]       -- Discounts for arguments; non-zero => be keener to inline
+                               --   Always infinite
+    }
  
  instance Outputable SimplCont where
-  ppr (Stop ty is_rhs _)            = ptext SLIT("Stop") <> brackets (ppr is_rhs) <+> ppr ty
-  ppr (ApplyTo dup arg se cont)      = ((ptext SLIT("ApplyTo") <+> ppr dup <+> pprParendExpr arg) $$ 
-                                         nest 2 (pprSimplEnv se)) $$ ppr cont
+  ppr (Stop ty _)                   = ptext SLIT("Stop") <+> ppr ty
+  ppr (ApplyTo dup arg se cont)      = ((ptext SLIT("ApplyTo") <+> ppr dup <+> pprParendExpr arg)
+                                         {-  $$ nest 2 (pprSimplEnv se) -}) $$ ppr cont
    ppr (StrictBind b _ _ _ cont)      = (ptext SLIT("StrictBind") <+> ppr b) $$ ppr cont
-  ppr (StrictArg f _ _ cont)         = (ptext SLIT("StrictArg") <+> ppr f) $$ ppr cont
+  ppr (StrictArg f _ _ _ cont)       = (ptext SLIT("StrictArg") <+> ppr f) $$ ppr cont
    ppr (Select dup bndr alts se cont) = (ptext SLIT("Select") <+> ppr dup <+> ppr bndr) $$ 
-                                      (nest 4 (ppr alts $$ pprSimplEnv se)) $$ ppr cont
+                                      (nest 4 (ppr alts)) $$ ppr cont 
    ppr (CoerceIt co cont)            = (ptext SLIT("CoerceIt") <+> ppr co) $$ ppr cont
  
  data DupFlag = OkToDup | NoDup
@@ -141,22 +158,23 @@ instance Outputable DupFlag where
  
  -------------------
  mkBoringStop :: OutType -> SimplCont
-mkBoringStop ty = Stop ty AnArg False
+mkBoringStop ty = Stop ty BoringCtxt
  
-mkLazyArgStop :: OutType -> Bool -> SimplCont
-mkLazyArgStop ty has_rules = Stop ty AnArg (canUpdateInPlace ty || has_rules)
+mkLazyArgStop :: OutType -> CallCtxt -> SimplCont
+mkLazyArgStop ty cci = Stop ty cci
  
  mkRhsStop :: OutType -> SimplCont
-mkRhsStop ty = Stop ty AnRhs (canUpdateInPlace ty)
+mkRhsStop ty = Stop ty BoringCtxt
  
-contIsRhsOrArg (Stop _ _ _)    = True
-contIsRhsOrArg (StrictBind {}) = True
-contIsRhsOrArg (StrictArg {})  = True
-contIsRhsOrArg other          = False
+-------------------
+contIsRhsOrArg (Stop {})                = True
+contIsRhsOrArg (StrictBind {})          = True
+contIsRhsOrArg (StrictArg {})           = True
+contIsRhsOrArg other            = False
  
  -------------------
  contIsDupable :: SimplCont -> Bool
-contIsDupable (Stop _ _ _)                      = True
+contIsDupable (Stop {})                 = True
  contIsDupable (ApplyTo  OkToDup _ _ _)   = True
  contIsDupable (Select   OkToDup _ _ _ _) = True
  contIsDupable (CoerceIt _ cont)          = contIsDupable cont
@@ -164,15 +182,15 @@ contIsDupable other                        = False
  
  -------------------
  contIsTrivial :: SimplCont -> Bool
-contIsTrivial (Stop _ _ _)               = True
+contIsTrivial (Stop {})                          = True
  contIsTrivial (ApplyTo _ (Type _) _ cont) = contIsTrivial cont
  contIsTrivial (CoerceIt _ cont)          = contIsTrivial cont
  contIsTrivial other                      = False
  
  -------------------
  contResultType :: SimplCont -> OutType
-contResultType (Stop to_ty _ _)                 = to_ty
-contResultType (StrictArg _ _ _ cont)   = contResultType cont
+contResultType (Stop to_ty _)           = to_ty
+contResultType (StrictArg _ _ _ _ cont)  = contResultType cont
  contResultType (StrictBind _ _ _ _ cont) = contResultType cont
  contResultType (ApplyTo _ _ _ cont)     = contResultType cont
  contResultType (CoerceIt _ cont)        = contResultType cont
@@ -199,6 +217,26 @@ dropArgs :: Int -> SimplCont -> SimplCont
  dropArgs 0 cont = cont
  dropArgs n (ApplyTo _ _ _ cont) = dropArgs (n-1) cont
  dropArgs n other               = pprPanic "dropArgs" (ppr n <+> ppr other)
+
+--------------------
+splitInlineCont :: SimplCont -> Maybe (SimplCont, SimplCont)
+-- Returns Nothing if the continuation should dissolve an InlineMe Note
+-- Return Just (c1,c2) otherwise, 
+--     where c1 is the continuation to put inside the InlineMe 
+--     and   c2 outside
+
+-- Example: (__inline_me__ (/\a. e)) ty
+--     Here we want to do the beta-redex without dissolving the InlineMe
+-- See test simpl017 (and Trac #1627) for a good example of why this is important
+
+splitInlineCont (ApplyTo dup (Type ty) se c)
+  | Just (c1, c2) <- splitInlineCont c                 = Just (ApplyTo dup (Type ty) se c1, c2)
+splitInlineCont cont@(Stop ty _)               = Just (mkBoringStop ty, cont)
+splitInlineCont cont@(StrictBind bndr _ _ se _) = Just (mkBoringStop (substTy se (idType bndr)), cont)
+splitInlineCont cont@(StrictArg _ fun_ty _ _ _) = Just (mkBoringStop (funArgTy fun_ty), cont)
+splitInlineCont other                          = Nothing
+       -- NB: the calculation of the type for mkBoringStop is an annoying
+       --     duplication of the same calucation in mkDupableCont
  \end{code}
  
  
@@ -269,62 +307,28 @@ applies when x is bound to a lambda expression.  Hence
  contIsInteresting looks for case expressions with just a single
  default case.
  
+
  \begin{code}
-interestingCallContext :: Bool                 -- False <=> no args at all
-                      -> Bool          -- False <=> no value args
-                      -> SimplCont -> Bool
-       -- The "lone-variable" case is important.  I spent ages
-       -- messing about with unsatisfactory varaints, but this is nice.
-       -- The idea is that if a variable appear all alone
-       --      as an arg of lazy fn, or rhs    Stop
-       --      as scrutinee of a case          Select
-       --      as arg of a strict fn           ArgOf
-       -- then we should not inline it (unless there is some other reason,
-       -- e.g. is is the sole occurrence).  We achieve this by making
-       -- interestingCallContext return False for a lone variable.
-       --
-       -- Why?  At least in the case-scrutinee situation, turning
-       --      let x = (a,b) in case x of y -> ...
-       -- into
-       --      let x = (a,b) in case (a,b) of y -> ...
-       -- and thence to 
-       --      let x = (a,b) in let y = (a,b) in ...
-       -- is bad if the binding for x will remain.
-       --
-       -- Another example: I discovered that strings
-       -- were getting inlined straight back into applications of 'error'
-       -- because the latter is strict.
-       --      s = "foo"
-       --      f = \x -> ...(error s)...
-
-       -- Fundamentally such contexts should not ecourage inlining because
-       -- the context can ``see'' the unfolding of the variable (e.g. case or a RULE)
-       -- so there's no gain.
-       --
-       -- However, even a type application or coercion isn't a lone variable.
-       -- Consider
-       --      case $fMonadST @ RealWorld of { :DMonad a b c -> c }
-       -- We had better inline that sucker!  The case won't see through it.
-       --
-       -- For now, I'm treating treating a variable applied to types 
-       -- in a *lazy* context "lone". The motivating example was
-       --      f = /\a. \x. BIG
-       --      g = /\a. \y.  h (f a)
-       -- There's no advantage in inlining f here, and perhaps
-       -- a significant disadvantage.  Hence some_val_args in the Stop case
-
-interestingCallContext some_args some_val_args cont
+interestingCallContext :: SimplCont -> CallCtxt
+interestingCallContext cont
    = interesting cont
    where
-    interesting (Select {})              = some_args
-    interesting (ApplyTo {})             = True        -- Can happen if we have (coerce t (f x)) y
-                                               -- Perhaps True is a bit over-keen, but I've
-                                               -- seen (coerce f) x, where f has an INLINE prag,
-                                               -- So we have to give some motivaiton for inlining it
-    interesting (StrictArg {})          = some_val_args
-    interesting (StrictBind {})                 = some_val_args        -- ??
-    interesting (Stop ty _ interesting)  = some_val_args && interesting
-    interesting (CoerceIt _ cont)        = interesting cont
+    interestingCtxt = ArgCtxt False 2  -- Give *some* incentive!
+
+    interesting (Select _ bndr _ _ _)
+       | isDeadBinder bndr       = CaseCtxt
+       | otherwise               = interestingCtxt
+               
+    interesting (ApplyTo {})      = interestingCtxt
+                               -- Can happen if we have (coerce t (f x)) y
+                               -- Perhaps interestingCtxt is a bit over-keen, but I've
+                               -- seen (coerce f) x, where f has an INLINE prag,
+                               -- So we have to give some motivation for inlining it
+
+    interesting (StrictArg _ _ cci _ _)        = cci
+    interesting (StrictBind {})                = BoringCtxt
+    interesting (Stop ty cci)          = cci
+    interesting (CoerceIt _ cont)      = interesting cont
         -- If this call is the arg of a strict function, the context
         -- is a bit interesting.  If we inline here, we may get useful
         -- evaluation information to avoid repeated evals: e.g.
@@ -345,21 +349,24 @@ interestingCallContext some_args some_val_args cont
  mkArgInfo :: Id
           -> Int        -- Number of value args
           -> SimplCont  -- Context of the cal
-         -> (Bool, [Bool])     -- Arg info
--- The arg info consists of
---  * A Bool indicating if the function has rules (recursively)
---  * A [Bool] indicating strictness for each arg
--- The [Bool] is usually infinite, but if it is finite it 
--- guarantees that the function diverges after being given
--- that number of args
+         -> ArgInfo
  
  mkArgInfo fun n_val_args call_cont
-  = (interestingArgContext fun call_cont, fun_stricts)
+  = ArgInfo { ai_rules = interestingArgContext fun call_cont
+           , ai_strs  = arg_stricts
+           , ai_discs = arg_discounts }
    where
-    vanilla_stricts, fun_stricts :: [Bool]
+    vanilla_discounts, arg_discounts :: [Int]
+    vanilla_discounts = repeat 0
+    arg_discounts = case idUnfolding fun of
+                       CoreUnfolding _ _ _ _ (UnfoldIfGoodArgs _ discounts _ _)
+                             -> discounts ++ vanilla_discounts
+                       other -> vanilla_discounts
+
+    vanilla_stricts, arg_stricts :: [Bool]
      vanilla_stricts  = repeat False
  
-    fun_stricts
+    arg_stricts
        = case splitStrictSig (idNewStrictness fun) of
           (demands, result_info)
                 | not (demands `lengthExceeds` n_val_args)
@@ -385,7 +392,9 @@ interestingArgContext :: Id -> SimplCont -> Bool
  -- where g has rules, then we *do* want to inline f, in case it
  -- exposes a rule that might fire.  Similarly, if the context is
  --     h (g (f x x))
--- where h has rules, then we do want to inline f.
+-- where h has rules, then we do want to inline f; hence the
+-- call_cont argument to interestingArgContext
+--
  -- The interesting_arg_ctxt flag makes this happen; if it's
  -- set, the inliner gets just enough keener to inline f 
  -- regardless of how boring f's arguments are, if it's marked INLINE
@@ -393,36 +402,18 @@ interestingArgContext :: Id -> SimplCont -> Bool
  -- The alternative would be to *always* inline an INLINE function,
  -- regardless of how boring its context is; but that seems overkill
  -- For example, it'd mean that wrapper functions were always inlined
-interestingArgContext fn cont
-  = idHasRules fn || go cont
+interestingArgContext fn call_cont
+  = idHasRules fn || go call_cont
    where
-    go (Select {})           = False
-    go (ApplyTo {})          = False
-    go (StrictArg {})        = True
-    go (StrictBind {})       = False   -- ??
-    go (CoerceIt _ c)        = go c
-    go (Stop _ _ interesting) = interesting
-
--------------------
-canUpdateInPlace :: Type -> Bool
--- Consider   let x = <wurble> in ...
--- If <wurble> returns an explicit constructor, we might be able
--- to do update in place.  So we treat even a thunk RHS context
--- as interesting if update in place is possible.  We approximate
--- this by seeing if the type has a single constructor with a
--- small arity.  But arity zero isn't good -- we share the single copy
--- for that case, so no point in sharing.
-
-canUpdateInPlace ty 
-  | not opt_UF_UpdateInPlace = False
-  | otherwise
-  = case splitTyConApp_maybe ty of 
-       Nothing         -> False 
-       Just (tycon, _) -> case tyConDataCons_maybe tycon of
-                               Just [dc]  -> arity == 1 || arity == 2
-                                          where
-                                             arity = dataConRepArity dc
-                               other -> False
+    go (Select {})            = False
+    go (ApplyTo {})           = False
+    go (StrictArg _ _ cci _ _) = interesting cci
+    go (StrictBind {})        = False  -- ??
+    go (CoerceIt _ c)         = go c
+    go (Stop _ cci)            = interesting cci
+
+    interesting (ArgCtxt rules _) = rules
+    interesting other            = False
  \end{code}
  
  
@@ -442,7 +433,7 @@ settings:
                         (d) Simplifying a GHCi expression or Template 
                                 Haskell splice
  
-       SimplPhase n    Used at all other times
+       SimplPhase n _   Used at all other times
  
  The key thing about SimplGently is that it does no call-site inlining.
  Before full laziness we must be careful not to inline wrappers,
@@ -557,7 +548,7 @@ y's occurrence info, which breaks the invariant.  It matters: y
  might have a BIG rhs, which will now be dup'd at every occurrenc of x.
  
  
-Evne RHSs labelled InlineMe aren't caught here, because there might be
+Even RHSs labelled InlineMe aren't caught here, because there might be
  no benefit from inlining at the call site.
  
  [Sept 01] Don't unconditionally inline a top-level thing, because that
@@ -591,8 +582,8 @@ preInlineUnconditionally env top_lvl bndr rhs
    where
      phase = getMode env
      active = case phase of
-                  SimplGently  -> isAlwaysActive prag
-                  SimplPhase n -> isActive n prag
+                  SimplGently    -> isAlwaysActive prag
+                  SimplPhase n _ -> isActive n prag
      prag = idInlinePragma bndr
  
      try_once in_lam int_cxt    -- There's one textual occurrence
@@ -626,8 +617,8 @@ preInlineUnconditionally env top_lvl bndr rhs
      canInlineInLam _                   = False
  
      early_phase = case phase of
-                       SimplPhase 0 -> False
-                       other        -> True
+                       SimplPhase 0 _ -> False
+                       other          -> True
  -- If we don't have this early_phase test, consider
  --     x = length [1,2,3]
  -- The full laziness pass carefully floats all the cons cells to
@@ -747,8 +738,8 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs unfolding
  
    where
      active = case getMode env of
-                  SimplGently  -> isAlwaysActive prag
-                  SimplPhase n -> isActive n prag
+                  SimplGently    -> isAlwaysActive prag
+                  SimplPhase n _ -> isActive n prag
      prag = idInlinePragma bndr
  
  activeInline :: SimplEnv -> OutId -> Bool
@@ -770,23 +761,24 @@ activeInline env id
         -- and they are now constructed as Compulsory unfoldings (in MkId)
         -- so they'll happen anyway.
  
-      SimplPhase n -> isActive n prag
+      SimplPhase n _ -> isActive n prag
    where
      prag = idInlinePragma id
  
-activeRule :: SimplEnv -> Maybe (Activation -> Bool)
+activeRule :: DynFlags -> SimplEnv -> Maybe (Activation -> Bool)
  -- Nothing => No rules at all
-activeRule env
-  | opt_RulesOff = Nothing
+activeRule dflags env
+  | not (dopt Opt_RewriteRules dflags)
+  = Nothing    -- Rewriting is off
    | otherwise
    = case getMode env of
-       SimplGently  -> Just isAlwaysActive
+       SimplGently    -> Just isAlwaysActive
                         -- Used to be Nothing (no rules in gentle mode)
                         -- Main motivation for changing is that I wanted
                         --      lift String ===> ...
                         -- to work in Template Haskell when simplifying
                         -- splices, so we get simpler code for literal strings
-       SimplPhase n -> Just (isActive n)
+       SimplPhase n _ -> Just (isActive n)
  \end{code}
  
  
@@ -802,17 +794,21 @@ mkLam :: [OutBndr] -> OutExpr -> SimplM OutExpr
  --     a) eta reduction, if that gives a trivial expression
  --     b) eta expansion [only if there are some value lambdas]
  
+mkLam [] body 
+  = return body
  mkLam bndrs body
    = do { dflags <- getDOptsSmpl
         ; mkLam' dflags bndrs body }
    where
      mkLam' :: DynFlags -> [OutBndr] -> OutExpr -> SimplM OutExpr
-    mkLam' dflags bndrs (Cast body@(Lam _ _) co)
+    mkLam' dflags bndrs (Cast body co)
+      | not (any bad bndrs)
         -- Note [Casts and lambdas]
-      = do { lam <- mkLam' dflags (bndrs ++ bndrs') body'
+      = do { lam <- mkLam' dflags bndrs body
            ; return (mkCoerce (mkPiTypes bndrs co) lam) }
-      where    
-       (bndrs',body') = collectBinders body
+      where
+       co_vars  = tyVarsOfType co
+       bad bndr = isCoVar bndr && bndr `elemVarSet` co_vars      
  
      mkLam' dflags bndrs body
        | dopt Opt_DoEtaReduction dflags,
@@ -826,7 +822,7 @@ mkLam bndrs body
            ; return (mkLams bndrs body') }
     
        | otherwise 
-      = returnSmpl (mkLams bndrs body)
+      = return (mkLams bndrs body)
  \end{code}
  
  Note [Casts and lambdas]
@@ -840,9 +836,26 @@ So this equation in mkLam' floats the g1 out, thus:
         (\x. e `cast` g1)  -->  (\x.e) `cast` (tx -> g1)
  where x:tx.
  
-In general, this floats casts outside lambdas, where (I hope) they might meet
-and cancel with some other cast.
-
+In general, this floats casts outside lambdas, where (I hope) they
+might meet and cancel with some other cast:
+       \x. e `cast` co   ===>   (\x. e) `cast` (tx -> co)
+       /\a. e `cast` co  ===>   (/\a. e) `cast` (/\a. co)
+       /\g. e `cast` co  ===>   (/\g. e) `cast` (/\g. co)
+                         (if not (g `in` co))
+
+Notice that it works regardless of 'e'.  Originally it worked only
+if 'e' was itself a lambda, but in some cases that resulted in 
+fruitless iteration in the simplifier.  A good example was when
+compiling Text.ParserCombinators.ReadPrec, where we had a definition 
+like   (\x. Get `cast` g)
+where Get is a constructor with nonzero arity.  Then mkLam eta-expanded
+the Get, and the next iteration eta-reduced it, and then eta-expanded 
+it again.
+
+Note also the side condition for the case of coercion binders.
+It does not make sense to transform
+       /\g. e `cast` g  ==>  (/\g.e) `cast` (/\g.g)
+because the latter is not well-kinded.
  
  --     c) floating lets out through big lambdas 
  --             [only if all tyvar lambdas, and only if this lambda
@@ -855,55 +868,97 @@ and cancel with some other cast.
                         -- if this is indeed a right-hand side; otherwise
                         -- we end up floating the thing out, only for float-in
                         -- to float it right back in again!
- = tryRhsTyLam env bndrs body          `thenSmpl` \ (floats, body') ->
-   returnSmpl (floats, mkLams bndrs body')
+ = do (floats, body') <- tryRhsTyLam env bndrs body
+      return (floats, mkLams bndrs body')
  -}
  
  
  %************************************************************************
  %*                                                                     *
-\subsection{Eta expansion and reduction}
+               Eta reduction
  %*                                                                     *
  %************************************************************************
  
-We try for eta reduction here, but *only* if we get all the 
-way to an exprIsTrivial expression.    
-We don't want to remove extra lambdas unless we are going 
-to avoid allocating this thing altogether
+Note [Eta reduction conditions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We try for eta reduction here, but *only* if we get all the way to an
+trivial expression.  We don't want to remove extra lambdas unless we
+are going to avoid allocating this thing altogether.
+
+There are some particularly delicate points here:
+
+* Eta reduction is not valid in general:  
+       \x. bot  /=  bot
+  This matters, partly for old-fashioned correctness reasons but,
+  worse, getting it wrong can yield a seg fault. Consider
+       f = \x.f x
+       h y = case (case y of { True -> f `seq` True; False -> False }) of
+               True -> ...; False -> ...
+
+  If we (unsoundly) eta-reduce f to get f=f, the strictness analyser
+  says f=bottom, and replaces the (f `seq` True) with just
+  (f `cast` unsafe-co).  BUT, as thing stand, 'f' got arity 1, and it
+  *keeps* arity 1 (perhaps also wrongly).  So CorePrep eta-expands 
+  the definition again, so that it does not termninate after all.
+  Result: seg-fault because the boolean case actually gets a function value.
+  See Trac #1947.
+
+  So it's important to to the right thing.
+
+* We need to be careful if we just look at f's arity. Currently (Dec07),
+  f's arity is visible in its own RHS (see Note [Arity robustness] in 
+  SimplEnv) so we must *not* trust the arity when checking that 'f' is
+  a value.  Instead, look at the unfolding. 
+
+  However for GlobalIds we can look at the arity; and for primops we
+  must, since they have no unfolding.  
+
+* Regardless of whether 'f' is a vlaue, we always want to 
+  reduce (/\a -> f a) to f
+  This came up in a RULE: foldr (build (/\a -> g a))
+  did not match           foldr (build (/\b -> ...something complex...))
+  The type checker can insert these eta-expanded versions,
+  with both type and dictionary lambdas; hence the slightly 
+  ad-hoc isDictId
+
+These delicacies are why we don't use exprIsTrivial and exprIsHNF here.
+Alas.
  
  \begin{code}
  tryEtaReduce :: [OutBndr] -> OutExpr -> Maybe OutExpr
  tryEtaReduce bndrs body 
-       -- We don't use CoreUtils.etaReduce, because we can be more
-       -- efficient here:
-       --  (a) we already have the binders
-       --  (b) we can do the triviality test before computing the free vars
    = go (reverse bndrs) body
    where
      go (b : bs) (App fun arg) | ok_arg b arg = go bs fun       -- Loop round
      go []       fun           | ok_fun fun   = Just fun                -- Success!
      go _        _                           = Nothing          -- Failure!
  
-    ok_fun fun =  exprIsTrivial fun
-              && not (any (`elemVarSet` (exprFreeVars fun)) bndrs)
-              && (exprIsHNF fun || all ok_lam bndrs)
+       -- Note [Eta reduction conditions]
+    ok_fun (App fun (Type ty)) 
+       | not (any (`elemVarSet` tyVarsOfType ty) bndrs)
+       =  ok_fun fun
+    ok_fun (Var fun_id)
+       =  not (fun_id `elem` bndrs)
+       && (ok_fun_id fun_id || all ok_lam bndrs)
+    ok_fun _fun = False
+
+    ok_fun_id fun
+       | isLocalId fun       = isEvaldUnfolding (idUnfolding fun)
+       | isDataConWorkId fun = True
+       | isGlobalId fun      = idArity fun > 0
+
      ok_lam v = isTyVar v || isDictId v
-       -- The exprIsHNF is because eta reduction is not 
-       -- valid in general:  \x. bot  /=  bot
-       -- So we need to be sure that the "fun" is a value.
-       --
-       -- However, we always want to reduce (/\a -> f a) to f
-       -- This came up in a RULE: foldr (build (/\a -> g a))
-       --      did not match      foldr (build (/\b -> ...something complex...))
-       -- The type checker can insert these eta-expanded versions,
-       -- with both type and dictionary lambdas; hence the slightly 
-       -- ad-hoc isDictTy
  
      ok_arg b arg = varToCoreExpr b `cheapEqExpr` arg
  \end{code}
  
  
-       Try eta expansion for RHSs
+%************************************************************************
+%*                                                                     *
+               Eta expansion
+%*                                                                     *
+%************************************************************************
+
  
  We go for:
     f = \x1..xn -> N  ==>   f = \x1..xn y1..ym -> N y1..ym
@@ -918,6 +973,16 @@ where (in both cases)
         * N is a NORMAL FORM (i.e. no redexes anywhere)
           wanting a suitable number of extra args.
  
+The biggest reason for doing this is for cases like
+
+       f = \x -> case x of
+                   True  -> \y -> e1
+                   False -> \y -> e2
+
+Here we want to get the lambdas together.  A good exmaple is the nofib
+program fibheaps, which gets 25% more allocation if you don't do this
+eta-expansion.
+
  We may have to sandwich some coerces between the lambdas
  to make the types work.   exprEtaExpandArity looks through coerces
  when computing arity; and etaExpand adds the coerces as necessary when
@@ -926,9 +991,9 @@ actually computing the expansion.
  \begin{code}
  tryEtaExpansion :: DynFlags -> OutExpr -> SimplM OutExpr
  -- There is at least one runtime binder in the binders
-tryEtaExpansion dflags body
-  = getUniquesSmpl                     `thenSmpl` \ us ->
-    returnSmpl (etaExpand fun_arity us body (exprType body))
+tryEtaExpansion dflags body = do
+    us <- getUniquesM
+    return (etaExpand fun_arity us body (exprType body))
    where
      fun_arity = exprEtaExpandArity dflags body
  \end{code}
@@ -940,8 +1005,35 @@ tryEtaExpansion dflags body
  %*                                                                     *
  %************************************************************************
  
-tryRhsTyLam tries this transformation, when the big lambda appears as
-the RHS of a let(rec) binding:
+Note [Floating and type abstraction]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider this:
+       x = /\a. C e1 e2
+We'd like to float this to 
+       y1 = /\a. e1
+       y2 = /\a. e2
+       x = /\a. C (y1 a) (y2 a)
+for the usual reasons: we want to inline x rather vigorously.
+
+You may think that this kind of thing is rare.  But in some programs it is
+common.  For example, if you do closure conversion you might get:
+
+       data a :-> b = forall e. (e -> a -> b) :$ e
+
+       f_cc :: forall a. a :-> a
+       f_cc = /\a. (\e. id a) :$ ()
+
+Now we really want to inline that f_cc thing so that the
+construction of the closure goes away. 
+
+So I have elaborated simplLazyBind to understand right-hand sides that look
+like
+       /\ a1..an. body
+
+and treat them specially. The real work is done in SimplUtils.abstractFloats,
+but there is quite a bit of plumbing in simplLazyBind as well.
+
+The same transformation is good when there are lets in the body:
  
         /\abc -> let(rec) x = e in b
     ==>
@@ -963,25 +1055,6 @@ let-floating.
  This optimisation is CRUCIAL in eliminating the junk introduced by
  desugaring mutually recursive definitions.  Don't eliminate it lightly!
  
-So far as the implementation is concerned:
-
-       Invariant: go F e = /\tvs -> F e
-       
-       Equalities:
-               go F (Let x=e in b)
-               = Let x' = /\tvs -> F e 
-                 in 
-                 go G b
-               where
-                   G = F . Let x = x' tvs
-       
-               go F (Letrec xi=ei in b)
-               = Letrec {xi' = /\tvs -> G ei} 
-                 in
-                 go G b
-               where
-                 G = F . Let {xi = xi' tvs}
-
  [May 1999]  If we do this transformation *regardless* then we can
  end up with some pretty silly stuff.  For example, 
  
@@ -1003,43 +1076,34 @@ and is of the form
  If we abstract this wrt the tyvar we then can't do the case inline
  as we would normally do.
  
+That's why the whole transformation is part of the same process that
+floats let-bindings and constructor arguments out of RHSs.  In particular,
+it is guarded by the doFloatFromRhs call in simplLazyBind.
  
-\begin{code}
-{-     Trying to do this in full laziness
-
-tryRhsTyLam :: SimplEnv -> [OutTyVar] -> OutExpr -> SimplM FloatsWithExpr
--- Call ensures that all the binders are type variables
-
-tryRhsTyLam env tyvars body            -- Only does something if there's a let
-  |  not (all isTyVar tyvars)
-  || not (worth_it body)               -- inside a type lambda, 
-  = returnSmpl (emptyFloats env, body) -- and a WHNF inside that
-
-  | otherwise
-  = go env (\x -> x) body
  
+\begin{code}
+abstractFloats :: [OutTyVar] -> SimplEnv -> OutExpr -> SimplM ([OutBind], OutExpr)
+abstractFloats main_tvs body_env body
+  = ASSERT( notNull body_floats )
+    do { (subst, float_binds) <- mapAccumLM abstract empty_subst body_floats
+       ; return (float_binds, CoreSubst.substExpr subst body) }
    where
-    worth_it e@(Let _ _) = whnf_in_middle e
-    worth_it e          = False
-
-    whnf_in_middle (Let (NonRec x rhs) e) | isUnLiftedType (idType x) = False
-    whnf_in_middle (Let _ e) = whnf_in_middle e
-    whnf_in_middle e        = exprIsCheap e
-
-    main_tyvar_set = mkVarSet tyvars
-
-    go env fn (Let bind@(NonRec var rhs) body)
-      | exprIsTrivial rhs
-      = go env (fn . Let bind) body
-
-    go env fn (Let (NonRec var rhs) body)
-      = mk_poly tyvars_here var                                                        `thenSmpl` \ (var', rhs') ->
-       addAuxiliaryBind env (NonRec var' (mkLams tyvars_here (fn rhs)))        $ \ env -> 
-       go env (fn . Let (mk_silly_bind var rhs')) body
-
+    main_tv_set = mkVarSet main_tvs
+    body_floats = getFloats body_env
+    empty_subst = CoreSubst.mkEmptySubst (seInScope body_env)
+
+    abstract :: CoreSubst.Subst -> OutBind -> SimplM (CoreSubst.Subst, OutBind)
+    abstract subst (NonRec id rhs)
+      = do { (poly_id, poly_app) <- mk_poly tvs_here id
+          ; let poly_rhs = mkLams tvs_here rhs'
+                subst'   = CoreSubst.extendIdSubst subst id poly_app
+          ; return (subst', (NonRec poly_id poly_rhs)) }
        where
-
-       tyvars_here = varSetElems (main_tyvar_set `intersectVarSet` exprSomeFreeVars isTyVar rhs)
+       rhs' = CoreSubst.substExpr subst rhs
+       tvs_here | any isCoVar main_tvs = main_tvs      -- Note [Abstract over coercions]
+                | otherwise 
+                = varSetElems (main_tv_set `intersectVarSet` exprSomeFreeVars isTyVar rhs')
+       
                 -- Abstract only over the type variables free in the rhs
                 -- wrt which the new binding is abstracted.  But the naive
                 -- approach of abstract wrt the tyvars free in the Id's type
@@ -1056,28 +1120,34 @@ tryRhsTyLam env tyvars body             -- Only does something if there's a let
                 -- abstracting wrt *all* the tyvars.  We'll see if that
                 -- gives rise to problems.   SLPJ June 98
  
-    go env fn (Let (Rec prs) body)
-       = mapAndUnzipSmpl (mk_poly tyvars_here) vars    `thenSmpl` \ (vars', rhss') ->
-        let
-           gn body = fn (foldr Let body (zipWith mk_silly_bind vars rhss'))
-           pairs   = vars' `zip` [mkLams tyvars_here (gn rhs) | rhs <- rhss]
-        in
-        addAuxiliaryBind env (Rec pairs)               $ \ env ->
-        go env gn body 
+    abstract subst (Rec prs)
+       = do { (poly_ids, poly_apps) <- mapAndUnzipM (mk_poly tvs_here) ids
+           ; let subst' = CoreSubst.extendSubstList subst (ids `zip` poly_apps)
+                 poly_rhss = [mkLams tvs_here (CoreSubst.substExpr subst' rhs) | rhs <- rhss]
+           ; return (subst', Rec (poly_ids `zip` poly_rhss)) }
         where
-        (vars,rhss) = unzip prs
-        tyvars_here = varSetElems (main_tyvar_set `intersectVarSet` exprsSomeFreeVars isTyVar (map snd prs))
-               -- See notes with tyvars_here above
-
-    go env fn body = returnSmpl (emptyFloats env, fn body)
-
-    mk_poly tyvars_here var
-      = getUniqueSmpl          `thenSmpl` \ uniq ->
-       let
-           poly_name = setNameUnique (idName var) uniq         -- Keep same name
-           poly_ty   = mkForAllTys tyvars_here (idType var)    -- But new type of course
-           poly_id   = mkLocalId poly_name poly_ty 
-
+        (ids,rhss) = unzip prs
+               -- For a recursive group, it's a bit of a pain to work out the minimal
+               -- set of tyvars over which to abstract:
+               --      /\ a b c.  let x = ...a... in
+               --                 letrec { p = ...x...q...
+               --                          q = .....p...b... } in
+               --                 ...
+               -- Since 'x' is abstracted over 'a', the {p,q} group must be abstracted
+               -- over 'a' (because x is replaced by (poly_x a)) as well as 'b'.  
+               -- Since it's a pain, we just use the whole set, which is always safe
+               -- 
+               -- If you ever want to be more selective, remember this bizarre case too:
+               --      x::a = x
+               -- Here, we must abstract 'x' over 'a'.
+        tvs_here = main_tvs
+
+    mk_poly tvs_here var
+      = do { uniq <- getUniqueM
+          ; let  poly_name = setNameUnique (idName var) uniq           -- Keep same name
+                 poly_ty   = mkForAllTys tvs_here (idType var) -- But new type of course
+                 poly_id   = mkLocalId poly_name poly_ty 
+          ; return (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tvs_here)) }
                 -- In the olden days, it was crucial to copy the occInfo of the original var, 
                 -- because we were looking at occurrence-analysed but as yet unsimplified code!
                 -- In particular, we mustn't lose the loop breakers.  BUT NOW we are looking
@@ -1090,10 +1160,17 @@ tryRhsTyLam env tyvars body             -- Only does something if there's a let
                 -- where x* has an INLINE prag on it.  Now, once x* is inlined,
                 -- the occurrences of x' will be just the occurrences originally
                 -- pinned on x.
-       in
-       returnSmpl (poly_id, mkTyApps (Var poly_id) (mkTyVarTys tyvars_here))
+\end{code}
+
+Note [Abstract over coercions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If a coercion variable (g :: a ~ Int) is free in the RHS, then so is the
+type variable a.  Rather than sort this mess out, we simply bale out and abstract
+wrt all the type variables if any of them are coercion variables.
+
+
+Historical note: if you use let-bindings instead of a substitution, beware of this:
  
-    mk_silly_bind var rhs = NonRec var (Note InlineMe rhs)
                 -- Suppose we start with:
                 --
                 --      x = /\ a -> let g = G in E
@@ -1113,8 +1190,6 @@ tryRhsTyLam env tyvars body               -- Only does something if there's a let
                 -- Solution: put an INLINE note on g's RHS, so that poly_g seems
                 --           to appear many times.  (NB: mkInlineMe eliminates
                 --           such notes on trivial RHSs, so do it manually.)
--}
-\end{code}
  
  %************************************************************************
  %*                                                                     *
@@ -1184,28 +1259,27 @@ have to check that r doesn't mention the variables bound by the
  pattern in each alternative, so the binder-info is rather useful.
  
  \begin{code}
-prepareAlts :: OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
-prepareAlts scrut case_bndr' alts
+prepareAlts :: SimplEnv -> OutExpr -> OutId -> [InAlt] -> SimplM ([AltCon], [InAlt])
+prepareAlts env scrut case_bndr' alts
    = do { dflags <- getDOptsSmpl
         ; alts <- combineIdenticalAlts case_bndr' alts
  
         ; let (alts_wo_default, maybe_deflt) = findDefault alts
               alt_cons = [con | (con,_,_) <- alts_wo_default]
               imposs_deflt_cons = nub (imposs_cons ++ alt_cons)
-               -- "imposs_deflt_cons" are handled either by the context, 
-               -- OR by a branch in this case expression.
-               -- Don't include DEFAULT!!
+               -- "imposs_deflt_cons" are handled 
+               --   EITHER by the context, 
+               --   OR by a non-DEFAULT branch in this case expression.
  
-       ; default_alts <- prepareDefault dflags scrut case_bndr' mb_tc_app 
+       ; default_alts <- prepareDefault dflags env case_bndr' mb_tc_app 
                                          imposs_deflt_cons maybe_deflt
  
-       ; let trimmed_alts = filter possible_alt alts_wo_default
-             merged_alts = mergeAlts default_alts trimmed_alts
+       ; let trimmed_alts = filterOut impossible_alt alts_wo_default
+             merged_alts = mergeAlts trimmed_alts default_alts
                 -- We need the mergeAlts in case the new default_alt 
                 -- has turned into a constructor alternative.
                 -- The merge keeps the inner DEFAULT at the front, if there is one
-               -- and eliminates any inner_alts that are shadowed by the outer_alts
-
+               -- and interleaves the alternatives in the right order
  
         ; return (imposs_deflt_cons, merged_alts) }
    where
@@ -1216,10 +1290,10 @@ prepareAlts scrut case_bndr' alts
                     Var v -> otherCons (idUnfolding v)
                     other -> []
  
-    possible_alt :: CoreAlt -> Bool
-    possible_alt (con, _, _) | con `elem` imposs_cons = False
-    possible_alt (DataAlt con, _, _) = dataConCanMatch inst_tys con
-    possible_alt alt               = True
+    impossible_alt :: CoreAlt -> Bool
+    impossible_alt (con, _, _) | con `elem` imposs_cons = True
+    impossible_alt (DataAlt con, _, _) = dataConCannotMatch inst_tys con
+    impossible_alt alt                = False
  
  
  --------------------------------------------------
@@ -1243,7 +1317,7 @@ combineIdenticalAlts case_bndr alts = return alts
  --                     Prepare the default alternative
  -------------------------------------------------------------------------
  prepareDefault :: DynFlags
-              -> OutExpr       -- Scrutinee
+              -> SimplEnv
                -> OutId         -- Case binder; need just for its type. Note that as an
                                 --   OutId, it has maximum information; this is important.
                                 --   Test simpl013 is an example
@@ -1255,30 +1329,39 @@ prepareDefault :: DynFlags
                                         -- And becuase case-merging can cause many to show up
  
  -------        Merge nested cases ----------
-prepareDefault dflags scrut outer_bndr bndr_ty imposs_cons (Just deflt_rhs)
+prepareDefault dflags env outer_bndr bndr_ty imposs_cons (Just deflt_rhs)
    | dopt Opt_CaseMerge dflags
-  , Case (Var scrut_var) inner_bndr _ inner_alts <- deflt_rhs
-  , scruting_same_var scrut_var
+  , Case (Var inner_scrut_var) inner_bndr _ inner_alts <- deflt_rhs
+  , DoneId inner_scrut_var' <- substId env inner_scrut_var
+       -- Remember, inner_scrut_var is an InId, but outer_bndr is an OutId
+  , inner_scrut_var' == outer_bndr
+       -- NB: the substId means that if the outer scrutinee was a 
+       --     variable, and inner scrutinee is the same variable, 
+       --     then inner_scrut_var' will be outer_bndr
+       --     via the magic of simplCaseBinder
    = do { tick (CaseMerge outer_bndr)
  
         ; let munge_rhs rhs = bindCaseBndr inner_bndr (Var outer_bndr) rhs
-       ; return [(con, args, munge_rhs rhs) | (con, args, rhs) <- inner_alts] }
+       ; return [(con, args, munge_rhs rhs) | (con, args, rhs) <- inner_alts,
+                                              not (con `elem` imposs_cons) ]
+               -- NB: filter out any imposs_cons.  Example:
+               --      case x of 
+               --        A -> e1
+               --        DEFAULT -> case x of 
+               --                      A -> e2
+               --                      B -> e3
+               -- When we merge, we must ensure that e1 takes 
+               -- precedence over e2 as the value for A!  
+       }
         -- Warning: don't call prepareAlts recursively!
         -- Firstly, there's no point, because inner alts have already had
         -- mkCase applied to them, so they won't have a case in their default
         -- Secondly, if you do, you get an infinite loop, because the bindCaseBndr
         -- in munge_rhs may put a case into the DEFAULT branch!
-  where
-       -- We are scrutinising the same variable if it's
-       -- the outer case-binder, or if the outer case scrutinises a variable
-       -- (and it's the same).  Testing both allows us not to replace the
-       -- outer scrut-var with the outer case-binder (Simplify.simplCaseBinder).
-    scruting_same_var = case scrut of
-                         Var outer_scrut -> \ v -> v == outer_bndr || v == outer_scrut
-                         other           -> \ v -> v == outer_bndr
+
  
  --------- Fill in known constructor -----------
-prepareDefault dflags scrut case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rhs)
+prepareDefault dflags env case_bndr (Just (tycon, inst_tys)) imposs_cons (Just deflt_rhs)
    |    -- This branch handles the case where we are 
         -- scrutinisng an algebraic data type
      isAlgTyCon tycon           -- It's a data type, tuple, or unboxed tuples.  
@@ -1297,15 +1380,14 @@ prepareDefault dflags scrut case_bndr (Just (tycon, inst_tys)) imposs_cons (Just
                                 -- which would be quite legitmate.  But it's a really obscure corner, and
                                 -- not worth wasting code on.
    , let imposs_data_cons = [con | DataAlt con <- imposs_cons]  -- We now know it's a data type 
-       is_possible con  = not (con `elem` imposs_data_cons)
-                          && dataConCanMatch inst_tys con
-  = case filter is_possible all_cons of
+       impossible con  = con `elem` imposs_data_cons || dataConCannotMatch inst_tys con
+  = case filterOut impossible all_cons of
         []    -> return []      -- Eliminate the default alternative
                                 -- altogether if it can't match
  
         [con] ->        -- It matches exactly one constructor, so fill it in
                  do { tick (FillInCaseDefault case_bndr)
-                    ; us <- getUniquesSmpl
+                    ; us <- getUniquesM
                      ; let (ex_tvs, co_tvs, arg_ids) =
                                dataConRepInstPat us con inst_tys
                      ; return [(DataAlt con, ex_tvs ++ co_tvs ++ arg_ids, deflt_rhs)] }
@@ -1313,10 +1395,10 @@ prepareDefault dflags scrut case_bndr (Just (tycon, inst_tys)) imposs_cons (Just
         two_or_more -> return [(DEFAULT, [], deflt_rhs)]
  
  --------- Catch-all cases -----------
-prepareDefault dflags scrut case_bndr bndr_ty imposs_cons (Just deflt_rhs)
+prepareDefault dflags env case_bndr bndr_ty imposs_cons (Just deflt_rhs)
    = return [(DEFAULT, [], deflt_rhs)]
  
-prepareDefault dflags scrut case_bndr bndr_ty imposs_cons Nothing
+prepareDefault dflags env case_bndr bndr_ty imposs_cons Nothing
    = return []  -- No default branch
  \end{code}
  
@@ -1352,7 +1434,7 @@ mkCase :: OutExpr -> OutId -> OutType
  -- put an error case here insteadd
  mkCase scrut case_bndr ty []
    = pprTrace "mkCase: null alts" (ppr case_bndr <+> ppr scrut) $
-    return (mkApps (Var eRROR_ID)
+    return (mkApps (Var rUNTIME_ERROR_ID)
                    [Type ty, Lit (mkStringLit "Impossible alternative")])
  
  
@@ -1362,8 +1444,8 @@ mkCase scrut case_bndr ty []
  
  mkCase scrut case_bndr ty alts -- Identity case
    | all identity_alt alts
-  = tick (CaseIdentity case_bndr)              `thenSmpl_`
-    returnSmpl (re_cast scrut)
+  = do tick (CaseIdentity case_bndr)
+       return (re_cast scrut)
    where
      identity_alt (con, args, rhs) = check_eq con args (de_cast rhs)
  
@@ -1396,7 +1478,7 @@ mkCase scrut case_bndr ty alts    -- Identity case
  --------------------------------------------------
  --     Catch-all
  --------------------------------------------------
-mkCase scrut bndr ty alts = returnSmpl (Case scrut bndr ty alts)
+mkCase scrut bndr ty alts = return (Case scrut bndr ty alts)
  \end{code}