Treat the (~) type constructor a bit specially

[ghc-hetmet.git] / compiler / specialise / SpecConstr.lhs
diff --git a/compiler/specialise/SpecConstr.lhs b/compiler/specialise/SpecConstr.lhs

index e5583e1..5fc0226 100644 (file)
--- a/compiler/specialise/SpecConstr.lhs
+++ b/compiler/specialise/SpecConstr.lhs
@@ -1,43 +1,72 @@
+ToDo [Nov 2010]
+~~~~~~~~~~~~~~~
+1. Use a library type rather than an annotation for ForceSpecConstr
+2. Nuke NoSpecConstr
+
  %
  % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
  %
  \section[SpecConstr]{Specialise over constructors}
  
  \begin{code}
  %
  % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
  %
  \section[SpecConstr]{Specialise over constructors}
  
  \begin{code}
+-- The above warning supression flag is a temporary kludge.
+-- While working on this module you are encouraged to remove it and fix
+-- any warnings in the module. See
+--     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
+-- for details
+
  module SpecConstr(
  module SpecConstr(
-       specConstrProgram       
+       specConstrProgram
+#ifdef GHCI
+        , SpecConstrAnnotation(..)
+#endif
      ) where
  
  #include "HsVersions.h"
  
  import CoreSyn
      ) where
  
  #include "HsVersions.h"
  
  import CoreSyn
-import CoreLint                ( showPass, endPass )
-import CoreUtils       ( exprType, mkPiTypes )
+import CoreSubst
+import CoreUtils
+import CoreUnfold      ( couldBeSmallEnoughToInline )
  import CoreFVs                 ( exprsFreeVars )
  import CoreFVs                 ( exprsFreeVars )
-import CoreTidy                ( tidyRules )
-import PprCore         ( pprRules )
+import CoreMonad
+import HscTypes         ( ModGuts(..) )
  import WwLib           ( mkWorkerArgs )
  import WwLib           ( mkWorkerArgs )
-import DataCon         ( dataConRepArity, dataConUnivTyVars )
-import Type            ( Type, tyConAppArgs )
-import Rules           ( matchN )
-import Id              ( Id, idName, idType, isDataConWorkId_maybe, 
-                         mkUserLocal, mkSysLocal, idUnfolding, isLocalId )
-import Var             ( Var )
+import DataCon
+import Coercion                hiding( substTy, substCo )
+import Rules
+import Type            hiding ( substTy )
+import Id
+import MkCore          ( mkImpossibleExpr )
+import Var
  import VarEnv
  import VarSet
  import VarEnv
  import VarSet
-import Name            ( nameOccName, nameSrcLoc )
-import Rules           ( addIdSpecialisations, mkLocalRule, rulesOfBinds )
-import OccName         ( mkSpecOcc )
-import ErrUtils                ( dumpIfSet_dyn )
-import DynFlags                ( DynFlags, DynFlag(..) )
-import BasicTypes      ( Activation(..) )
-import Maybes          ( orElse, catMaybes, isJust )
-import Util            ( zipWithEqual, lengthAtLeast, notNull )
-import List            ( nubBy, partition )
+import Name
+import BasicTypes
+import DynFlags                ( DynFlags(..) )
+import StaticFlags     ( opt_PprStyle_Debug )
+import Maybes          ( orElse, catMaybes, isJust, isNothing )
+import Demand
+import DmdAnal         ( both )
+import Serialized       ( deserializeWithData )
+import Util
+import Pair
  import UniqSupply
  import Outputable
  import FastString
  import UniqFM
  import UniqSupply
  import Outputable
  import FastString
  import UniqFM
+import MonadUtils
+import Control.Monad   ( zipWithM )
+import Data.List
+
+
+-- See Note [SpecConstrAnnotation]
+#ifndef GHCI
+type SpecConstrAnnotation = ()
+#else
+import TyCon            ( TyCon )
+import GHC.Exts( SpecConstrAnnotation(..) )
+#endif
  \end{code}
  
  -----------------------------------------------------
  \end{code}
  
  -----------------------------------------------------
@@ -114,7 +143,7 @@ This happens if
  
  Hence the "OR" part of Note [Good arguments] below.
  
  
  Hence the "OR" part of Note [Good arguments] below.
  
-ALTERNATIVE: pass both boxed and unboxed versions.  This no longer saves
+ALTERNATIVE 2: pass both boxed and unboxed versions.  This no longer saves
  allocation, but does perhaps save evals. In the RULE we'd have
  something like
  
  allocation, but does perhaps save evals. In the RULE we'd have
  something like
  
@@ -124,6 +153,25 @@ If at the call site the (I# x) was an unfolding, then we'd have to
  rely on CSE to eliminate the duplicate allocation.... This alternative
  doesn't look attractive enough to pursue.
  
  rely on CSE to eliminate the duplicate allocation.... This alternative
  doesn't look attractive enough to pursue.
  
+ALTERNATIVE 3: ignore the reboxing problem.  The trouble is that 
+the conservative reboxing story prevents many useful functions from being
+specialised.  Example:
+       foo :: Maybe Int -> Int -> Int
+       foo   (Just m) 0 = 0
+       foo x@(Just m) n = foo x (n-m)
+Here the use of 'x' will clearly not require boxing in the specialised function.
+
+The strictness analyser has the same problem, in fact.  Example:
+       f p@(a,b) = ...
+If we pass just 'a' and 'b' to the worker, it might need to rebox the
+pair to create (a,b).  A more sophisticated analysis might figure out
+precisely the cases in which this could happen, but the strictness
+analyser does no such analysis; it just passes 'a' and 'b', and hopes
+for the best.
+
+So my current choice is to make SpecConstr similarly aggressive, and
+ignore the bad potential of reboxing.
+
  
  Note [Good arguments]
  ~~~~~~~~~~~~~~~~~~~~~
  
  Note [Good arguments]
  ~~~~~~~~~~~~~~~~~~~~~
@@ -298,6 +346,180 @@ may avoid allocating it altogether.  Just like for constructors.
  
  Looks cool, but probably rare...but it might be easy to implement.
  
  
  Looks cool, but probably rare...but it might be easy to implement.
  
+
+Note [SpecConstr for casts]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider 
+    data family T a :: *
+    data instance T Int = T Int
+
+    foo n = ...
+       where
+         go (T 0) = 0
+         go (T n) = go (T (n-1))
+
+The recursive call ends up looking like 
+       go (T (I# ...) `cast` g)
+So we want to spot the construtor application inside the cast.
+That's why we have the Cast case in argToPat
+
+Note [Local recursive groups]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+For a *local* recursive group, we can see all the calls to the
+function, so we seed the specialisation loop from the calls in the
+body, not from the calls in the RHS.  Consider:
+
+  bar m n = foo n (n,n) (n,n) (n,n) (n,n)
+   where
+     foo n p q r s
+       | n == 0    = m
+       | n > 3000  = case p of { (p1,p2) -> foo (n-1) (p2,p1) q r s }
+       | n > 2000  = case q of { (q1,q2) -> foo (n-1) p (q2,q1) r s }
+       | n > 1000  = case r of { (r1,r2) -> foo (n-1) p q (r2,r1) s }
+       | otherwise = case s of { (s1,s2) -> foo (n-1) p q r (s2,s1) }
+
+If we start with the RHSs of 'foo', we get lots and lots of specialisations,
+most of which are not needed.  But if we start with the (single) call
+in the rhs of 'bar' we get exactly one fully-specialised copy, and all
+the recursive calls go to this fully-specialised copy. Indeed, the original
+function is later collected as dead code.  This is very important in 
+specialising the loops arising from stream fusion, for example in NDP where
+we were getting literally hundreds of (mostly unused) specialisations of
+a local function.
+
+In a case like the above we end up never calling the original un-specialised
+function.  (Although we still leave its code around just in case.)
+
+However, if we find any boring calls in the body, including *unsaturated*
+ones, such as
+      letrec foo x y = ....foo...
+      in map foo xs
+then we will end up calling the un-specialised function, so then we *should*
+use the calls in the un-specialised RHS as seeds.  We call these "boring 
+call patterns, and callsToPats reports if it finds any of these.
+
+
+Note [Do not specialise diverging functions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Specialising a function that just diverges is a waste of code.
+Furthermore, it broke GHC (simpl014) thus:
+   {-# STR Sb #-}
+   f = \x. case x of (a,b) -> f x
+If we specialise f we get
+   f = \x. case x of (a,b) -> fspec a b
+But fspec doesn't have decent strictnes info.  As it happened,
+(f x) :: IO t, so the state hack applied and we eta expanded fspec,
+and hence f.  But now f's strictness is less than its arity, which
+breaks an invariant.
+
+Note [SpecConstrAnnotation]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+SpecConstrAnnotation is defined in GHC.Exts, and is only guaranteed to
+be available in stage 2 (well, until the bootstrap compiler can be
+guaranteed to have it)
+
+So we define it to be () in stage1 (ie when GHCI is undefined), and
+'#ifdef' out the code that uses it.
+
+See also Note [Forcing specialisation]
+
+Note [Forcing specialisation]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+With stream fusion and in other similar cases, we want to fully specialise
+some (but not necessarily all!) loops regardless of their size and the
+number of specialisations. We allow a library to specify this by annotating
+a type with ForceSpecConstr and then adding a parameter of that type to the
+loop. Here is a (simplified) example from the vector library:
+
+  data SPEC = SPEC | SPEC2
+  {-# ANN type SPEC ForceSpecConstr #-}
+
+  foldl :: (a -> b -> a) -> a -> Stream b -> a
+  {-# INLINE foldl #-}
+  foldl f z (Stream step s _) = foldl_loop SPEC z s
+    where
+      foldl_loop !sPEC z s = case step s of
+                              Yield x s' -> foldl_loop sPEC (f z x) s'
+                              Skip       -> foldl_loop sPEC z s'
+                              Done       -> z
+
+SpecConstr will spot the SPEC parameter and always fully specialise
+foldl_loop. Note that
+
+  * We have to prevent the SPEC argument from being removed by
+    w/w which is why (a) SPEC is a sum type, and (b) we have to seq on
+    the SPEC argument.
+
+  * And lastly, the SPEC argument is ultimately eliminated by
+    SpecConstr itself so there is no runtime overhead.
+
+This is all quite ugly; we ought to come up with a better design.
+
+ForceSpecConstr arguments are spotted in scExpr' and scTopBinds which then set
+sc_force to True when calling specLoop. This flag does three things:
+  * Ignore specConstrThreshold, to specialise functions of arbitrary size
+        (see scTopBind)
+  * Ignore specConstrCount, to make arbitrary numbers of specialisations
+        (see specialise)
+  * Specialise even for arguments that are not scrutinised in the loop
+        (see argToPat; Trac #4488)
+
+This flag is inherited for nested non-recursive bindings (which are likely to
+be join points and hence should be fully specialised) but reset for nested
+recursive bindings.
+
+What alternatives did I consider? Annotating the loop itself doesn't
+work because (a) it is local and (b) it will be w/w'ed and I having
+w/w propagating annotation somehow doesn't seem like a good idea. The
+types of the loop arguments really seem to be the most persistent
+thing.
+
+Annotating the types that make up the loop state doesn't work,
+either, because (a) it would prevent us from using types like Either
+or tuples here, (b) we don't want to restrict the set of types that
+can be used in Stream states and (c) some types are fixed by the user
+(e.g., the accumulator here) but we still want to specialise as much
+as possible.
+
+ForceSpecConstr is done by way of an annotation:
+  data SPEC = SPEC | SPEC2
+  {-# ANN type SPEC ForceSpecConstr #-}
+But SPEC is the *only* type so annotated, so it'd be better to
+use a particular library type.
+
+Alternatives to ForceSpecConstr
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Instead of giving the loop an extra argument of type SPEC, we
+also considered *wrapping* arguments in SPEC, thus
+  data SPEC a = SPEC a | SPEC2
+
+  loop = \arg -> case arg of
+                     SPEC state ->
+                        case state of (x,y) -> ... loop (SPEC (x',y')) ...
+                        S2 -> error ...
+The idea is that a SPEC argument says "specialise this argument
+regardless of whether the function case-analyses it.  But this
+doesn't work well:
+  * SPEC must still be a sum type, else the strictness analyser
+    eliminates it
+  * But that means that 'loop' won't be strict in its real payload
+This loss of strictness in turn screws up specialisation, because
+we may end up with calls like
+   loop (SPEC (case z of (p,q) -> (q,p)))
+Without the SPEC, if 'loop' was strict, the case would move out
+and we'd see loop applied to a pair. But if 'loop' isn' strict
+this doesn't look like a specialisable call.
+
+Note [NoSpecConstr]
+~~~~~~~~~~~~~~~~~~~
+The ignoreDataCon stuff allows you to say
+    {-# ANN type T NoSpecConstr #-}
+to mean "don't specialise on arguments of this type.  It was added
+before we had ForceSpecConstr.  Lacking ForceSpecConstr we specialised
+regardless of size; and then we needed a way to turn that *off*.  Now
+that we have ForceSpecConstr, this NoSpecConstr is probably redundant.
+(Used only for PArray.)
+
  -----------------------------------------------------
                 Stuff not yet handled
  -----------------------------------------------------
  -----------------------------------------------------
                 Stuff not yet handled
  -----------------------------------------------------
@@ -373,8 +595,6 @@ But perhaps the first one isn't good.  After all, we know that tpl_B2 is
  a T (I# x) really, because T is strict and Int has one constructor.  (We can't
  unbox the strict fields, becuase T is polymorphic!)
  
  a T (I# x) really, because T is strict and Int has one constructor.  (We can't
  unbox the strict fields, becuase T is polymorphic!)
  
-
-
  %************************************************************************
  %*                                                                     *
  \subsection{Top level wrapper stuff}
  %************************************************************************
  %*                                                                     *
  \subsection{Top level wrapper stuff}
@@ -382,24 +602,19 @@ unbox the strict fields, becuase T is polymorphic!)
  %************************************************************************
  
  \begin{code}
  %************************************************************************
  
  \begin{code}
-specConstrProgram :: DynFlags -> UniqSupply -> [CoreBind] -> IO [CoreBind]
-specConstrProgram dflags us binds
+specConstrProgram :: ModGuts -> CoreM ModGuts
+specConstrProgram guts
    = do
    = do
-       showPass dflags "SpecConstr"
-
-       let (binds', _) = initUs us (go emptyScEnv binds)
-
-       endPass dflags "SpecConstr" Opt_D_dump_spec binds'
-
-       dumpIfSet_dyn dflags Opt_D_dump_rules "Top-level specialisations"
-                 (pprRules (tidyRules emptyTidyEnv (rulesOfBinds binds')))
-
-       return binds'
+      dflags <- getDynFlags
+      us     <- getUniqueSupplyM
+      annos  <- getFirstAnnotations deserializeWithData guts
+      let binds' = fst $ initUs us (go (initScEnv dflags annos) (mg_binds guts))
+      return (guts { mg_binds = binds' })
    where
    where
-    go env []          = returnUs []
-    go env (bind:binds) = scBind env bind      `thenUs` \ (env', _, bind') ->
-                         go env' binds         `thenUs` \ binds' ->
-                         returnUs (bind' : binds')
+    go _   []          = return []
+    go env (bind:binds) = do (env', bind') <- scTopBind env bind
+                             binds' <- go env' binds
+                             return (bind' : binds')
  \end{code}
  
  
  \end{code}
  
  
@@ -410,24 +625,58 @@ specConstrProgram dflags us binds
  %************************************************************************
  
  \begin{code}
  %************************************************************************
  
  \begin{code}
-data ScEnv = SCE { scope :: InScopeEnv,
-                       -- Binds all non-top-level variables in scope
+data ScEnv = SCE { sc_size  :: Maybe Int,      -- Size threshold
+                  sc_count :: Maybe Int,       -- Max # of specialisations for any one fn
+                                               -- See Note [Avoiding exponential blowup]
+                   sc_force :: Bool,            -- Force specialisation?
+                                                -- See Note [Forcing specialisation]
  
  
-                  cons  :: ConstrEnv
+                  sc_subst :: Subst,           -- Current substitution
+                                               -- Maps InIds to OutExprs
+
+                  sc_how_bound :: HowBoundEnv,
+                       -- Binds interesting non-top-level variables
+                       -- Domain is OutVars (*after* applying the substitution)
+
+                  sc_vals  :: ValueEnv,
+                       -- Domain is OutIds (*after* applying the substitution)
+                       -- Used even for top-level bindings (but not imported ones)
+
+                   sc_annotations :: UniqFM SpecConstrAnnotation
              }
  
              }
  
-type InScopeEnv = VarEnv HowBound
+---------------------
+-- As we go, we apply a substitution (sc_subst) to the current term
+type InExpr = CoreExpr         -- _Before_ applying the subst
+type InVar  = Var
  
  
-type ConstrEnv = IdEnv ConValue
-data ConValue  = CV AltCon [CoreArg]
-       -- Variables known to be bound to a constructor
-       -- in a particular case alternative
+type OutExpr = CoreExpr                -- _After_ applying the subst
+type OutId   = Id
+type OutVar  = Var
  
  
+---------------------
+type HowBoundEnv = VarEnv HowBound     -- Domain is OutVars
+
+---------------------
+type ValueEnv = IdEnv Value            -- Domain is OutIds
+data Value    = ConVal AltCon [CoreArg]        -- _Saturated_ constructors
+                                       --   The AltCon is never DEFAULT
+             | LambdaVal               -- Inlinable lambdas or PAPs
  
  
-instance Outputable ConValue where
-   ppr (CV con args) = ppr con <+> interpp'SP args
+instance Outputable Value where
+   ppr (ConVal con args) = ppr con <+> interpp'SP args
+   ppr LambdaVal        = ptext (sLit "<Lambda>")
  
  
-emptyScEnv = SCE { scope = emptyVarEnv, cons = emptyVarEnv }
+---------------------
+initScEnv :: DynFlags -> UniqFM SpecConstrAnnotation -> ScEnv
+initScEnv dflags anns
+  = SCE { sc_size = specConstrThreshold dflags,
+         sc_count = specConstrCount dflags,
+          sc_force = False,
+         sc_subst = emptySubst, 
+         sc_how_bound = emptyVarEnv, 
+         sc_vals = emptyVarEnv,
+          sc_annotations = anns }
  
  data HowBound = RecFun -- These are the recursive functions for which 
                         -- we seek interesting call patterns
  
  data HowBound = RecFun -- These are the recursive functions for which 
                         -- we seek interesting call patterns
@@ -435,69 +684,199 @@ data HowBound = RecFun   -- These are the recursive functions for which
               | RecArg  -- These are those functions' arguments, or their sub-components; 
                         -- we gather occurrence information for these
  
               | RecArg  -- These are those functions' arguments, or their sub-components; 
                         -- we gather occurrence information for these
  
-             | Other   -- We track all others so we know what's in scope
-                       -- This is used in spec_one to check what needs to be
-                       -- passed as a parameter and what is in scope at the 
-                       -- function definition site
-
  instance Outputable HowBound where
    ppr RecFun = text "RecFun"
    ppr RecArg = text "RecArg"
  instance Outputable HowBound where
    ppr RecFun = text "RecFun"
    ppr RecArg = text "RecArg"
-  ppr Other = text "Other"
-
-lookupScopeEnv env v = lookupVarEnv (scope env) v
-
-extendBndrs env bndrs = env { scope = extendVarEnvList (scope env) [(b,Other) | b <- bndrs] }
-extendBndr  env bndr  = env { scope = extendVarEnv (scope env) bndr Other }
-
-    -- When we encounter
-    -- case scrut of b
-    --     C x y -> ...
-    -- we want to bind b, and perhaps scrut too, to (C x y)
-extendCaseBndrs :: ScEnv -> Id -> CoreExpr -> AltCon -> [Var] -> ScEnv
-extendCaseBndrs env case_bndr scrut con alt_bndrs
-  = case con of
-       DEFAULT    -> env1
-       LitAlt lit -> extendCons env1 scrut case_bndr (CV con [])
-       DataAlt dc -> extend_data_con dc
+
+scForce :: ScEnv -> Bool -> ScEnv
+scForce env b = env { sc_force = b }
+
+lookupHowBound :: ScEnv -> Id -> Maybe HowBound
+lookupHowBound env id = lookupVarEnv (sc_how_bound env) id
+
+scSubstId :: ScEnv -> Id -> CoreExpr
+scSubstId env v = lookupIdSubst (text "scSubstId") (sc_subst env) v
+
+scSubstTy :: ScEnv -> Type -> Type
+scSubstTy env ty = substTy (sc_subst env) ty
+
+scSubstCo :: ScEnv -> Coercion -> Coercion
+scSubstCo env co = substCo (sc_subst env) co
+
+zapScSubst :: ScEnv -> ScEnv
+zapScSubst env = env { sc_subst = zapSubstEnv (sc_subst env) }
+
+extendScInScope :: ScEnv -> [Var] -> ScEnv
+       -- Bring the quantified variables into scope
+extendScInScope env qvars = env { sc_subst = extendInScopeList (sc_subst env) qvars }
+
+       -- Extend the substitution
+extendScSubst :: ScEnv -> Var -> OutExpr -> ScEnv
+extendScSubst env var expr = env { sc_subst = extendSubst (sc_subst env) var expr }
+
+extendScSubstList :: ScEnv -> [(Var,OutExpr)] -> ScEnv
+extendScSubstList env prs = env { sc_subst = extendSubstList (sc_subst env) prs }
+
+extendHowBound :: ScEnv -> [Var] -> HowBound -> ScEnv
+extendHowBound env bndrs how_bound
+  = env { sc_how_bound = extendVarEnvList (sc_how_bound env)
+                           [(bndr,how_bound) | bndr <- bndrs] }
+
+extendBndrsWith :: HowBound -> ScEnv -> [Var] -> (ScEnv, [Var])
+extendBndrsWith how_bound env bndrs 
+  = (env { sc_subst = subst', sc_how_bound = hb_env' }, bndrs')
    where
    where
-    cur_scope = scope env
-    env1 = env { scope = extendVarEnvList cur_scope 
-                               [(b,how_bound) | b <- case_bndr:alt_bndrs] }
-
-       -- Record RecArg for the components iff the scrutinee is RecArg
-       --      [This comment looks plain wrong to me, so I'm ignoring it
-       --           "Also forget if the scrutinee is a RecArg, because we're
-       --           now in the branch of a case, and we don't want to
-       --           record a non-scrutinee use of v if we have
-       --              case v of { (a,b) -> ...(f v)... }" ]
-    how_bound = case scrut of
-                 Var v -> lookupVarEnv cur_scope v `orElse` Other
-                 other -> Other
-
-    extend_data_con data_con = 
-      extendCons env1 scrut case_bndr (CV con vanilla_args)
-       where
-           vanilla_args = map Type (tyConAppArgs (idType case_bndr)) ++
-                          varsToCoreExprs alt_bndrs
-
-extendCons :: ScEnv -> CoreExpr -> Id -> ConValue -> ScEnv
-extendCons env scrut case_bndr val
-  = case scrut of
-       Var v -> env { cons = extendVarEnv cons1 v val }
-       other -> env { cons = cons1 }
+    (subst', bndrs') = substBndrs (sc_subst env) bndrs
+    hb_env' = sc_how_bound env `extendVarEnvList` 
+                   [(bndr,how_bound) | bndr <- bndrs']
+
+extendBndrWith :: HowBound -> ScEnv -> Var -> (ScEnv, Var)
+extendBndrWith how_bound env bndr 
+  = (env { sc_subst = subst', sc_how_bound = hb_env' }, bndr')
    where
    where
-    cons1 = extendVarEnv (cons env) case_bndr val
-
-    -- When we encounter a recursive function binding
-    -- f = \x y -> ...
-    -- we want to extend the scope env with bindings 
-    -- that record that f is a RecFn and x,y are RecArgs
-extendRecBndr env fn bndrs
-  =  env { scope = scope env `extendVarEnvList` 
-                  ((fn,RecFun): [(bndr,RecArg) | bndr <- bndrs]) }
+    (subst', bndr') = substBndr (sc_subst env) bndr
+    hb_env' = extendVarEnv (sc_how_bound env) bndr' how_bound
+
+extendRecBndrs :: ScEnv -> [Var] -> (ScEnv, [Var])
+extendRecBndrs env bndrs  = (env { sc_subst = subst' }, bndrs')
+                     where
+                       (subst', bndrs') = substRecBndrs (sc_subst env) bndrs
+
+extendBndr :: ScEnv -> Var -> (ScEnv, Var)
+extendBndr  env bndr  = (env { sc_subst = subst' }, bndr')
+                     where
+                       (subst', bndr') = substBndr (sc_subst env) bndr
+
+extendValEnv :: ScEnv -> Id -> Maybe Value -> ScEnv
+extendValEnv env _  Nothing   = env
+extendValEnv env id (Just cv) = env { sc_vals = extendVarEnv (sc_vals env) id cv }
+
+extendCaseBndrs :: ScEnv -> OutExpr -> OutId -> AltCon -> [Var] -> (ScEnv, [Var])
+-- When we encounter
+--     case scrut of b
+--         C x y -> ...
+-- we want to bind b, to (C x y)
+-- NB1: Extends only the sc_vals part of the envt
+-- NB2: Kill the dead-ness info on the pattern binders x,y, since
+--      they are potentially made alive by the [b -> C x y] binding
+extendCaseBndrs env scrut case_bndr con alt_bndrs
+   = (env2, alt_bndrs')
+ where
+   live_case_bndr = not (isDeadBinder case_bndr)
+   env1 | Var v <- scrut = extendValEnv env v cval
+        | otherwise      = env -- See Note [Add scrutinee to ValueEnv too]
+   env2 | live_case_bndr = extendValEnv env1 case_bndr cval
+        | otherwise      = env1
+
+   alt_bndrs' | case scrut of { Var {} -> True; _ -> live_case_bndr }
+              = map zap alt_bndrs
+              | otherwise
+              = alt_bndrs
+
+   cval = case con of
+               DEFAULT    -> Nothing
+               LitAlt {}  -> Just (ConVal con [])
+               DataAlt {} -> Just (ConVal con vanilla_args)
+                     where
+                       vanilla_args = map Type (tyConAppArgs (idType case_bndr)) ++
+                                      varsToCoreExprs alt_bndrs
+
+   zap v | isTyVar v = v               -- See NB2 above
+         | otherwise = zapIdOccInfo v
+
+
+decreaseSpecCount :: ScEnv -> Int -> ScEnv
+-- See Note [Avoiding exponential blowup]
+decreaseSpecCount env n_specs 
+  = env { sc_count = case sc_count env of
+                       Nothing -> Nothing
+                       Just n  -> Just (n `div` (n_specs + 1)) }
+       -- The "+1" takes account of the original function; 
+       -- See Note [Avoiding exponential blowup]
+
+---------------------------------------------------
+-- See Note [SpecConstrAnnotation]
+ignoreType    :: ScEnv -> Type   -> Bool
+ignoreDataCon  :: ScEnv -> DataCon -> Bool
+forceSpecBndr :: ScEnv -> Var    -> Bool
+#ifndef GHCI
+ignoreType    _ _ = False
+ignoreDataCon  _ _ = False
+forceSpecBndr _ _ = False
+
+#else /* GHCI */
+
+ignoreDataCon env dc = ignoreTyCon env (dataConTyCon dc)
+
+ignoreType env ty
+  = case splitTyConApp_maybe ty of
+      Just (tycon, _) -> ignoreTyCon env tycon
+      _               -> False
+
+ignoreTyCon :: ScEnv -> TyCon -> Bool
+ignoreTyCon env tycon
+  = lookupUFM (sc_annotations env) tycon == Just NoSpecConstr
+
+forceSpecBndr env var = forceSpecFunTy env . snd . splitForAllTys . varType $ var
+
+forceSpecFunTy :: ScEnv -> Type -> Bool
+forceSpecFunTy env = any (forceSpecArgTy env) . fst . splitFunTys
+
+forceSpecArgTy :: ScEnv -> Type -> Bool
+forceSpecArgTy env ty
+  | Just ty' <- coreView ty = forceSpecArgTy env ty'
+
+forceSpecArgTy env ty
+  | Just (tycon, tys) <- splitTyConApp_maybe ty
+  , tycon /= funTyCon
+      = lookupUFM (sc_annotations env) tycon == Just ForceSpecConstr
+        || any (forceSpecArgTy env) tys
+
+forceSpecArgTy _ _ = False
+#endif /* GHCI */
  \end{code}
  
  \end{code}
  
+Note [Add scrutinee to ValueEnv too]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider this:
+   case x of y
+     (a,b) -> case b of c
+                I# v -> ...(f y)...
+By the time we get to the call (f y), the ValueEnv
+will have a binding for y, and for c
+    y -> (a,b)
+    c -> I# v
+BUT that's not enough!  Looking at the call (f y) we
+see that y is pair (a,b), but we also need to know what 'b' is.
+So in extendCaseBndrs we must *also* add the binding 
+   b -> I# v
+else we lose a useful specialisation for f.  This is necessary even
+though the simplifier has systematically replaced uses of 'x' with 'y'
+and 'b' with 'c' in the code.  The use of 'b' in the ValueEnv came
+from outside the case.  See Trac #4908 for the live example.
+
+Note [Avoiding exponential blowup]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The sc_count field of the ScEnv says how many times we are prepared to
+duplicate a single function.  But we must take care with recursive
+specialiations.  Consider
+
+       let $j1 = let $j2 = let $j3 = ...
+                            in 
+                            ...$j3...
+                  in 
+                  ...$j2...
+        in 
+        ...$j1...
+
+If we specialise $j1 then in each specialisation (as well as the original)
+we can specialise $j2, and similarly $j3.  Even if we make just *one*
+specialisation of each, becuase we also have the original we'll get 2^n
+copies of $j3, which is not good.
+
+So when recursively specialising we divide the sc_count by the number of
+copies we are making at this level, including the original.
+
  
  %************************************************************************
  %*                                                                     *
  
  %************************************************************************
  %*                                                                     *
@@ -508,48 +887,51 @@ extendRecBndr env fn bndrs
  \begin{code}
  data ScUsage
     = SCU {
  \begin{code}
  data ScUsage
     = SCU {
-       calls :: !(IdEnv ([Call])),     -- Calls
+       scu_calls :: CallEnv,           -- Calls
                                         -- The functions are a subset of the 
                                         --      RecFuns in the ScEnv
  
                                         -- The functions are a subset of the 
                                         --      RecFuns in the ScEnv
  
-       occs :: !(IdEnv ArgOcc)         -- Information on argument occurrences
-     }                                 -- The variables are a subset of the 
-                                       --      RecArg in the ScEnv
+       scu_occs :: !(IdEnv ArgOcc)     -- Information on argument occurrences
+     }                                 -- The domain is OutIds
  
  
-type Call = (ConstrEnv, [CoreArg])
+type CallEnv = IdEnv [Call]
+type Call = (ValueEnv, [CoreArg])
         -- The arguments of the call, together with the
         -- env giving the constructor bindings at the call site
  
         -- The arguments of the call, together with the
         -- env giving the constructor bindings at the call site
  
-nullUsage = SCU { calls = emptyVarEnv, occs = emptyVarEnv }
+nullUsage :: ScUsage
+nullUsage = SCU { scu_calls = emptyVarEnv, scu_occs = emptyVarEnv }
  
  
-combineUsage u1 u2 = SCU { calls = plusVarEnv_C (++) (calls u1) (calls u2),
-                          occs  = plusVarEnv_C combineOcc (occs u1) (occs u2) }
+combineCalls :: CallEnv -> CallEnv -> CallEnv
+combineCalls = plusVarEnv_C (++)
  
  
+combineUsage :: ScUsage -> ScUsage -> ScUsage
+combineUsage u1 u2 = SCU { scu_calls = combineCalls (scu_calls u1) (scu_calls u2),
+                          scu_occs  = plusVarEnv_C combineOcc (scu_occs u1) (scu_occs u2) }
+
+combineUsages :: [ScUsage] -> ScUsage
  combineUsages [] = nullUsage
  combineUsages us = foldr1 combineUsage us
  
  combineUsages [] = nullUsage
  combineUsages us = foldr1 combineUsage us
  
-lookupOcc :: ScUsage -> Var -> (ScUsage, ArgOcc)
-lookupOcc (SCU { calls = sc_calls, occs = sc_occs }) bndr
-  = (SCU {calls = sc_calls, occs = delVarEnv sc_occs bndr},
-     lookupVarEnv sc_occs bndr `orElse` NoOcc)
-
-lookupOccs :: ScUsage -> [Var] -> (ScUsage, [ArgOcc])
-lookupOccs (SCU { calls = sc_calls, occs = sc_occs }) bndrs
-  = (SCU {calls = sc_calls, occs = delVarEnvList sc_occs bndrs},
+lookupOccs :: ScUsage -> [OutVar] -> (ScUsage, [ArgOcc])
+lookupOccs (SCU { scu_calls = sc_calls, scu_occs = sc_occs }) bndrs
+  = (SCU {scu_calls = sc_calls, scu_occs = delVarEnvList sc_occs bndrs},
       [lookupVarEnv sc_occs b `orElse` NoOcc | b <- bndrs])
  
  data ArgOcc = NoOcc    -- Doesn't occur at all; or a type argument
             | UnkOcc    -- Used in some unknown way
  
       [lookupVarEnv sc_occs b `orElse` NoOcc | b <- bndrs])
  
  data ArgOcc = NoOcc    -- Doesn't occur at all; or a type argument
             | UnkOcc    -- Used in some unknown way
  
-           | ScrutOcc (UniqFM [ArgOcc])        -- See Note [ScrutOcc]
+           | ScrutOcc  -- See Note [ScrutOcc]
+                 (DataConEnv [ArgOcc])   -- How the sub-components are used
  
  
-           | BothOcc   -- Definitely taken apart, *and* perhaps used in some other way
+type DataConEnv a = UniqFM a    -- Keyed by DataCon
  
  
-{-     Note  [ScrutOcc]
+{- Note  [ScrutOcc]
+~~~~~~~~~~~~~~~~~~~
+An occurrence of ScrutOcc indicates that the thing, or a `cast` version of the thing,
+is *only* taken apart or applied.
  
  
-An occurrence of ScrutOcc indicates that the thing is *only* taken apart or applied.
-
-  Functions, litersl: ScrutOcc emptyUFM
+  Functions, literal: ScrutOcc emptyUFM
    Data constructors:  ScrutOcc subs,
  
  where (subs :: UniqFM [ArgOcc]) gives usage of the *pattern-bound* components,
    Data constructors:  ScrutOcc subs,
  
  where (subs :: UniqFM [ArgOcc]) gives usage of the *pattern-bound* components,
@@ -563,32 +945,40 @@ A pattern binds b, x::a, y::b, z::b->a, but not 'a'!
  -}
  
  instance Outputable ArgOcc where
  -}
  
  instance Outputable ArgOcc where
-  ppr (ScrutOcc xs) = ptext SLIT("scrut-occ") <> parens (ppr xs)
-  ppr UnkOcc       = ptext SLIT("unk-occ")
-  ppr BothOcc      = ptext SLIT("both-occ")
-  ppr NoOcc                = ptext SLIT("no-occ")
-
+  ppr (ScrutOcc xs) = ptext (sLit "scrut-occ") <> ppr xs
+  ppr UnkOcc       = ptext (sLit "unk-occ")
+  ppr NoOcc                = ptext (sLit "no-occ")
+
+evalScrutOcc :: ArgOcc
+evalScrutOcc = ScrutOcc emptyUFM
+
+-- Experimentally, this vesion of combineOcc makes ScrutOcc "win", so
+-- that if the thing is scrutinised anywhere then we get to see that
+-- in the overall result, even if it's also used in a boxed way
+-- This might be too agressive; see Note [Reboxing] Alternative 3
+combineOcc :: ArgOcc -> ArgOcc -> ArgOcc
  combineOcc NoOcc        occ           = occ
  combineOcc occ                  NoOcc         = occ
  combineOcc (ScrutOcc xs) (ScrutOcc ys) = ScrutOcc (plusUFM_C combineOccs xs ys)
  combineOcc NoOcc        occ           = occ
  combineOcc occ                  NoOcc         = occ
  combineOcc (ScrutOcc xs) (ScrutOcc ys) = ScrutOcc (plusUFM_C combineOccs xs ys)
+combineOcc UnkOcc        (ScrutOcc ys) = ScrutOcc ys
+combineOcc (ScrutOcc xs) UnkOcc               = ScrutOcc xs
  combineOcc UnkOcc        UnkOcc        = UnkOcc
  combineOcc UnkOcc        UnkOcc        = UnkOcc
-combineOcc _       _                  = BothOcc
  
  combineOccs :: [ArgOcc] -> [ArgOcc] -> [ArgOcc]
  combineOccs xs ys = zipWithEqual "combineOccs" combineOcc xs ys
  
  
  combineOccs :: [ArgOcc] -> [ArgOcc] -> [ArgOcc]
  combineOccs xs ys = zipWithEqual "combineOccs" combineOcc xs ys
  
-conArgOccs :: ArgOcc -> AltCon -> [ArgOcc]
--- Find usage of components of data con; returns [UnkOcc...] if unknown
--- See Note [ScrutOcc] for the extra UnkOccs in the vanilla datacon case
-
-conArgOccs (ScrutOcc fm) (DataAlt dc) 
-  | Just pat_arg_occs <- lookupUFM fm dc
-  = [UnkOcc | tv <- dataConUnivTyVars dc] ++ pat_arg_occs
-
-conArgOccs other con = repeat UnkOcc
+setScrutOcc :: ScEnv -> ScUsage -> OutExpr -> ArgOcc -> ScUsage
+-- _Overwrite_ the occurrence info for the scrutinee, if the scrutinee
+-- is a variable, and an interesting variable
+setScrutOcc env usg (Cast e _) occ = setScrutOcc env usg e occ
+setScrutOcc env usg (Note _ e) occ = setScrutOcc env usg e occ
+setScrutOcc env usg (Var v)    occ
+  | Just RecArg <- lookupHowBound env v = usg { scu_occs = extendVarEnv (scu_occs usg) v occ }
+  | otherwise                          = usg
+setScrutOcc _env usg _other _occ       -- Catch-all
+  = usg        
  \end{code}
  
  \end{code}
  
-
  %************************************************************************
  %*                                                                     *
  \subsection{The main recursive function}
  %************************************************************************
  %*                                                                     *
  \subsection{The main recursive function}
@@ -599,175 +989,367 @@ The main recursive function gathers up usage information, and
  creates specialised versions of functions.
  
  \begin{code}
  creates specialised versions of functions.
  
  \begin{code}
-scExpr :: ScEnv -> CoreExpr -> UniqSM (ScUsage, CoreExpr)
+scExpr, scExpr' :: ScEnv -> CoreExpr -> UniqSM (ScUsage, CoreExpr)
         -- The unique supply is needed when we invent
         -- a new name for the specialised function and its args
  
         -- The unique supply is needed when we invent
         -- a new name for the specialised function and its args
  
-scExpr env e@(Type t) = returnUs (nullUsage, e)
-scExpr env e@(Lit l)  = returnUs (nullUsage, e)
-scExpr env e@(Var v)  = returnUs (varUsage env v UnkOcc, e)
-scExpr env (Note n e) = scExpr env e   `thenUs` \ (usg,e') ->
-                       returnUs (usg, Note n e')
-scExpr env (Cast e co)= scExpr env e   `thenUs` \ (usg,e') ->
-                        returnUs (usg, Cast e' co)
-scExpr env (Lam b e)  = scExpr (extendBndr env b) e    `thenUs` \ (usg,e') ->
-                       returnUs (usg, Lam b e')
-
-scExpr env (Case scrut b ty alts) 
-  = do { (alt_usgs, alt_occs, alts') <- mapAndUnzip3Us sc_alt alts
-       ; let (alt_usg, b_occ) = lookupOcc (combineUsages alt_usgs) b
-             scrut_occ = foldr combineOcc b_occ alt_occs
-               -- The combined usage of the scrutinee is given
-               -- by scrut_occ, which is passed to scScrut, which
-               -- in turn treats a bare-variable scrutinee specially
-       ; (scrut_usg, scrut') <- scScrut env scrut scrut_occ
-       ; return (alt_usg `combineUsage` scrut_usg,
-                 Case scrut' b ty alts') }
+scExpr env e = scExpr' env e
+
+
+scExpr' env (Var v)     = case scSubstId env v of
+                           Var v' -> return (mkVarUsage env v' [], Var v')
+                           e'     -> scExpr (zapScSubst env) e'
+
+scExpr' env (Type t)    = return (nullUsage, Type (scSubstTy env t))
+scExpr' env (Coercion c) = return (nullUsage, Coercion (scSubstCo env c))
+scExpr' _   e@(Lit {})  = return (nullUsage, e)
+scExpr' env (Note n e)  = do (usg,e') <- scExpr env e
+                             return (usg, Note n e')
+scExpr' env (Cast e co) = do (usg, e') <- scExpr env e
+                             return (usg, Cast e' (scSubstCo env co))
+scExpr' env e@(App _ _) = scApp env (collectArgs e)
+scExpr' env (Lam b e)   = do let (env', b') = extendBndr env b
+                             (usg, e') <- scExpr env' e
+                             return (usg, Lam b' e')
+
+scExpr' env (Case scrut b ty alts) 
+  = do { (scrut_usg, scrut') <- scExpr env scrut
+       ; case isValue (sc_vals env) scrut' of
+               Just (ConVal con args) -> sc_con_app con args scrut'
+               _other                 -> sc_vanilla scrut_usg scrut'
+       }
    where
    where
-    sc_alt (con,bs,rhs)
-      = do { let env1 = extendCaseBndrs env b scrut con bs
-          ; (usg,rhs') <- scExpr env1 rhs
-          ; let (usg', arg_occs) = lookupOccs usg bs
+    sc_con_app con args scrut'         -- Known constructor; simplify
+       = do { let (_, bs, rhs) = findAlt con alts
+                                 `orElse` (DEFAULT, [], mkImpossibleExpr (coreAltsType alts))
+                  alt_env'  = extendScSubstList env ((b,scrut') : bs `zip` trimConArgs con args)
+            ; scExpr alt_env' rhs }
+                               
+    sc_vanilla scrut_usg scrut'        -- Normal case
+     = do { let (alt_env,b') = extendBndrWith RecArg env b
+                       -- Record RecArg for the components
+
+         ; (alt_usgs, alt_occs, alts')
+               <- mapAndUnzip3M (sc_alt alt_env scrut' b') alts
+
+         ; let scrut_occ  = foldr1 combineOcc alt_occs -- Never empty
+               scrut_usg' = setScrutOcc env scrut_usg scrut' scrut_occ
+               -- The combined usage of the scrutinee is given
+               -- by scrut_occ, which is passed to scScrut, which
+               -- in turn treats a bare-variable scrutinee specially
+
+         ; return (foldr combineUsage scrut_usg' alt_usgs,
+                   Case scrut' b' (scSubstTy env ty) alts') }
+
+    sc_alt env scrut' b' (con,bs,rhs)
+      = do { let (env1, bs1) = extendBndrsWith RecArg env bs
+                (env2, bs2) = extendCaseBndrs env1 scrut' b' con bs1
+          ; (usg, rhs') <- scExpr env2 rhs
+          ; let (usg', b_occ:arg_occs) = lookupOccs usg (b':bs2)
                  scrut_occ = case con of
                                 DataAlt dc -> ScrutOcc (unitUFM dc arg_occs)
                  scrut_occ = case con of
                                 DataAlt dc -> ScrutOcc (unitUFM dc arg_occs)
-                               other      -> ScrutOcc emptyUFM
-          ; return (usg', scrut_occ, (con,bs,rhs')) }
-
-scExpr env (Let bind body)
-  = scBind env bind    `thenUs` \ (env', bind_usg, bind') ->
-    scExpr env' body   `thenUs` \ (body_usg, body') ->
-    returnUs (bind_usg `combineUsage` body_usg, Let bind' body')
-
-scExpr env e@(App _ _) 
-  = do { let (fn, args) = collectArgs e
-       ; (fn_usg, fn') <- scScrut env fn (ScrutOcc emptyUFM)
-       -- Process the function too.   It's almost always a variable,
-       -- but not always.  In particular, if this pass follows float-in,
-       -- which it may, we can get 
-       --      (let f = ...f... in f) arg1 arg2
-       -- We use scScrut to record the fact that the function is called
-       -- Perhpas we should check that it has at least one value arg, 
-       -- but currently we don't bother
-
-       ; (arg_usgs, args') <- mapAndUnzipUs (scExpr env) args
-       ; let call_usg = case fn of
-                          Var f | Just RecFun <- lookupScopeEnv env f
-                                -> SCU { calls = unitVarEnv f [(cons env, args)], 
-                                         occs  = emptyVarEnv }
-                          other -> nullUsage
-       ; return (combineUsages arg_usgs `combineUsage` fn_usg 
-                                        `combineUsage` call_usg,
-                 mkApps fn' args') }
+                               _          -> ScrutOcc emptyUFM
+          ; return (usg', b_occ `combineOcc` scrut_occ, (con, bs2, rhs')) }
+
+scExpr' env (Let (NonRec bndr rhs) body)
+  | isTyVar bndr       -- Type-lets may be created by doBeta
+  = scExpr' (extendScSubst env bndr rhs) body
+
+  | otherwise  
+  = do { let (body_env, bndr') = extendBndr env bndr
+       ; (rhs_usg, rhs_info) <- scRecRhs env (bndr',rhs)
+
+       ; let body_env2 = extendHowBound body_env [bndr'] RecFun
+                                  -- Note [Local let bindings]
+             RI _ rhs' _ _ _ = rhs_info
+              body_env3 = extendValEnv body_env2 bndr' (isValue (sc_vals env) rhs')
+
+       ; (body_usg, body') <- scExpr body_env3 body
+
+          -- NB: For non-recursive bindings we inherit sc_force flag from
+          -- the parent function (see Note [Forcing specialisation])
+       ; (spec_usg, specs) <- specialise env
+                                          (scu_calls body_usg) 
+                                         rhs_info
+                                          (SI [] 0 (Just rhs_usg))
+
+       ; return (body_usg { scu_calls = scu_calls body_usg `delVarEnv` bndr' } 
+                   `combineUsage` rhs_usg `combineUsage` spec_usg,
+                 mkLets [NonRec b r | (b,r) <- specInfoBinds rhs_info specs] body')
+       }
+
+
+-- A *local* recursive group: see Note [Local recursive groups]
+scExpr' env (Let (Rec prs) body)
+  = do { let (bndrs,rhss) = unzip prs
+             (rhs_env1,bndrs') = extendRecBndrs env bndrs
+             rhs_env2 = extendHowBound rhs_env1 bndrs' RecFun
+              force_spec = any (forceSpecBndr env) bndrs'
+                -- Note [Forcing specialisation]
+
+       ; (rhs_usgs, rhs_infos) <- mapAndUnzipM (scRecRhs rhs_env2) (bndrs' `zip` rhss)
+       ; (body_usg, body')     <- scExpr rhs_env2 body
+
+       -- NB: start specLoop from body_usg
+       ; (spec_usg, specs) <- specLoop (scForce rhs_env2 force_spec)
+                                        (scu_calls body_usg) rhs_infos nullUsage
+                                       [SI [] 0 (Just usg) | usg <- rhs_usgs]
+               -- Do not unconditionally generate specialisations from rhs_usgs  
+               -- Instead use them only if we find an unspecialised call
+               -- See Note [Local recursive groups]
+
+       ; let rhs_usg = combineUsages rhs_usgs
+             all_usg = spec_usg `combineUsage` rhs_usg `combineUsage` body_usg 
+              bind'   = Rec (concat (zipWith specInfoBinds rhs_infos specs))
+
+       ; return (all_usg { scu_calls = scu_calls all_usg `delVarEnvList` bndrs' },
+                 Let bind' body') }
+\end{code}
  
  
+Note [Local let bindings]
+~~~~~~~~~~~~~~~~~~~~~~~~~
+It is not uncommon to find this
  
  
-----------------------
-scScrut :: ScEnv -> CoreExpr -> ArgOcc -> UniqSM (ScUsage, CoreExpr)
--- Used for the scrutinee of a case, 
--- or the function of an application
-scScrut env e@(Var v) occ = returnUs (varUsage env v occ, e)
-scScrut env e        occ = scExpr env e
+   let $j = \x. <blah> in ...$j True...$j True...
+
+Here $j is an arbitrary let-bound function, but it often comes up for
+join points.  We might like to specialise $j for its call patterns.
+Notice the difference from a letrec, where we look for call patterns
+in the *RHS* of the function.  Here we look for call patterns in the
+*body* of the let.
+
+At one point I predicated this on the RHS mentioning the outer
+recursive function, but that's not essential and might even be
+harmful.  I'm not sure.
  
  
  
  
+\begin{code}
+scApp :: ScEnv -> (InExpr, [InExpr]) -> UniqSM (ScUsage, CoreExpr)
+
+scApp env (Var fn, args)       -- Function is a variable
+  = ASSERT( not (null args) )
+    do { args_w_usgs <- mapM (scExpr env) args
+       ; let (arg_usgs, args') = unzip args_w_usgs
+             arg_usg = combineUsages arg_usgs
+       ; case scSubstId env fn of
+           fn'@(Lam {}) -> scExpr (zapScSubst env) (doBeta fn' args')
+                       -- Do beta-reduction and try again
+
+           Var fn' -> return (arg_usg `combineUsage` mkVarUsage env fn' args',
+                               mkApps (Var fn') args')
+
+           other_fn' -> return (arg_usg, mkApps other_fn' args') }
+               -- NB: doing this ignores any usage info from the substituted
+               --     function, but I don't think that matters.  If it does
+               --     we can fix it.
+  where
+    doBeta :: OutExpr -> [OutExpr] -> OutExpr
+    -- ToDo: adjust for System IF
+    doBeta (Lam bndr body) (arg : args) = Let (NonRec bndr arg) (doBeta body args)
+    doBeta fn             args         = mkApps fn args
+
+-- The function is almost always a variable, but not always.  
+-- In particular, if this pass follows float-in,
+-- which it may, we can get 
+--     (let f = ...f... in f) arg1 arg2
+scApp env (other_fn, args)
+  = do         { (fn_usg,   fn')   <- scExpr env other_fn
+       ; (arg_usgs, args') <- mapAndUnzipM (scExpr env) args
+       ; return (combineUsages arg_usgs `combineUsage` fn_usg, mkApps fn' args') }
+
  ----------------------
  ----------------------
-scBind :: ScEnv -> CoreBind -> UniqSM (ScEnv, ScUsage, CoreBind)
-scBind env (Rec [(fn,rhs)])
-  | notNull val_bndrs
-  = scExpr env_fn_body body            `thenUs` \ (usg, body') ->
-    specialise env fn bndrs body' usg  `thenUs` \ (rules, spec_prs) ->
-       -- Note body': the specialised copies should be based on the 
-       --             optimised version of the body, in case there were
-       --             nested functions inside.
-    let
-       SCU { calls = calls, occs = occs } = usg
-    in
-    returnUs (extendBndr env fn,       -- For the body of the letrec, just
-                                       -- extend the env with Other to record 
-                                       -- that it's in scope; no funny RecFun business
-             SCU { calls = calls `delVarEnv` fn, occs = occs `delVarEnvList` val_bndrs},
-             Rec ((fn `addIdSpecialisations` rules, mkLams bndrs body') : spec_prs))
+mkVarUsage :: ScEnv -> Id -> [CoreExpr] -> ScUsage
+mkVarUsage env fn args
+  = case lookupHowBound env fn of
+       Just RecFun -> SCU { scu_calls = unitVarEnv fn [(sc_vals env, args)]
+                          , scu_occs  = emptyVarEnv }
+       Just RecArg -> SCU { scu_calls = emptyVarEnv
+                          , scu_occs  = unitVarEnv fn arg_occ }
+        Nothing     -> nullUsage
    where
    where
-    (bndrs,body) = collectBinders rhs
-    val_bndrs    = filter isId bndrs
-    env_fn_body         = extendRecBndr env fn bndrs
+    -- I rather think we could use UnkOcc all the time
+    arg_occ | null args = UnkOcc
+            | otherwise = evalScrutOcc
  
  
-scBind env (Rec prs)
-  = mapAndUnzipUs do_one prs   `thenUs` \ (usgs, prs') ->
-    returnUs (extendBndrs env (map fst prs), combineUsages usgs, Rec prs')
+----------------------
+scTopBind :: ScEnv -> CoreBind -> UniqSM (ScEnv, CoreBind)
+scTopBind env (Rec prs)
+  | Just threshold <- sc_size env
+  , not force_spec
+  , not (all (couldBeSmallEnoughToInline threshold) rhss)
+               -- No specialisation
+  = do { let (rhs_env,bndrs') = extendRecBndrs env bndrs
+       ; (_, rhss') <- mapAndUnzipM (scExpr rhs_env) rhss
+       ; return (rhs_env, Rec (bndrs' `zip` rhss')) }
+  | otherwise  -- Do specialisation
+  = do { let (rhs_env1,bndrs') = extendRecBndrs env bndrs
+             rhs_env2 = extendHowBound rhs_env1 bndrs' RecFun
+
+       ; (rhs_usgs, rhs_infos) <- mapAndUnzipM (scRecRhs rhs_env2) (bndrs' `zip` rhss)
+       ; let rhs_usg = combineUsages rhs_usgs
+
+       ; (_, specs) <- specLoop (scForce rhs_env2 force_spec)
+                                 (scu_calls rhs_usg) rhs_infos nullUsage
+                                [SI [] 0 Nothing | _ <- bndrs]
+
+       ; return (rhs_env1,  -- For the body of the letrec, delete the RecFun business
+                 Rec (concat (zipWith specInfoBinds rhs_infos specs))) }
    where
    where
-    do_one (bndr,rhs) = scExpr env rhs `thenUs` \ (usg, rhs') ->
-                       returnUs (usg, (bndr,rhs'))
+    (bndrs,rhss) = unzip prs
+    force_spec = any (forceSpecBndr env) bndrs
+      -- Note [Forcing specialisation]
+
+scTopBind env (NonRec bndr rhs)
+  = do { (_, rhs') <- scExpr env rhs
+       ; let (env1, bndr') = extendBndr env bndr
+             env2 = extendValEnv env1 bndr' (isValue (sc_vals env) rhs')
+       ; return (env2, NonRec bndr' rhs') }
  
  
-scBind env (NonRec bndr rhs)
-  = scExpr env rhs     `thenUs` \ (usg, rhs') ->
-    returnUs (extendBndr env bndr, usg, NonRec bndr rhs')
+----------------------
+scRecRhs :: ScEnv -> (OutId, InExpr) -> UniqSM (ScUsage, RhsInfo)
+scRecRhs env (bndr,rhs)
+  = do { let (arg_bndrs,body) = collectBinders rhs
+             (body_env, arg_bndrs') = extendBndrsWith RecArg env arg_bndrs
+       ; (body_usg, body') <- scExpr body_env body
+       ; let (rhs_usg, arg_occs) = lookupOccs body_usg arg_bndrs'
+       ; return (rhs_usg, RI bndr (mkLams arg_bndrs' body')
+                                   arg_bndrs body arg_occs) }
+               -- The arg_occs says how the visible,
+               -- lambda-bound binders of the RHS are used
+               -- (including the TyVar binders)
+               -- Two pats are the same if they match both ways
  
  ----------------------
  
  ----------------------
-varUsage env v use 
-  | Just RecArg <- lookupScopeEnv env v = SCU { calls = emptyVarEnv, 
-                                               occs = unitVarEnv v use }
-  | otherwise                          = nullUsage
+specInfoBinds :: RhsInfo -> SpecInfo -> [(Id,CoreExpr)]
+specInfoBinds (RI fn new_rhs _ _ _) (SI specs _ _)
+  = [(id,rhs) | OS _ _ id rhs <- specs] ++ 
+             -- First the specialised bindings
+
+    [(fn `addIdSpecialisations` rules, new_rhs)]
+             -- And now the original binding
+  where
+    rules = [r | OS _ r _ _ <- specs]
  \end{code}
  
  
  %************************************************************************
  %*                                                                     *
  \end{code}
  
  
  %************************************************************************
  %*                                                                     *
-\subsection{The specialiser}
+               The specialiser itself
  %*                                                                     *
  %************************************************************************
  
  \begin{code}
  %*                                                                     *
  %************************************************************************
  
  \begin{code}
-specialise :: ScEnv
-          -> Id                        -- Functionn
-          -> [CoreBndr] -> CoreExpr    -- Its RHS
-          -> ScUsage                   -- Info on usage
-          -> UniqSM ([CoreRule],       -- Rules
-                     [(Id,CoreExpr)])  -- Bindings
-
-specialise env fn bndrs body body_usg
-  = do { let (_, bndr_occs) = lookupOccs body_usg bndrs
-
-       ; mb_calls <- mapM (callToPats (scope env) bndr_occs)
-                          (lookupVarEnv (calls body_usg) fn `orElse` [])
-
-       ; let good_calls :: [([Var], [CoreArg])]
-             good_calls = catMaybes mb_calls
-             in_scope = mkInScopeSet $ unionVarSets $
-                        [ exprsFreeVars pats `delVarSetList` vs 
-                        | (vs,pats) <- good_calls ]
-             uniq_calls = nubBy (same_call in_scope) good_calls
-       ; mapAndUnzipUs (spec_one env fn (mkLams bndrs body)) 
-                       (uniq_calls `zip` [1..]) }
-  where
-       -- Two calls are the same if they match both ways
-    same_call in_scope (vs1,as1)(vs2,as2)
-        =  isJust (matchN in_scope vs1 as1 as2)
-        && isJust (matchN in_scope vs2 as2 as1)
-
-callToPats :: InScopeEnv -> [ArgOcc] -> Call
-          -> UniqSM (Maybe ([Var], [CoreExpr]))
-       -- The VarSet is the variables to quantify over in the rule
-       -- The [CoreExpr] are the argument patterns for the rule
-callToPats in_scope bndr_occs (con_env, args)
-  | length args < length bndr_occs     -- Check saturated
-  = return Nothing
+data RhsInfo = RI OutId                -- The binder
+                  OutExpr              -- The new RHS
+                 [InVar] InExpr        -- The *original* RHS (\xs.body)
+                                       --   Note [Specialise original body]
+                  [ArgOcc]             -- Info on how the xs occur in body
+
+data SpecInfo = SI [OneSpec]           -- The specialisations we have generated
+
+                  Int                  -- Length of specs; used for numbering them
+
+                  (Maybe ScUsage)      -- Just cs  => we have not yet used calls in the
+                                       --             from calls in the *original* RHS as
+                                       --             seeds for new specialisations;
+                                       --             if you decide to do so, here is the
+                                       --             RHS usage (which has not yet been
+                                       --             unleashed)
+                                       -- Nothing => we have
+                                       -- See Note [Local recursive groups]
+
+       -- One specialisation: Rule plus definition
+data OneSpec  = OS CallPat             -- Call pattern that generated this specialisation
+                  CoreRule             -- Rule connecting original id with the specialisation
+                  OutId OutExpr        -- Spec id + its rhs
+
+
+specLoop :: ScEnv
+        -> CallEnv
+        -> [RhsInfo]
+        -> ScUsage -> [SpecInfo]               -- One per binder; acccumulating parameter
+        -> UniqSM (ScUsage, [SpecInfo])        -- ...ditto...
+
+specLoop env all_calls rhs_infos usg_so_far specs_so_far
+  = do { specs_w_usg <- zipWithM (specialise env all_calls) rhs_infos specs_so_far
+       ; let (new_usg_s, all_specs) = unzip specs_w_usg
+             new_usg   = combineUsages new_usg_s
+             new_calls = scu_calls new_usg
+             all_usg   = usg_so_far `combineUsage` new_usg
+       ; if isEmptyVarEnv new_calls then
+               return (all_usg, all_specs) 
+         else 
+               specLoop env new_calls rhs_infos all_usg all_specs }
+
+specialise 
+   :: ScEnv
+   -> CallEnv                          -- Info on calls
+   -> RhsInfo
+   -> SpecInfo                         -- Original RHS plus patterns dealt with
+   -> UniqSM (ScUsage, SpecInfo)       -- New specialised versions and their usage
+
+-- Note: this only generates *specialised* bindings
+-- The original binding is added by specInfoBinds
+--
+-- Note: the rhs here is the optimised version of the original rhs
+-- So when we make a specialised copy of the RHS, we're starting
+-- from an RHS whose nested functions have been optimised already.
+
+specialise env bind_calls (RI fn _ arg_bndrs body arg_occs) 
+               spec_info@(SI specs spec_count mb_unspec)
+  | not (isBottomingId fn)      -- Note [Do not specialise diverging functions]
+  , not (isNeverActive (idInlineActivation fn))        -- See Note [Transfer activation]
+  , notNull arg_bndrs          -- Only specialise functions
+  , Just all_calls <- lookupVarEnv bind_calls fn
+  = do { (boring_call, pats) <- callsToPats env specs arg_occs all_calls
+--     ; pprTrace "specialise" (vcat [ ppr fn <+> text "with" <+> int (length pats) <+> text "good patterns"
+--                                      , text "arg_occs" <+> ppr arg_occs
+--                                   , text "calls" <+> ppr all_calls
+--                                   , text "good pats" <+> ppr pats])  $
+--       return ()
+
+               -- Bale out if too many specialisations
+       ; let n_pats      = length pats
+              spec_count' = n_pats + spec_count
+       ; case sc_count env of
+           Just max | not (sc_force env) && spec_count' > max
+               -> pprTrace "SpecConstr" msg $  
+                   return (nullUsage, spec_info)
+               where
+                  msg = vcat [ sep [ ptext (sLit "Function") <+> quotes (ppr fn)
+                                   , nest 2 (ptext (sLit "has") <+> 
+                                              speakNOf spec_count' (ptext (sLit "call pattern")) <> comma <+>
+                                              ptext (sLit "but the limit is") <+> int max) ]
+                             , ptext (sLit "Use -fspec-constr-count=n to set the bound")
+                             , extra ]
+                  extra | not opt_PprStyle_Debug = ptext (sLit "Use -dppr-debug to see specialisations")
+                        | otherwise = ptext (sLit "Specialisations:") <+> ppr (pats ++ [p | OS p _ _ _ <- specs])
+
+           _normal_case -> do {
+
+          let spec_env = decreaseSpecCount env n_pats
+       ; (spec_usgs, new_specs) <- mapAndUnzipM (spec_one spec_env fn arg_bndrs body)
+                                                (pats `zip` [spec_count..])
+               -- See Note [Specialise original body]
+
+       ; let spec_usg = combineUsages spec_usgs
+             (new_usg, mb_unspec')
+                 = case mb_unspec of
+                     Just rhs_usg | boring_call -> (spec_usg `combineUsage` rhs_usg, Nothing)
+                     _                          -> (spec_usg,                      mb_unspec)
+           
+       ; return (new_usg, SI (new_specs ++ specs) spec_count' mb_unspec') } }
    | otherwise
    | otherwise
-  = do { prs <- argsToPats in_scope con_env (args `zip` bndr_occs)
-       ; let (good_pats, pats) = unzip prs
-             pat_fvs = varSetElems (exprsFreeVars pats)
-             qvars   = filter (not . (`elemVarEnv` in_scope)) pat_fvs
-               -- Quantify over variables that are not in sccpe
-               -- See Note [Shadowing] at the top
-               
-       ; if or good_pats 
-         then return (Just (qvars, pats))
-         else return Nothing }
+  = return (nullUsage, spec_info)              -- The boring case
+
  
  ---------------------
  spec_one :: ScEnv
  
  ---------------------
  spec_one :: ScEnv
-        -> Id                                  -- Function
-        -> CoreExpr                            -- Rhs of the original function
-        -> (([Var], [CoreArg]), Int)
-        -> UniqSM (CoreRule, (Id,CoreExpr))    -- Rule and binding
+        -> OutId       -- Function
+        -> [InVar]     -- Lambda-binders of RHS; should match patterns
+        -> InExpr      -- Body of the original function
+        -> (CallPat, Int)
+        -> UniqSM (ScUsage, OneSpec)   -- Rule and binding
  
  -- spec_one creates a specialised copy of the function, together
  -- with a rule for using it.  I'm very proud of how short this
  
  -- spec_one creates a specialised copy of the function, together
  -- with a rule for using it.  I'm very proud of how short this
@@ -781,7 +1363,8 @@ spec_one :: ScEnv
           [c::*, v::(b,c) are presumably bound by the (...) part]
    ==>
       f_spec = /\ b c \ v::(b,c) hw::[(a,(b,c))] ->
           [c::*, v::(b,c) are presumably bound by the (...) part]
    ==>
       f_spec = /\ b c \ v::(b,c) hw::[(a,(b,c))] ->
-                 (...entire RHS of f...) (b,c) ((:) (a,(b,c)) (x,v) hw)
+                 (...entire body of f...) [b -> (b,c), 
+                                           y -> ((:) (a,(b,c)) (x,v) hw)]
    
       RULE:  forall b::* c::*,          -- Note, *not* forall a, x
                    v::(b,c),
    
       RULE:  forall b::* c::*,          -- Note, *not* forall a, x
                    v::(b,c),
@@ -790,44 +1373,111 @@ spec_one :: ScEnv
             f (b,c) ((:) (a,(b,c)) (x,v) hw) = f_spec b c v hw
  -}
  
             f (b,c) ((:) (a,(b,c)) (x,v) hw) = f_spec b c v hw
  -}
  
-spec_one env fn rhs ((vars_to_bind, pats), rule_number)
-  = getUniqueUs                `thenUs` \ spec_uniq ->
-    let 
-       fn_name      = idName fn
-       fn_loc       = nameSrcLoc fn_name
-       spec_occ     = mkSpecOcc (nameOccName fn_name)
+spec_one env fn arg_bndrs body (call_pat@(qvars, pats), rule_number)
+  = do { spec_uniq <- getUniqueUs
+        ; let spec_env = extendScSubstList (extendScInScope env qvars)
+                                          (arg_bndrs `zip` pats)
+             fn_name    = idName fn
+             fn_loc     = nameSrcSpan fn_name
+             spec_occ   = mkSpecOcc (nameOccName fn_name)
+             rule_name  = mkFastString ("SC:" ++ showSDoc (ppr fn <> int rule_number))
+             spec_name  = mkInternalName spec_uniq spec_occ fn_loc
+--     ; pprTrace "{spec_one" (ppr (sc_count env) <+> ppr fn <+> ppr pats <+> text "-->" <+> ppr spec_name) $ 
+--       return ()
+
+       -- Specialise the body
+       ; (spec_usg, spec_body) <- scExpr spec_env body
+
+--     ; pprTrace "done spec_one}" (ppr fn) $ 
+--       return ()
+
+               -- And build the results
+       ; let spec_id = mkLocalId spec_name (mkPiTypes spec_lam_args body_ty) 
+                            `setIdStrictness` spec_str         -- See Note [Transfer strictness]
+                            `setIdArity` count isId spec_lam_args
+             spec_str   = calcSpecStrictness fn spec_lam_args pats
+             (spec_lam_args, spec_call_args) = mkWorkerArgs qvars body_ty
+               -- Usual w/w hack to avoid generating 
+               -- a spec_rhs of unlifted type and no args
+
+              spec_rhs   = mkLams spec_lam_args spec_body
+             body_ty    = exprType spec_body
+             rule_rhs   = mkVarApps (Var spec_id) spec_call_args
+              inline_act = idInlineActivation fn
+             rule       = mkRule True {- Auto -} True {- Local -}
+                                  rule_name inline_act fn_name qvars pats rule_rhs
+                          -- See Note [Transfer activation]
+       ; return (spec_usg, OS call_pat rule spec_id spec_rhs) }
+
+calcSpecStrictness :: Id                    -- The original function
+                   -> [Var] -> [CoreExpr]    -- Call pattern
+                  -> StrictSig              -- Strictness of specialised thing
+-- See Note [Transfer strictness]
+calcSpecStrictness fn qvars pats
+  = StrictSig (mkTopDmdType spec_dmds TopRes)
+  where
+    spec_dmds = [ lookupVarEnv dmd_env qv `orElse` lazyDmd | qv <- qvars, isId qv ]
+    StrictSig (DmdType _ dmds _) = idStrictness fn
+
+    dmd_env = go emptyVarEnv dmds pats
+
+    go env ds (Type {} : pats) = go env ds pats
+    go env ds (Coercion {} : pats) = go env ds pats
+    go env (d:ds) (pat : pats) = go (go_one env d pat) ds pats
+    go env _      _            = env
+
+    go_one env d   (Var v) = extendVarEnv_C both env v d
+    go_one env (Box d)   e = go_one env d e
+    go_one env (Eval (Prod ds)) e 
+          | (Var _, args) <- collectArgs e = go env ds args
+    go_one env _         _ = env
  
  
-               -- Put the type variables first; the type of a term
-               -- variable may mention a type variable
-       (tvs, ids)   = partition isTyVar vars_to_bind
-       bndrs        = tvs ++ ids
-       spec_body    = mkApps rhs pats
-       body_ty      = exprType spec_body
-       
-       (spec_lam_args, spec_call_args) = mkWorkerArgs bndrs body_ty
-               -- Usual w/w hack to avoid generating 
-               -- a spec_rhs of unlifted type and no args
-       
-       rule_name = mkFastString ("SC:" ++ showSDoc (ppr fn <> int rule_number))
-       spec_rhs  = mkLams spec_lam_args spec_body
-       spec_id   = mkUserLocal spec_occ spec_uniq (mkPiTypes spec_lam_args body_ty) fn_loc
-       rule_rhs  = mkVarApps (Var spec_id) spec_call_args
-       rule      = mkLocalRule rule_name specConstrActivation fn_name bndrs pats rule_rhs
-    in
-    returnUs (rule, (spec_id, spec_rhs))
-
--- In which phase should the specialise-constructor rules be active?
--- Originally I made them always-active, but Manuel found that
--- this defeated some clever user-written rules.  So Plan B
--- is to make them active only in Phase 0; after all, currently,
--- the specConstr transformation is only run after the simplifier
--- has reached Phase 0.  In general one would want it to be 
--- flag-controllable, but for now I'm leaving it baked in
---                                     [SLPJ Oct 01]
-specConstrActivation :: Activation
-specConstrActivation = ActiveAfter 0   -- Baked in; see comments above
  \end{code}
  
  \end{code}
  
+Note [Specialise original body]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The RhsInfo for a binding keeps the *original* body of the binding.  We
+must specialise that, *not* the result of applying specExpr to the RHS
+(which is also kept in RhsInfo). Otherwise we end up specialising a
+specialised RHS, and that can lead directly to exponential behaviour.
+
+Note [Transfer activation]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+  This note is for SpecConstr, but exactly the same thing
+  happens in the overloading specialiser; see
+  Note [Auto-specialisation and RULES] in Specialise.
+
+In which phase should the specialise-constructor rules be active?
+Originally I made them always-active, but Manuel found that this
+defeated some clever user-written rules.  Then I made them active only
+in Phase 0; after all, currently, the specConstr transformation is
+only run after the simplifier has reached Phase 0, but that meant
+that specialisations didn't fire inside wrappers; see test
+simplCore/should_compile/spec-inline.
+
+So now I just use the inline-activation of the parent Id, as the
+activation for the specialiation RULE, just like the main specialiser;
+
+This in turn means there is no point in specialising NOINLINE things,
+so we test for that.
+
+Note [Transfer strictness]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+We must transfer strictness information from the original function to
+the specialised one.  Suppose, for example
+
+  f has strictness     SS
+        and a RULE     f (a:as) b = f_spec a as b
+
+Now we want f_spec to have strictess  LLS, otherwise we'll use call-by-need
+when calling f_spec instead of call-by-value.  And that can result in 
+unbounded worsening in space (cf the classic foldl vs foldl')
+
+See Trac #3437 for a good example.
+
+The function calcSpecStrictness performs the calculation.
+
+
  %************************************************************************
  %*                                                                     *
  \subsection{Argument analysis}
  %************************************************************************
  %*                                                                     *
  \subsection{Argument analysis}
@@ -839,20 +1489,66 @@ they are constructor applications.
  
  
  \begin{code}
  
  
  \begin{code}
+type CallPat = ([Var], [CoreExpr])     -- Quantified variables and arguments
+
+callsToPats :: ScEnv -> [OneSpec] -> [ArgOcc] -> [Call] -> UniqSM (Bool, [CallPat])
+       -- Result has no duplicate patterns, 
+       -- nor ones mentioned in done_pats
+       -- Bool indicates that there was at least one boring pattern
+callsToPats env done_specs bndr_occs calls
+  = do { mb_pats <- mapM (callToPats env bndr_occs) calls
+
+       ; let good_pats :: [CallPat]
+             good_pats = catMaybes mb_pats
+             done_pats = [p | OS p _ _ _ <- done_specs] 
+             is_done p = any (samePat p) done_pats
+
+       ; return (any isNothing mb_pats, 
+                 filterOut is_done (nubBy samePat good_pats)) }
+
+callToPats :: ScEnv -> [ArgOcc] -> Call -> UniqSM (Maybe CallPat)
+       -- The [Var] is the variables to quantify over in the rule
+       --      Type variables come first, since they may scope 
+       --      over the following term variables
+       -- The [CoreExpr] are the argument patterns for the rule
+callToPats env bndr_occs (con_env, args)
+  | length args < length bndr_occs     -- Check saturated
+  = return Nothing
+  | otherwise
+  = do { let in_scope = substInScope (sc_subst env)
+       ; (interesting, pats) <- argsToPats env in_scope con_env args bndr_occs
+       ; let pat_fvs = varSetElems (exprsFreeVars pats)
+             qvars   = filterOut (`elemInScopeSet` in_scope) pat_fvs
+               -- Quantify over variables that are not in sccpe
+               -- at the call site
+               -- See Note [Shadowing] at the top
+               
+             (tvs, ids) = partition isTyVar qvars
+             qvars'     = tvs ++ ids
+               -- Put the type variables first; the type of a term
+               -- variable may mention a type variable
+
+       ; -- pprTrace "callToPats"  (ppr args $$ ppr prs $$ ppr bndr_occs) $
+         if interesting
+         then return (Just (qvars', pats))
+         else return Nothing }
+
      -- argToPat takes an actual argument, and returns an abstracted
      -- version, consisting of just the "constructor skeleton" of the
      -- argument, with non-constructor sub-expression replaced by new
      -- placeholder variables.  For example:
      --    C a (D (f x) (g y))  ==>  C p1 (D p2 p3)
  
      -- argToPat takes an actual argument, and returns an abstracted
      -- version, consisting of just the "constructor skeleton" of the
      -- argument, with non-constructor sub-expression replaced by new
      -- placeholder variables.  For example:
      --    C a (D (f x) (g y))  ==>  C p1 (D p2 p3)
  
-argToPat :: InScopeEnv                 -- What's in scope at the fn defn site
-        -> ConstrEnv                   -- ConstrEnv at the call site
+argToPat :: ScEnv
+         -> InScopeSet                 -- What's in scope at the fn defn site
+        -> ValueEnv                    -- ValueEnv at the call site
          -> CoreArg                     -- A call arg (or component thereof)
          -> ArgOcc
          -> UniqSM (Bool, CoreArg)
          -> CoreArg                     -- A call arg (or component thereof)
          -> ArgOcc
          -> UniqSM (Bool, CoreArg)
+
  -- Returns (interesting, pat), 
  -- where pat is the pattern derived from the argument
  -- Returns (interesting, pat), 
  -- where pat is the pattern derived from the argument
---           intersting=True if the pattern is non-trivial (not a variable or type)
+--           interesting=True if the pattern is non-trivial (not a variable or type)
  -- E.g.                x:xs         --> (True, x:xs)
  --             f xs         --> (False, w)        where w is a fresh wildcard
  --             (f xs, 'c')  --> (True, (w, 'c'))  where w is a fresh wildcard
  -- E.g.                x:xs         --> (True, x:xs)
  --             f xs         --> (False, w)        where w is a fresh wildcard
  --             (f xs, 'c')  --> (True, (w, 'c'))  where w is a fresh wildcard
@@ -860,29 +1556,57 @@ argToPat :: InScopeEnv                   -- What's in scope at the fn defn site
  --             lvl7         --> (True, lvl7)      if lvl7 is bound 
  --                                                somewhere further out
  
  --             lvl7         --> (True, lvl7)      if lvl7 is bound 
  --                                                somewhere further out
  
-argToPat in_scope con_env arg@(Type ty) arg_occ
+argToPat _env _in_scope _val_env arg@(Type {}) _arg_occ
+  = return (False, arg)
+    
+argToPat _env _in_scope _val_env arg@(Coercion {}) _arg_occ
    = return (False, arg)
  
    = return (False, arg)
  
-argToPat in_scope con_env (Var v) arg_occ
-  | not (isLocalId v) || v `elemVarEnv` in_scope
-  =    -- The recursive call passes a variable that 
-       -- is in scope at the function definition site
-       -- It's worth specialising on this if
-       --      (a) it's used in an interesting way in the body
-       --      (b) we know what its value is
-    if    (case arg_occ of { UnkOcc -> False; other -> True }) -- (a)
-       && isValueUnfolding (idUnfolding v)                     -- (b)
-    then return (True, Var v)
-    else wildCardPat (idType v)
-
-argToPat in_scope con_env (Let _ arg) arg_occ
-  = argToPat in_scope con_env arg arg_occ
+argToPat env in_scope val_env (Note _ arg) arg_occ
+  = argToPat env in_scope val_env arg arg_occ
+       -- Note [Notes in call patterns]
+       -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+       -- Ignore Notes.  In particular, we want to ignore any InlineMe notes
+       -- Perhaps we should not ignore profiling notes, but I'm going to
+       -- ride roughshod over them all for now.
+       --- See Note [Notes in RULE matching] in Rules
+
+argToPat env in_scope val_env (Let _ arg) arg_occ
+  = argToPat env in_scope val_env arg arg_occ
+       -- See Note [Matching lets] in Rule.lhs
         -- Look through let expressions
         -- Look through let expressions
-       -- e.g.         f (let v = rhs in \y -> ...v...)
-       -- Here we can specialise for f (\y -> ...)
+       -- e.g.         f (let v = rhs in (v,w))
+       -- Here we can specialise for f (v,w)
         -- because the rule-matcher will look through the let.
  
         -- because the rule-matcher will look through the let.
  
-argToPat in_scope con_env arg arg_occ
+{- Disabled; see Note [Matching cases] in Rule.lhs
+argToPat env in_scope val_env (Case scrut _ _ [(_, _, rhs)]) arg_occ
+  | exprOkForSpeculation scrut -- See Note [Matching cases] in Rule.hhs
+  = argToPat env in_scope val_env rhs arg_occ
+-}
+
+argToPat env in_scope val_env (Cast arg co) arg_occ
+  | isReflCo co     -- Substitution in the SpecConstr itself
+                    -- can lead to identity coercions
+  = argToPat env in_scope val_env arg arg_occ
+  | not (ignoreType env ty2)
+  = do { (interesting, arg') <- argToPat env in_scope val_env arg arg_occ
+       ; if not interesting then 
+               wildCardPat ty2
+         else do
+       { -- Make a wild-card pattern for the coercion
+         uniq <- getUniqueUs
+       ; let co_name = mkSysTvName uniq (fsLit "sg")
+             co_var = mkCoVar co_name (mkCoType ty1 ty2)
+       ; return (interesting, Cast arg' (mkCoVarCo co_var)) } }
+  where
+    Pair ty1 ty2 = coercionKind co
+
+    
+
+{-     Disabling lambda specialisation for now
+       It's fragile, and the spec_loop can be infinite
+argToPat in_scope val_env arg arg_occ
    | is_value_lam arg
    = return (True, arg)
    where
    | is_value_lam arg
    = return (True, arg)
    where
@@ -890,75 +1614,154 @@ argToPat in_scope con_env arg arg_occ
         | isId v = True         -- it is inside a type lambda
         | otherwise = is_value_lam e
      is_value_lam other = False
         | isId v = True         -- it is inside a type lambda
         | otherwise = is_value_lam e
      is_value_lam other = False
+-}
  
  
-argToPat in_scope con_env arg arg_occ
-  | Just (CV dc args) <- is_con_app_maybe con_env arg
-  , case arg_occ of
-       ScrutOcc _ -> True              -- Used only by case scrutinee
-       BothOcc    -> case arg of       -- Used by case scrut
-                       App {} -> True  -- ...and elsewhere...
-                       other  -> False
-       other      -> False     -- No point; the arg is not decomposed
-  = do { args' <- argsToPats in_scope con_env (args `zip` conArgOccs arg_occ dc)
-       ; return (True, mk_con_app dc (map snd args')) }
-
-argToPat in_scope con_env (Var v) arg_occ
-  =    -- A variable bound inside the function. 
-       -- Don't make a wild-card, because we may usefully share
-       --      e.g.  f a = let x = ... in f (x,x)
-       -- NB: this case follows the lambda and con-app cases!!
-    return (False, Var v)
-
--- The default case: make a wild-card
-argToPat in_scope con_env arg arg_occ = wildCardPat (exprType arg)
+  -- Check for a constructor application
+  -- NB: this *precedes* the Var case, so that we catch nullary constrs
+argToPat env in_scope val_env arg arg_occ
+  | Just (ConVal (DataAlt dc) args) <- isValue val_env arg
+  , not (ignoreDataCon env dc)        -- See Note [NoSpecConstr]
+  , Just arg_occs <- mb_scrut dc
+  = do { let (ty_args, rest_args) = splitAtList (dataConUnivTyVars dc) args
+        ; (_, args') <- argsToPats env in_scope val_env rest_args arg_occs
+       ; return (True, 
+                  mkConApp dc (ty_args ++ args')) }
+  where
+    mb_scrut dc = case arg_occ of
+                   ScrutOcc bs 
+                           | Just occs <- lookupUFM bs dc
+                                          -> Just (occs)  -- See Note [Reboxing]
+                    _other | sc_force env -> Just (repeat UnkOcc)
+                           | otherwise    -> Nothing
+
+  -- Check if the argument is a variable that 
+  --   (a) is used in an interesting way in the body
+  --   (b) we know what its value is
+  -- In that case it counts as "interesting"
+argToPat env in_scope val_env (Var v) arg_occ
+  | sc_force env || case arg_occ of { UnkOcc -> False; _other -> True }, -- (a)
+    is_value,                                                            -- (b)
+    not (ignoreType env (varType v))
+  = return (True, Var v)
+  where
+    is_value 
+       | isLocalId v = v `elemInScopeSet` in_scope 
+                       && isJust (lookupVarEnv val_env v)
+               -- Local variables have values in val_env
+       | otherwise   = isValueUnfolding (idUnfolding v)
+               -- Imports have unfoldings
+
+--     I'm really not sure what this comment means
+--     And by not wild-carding we tend to get forall'd 
+--     variables that are in soope, which in turn can
+--     expose the weakness in let-matching
+--     See Note [Matching lets] in Rules
+
+  -- Check for a variable bound inside the function. 
+  -- Don't make a wild-card, because we may usefully share
+  --   e.g.  f a = let x = ... in f (x,x)
+  -- NB: this case follows the lambda and con-app cases!!
+-- argToPat _in_scope _val_env (Var v) _arg_occ
+--   = return (False, Var v)
+       -- SLPJ : disabling this to avoid proliferation of versions
+       -- also works badly when thinking about seeding the loop
+       -- from the body of the let
+       --       f x y = letrec g z = ... in g (x,y)
+       -- We don't want to specialise for that *particular* x,y
+
+  -- The default case: make a wild-card
+argToPat _env _in_scope _val_env arg _arg_occ
+  = wildCardPat (exprType arg)
  
  wildCardPat :: Type -> UniqSM (Bool, CoreArg)
  
  wildCardPat :: Type -> UniqSM (Bool, CoreArg)
-wildCardPat ty = do { uniq <- getUniqueUs
-                   ; let id = mkSysLocal FSLIT("sc") uniq ty
-                   ; return (False, Var id) }
-
-argsToPats :: InScopeEnv -> ConstrEnv
-          -> [(CoreArg, ArgOcc)]
-          -> UniqSM [(Bool, CoreArg)]
-argsToPats in_scope con_env args
-  = mapUs do_one args
-  where
-    do_one (arg,occ) = argToPat in_scope con_env arg occ
+wildCardPat ty
+  = do { uniq <- getUniqueUs
+       ; let id = mkSysLocal (fsLit "sc") uniq ty
+       ; return (False, Var id) }
+
+argsToPats :: ScEnv -> InScopeSet -> ValueEnv
+          -> [CoreArg] -> [ArgOcc]  -- Should be same length
+          -> UniqSM (Bool, [CoreArg])
+argsToPats env in_scope val_env args occs
+  = do { stuff <- zipWithM (argToPat env in_scope val_env) args occs
+       ; let (interesting_s, args') = unzip stuff
+       ; return (or interesting_s, args') }
  \end{code}
  
  
  \begin{code}
  \end{code}
  
  
  \begin{code}
-is_con_app_maybe :: ConstrEnv -> CoreExpr -> Maybe ConValue
-is_con_app_maybe env (Var v)
-  = case lookupVarEnv env v of
-       Just stuff -> Just stuff
-               -- You might think we could look in the idUnfolding here
+isValue :: ValueEnv -> CoreExpr -> Maybe Value
+isValue _env (Lit lit)
+  = Just (ConVal (LitAlt lit) [])
+
+isValue env (Var v)
+  | Just stuff <- lookupVarEnv env v
+  = Just stuff -- You might think we could look in the idUnfolding here
                 -- but that doesn't take account of which branch of a 
                 -- case we are in, which is the whole point
  
                 -- but that doesn't take account of which branch of a 
                 -- case we are in, which is the whole point
  
-       Nothing | isCheapUnfolding unf
-               -> is_con_app_maybe env (unfoldingTemplate unf)
-               where
-                 unf = idUnfolding v
-               -- However we do want to consult the unfolding 
-               -- as well, for let-bound constructors!
+  | not (isLocalId v) && isCheapUnfolding unf
+  = isValue env (unfoldingTemplate unf)
+  where
+    unf = idUnfolding v
+       -- However we do want to consult the unfolding 
+       -- as well, for let-bound constructors!
+
+isValue env (Lam b e)
+  | isTyVar b = case isValue env e of
+                 Just _  -> Just LambdaVal
+                 Nothing -> Nothing
+  | otherwise = Just LambdaVal
+
+isValue _env expr      -- Maybe it's a constructor application
+  | (Var fun, args) <- collectArgs expr
+  = case isDataConWorkId_maybe fun of
  
  
-       other  -> Nothing
+       Just con | args `lengthAtLeast` dataConRepArity con 
+               -- Check saturated; might be > because the 
+               --                  arity excludes type args
+               -> Just (ConVal (DataAlt con) args)
  
  
-is_con_app_maybe env (Lit lit)
-  = Just (CV (LitAlt lit) [])
+       _other | valArgCount args < idArity fun
+               -- Under-applied function
+              -> Just LambdaVal        -- Partial application
  
  
-is_con_app_maybe env expr
-  = case collectArgs expr of
-       (Var fun, args) | Just con <- isDataConWorkId_maybe fun,
-                         args `lengthAtLeast` dataConRepArity con
-               -- Might be > because the arity excludes type args
-                       -> Just (CV (DataAlt con) args)
+       _other -> Nothing
  
  
-       other -> Nothing
+isValue _env _expr = Nothing
  
  
-mk_con_app :: AltCon -> [CoreArg] -> CoreExpr
-mk_con_app (LitAlt lit)  []   = Lit lit
-mk_con_app (DataAlt con) args = mkConApp con args
-mk_con_app other args = panic "SpecConstr.mk_con_app"
+samePat :: CallPat -> CallPat -> Bool
+samePat (vs1, as1) (vs2, as2)
+  = all2 same as1 as2
+  where
+    same (Var v1) (Var v2) 
+       | v1 `elem` vs1 = v2 `elem` vs2
+       | v2 `elem` vs2 = False
+       | otherwise     = v1 == v2
+
+    same (Lit l1)    (Lit l2)    = l1==l2
+    same (App f1 a1) (App f2 a2) = same f1 f2 && same a1 a2
+
+    same (Type {}) (Type {}) = True    -- Note [Ignore type differences]
+    same (Coercion {}) (Coercion {}) = True
+    same (Note _ e1) e2        = same e1 e2    -- Ignore casts and notes
+    same (Cast e1 _) e2        = same e1 e2
+    same e1 (Note _ e2) = same e1 e2
+    same e1 (Cast e2 _) = same e1 e2
+
+    same e1 e2 = WARN( bad e1 || bad e2, ppr e1 $$ ppr e2) 
+                False  -- Let, lambda, case should not occur
+    bad (Case {}) = True
+    bad (Let {})  = True
+    bad (Lam {})  = True
+    bad _other   = False
  \end{code}
  \end{code}
+
+Note [Ignore type differences]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We do not want to generate specialisations where the call patterns
+differ only in their type arguments!  Not only is it utterly useless,
+but it also means that (with polymorphic recursion) we can generate
+an infinite number of specialisations. Example is Data.Sequence.adjustTree, 
+I think.
+