X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2Fspecialise%2FSpecConstr.lhs;h=f214f0cae8d2e1d10f9bdf3fb17c3203407a7a3a;hp=03dd3f55dcbd1ba0ae485c3d86536aff84292265;hb=e95ee1f718c6915c478005aad8af81705357d6ab;hpb=426d9b8565b66eeb7d27725e8823784769cfca48

diff --git a/compiler/specialise/SpecConstr.lhs b/compiler/specialise/SpecConstr.lhs
index 03dd3f5..f214f0c 100644
--- a/compiler/specialise/SpecConstr.lhs
+++ b/compiler/specialise/SpecConstr.lhs
@@ -4,40 +4,54 @@
 \section[SpecConstr]{Specialise over constructors}
 
 \begin{code}
+-- The above warning supression flag is a temporary kludge.
+-- While working on this module you are encouraged to remove it and fix
+-- any warnings in the module. See
+--     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
+-- for details
+
 module SpecConstr(
-	specConstrProgram	
+	specConstrProgram, SpecConstrAnnotation(..)
     ) where
 
 #include "HsVersions.h"
 
 import CoreSyn
-import CoreLint		( showPass, endPass )
-import CoreUtils	( exprType, tcEqExpr, mkPiTypes )
+import CoreSubst
+import CoreUtils
+import CoreUnfold	( couldBeSmallEnoughToInline )
 import CoreFVs 		( exprsFreeVars )
-import CoreSubst	( Subst, mkSubst, substExpr )
-import CoreTidy		( tidyRules )
-import PprCore		( pprRules )
+import CoreMonad
+import HscTypes         ( ModGuts(..) )
 import WwLib		( mkWorkerArgs )
-import DataCon		( dataConRepArity, isVanillaDataCon )
-import Type		( tyConAppArgs, tyVarsOfTypes )
-import Unify		( coreRefineTys )
-import Id		( Id, idName, idType, isDataConWorkId_maybe, 
-			  mkUserLocal, mkSysLocal, idUnfolding )
-import Var		( Var )
+import DataCon		( dataConTyCon, dataConRepArity, dataConUnivTyVars )
+import TyCon            ( TyCon )
+import Literal          ( literalType )
+import Coercion	
+import Rules
+import Type		hiding( substTy )
+import Id
+import MkCore		( mkImpossibleExpr )
+import Var
 import VarEnv
 import VarSet
-import Name		( nameOccName, nameSrcLoc )
-import Rules		( addIdSpecialisations, mkLocalRule, rulesOfBinds )
-import OccName		( mkSpecOcc )
-import ErrUtils		( dumpIfSet_dyn )
-import DynFlags		( DynFlags, DynFlag(..) )
-import BasicTypes	( Activation(..) )
-import Maybes		( orElse )
-import Util		( mapAccumL, lengthAtLeast, notNull )
-import List		( nubBy, partition )
+import Name
+import BasicTypes
+import DynFlags		( DynFlags(..) )
+import StaticFlags	( opt_PprStyle_Debug )
+import Maybes		( orElse, catMaybes, isJust, isNothing )
+import Demand
+import DmdAnal		( both )
+import Serialized       ( deserializeWithData )
+import Util
 import UniqSupply
 import Outputable
 import FastString
+import UniqFM
+import MonadUtils
+import Control.Monad	( zipWithM )
+import Data.List
+import Data.Data        ( Data, Typeable )
 \end{code}
 
 -----------------------------------------------------
@@ -93,9 +107,55 @@ In Core, by the time we've w/wd (f is strict in i) we get
 
 At the call to f, we see that the argument, n is know to be (I# n#),
 and n is evaluated elsewhere in the body of f, so we can play the same
-trick as above.  However we don't want to do that if the boxed version
-of n is needed (else we'd avoid the eval but pay more for re-boxing n).
-So in this case we want that the *only* uses of n are in case statements.
+trick as above.  
+
+
+Note [Reboxing]
+~~~~~~~~~~~~~~~
+We must be careful not to allocate the same constructor twice.  Consider
+	f p = (...(case p of (a,b) -> e)...p...,
+	       ...let t = (r,s) in ...t...(f t)...)
+At the recursive call to f, we can see that t is a pair.  But we do NOT want
+to make a specialised copy:
+	f' a b = let p = (a,b) in (..., ...)
+because now t is allocated by the caller, then r and s are passed to the
+recursive call, which allocates the (r,s) pair again.
+
+This happens if
+  (a) the argument p is used in other than a case-scrutinsation way.
+  (b) the argument to the call is not a 'fresh' tuple; you have to
+	look into its unfolding to see that it's a tuple
+
+Hence the "OR" part of Note [Good arguments] below.
+
+ALTERNATIVE 2: pass both boxed and unboxed versions.  This no longer saves
+allocation, but does perhaps save evals. In the RULE we'd have
+something like
+
+  f (I# x#) = f' (I# x#) x#
+
+If at the call site the (I# x) was an unfolding, then we'd have to
+rely on CSE to eliminate the duplicate allocation.... This alternative
+doesn't look attractive enough to pursue.
+
+ALTERNATIVE 3: ignore the reboxing problem.  The trouble is that 
+the conservative reboxing story prevents many useful functions from being
+specialised.  Example:
+	foo :: Maybe Int -> Int -> Int
+	foo   (Just m) 0 = 0
+	foo x@(Just m) n = foo x (n-m)
+Here the use of 'x' will clearly not require boxing in the specialised function.
+
+The strictness analyser has the same problem, in fact.  Example:
+	f p@(a,b) = ...
+If we pass just 'a' and 'b' to the worker, it might need to rebox the
+pair to create (a,b).  A more sophisticated analysis might figure out
+precisely the cases in which this could happen, but the strictness
+analyser does no such analysis; it just passes 'a' and 'b', and hopes
+for the best.
+
+So my current choice is to make SpecConstr similarly aggressive, and
+ignore the bad potential of reboxing.
 
 
 Note [Good arguments]
@@ -121,7 +181,7 @@ So we look for
       That same parameter is scrutinised by a case somewhere in 
       the RHS of the function
 	AND
-      Those are the only uses of the parameter
+      Those are the only uses of the parameter (see Note [Reboxing])
 
 
 What to abstract over
@@ -192,6 +252,171 @@ is to run deShadowBinds before running SpecConstr, but instead we run the
 simplifier.  That gives the simplest possible program for SpecConstr to
 chew on; and it virtually guarantees no shadowing.
 
+Note [Specialising for constant parameters]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+This one is about specialising on a *constant* (but not necessarily
+constructor) argument
+
+    foo :: Int -> (Int -> Int) -> Int
+    foo 0 f = 0
+    foo m f = foo (f m) (+1)
+
+It produces
+
+    lvl_rmV :: GHC.Base.Int -> GHC.Base.Int
+    lvl_rmV =
+      \ (ds_dlk :: GHC.Base.Int) ->
+        case ds_dlk of wild_alH { GHC.Base.I# x_alG ->
+        GHC.Base.I# (GHC.Prim.+# x_alG 1)
+
+    T.$wfoo :: GHC.Prim.Int# -> (GHC.Base.Int -> GHC.Base.Int) ->
+    GHC.Prim.Int#
+    T.$wfoo =
+      \ (ww_sme :: GHC.Prim.Int#) (w_smg :: GHC.Base.Int -> GHC.Base.Int) ->
+        case ww_sme of ds_Xlw {
+          __DEFAULT ->
+    	case w_smg (GHC.Base.I# ds_Xlw) of w1_Xmo { GHC.Base.I# ww1_Xmz ->
+    	T.$wfoo ww1_Xmz lvl_rmV
+    	};
+          0 -> 0
+        }
+
+The recursive call has lvl_rmV as its argument, so we could create a specialised copy
+with that argument baked in; that is, not passed at all.   Now it can perhaps be inlined.
+
+When is this worth it?  Call the constant 'lvl'
+- If 'lvl' has an unfolding that is a constructor, see if the corresponding
+  parameter is scrutinised anywhere in the body.
+
+- If 'lvl' has an unfolding that is a inlinable function, see if the corresponding
+  parameter is applied (...to enough arguments...?)
+
+  Also do this is if the function has RULES?
+
+Also 	
+
+Note [Specialising for lambda parameters]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    foo :: Int -> (Int -> Int) -> Int
+    foo 0 f = 0
+    foo m f = foo (f m) (\n -> n-m)
+
+This is subtly different from the previous one in that we get an
+explicit lambda as the argument:
+
+    T.$wfoo :: GHC.Prim.Int# -> (GHC.Base.Int -> GHC.Base.Int) ->
+    GHC.Prim.Int#
+    T.$wfoo =
+      \ (ww_sm8 :: GHC.Prim.Int#) (w_sma :: GHC.Base.Int -> GHC.Base.Int) ->
+        case ww_sm8 of ds_Xlr {
+          __DEFAULT ->
+    	case w_sma (GHC.Base.I# ds_Xlr) of w1_Xmf { GHC.Base.I# ww1_Xmq ->
+    	T.$wfoo
+    	  ww1_Xmq
+    	  (\ (n_ad3 :: GHC.Base.Int) ->
+    	     case n_ad3 of wild_alB { GHC.Base.I# x_alA ->
+    	     GHC.Base.I# (GHC.Prim.-# x_alA ds_Xlr)
+    	     })
+    	};
+          0 -> 0
+        }
+
+I wonder if SpecConstr couldn't be extended to handle this? After all,
+lambda is a sort of constructor for functions and perhaps it already
+has most of the necessary machinery?
+
+Furthermore, there's an immediate win, because you don't need to allocate the lamda
+at the call site; and if perchance it's called in the recursive call, then you
+may avoid allocating it altogether.  Just like for constructors.
+
+Looks cool, but probably rare...but it might be easy to implement.
+
+
+Note [SpecConstr for casts]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider 
+    data family T a :: *
+    data instance T Int = T Int
+
+    foo n = ...
+       where
+         go (T 0) = 0
+         go (T n) = go (T (n-1))
+
+The recursive call ends up looking like 
+	go (T (I# ...) `cast` g)
+So we want to spot the construtor application inside the cast.
+That's why we have the Cast case in argToPat
+
+Note [Local recursive groups]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+For a *local* recursive group, we can see all the calls to the
+function, so we seed the specialisation loop from the calls in the
+body, not from the calls in the RHS.  Consider:
+
+  bar m n = foo n (n,n) (n,n) (n,n) (n,n)
+   where
+     foo n p q r s
+       | n == 0    = m
+       | n > 3000  = case p of { (p1,p2) -> foo (n-1) (p2,p1) q r s }
+       | n > 2000  = case q of { (q1,q2) -> foo (n-1) p (q2,q1) r s }
+       | n > 1000  = case r of { (r1,r2) -> foo (n-1) p q (r2,r1) s }
+       | otherwise = case s of { (s1,s2) -> foo (n-1) p q r (s2,s1) }
+
+If we start with the RHSs of 'foo', we get lots and lots of specialisations,
+most of which are not needed.  But if we start with the (single) call
+in the rhs of 'bar' we get exactly one fully-specialised copy, and all
+the recursive calls go to this fully-specialised copy. Indeed, the original
+function is later collected as dead code.  This is very important in 
+specialising the loops arising from stream fusion, for example in NDP where
+we were getting literally hundreds of (mostly unused) specialisations of
+a local function.
+
+Note [Do not specialise diverging functions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Specialising a function that just diverges is a waste of code.
+Furthermore, it broke GHC (simpl014) thus:
+   {-# STR Sb #-}
+   f = \x. case x of (a,b) -> f x
+If we specialise f we get
+   f = \x. case x of (a,b) -> fspec a b
+But fspec doesn't have decent strictnes info.  As it happened,
+(f x) :: IO t, so the state hack applied and we eta expanded fspec,
+and hence f.  But now f's strictness is less than its arity, which
+breaks an invariant.
+
+Note [Forcing specialisation]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+With stream fusion and in other similar cases, we want to fully specialise
+some (but not necessarily all!) loops regardless of their size and the
+number of specialisations. We allow a library to specify this by annotating
+a type with ForceSpecConstr and then adding a parameter of that type to the
+loop. Here is a (simplified) example from the vector library:
+
+  data SPEC = SPEC | SPEC2
+  {-# ANN type SPEC ForceSpecConstr #-}
+
+  foldl :: (a -> b -> a) -> a -> Stream b -> a
+  {-# INLINE foldl #-}
+  foldl f z (Stream step s _) = foldl_loop SPEC z s
+    where
+      foldl_loop SPEC z s = case step s of
+                              Yield x s' -> foldl_loop SPEC (f z x) s'
+                              Skip       -> foldl_loop SPEC z s'
+                              Done       -> z
+
+SpecConstr will spot the SPEC parameter and always fully specialise
+foldl_loop. Note that we can't just annotate foldl_loop since it isn't a
+top-level function but even if we could, inlining etc. could easily drop the
+annotation. We also have to prevent the SPEC argument from being removed by
+w/w which is why SPEC is a sum type. This is all quite ugly; we ought to come
+up with a better design.
+
+ForceSpecConstr arguments are spotted in scExpr' and scTopBinds which then set
+force_spec to True when calling specLoop. This flag makes specLoop and
+specialise ignore specConstrCount and specConstrThreshold when deciding
+whether to specialise a function.
+
 -----------------------------------------------------
 		Stuff not yet handled
 -----------------------------------------------------
@@ -264,70 +489,22 @@ We get two specialisations:
 		  = Foo.$s$wfoo y_aFp sc_sGC ;
 
 But perhaps the first one isn't good.  After all, we know that tpl_B2 is
-a T (I# x) really!
-
+a T (I# x) really, because T is strict and Int has one constructor.  (We can't
+unbox the strict fields, becuase T is polymorphic!)
 
-Example 3
-~~~~~~~~~
-This one is about specialising on a *lambda* argument
-
-    foo :: Int -> (Int -> Int) -> Int
-    foo 0 f = 0
-    foo m f = foo (f m) (+1)
-
-It produces
-
-    lvl_rmV :: GHC.Base.Int -> GHC.Base.Int
-    lvl_rmV =
-      \ (ds_dlk :: GHC.Base.Int) ->
-        case ds_dlk of wild_alH { GHC.Base.I# x_alG ->
-        GHC.Base.I# (GHC.Prim.+# x_alG 1)
-
-    T.$wfoo :: GHC.Prim.Int# -> (GHC.Base.Int -> GHC.Base.Int) ->
-    GHC.Prim.Int#
-    T.$wfoo =
-      \ (ww_sme :: GHC.Prim.Int#) (w_smg :: GHC.Base.Int -> GHC.Base.Int) ->
-        case ww_sme of ds_Xlw {
-          __DEFAULT ->
-    	case w_smg (GHC.Base.I# ds_Xlw) of w1_Xmo { GHC.Base.I# ww1_Xmz ->
-    	T.$wfoo ww1_Xmz lvl_rmV
-    	};
-          0 -> 0
-        }
-
-Of course, it would be much nicer if the optimiser specialised $wfoo for
-when lvl_rmV is passed as the second argument and then inlined it.
-
-Example 4
-~~~~~~~~~
-    foo :: Int -> (Int -> Int) -> Int
-    foo 0 f = 0
-    foo m f = foo (f m) (\n -> n-m)
-
-This is subtly different from the previous one in that we get an
-explicit lambda as the argument:
-
-    T.$wfoo :: GHC.Prim.Int# -> (GHC.Base.Int -> GHC.Base.Int) ->
-    GHC.Prim.Int#
-    T.$wfoo =
-      \ (ww_sm8 :: GHC.Prim.Int#) (w_sma :: GHC.Base.Int -> GHC.Base.Int) ->
-        case ww_sm8 of ds_Xlr {
-          __DEFAULT ->
-    	case w_sma (GHC.Base.I# ds_Xlr) of w1_Xmf { GHC.Base.I# ww1_Xmq ->
-    	T.$wfoo
-    	  ww1_Xmq
-    	  (\ (n_ad3 :: GHC.Base.Int) ->
-    	     case n_ad3 of wild_alB { GHC.Base.I# x_alA ->
-    	     GHC.Base.I# (GHC.Prim.-# x_alA ds_Xlr)
-    	     })
-    	};
-          0 -> 0
-        }
+%************************************************************************
+%*									*
+\subsection{Annotations}
+%*									*
+%************************************************************************
 
-I wonder if SpecConstr couldn't be extended to handle this? After all,
-lambda is a sort of constructor for functions and perhaps it already
-has most of the necessary machinery?
+Annotating a type with NoSpecConstr will make SpecConstr not specialise
+for arguments of that type.
 
+\begin{code}
+data SpecConstrAnnotation = NoSpecConstr | ForceSpecConstr
+                deriving( Data, Typeable, Eq )
+\end{code}
 
 %************************************************************************
 %*									*
@@ -336,24 +513,19 @@ has most of the necessary machinery?
 %************************************************************************
 
 \begin{code}
-specConstrProgram :: DynFlags -> UniqSupply -> [CoreBind] -> IO [CoreBind]
-specConstrProgram dflags us binds
+specConstrProgram :: ModGuts -> CoreM ModGuts
+specConstrProgram guts
   = do
-	showPass dflags "SpecConstr"
-
-	let (binds', _) = initUs us (go emptyScEnv binds)
-
-	endPass dflags "SpecConstr" Opt_D_dump_spec binds'
-
-	dumpIfSet_dyn dflags Opt_D_dump_rules "Top-level specialisations"
-		  (pprRules (tidyRules emptyTidyEnv (rulesOfBinds binds')))
-
-	return binds'
+      dflags <- getDynFlags
+      us     <- getUniqueSupplyM
+      annos  <- getFirstAnnotations deserializeWithData guts
+      let binds' = fst $ initUs us (go (initScEnv dflags annos) (mg_binds guts))
+      return (guts { mg_binds = binds' })
   where
-    go env []	        = returnUs []
-    go env (bind:binds) = scBind env bind 	`thenUs` \ (env', _, bind') ->
-			  go env' binds 	`thenUs` \ binds' ->
-			  returnUs (bind' : binds')
+    go _   []	        = return []
+    go env (bind:binds) = do (env', bind') <- scTopBind env bind
+                             binds' <- go env' binds
+                             return (bind' : binds')
 \end{code}
 
 
@@ -364,111 +536,218 @@ specConstrProgram dflags us binds
 %************************************************************************
 
 \begin{code}
-data ScEnv = SCE { scope :: VarEnv HowBound,
-			-- Binds all non-top-level variables in scope
+data ScEnv = SCE { sc_size  :: Maybe Int,	-- Size threshold
+		   sc_count :: Maybe Int,	-- Max # of specialisations for any one fn
+						-- See Note [Avoiding exponential blowup]
+
+		   sc_subst :: Subst,   	-- Current substitution
+						-- Maps InIds to OutExprs
 
-		   cons  :: ConstrEnv
+		   sc_how_bound :: HowBoundEnv,
+			-- Binds interesting non-top-level variables
+			-- Domain is OutVars (*after* applying the substitution)
+
+		   sc_vals  :: ValueEnv,
+			-- Domain is OutIds (*after* applying the substitution)
+			-- Used even for top-level bindings (but not imported ones)
+
+                   sc_annotations :: UniqFM SpecConstrAnnotation
 	     }
 
-type ConstrEnv = IdEnv ConValue
-data ConValue  = CV AltCon [CoreArg]
-	-- Variables known to be bound to a constructor
-	-- in a particular case alternative
+---------------------
+-- As we go, we apply a substitution (sc_subst) to the current term
+type InExpr = CoreExpr		-- _Before_ applying the subst
+type InVar  = Var
 
+type OutExpr = CoreExpr		-- _After_ applying the subst
+type OutId   = Id
+type OutVar  = Var
 
-instance Outputable ConValue where
-   ppr (CV con args) = ppr con <+> interpp'SP args
+---------------------
+type HowBoundEnv = VarEnv HowBound	-- Domain is OutVars
 
-refineConstrEnv :: Subst -> ConstrEnv -> ConstrEnv
--- The substitution is a type substitution only
-refineConstrEnv subst env = mapVarEnv refine_con_value env
-  where
-    refine_con_value (CV con args) = CV con (map (substExpr subst) args)
+---------------------
+type ValueEnv = IdEnv Value		-- Domain is OutIds
+data Value    = ConVal AltCon [CoreArg]	-- _Saturated_ constructors
+	      | LambdaVal		-- Inlinable lambdas or PAPs
 
-emptyScEnv = SCE { scope = emptyVarEnv, cons = emptyVarEnv }
+instance Outputable Value where
+   ppr (ConVal con args) = ppr con <+> interpp'SP args
+   ppr LambdaVal	 = ptext (sLit "<Lambda>")
 
-data HowBound = RecFun		-- These are the recursive functions for which 
-				-- we seek interesting call patterns
+---------------------
+initScEnv :: DynFlags -> UniqFM SpecConstrAnnotation -> ScEnv
+initScEnv dflags anns
+  = SCE { sc_size = specConstrThreshold dflags,
+	  sc_count = specConstrCount dflags,
+	  sc_subst = emptySubst, 
+	  sc_how_bound = emptyVarEnv, 
+	  sc_vals = emptyVarEnv,
+          sc_annotations = anns }
 
-	      | RecArg		-- These are those functions' arguments; we are
-				-- interested to see if those arguments are scrutinised
+data HowBound = RecFun	-- These are the recursive functions for which 
+			-- we seek interesting call patterns
 
-	      | Other		-- We track all others so we know what's in scope
-				-- This is used in spec_one to check what needs to be
-				-- passed as a parameter and what is in scope at the 
-				-- function definition site
+	      | RecArg	-- These are those functions' arguments, or their sub-components; 
+			-- we gather occurrence information for these
 
 instance Outputable HowBound where
   ppr RecFun = text "RecFun"
   ppr RecArg = text "RecArg"
-  ppr Other = text "Other"
 
-lookupScopeEnv env v = lookupVarEnv (scope env) v
+lookupHowBound :: ScEnv -> Id -> Maybe HowBound
+lookupHowBound env id = lookupVarEnv (sc_how_bound env) id
 
-extendBndrs env bndrs = env { scope = extendVarEnvList (scope env) [(b,Other) | b <- bndrs] }
-extendBndr  env bndr  = env { scope = extendVarEnv (scope env) bndr Other }
+scSubstId :: ScEnv -> Id -> CoreExpr
+scSubstId env v = lookupIdSubst (text "scSubstId") (sc_subst env) v
 
-    -- When we encounter
-    --	case scrut of b
-    --	    C x y -> ...
-    -- we want to bind b, and perhaps scrut too, to (C x y)
-extendCaseBndrs :: ScEnv -> Id -> CoreExpr -> AltCon -> [Var] -> ScEnv
-extendCaseBndrs env case_bndr scrut DEFAULT alt_bndrs
-  = extendBndrs env (case_bndr : alt_bndrs)
+scSubstTy :: ScEnv -> Type -> Type
+scSubstTy env ty = substTy (sc_subst env) ty
 
-extendCaseBndrs env case_bndr scrut con@(LitAlt lit) alt_bndrs
-  = ASSERT( null alt_bndrs ) extendAlt env case_bndr scrut (CV con []) []
+zapScSubst :: ScEnv -> ScEnv
+zapScSubst env = env { sc_subst = zapSubstEnv (sc_subst env) }
 
-extendCaseBndrs env case_bndr scrut con@(DataAlt data_con) alt_bndrs
-  | isVanillaDataCon data_con
-  = extendAlt env case_bndr scrut (CV con vanilla_args) alt_bndrs
-    
-  | otherwise	-- GADT
-  = extendAlt env1 case_bndr scrut (CV con gadt_args) alt_bndrs
+extendScInScope :: ScEnv -> [Var] -> ScEnv
+	-- Bring the quantified variables into scope
+extendScInScope env qvars = env { sc_subst = extendInScopeList (sc_subst env) qvars }
+
+	-- Extend the substitution
+extendScSubst :: ScEnv -> Var -> OutExpr -> ScEnv
+extendScSubst env var expr = env { sc_subst = extendSubst (sc_subst env) var expr }
+
+extendScSubstList :: ScEnv -> [(Var,OutExpr)] -> ScEnv
+extendScSubstList env prs = env { sc_subst = extendSubstList (sc_subst env) prs }
+
+extendHowBound :: ScEnv -> [Var] -> HowBound -> ScEnv
+extendHowBound env bndrs how_bound
+  = env { sc_how_bound = extendVarEnvList (sc_how_bound env)
+			    [(bndr,how_bound) | bndr <- bndrs] }
+
+extendBndrsWith :: HowBound -> ScEnv -> [Var] -> (ScEnv, [Var])
+extendBndrsWith how_bound env bndrs 
+  = (env { sc_subst = subst', sc_how_bound = hb_env' }, bndrs')
+  where
+    (subst', bndrs') = substBndrs (sc_subst env) bndrs
+    hb_env' = sc_how_bound env `extendVarEnvList` 
+		    [(bndr,how_bound) | bndr <- bndrs']
+
+extendBndrWith :: HowBound -> ScEnv -> Var -> (ScEnv, Var)
+extendBndrWith how_bound env bndr 
+  = (env { sc_subst = subst', sc_how_bound = hb_env' }, bndr')
   where
-    vanilla_args = map Type (tyConAppArgs (idType case_bndr)) ++
-		   map varToCoreExpr alt_bndrs
-
-    gadt_args = map (substExpr subst . varToCoreExpr) alt_bndrs
-	-- This call generates some bogus warnings from substExpr,
-	-- because it's inconvenient to put all the Ids in scope
-	-- Will be fixed when we move to FC
-
-    (alt_tvs, _) = span isTyVar alt_bndrs
-    Just (tv_subst, is_local) = coreRefineTys data_con alt_tvs (idType case_bndr)
-    subst = mkSubst in_scope tv_subst emptyVarEnv	-- No Id substitition
-    in_scope = mkInScopeSet (tyVarsOfTypes (varEnvElts tv_subst))
-
-    env1 | is_local  = env
-	 | otherwise = env { cons = refineConstrEnv subst (cons env) }
-
-
-
-extendAlt :: ScEnv -> Id -> CoreExpr -> ConValue -> [Var] -> ScEnv
-extendAlt env case_bndr scrut val alt_bndrs
-  = let 
-       env1 = SCE { scope = extendVarEnvList (scope env) [(b,Other) | b <- case_bndr : alt_bndrs],
-		    cons  = extendVarEnv     (cons  env) case_bndr val }
-    in
-    case scrut of
-	Var v ->   -- Bind the scrutinee in the ConstrEnv if it's a variable
-		   -- Also forget if the scrutinee is a RecArg, because we're
-		   -- now in the branch of a case, and we don't want to
-		   -- record a non-scrutinee use of v if we have
-		   --	case v of { (a,b) -> ...(f v)... }
-		 SCE { scope = extendVarEnv (scope env1) v Other,
-		       cons  = extendVarEnv (cons env1)  v val }
-	other -> env1
-
-    -- When we encounter a recursive function binding
-    --	f = \x y -> ...
-    -- we want to extend the scope env with bindings 
-    -- that record that f is a RecFn and x,y are RecArgs
-extendRecBndr env fn bndrs
-  =  env { scope = scope env `extendVarEnvList` 
-		   ((fn,RecFun): [(bndr,RecArg) | bndr <- bndrs]) }
+    (subst', bndr') = substBndr (sc_subst env) bndr
+    hb_env' = extendVarEnv (sc_how_bound env) bndr' how_bound
+
+extendRecBndrs :: ScEnv -> [Var] -> (ScEnv, [Var])
+extendRecBndrs env bndrs  = (env { sc_subst = subst' }, bndrs')
+		      where
+			(subst', bndrs') = substRecBndrs (sc_subst env) bndrs
+
+extendBndr :: ScEnv -> Var -> (ScEnv, Var)
+extendBndr  env bndr  = (env { sc_subst = subst' }, bndr')
+		      where
+			(subst', bndr') = substBndr (sc_subst env) bndr
+
+extendValEnv :: ScEnv -> Id -> Maybe Value -> ScEnv
+extendValEnv env _  Nothing   = env
+extendValEnv env id (Just cv) = env { sc_vals = extendVarEnv (sc_vals env) id cv }
+
+extendCaseBndrs :: ScEnv -> Id -> AltCon -> [Var] -> (ScEnv, [Var])
+-- When we encounter
+--	case scrut of b
+--	    C x y -> ...
+-- we want to bind b, to (C x y)
+-- NB1: Extends only the sc_vals part of the envt
+-- NB2: Kill the dead-ness info on the pattern binders x,y, since
+--      they are potentially made alive by the [b -> C x y] binding
+extendCaseBndrs env case_bndr con alt_bndrs
+  | isDeadBinder case_bndr
+  = (env, alt_bndrs)
+  | otherwise
+  = (env1, map zap alt_bndrs)
+	-- NB: We used to bind v too, if scrut = (Var v); but
+        --     the simplifer has already done this so it seems
+        --     redundant to do so here
+ 	-- case scrut of
+  	--	Var v  -> extendValEnv env1 v cval
+	--	_other -> env1
+ where
+   zap v | isTyCoVar v = v		-- See NB2 above
+         | otherwise = zapIdOccInfo v
+   env1 = extendValEnv env case_bndr cval
+   cval = case con of
+		DEFAULT    -> Nothing
+		LitAlt {}  -> Just (ConVal con [])
+		DataAlt {} -> Just (ConVal con vanilla_args)
+		      where
+		       	vanilla_args = map Type (tyConAppArgs (idType case_bndr)) ++
+				       varsToCoreExprs alt_bndrs
+
+ignoreTyCon :: ScEnv -> TyCon -> Bool
+ignoreTyCon env tycon
+  = lookupUFM (sc_annotations env) tycon == Just NoSpecConstr
+
+ignoreType :: ScEnv -> Type -> Bool
+ignoreType env ty
+  = case splitTyConApp_maybe ty of
+      Just (tycon, _) -> ignoreTyCon env tycon
+      _               -> False
+
+ignoreAltCon :: ScEnv -> AltCon -> Bool
+ignoreAltCon env (DataAlt dc) = ignoreTyCon env (dataConTyCon dc)
+ignoreAltCon env (LitAlt lit) = ignoreType env (literalType lit)
+ignoreAltCon _   DEFAULT      = True
+
+forceSpecBndr :: ScEnv -> Var -> Bool
+forceSpecBndr env var = forceSpecFunTy env . snd . splitForAllTys . varType $ var
+
+forceSpecFunTy :: ScEnv -> Type -> Bool
+forceSpecFunTy env = any (forceSpecArgTy env) . fst . splitFunTys
+
+forceSpecArgTy :: ScEnv -> Type -> Bool
+forceSpecArgTy env ty
+  | Just ty' <- coreView ty = forceSpecArgTy env ty'
+
+forceSpecArgTy env ty
+  | Just (tycon, tys) <- splitTyConApp_maybe ty
+  , tycon /= funTyCon
+      = lookupUFM (sc_annotations env) tycon == Just ForceSpecConstr
+        || any (forceSpecArgTy env) tys
+
+forceSpecArgTy _ _ = False
+
+decreaseSpecCount :: ScEnv -> Int -> ScEnv
+-- See Note [Avoiding exponential blowup]
+decreaseSpecCount env n_specs 
+  = env { sc_count = case sc_count env of
+                       Nothing -> Nothing
+                       Just n  -> Just (n `div` (n_specs + 1)) }
+	-- The "+1" takes account of the original function; 
+	-- See Note [Avoiding exponential blowup]
 \end{code}
 
+Note [Avoiding exponential blowup]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The sc_count field of the ScEnv says how many times we are prepared to
+duplicate a single function.  But we must take care with recursive
+specialiations.  Consider
+
+	let $j1 = let $j2 = let $j3 = ...
+                            in 
+                            ...$j3...
+                  in 
+                  ...$j2...
+        in 
+        ...$j1...
+
+If we specialise $j1 then in each specialisation (as well as the original)
+we can specialise $j2, and similarly $j3.  Even if we make just *one*
+specialisation of each, becuase we also have the original we'll get 2^n
+copies of $j3, which is not good.
+
+So when recursively specialising we divide the sc_count by the number of
+copies we are making at this level, including the original.
+
 
 %************************************************************************
 %*									*
@@ -479,40 +758,110 @@ extendRecBndr env fn bndrs
 \begin{code}
 data ScUsage
    = SCU {
-	calls :: !(IdEnv ([Call])),	-- Calls
+	scu_calls :: CallEnv,		-- Calls
 					-- The functions are a subset of the 
 					-- 	RecFuns in the ScEnv
 
-	occs :: !(IdEnv ArgOcc)		-- Information on argument occurrences
-     }					-- The variables are a subset of the 
-					--	RecArg in the ScEnv
+	scu_occs :: !(IdEnv ArgOcc)	-- Information on argument occurrences
+     }					-- The domain is OutIds
 
-type Call = (ConstrEnv, [CoreArg])
+type CallEnv = IdEnv [Call]
+type Call = (ValueEnv, [CoreArg])
 	-- The arguments of the call, together with the
 	-- env giving the constructor bindings at the call site
 
-nullUsage = SCU { calls = emptyVarEnv, occs = emptyVarEnv }
+nullUsage :: ScUsage
+nullUsage = SCU { scu_calls = emptyVarEnv, scu_occs = emptyVarEnv }
 
-combineUsage u1 u2 = SCU { calls = plusVarEnv_C (++) (calls u1) (calls u2),
-			   occs  = plusVarEnv_C combineOcc (occs u1) (occs u2) }
+combineCalls :: CallEnv -> CallEnv -> CallEnv
+combineCalls = plusVarEnv_C (++)
 
+combineUsage :: ScUsage -> ScUsage -> ScUsage
+combineUsage u1 u2 = SCU { scu_calls = combineCalls (scu_calls u1) (scu_calls u2),
+			   scu_occs  = plusVarEnv_C combineOcc (scu_occs u1) (scu_occs u2) }
+
+combineUsages :: [ScUsage] -> ScUsage
 combineUsages [] = nullUsage
 combineUsages us = foldr1 combineUsage us
 
-data ArgOcc = CaseScrut 
-	    | OtherOcc
-	    | Both
+lookupOcc :: ScUsage -> OutVar -> (ScUsage, ArgOcc)
+lookupOcc (SCU { scu_calls = sc_calls, scu_occs = sc_occs }) bndr
+  = (SCU {scu_calls = sc_calls, scu_occs = delVarEnv sc_occs bndr},
+     lookupVarEnv sc_occs bndr `orElse` NoOcc)
 
-instance Outputable ArgOcc where
-  ppr CaseScrut = ptext SLIT("case-scrut")
-  ppr OtherOcc  = ptext SLIT("other-occ")
-  ppr Both      = ptext SLIT("case-scrut and other")
+lookupOccs :: ScUsage -> [OutVar] -> (ScUsage, [ArgOcc])
+lookupOccs (SCU { scu_calls = sc_calls, scu_occs = sc_occs }) bndrs
+  = (SCU {scu_calls = sc_calls, scu_occs = delVarEnvList sc_occs bndrs},
+     [lookupVarEnv sc_occs b `orElse` NoOcc | b <- bndrs])
 
-combineOcc CaseScrut CaseScrut = CaseScrut
-combineOcc OtherOcc  OtherOcc  = OtherOcc
-combineOcc _	     _	       = Both
-\end{code}
+data ArgOcc = NoOcc	-- Doesn't occur at all; or a type argument
+	    | UnkOcc	-- Used in some unknown way
 
+	    | ScrutOcc (UniqFM [ArgOcc])	-- See Note [ScrutOcc]
+
+	    | BothOcc	-- Definitely taken apart, *and* perhaps used in some other way
+
+{-	Note  [ScrutOcc]
+
+An occurrence of ScrutOcc indicates that the thing, or a `cast` version of the thing,
+is *only* taken apart or applied.
+
+  Functions, literal: ScrutOcc emptyUFM
+  Data constructors:  ScrutOcc subs,
+
+where (subs :: UniqFM [ArgOcc]) gives usage of the *pattern-bound* components,
+The domain of the UniqFM is the Unique of the data constructor
+
+The [ArgOcc] is the occurrences of the *pattern-bound* components 
+of the data structure.  E.g.
+	data T a = forall b. MkT a b (b->a)
+A pattern binds b, x::a, y::b, z::b->a, but not 'a'!
+
+-}
+
+instance Outputable ArgOcc where
+  ppr (ScrutOcc xs) = ptext (sLit "scrut-occ") <> ppr xs
+  ppr UnkOcc 	    = ptext (sLit "unk-occ")
+  ppr BothOcc 	    = ptext (sLit "both-occ")
+  ppr NoOcc    	    = ptext (sLit "no-occ")
+
+-- Experimentally, this vesion of combineOcc makes ScrutOcc "win", so
+-- that if the thing is scrutinised anywhere then we get to see that
+-- in the overall result, even if it's also used in a boxed way
+-- This might be too agressive; see Note [Reboxing] Alternative 3
+combineOcc :: ArgOcc -> ArgOcc -> ArgOcc
+combineOcc NoOcc	 occ 	       = occ
+combineOcc occ 		 NoOcc	       = occ
+combineOcc (ScrutOcc xs) (ScrutOcc ys) = ScrutOcc (plusUFM_C combineOccs xs ys)
+combineOcc _occ          (ScrutOcc ys) = ScrutOcc ys
+combineOcc (ScrutOcc xs) _occ	       = ScrutOcc xs
+combineOcc UnkOcc        UnkOcc        = UnkOcc
+combineOcc _	    _	     	       = BothOcc
+
+combineOccs :: [ArgOcc] -> [ArgOcc] -> [ArgOcc]
+combineOccs xs ys = zipWithEqual "combineOccs" combineOcc xs ys
+
+setScrutOcc :: ScEnv -> ScUsage -> OutExpr -> ArgOcc -> ScUsage
+-- _Overwrite_ the occurrence info for the scrutinee, if the scrutinee
+-- is a variable, and an interesting variable
+setScrutOcc env usg (Cast e _) occ = setScrutOcc env usg e occ
+setScrutOcc env usg (Note _ e) occ = setScrutOcc env usg e occ
+setScrutOcc env usg (Var v)    occ
+  | Just RecArg <- lookupHowBound env v = usg { scu_occs = extendVarEnv (scu_occs usg) v occ }
+  | otherwise				= usg
+setScrutOcc _env usg _other _occ	-- Catch-all
+  = usg	
+
+conArgOccs :: ArgOcc -> AltCon -> [ArgOcc]
+-- Find usage of components of data con; returns [UnkOcc...] if unknown
+-- See Note [ScrutOcc] for the extra UnkOccs in the vanilla datacon case
+
+conArgOccs (ScrutOcc fm) (DataAlt dc) 
+  | Just pat_arg_occs <- lookupUFM fm dc
+  = [UnkOcc | _ <- dataConUnivTyVars dc] ++ pat_arg_occs
+
+conArgOccs _other _con = repeat UnkOcc
+\end{code}
 
 %************************************************************************
 %*									*
@@ -524,155 +873,358 @@ The main recursive function gathers up usage information, and
 creates specialised versions of functions.
 
 \begin{code}
-scExpr :: ScEnv -> CoreExpr -> UniqSM (ScUsage, CoreExpr)
+scExpr, scExpr' :: ScEnv -> CoreExpr -> UniqSM (ScUsage, CoreExpr)
 	-- The unique supply is needed when we invent
 	-- a new name for the specialised function and its args
 
-scExpr env e@(Type t) = returnUs (nullUsage, e)
-scExpr env e@(Lit l)  = returnUs (nullUsage, e)
-scExpr env e@(Var v)  = returnUs (varUsage env v OtherOcc, e)
-scExpr env (Note n e) = scExpr env e	`thenUs` \ (usg,e') ->
-			returnUs (usg, Note n e')
-scExpr env (Lam b e)  = scExpr (extendBndr env b) e	`thenUs` \ (usg,e') ->
-			returnUs (usg, Lam b e')
-
-scExpr env (Case scrut b ty alts) 
-  = sc_scrut scrut		`thenUs` \ (scrut_usg, scrut') ->
-    mapAndUnzipUs sc_alt alts	`thenUs` \ (alts_usgs, alts') ->
-    returnUs (combineUsages alts_usgs `combineUsage` scrut_usg,
-	      Case scrut' b ty alts')
+scExpr env e = scExpr' env e
+
+
+scExpr' env (Var v)     = case scSubstId env v of
+		            Var v' -> return (varUsage env v' UnkOcc, Var v')
+		            e'     -> scExpr (zapScSubst env) e'
+
+scExpr' env (Type t)    = return (nullUsage, Type (scSubstTy env t))
+scExpr' _   e@(Lit {})  = return (nullUsage, e)
+scExpr' env (Note n e)  = do (usg,e') <- scExpr env e
+                             return (usg, Note n e')
+scExpr' env (Cast e co) = do (usg, e') <- scExpr env e
+                             return (usg, Cast e' (scSubstTy env co))
+scExpr' env e@(App _ _) = scApp env (collectArgs e)
+scExpr' env (Lam b e)   = do let (env', b') = extendBndr env b
+                             (usg, e') <- scExpr env' e
+                             return (usg, Lam b' e')
+
+scExpr' env (Case scrut b ty alts) 
+  = do	{ (scrut_usg, scrut') <- scExpr env scrut
+	; case isValue (sc_vals env) scrut' of
+		Just (ConVal con args) -> sc_con_app con args scrut'
+		_other		       -> sc_vanilla scrut_usg scrut'
+	}
   where
-    sc_scrut e@(Var v) = returnUs (varUsage env v CaseScrut, e)
-    sc_scrut e	       = scExpr env e
-
-    sc_alt (con,bs,rhs) = scExpr env1 rhs	`thenUs` \ (usg,rhs') ->
-			  returnUs (usg, (con,bs,rhs'))
-			where
-			  env1 = extendCaseBndrs env b scrut con bs
-
-scExpr env (Let bind body)
-  = scBind env bind	`thenUs` \ (env', bind_usg, bind') ->
-    scExpr env' body	`thenUs` \ (body_usg, body') ->
-    returnUs (bind_usg `combineUsage` body_usg, Let bind' body')
-
-scExpr env e@(App _ _) 
-  = let 
-	(fn, args) = collectArgs e
-    in
-    mapAndUnzipUs (scExpr env) (fn:args)	`thenUs` \ (usgs, (fn':args')) ->
-	-- Process the function too.   It's almost always a variable,
-	-- but not always.  In particular, if this pass follows float-in,
-	-- which it may, we can get 
-	--	(let f = ...f... in f) arg1 arg2
-    let
-	call_usg = case fn of
-		 	Var f | Just RecFun <- lookupScopeEnv env f
-			      -> SCU { calls = unitVarEnv f [(cons env, args)], 
-				       occs  = emptyVarEnv }
-			other -> nullUsage
-    in
-    returnUs (combineUsages usgs `combineUsage` call_usg, mkApps fn' args')
+    sc_con_app con args scrut' 	-- Known constructor; simplify
+	= do { let (_, bs, rhs) = findAlt con alts
+	       	   	          `orElse` (DEFAULT, [], mkImpossibleExpr (coreAltsType alts))
+		   alt_env'  = extendScSubstList env ((b,scrut') : bs `zip` trimConArgs con args)
+	     ; scExpr alt_env' rhs }
+				
+    sc_vanilla scrut_usg scrut'	-- Normal case
+     = do { let (alt_env,b') = extendBndrWith RecArg env b
+			-- Record RecArg for the components
+
+	  ; (alt_usgs, alt_occs, alts')
+		<- mapAndUnzip3M (sc_alt alt_env scrut' b') alts
+
+	  ; let (alt_usg, b_occ) = lookupOcc (combineUsages alt_usgs) b'
+		scrut_occ        = foldr combineOcc b_occ alt_occs
+		scrut_usg'       = setScrutOcc env scrut_usg scrut' scrut_occ
+	  	-- The combined usage of the scrutinee is given
+	  	-- by scrut_occ, which is passed to scScrut, which
+	  	-- in turn treats a bare-variable scrutinee specially
+
+	  ; return (alt_usg `combineUsage` scrut_usg',
+	  	    Case scrut' b' (scSubstTy env ty) alts') }
+
+    sc_alt env _scrut' b' (con,bs,rhs)
+      = do { let (env1, bs1)  = extendBndrsWith RecArg env bs
+		 (env2, bs2) = extendCaseBndrs env1 b' con bs1
+	   ; (usg,rhs') <- scExpr env2 rhs
+	   ; let (usg', arg_occs) = lookupOccs usg bs2
+		 scrut_occ = case con of
+				DataAlt dc -> ScrutOcc (unitUFM dc arg_occs)
+				_      	   -> ScrutOcc emptyUFM
+	   ; return (usg', scrut_occ, (con, bs2, rhs')) }
+
+scExpr' env (Let (NonRec bndr rhs) body)
+  | isTyCoVar bndr	-- Type-lets may be created by doBeta
+  = scExpr' (extendScSubst env bndr rhs) body
+
+  | otherwise		   -- Note [Local let bindings]
+  = do	{ let (body_env, bndr') = extendBndr env bndr
+	      body_env2 = extendHowBound body_env [bndr'] RecFun
+	; (body_usg, body') <- scExpr body_env2 body
+
+	; (rhs_usg, rhs_info) <- scRecRhs env (bndr',rhs)
+
+          -- NB: We don't use the ForceSpecConstr mechanism (see
+          -- Note [Forcing specialisation]) for non-recursive bindings
+          -- at the moment. I'm not sure if this is the right thing to do.
+	; let force_spec = False
+	; (spec_usg, specs) <- specialise env force_spec 
+                                          (scu_calls body_usg) 
+					  rhs_info
+                                          (SI [] 0 (Just rhs_usg))
+
+	; return (body_usg { scu_calls = scu_calls body_usg `delVarEnv` bndr' } 
+	            `combineUsage` spec_usg,
+		  mkLets [NonRec b r | (b,r) <- specInfoBinds rhs_info specs] body')
+	}
+
+
+-- A *local* recursive group: see Note [Local recursive groups]
+scExpr' env (Let (Rec prs) body)
+  = do	{ let (bndrs,rhss) = unzip prs
+	      (rhs_env1,bndrs') = extendRecBndrs env bndrs
+	      rhs_env2 = extendHowBound rhs_env1 bndrs' RecFun
+              force_spec = any (forceSpecBndr env) bndrs'
+                -- Note [Forcing specialisation]
+
+	; (rhs_usgs, rhs_infos) <- mapAndUnzipM (scRecRhs rhs_env2) (bndrs' `zip` rhss)
+	; (body_usg, body')     <- scExpr rhs_env2 body
+
+	-- NB: start specLoop from body_usg
+	; (spec_usg, specs) <- specLoop rhs_env2 force_spec
+                                        (scu_calls body_usg) rhs_infos nullUsage
+					[SI [] 0 (Just usg) | usg <- rhs_usgs]
+		-- Do not unconditionally use rhs_usgs. 
+		-- Instead use them only if we find an unspecialised call
+		-- See Note [Local recursive groups]
+
+	; let all_usg = spec_usg `combineUsage` body_usg
+	      bind'   = Rec (concat (zipWith specInfoBinds rhs_infos specs))
+
+	; return (all_usg { scu_calls = scu_calls all_usg `delVarEnvList` bndrs' },
+	          Let bind' body') }
+\end{code}
 
+Note [Local let bindings]
+~~~~~~~~~~~~~~~~~~~~~~~~~
+It is not uncommon to find this
 
-----------------------
-scBind :: ScEnv -> CoreBind -> UniqSM (ScEnv, ScUsage, CoreBind)
-scBind env (Rec [(fn,rhs)])
-  | notNull val_bndrs
-  = scExpr env_fn_body body		`thenUs` \ (usg, body') ->
-    specialise env fn bndrs body' usg	`thenUs` \ (rules, spec_prs) ->
-	-- Note body': the specialised copies should be based on the 
-	-- 	       optimised version of the body, in case there were
-	--	       nested functions inside.
-    let
-	SCU { calls = calls, occs = occs } = usg
-    in
-    returnUs (extendBndr env fn,	-- For the body of the letrec, just
-					-- extend the env with Other to record 
-					-- that it's in scope; no funny RecFun business
-	      SCU { calls = calls `delVarEnv` fn, occs = occs `delVarEnvList` val_bndrs},
-	      Rec ((fn `addIdSpecialisations` rules, mkLams bndrs body') : spec_prs))
+   let $j = \x. <blah> in ...$j True...$j True...
+
+Here $j is an arbitrary let-bound function, but it often comes up for
+join points.  We might like to specialise $j for its call patterns.
+Notice the difference from a letrec, where we look for call patterns
+in the *RHS* of the function.  Here we look for call patterns in the
+*body* of the let.
+
+At one point I predicated this on the RHS mentioning the outer
+recursive function, but that's not essential and might even be
+harmful.  I'm not sure.
+
+
+\begin{code}
+scApp :: ScEnv -> (InExpr, [InExpr]) -> UniqSM (ScUsage, CoreExpr)
+
+scApp env (Var fn, args)	-- Function is a variable
+  = ASSERT( not (null args) )
+    do	{ args_w_usgs <- mapM (scExpr env) args
+	; let (arg_usgs, args') = unzip args_w_usgs
+	      arg_usg = combineUsages arg_usgs
+	; case scSubstId env fn of
+	    fn'@(Lam {}) -> scExpr (zapScSubst env) (doBeta fn' args')
+			-- Do beta-reduction and try again
+
+	    Var fn' -> return (arg_usg `combineUsage` fn_usg, mkApps (Var fn') args')
+		where
+		  fn_usg = case lookupHowBound env fn' of
+				Just RecFun -> SCU { scu_calls = unitVarEnv fn' [(sc_vals env, args')], 
+					             scu_occs  = emptyVarEnv }
+				Just RecArg -> SCU { scu_calls = emptyVarEnv,
+					             scu_occs  = unitVarEnv fn' (ScrutOcc emptyUFM) }
+				Nothing     -> nullUsage
+
+
+	    other_fn' -> return (arg_usg, mkApps other_fn' args') }
+		-- NB: doing this ignores any usage info from the substituted
+		--     function, but I don't think that matters.  If it does
+		--     we can fix it.
   where
-    (bndrs,body) = collectBinders rhs
-    val_bndrs    = filter isId bndrs
-    env_fn_body	 = extendRecBndr env fn bndrs
+    doBeta :: OutExpr -> [OutExpr] -> OutExpr
+    -- ToDo: adjust for System IF
+    doBeta (Lam bndr body) (arg : args) = Let (NonRec bndr arg) (doBeta body args)
+    doBeta fn	           args	        = mkApps fn args
+
+-- The function is almost always a variable, but not always.  
+-- In particular, if this pass follows float-in,
+-- which it may, we can get 
+--	(let f = ...f... in f) arg1 arg2
+scApp env (other_fn, args)
+  = do 	{ (fn_usg,   fn')   <- scExpr env other_fn
+	; (arg_usgs, args') <- mapAndUnzipM (scExpr env) args
+	; return (combineUsages arg_usgs `combineUsage` fn_usg, mkApps fn' args') }
 
-scBind env (Rec prs)
-  = mapAndUnzipUs do_one prs	`thenUs` \ (usgs, prs') ->
-    returnUs (extendBndrs env (map fst prs), combineUsages usgs, Rec prs')
+----------------------
+scTopBind :: ScEnv -> CoreBind -> UniqSM (ScEnv, CoreBind)
+scTopBind env (Rec prs)
+  | Just threshold <- sc_size env
+  , not force_spec
+  , not (all (couldBeSmallEnoughToInline threshold) rhss)
+		-- No specialisation
+  = do	{ let (rhs_env,bndrs') = extendRecBndrs env bndrs
+	; (_, rhss') <- mapAndUnzipM (scExpr rhs_env) rhss
+	; return (rhs_env, Rec (bndrs' `zip` rhss')) }
+  | otherwise	-- Do specialisation
+  = do	{ let (rhs_env1,bndrs') = extendRecBndrs env bndrs
+	      rhs_env2 = extendHowBound rhs_env1 bndrs' RecFun
+
+	; (rhs_usgs, rhs_infos) <- mapAndUnzipM (scRecRhs rhs_env2) (bndrs' `zip` rhss)
+	; let rhs_usg = combineUsages rhs_usgs
+
+	; (_, specs) <- specLoop rhs_env2 force_spec
+                                 (scu_calls rhs_usg) rhs_infos nullUsage
+				 [SI [] 0 Nothing | _ <- bndrs]
+
+	; return (rhs_env1,  -- For the body of the letrec, delete the RecFun business
+		  Rec (concat (zipWith specInfoBinds rhs_infos specs))) }
   where
-    do_one (bndr,rhs) = scExpr env rhs	`thenUs` \ (usg, rhs') ->
-		        returnUs (usg, (bndr,rhs'))
+    (bndrs,rhss) = unzip prs
+    force_spec = any (forceSpecBndr env) bndrs
+      -- Note [Forcing specialisation]
+
+scTopBind env (NonRec bndr rhs)
+  = do	{ (_, rhs') <- scExpr env rhs
+	; let (env1, bndr') = extendBndr env bndr
+	      env2 = extendValEnv env1 bndr' (isValue (sc_vals env) rhs')
+	; return (env2, NonRec bndr' rhs') }
 
-scBind env (NonRec bndr rhs)
-  = scExpr env rhs	`thenUs` \ (usg, rhs') ->
-    returnUs (extendBndr env bndr, usg, NonRec bndr rhs')
+----------------------
+scRecRhs :: ScEnv -> (OutId, InExpr) -> UniqSM (ScUsage, RhsInfo)
+scRecRhs env (bndr,rhs)
+  = do	{ let (arg_bndrs,body) = collectBinders rhs
+	      (body_env, arg_bndrs') = extendBndrsWith RecArg env arg_bndrs
+	; (body_usg, body') <- scExpr body_env body
+	; let (rhs_usg, arg_occs) = lookupOccs body_usg arg_bndrs'
+	; return (rhs_usg, RI bndr (mkLams arg_bndrs' body')
+                                   arg_bndrs body arg_occs) }
+		-- The arg_occs says how the visible,
+		-- lambda-bound binders of the RHS are used
+		-- (including the TyVar binders)
+	 	-- Two pats are the same if they match both ways
+
+----------------------
+specInfoBinds :: RhsInfo -> SpecInfo -> [(Id,CoreExpr)]
+specInfoBinds (RI fn new_rhs _ _ _) (SI specs _ _)
+  = [(id,rhs) | OS _ _ id rhs <- specs] ++ 
+    [(fn `addIdSpecialisations` rules, new_rhs)]
+  where
+    rules = [r | OS _ r _ _ <- specs]
 
 ----------------------
+varUsage :: ScEnv -> OutVar -> ArgOcc -> ScUsage
 varUsage env v use 
-  | Just RecArg <- lookupScopeEnv env v = SCU { calls = emptyVarEnv, 
-						occs = unitVarEnv v use }
+  | Just RecArg <- lookupHowBound env v = SCU { scu_calls = emptyVarEnv 
+					      , scu_occs = unitVarEnv v use }
   | otherwise		   	        = nullUsage
 \end{code}
 
 
 %************************************************************************
 %*									*
-\subsection{The specialiser}
+		The specialiser itself
 %*									*
 %************************************************************************
 
 \begin{code}
-specialise :: ScEnv
-	   -> Id 			-- Functionn
-	   -> [CoreBndr] -> CoreExpr	-- Its RHS
-	   -> ScUsage			-- Info on usage
-	   -> UniqSM ([CoreRule], 	-- Rules
-		      [(Id,CoreExpr)])	-- Bindings
-
-specialise env fn bndrs body (SCU {calls=calls, occs=occs})
-  = getUs		`thenUs` \ us ->
-    let
-	all_calls = lookupVarEnv calls fn `orElse` []
-
-	good_calls :: [[CoreArg]]
-	good_calls = [ pats
-		     | (con_env, call_args) <- all_calls,
-		       call_args `lengthAtLeast` n_bndrs,	    -- App is saturated
-		       let call = bndrs `zip` call_args,
-		       any (good_arg con_env occs) call,    -- At least one arg is a constr app
-		       let (_, pats) = argsToPats con_env us call_args
-		     ]
-    in
-    mapAndUnzipUs (spec_one env fn (mkLams bndrs body)) 
-		  (nubBy same_call good_calls `zip` [1..])
-  where
-    n_bndrs  = length bndrs
-    same_call as1 as2 = and (zipWith tcEqExpr as1 as2)
+data RhsInfo = RI OutId 		-- The binder
+                  OutExpr		-- The new RHS
+		  [InVar] InExpr	-- The *original* RHS (\xs.body)
+					--   Note [Specialise original body]
+                  [ArgOcc]		-- Info on how the xs occur in body
+
+data SpecInfo = SI [OneSpec]		-- The specialisations we have generated
+
+		   Int			-- Length of specs; used for numbering them
+
+		   (Maybe ScUsage)	-- Nothing => we have generated specialisations
+					--	      from calls in the *original* RHS
+					-- Just cs => we haven't, and this is the usage
+					--	      of the original RHS
+					-- See Note [Local recursive groups]
+
+	-- One specialisation: Rule plus definition
+data OneSpec  = OS CallPat 		-- Call pattern that generated this specialisation
+		   CoreRule		-- Rule connecting original id with the specialisation
+		   OutId OutExpr	-- Spec id + its rhs
+
+
+specLoop :: ScEnv
+         -> Bool                                -- force specialisation?
+                                                -- Note [Forcing specialisation]
+	 -> CallEnv
+	 -> [RhsInfo]
+	 -> ScUsage -> [SpecInfo]		-- One per binder; acccumulating parameter
+	 -> UniqSM (ScUsage, [SpecInfo])	-- ...ditto...
+specLoop env force_spec all_calls rhs_infos usg_so_far specs_so_far
+  = do	{ specs_w_usg <- zipWithM (specialise env force_spec all_calls) rhs_infos specs_so_far
+	; let (new_usg_s, all_specs) = unzip specs_w_usg
+	      new_usg   = combineUsages new_usg_s
+	      new_calls = scu_calls new_usg
+	      all_usg   = usg_so_far `combineUsage` new_usg
+	; if isEmptyVarEnv new_calls then
+		return (all_usg, all_specs) 
+ 	  else 
+		specLoop env force_spec new_calls rhs_infos all_usg all_specs }
+
+specialise 
+   :: ScEnv
+   -> Bool                              -- force specialisation?
+                                        --   Note [Forcing specialisation]
+   -> CallEnv				-- Info on calls
+   -> RhsInfo
+   -> SpecInfo				-- Original RHS plus patterns dealt with
+   -> UniqSM (ScUsage, SpecInfo)	-- New specialised versions and their usage
+
+-- Note: the rhs here is the optimised version of the original rhs
+-- So when we make a specialised copy of the RHS, we're starting
+-- from an RHS whose nested functions have been optimised already.
+
+specialise env force_spec bind_calls (RI fn _ arg_bndrs body arg_occs) 
+			  spec_info@(SI specs spec_count mb_unspec)
+  | not (isBottomingId fn)      -- Note [Do not specialise diverging functions]
+  , not (isNeverActive (idInlineActivation fn))	-- See Note [Transfer activation]
+  , notNull arg_bndrs		-- Only specialise functions
+  , Just all_calls <- lookupVarEnv bind_calls fn
+  = do	{ (boring_call, pats) <- callsToPats env specs arg_occs all_calls
+--	; pprTrace "specialise" (vcat [ ppr fn <+> text "with" <+> int (length pats) <+> text "good patterns"
+--                                      , text "arg_occs" <+> ppr arg_occs
+--	  			      ,	text "calls" <+> ppr all_calls
+--	  			      , text "good pats" <+> ppr pats])  $
+--	  return ()
+
+		-- Bale out if too many specialisations
+	; let n_pats      = length pats
+              spec_count' = n_pats + spec_count
+	; case sc_count env of
+	    Just max | not force_spec && spec_count' > max
+		-> pprTrace "SpecConstr" msg $  
+                   return (nullUsage, spec_info)
+		where
+		   msg = vcat [ sep [ ptext (sLit "Function") <+> quotes (ppr fn)
+		       	            , nest 2 (ptext (sLit "has") <+> 
+                                              speakNOf spec_count' (ptext (sLit "call pattern")) <> comma <+>
+                                              ptext (sLit "but the limit is") <+> int max) ]
+			      , ptext (sLit "Use -fspec-constr-count=n to set the bound")
+			      , extra ]
+	           extra | not opt_PprStyle_Debug = ptext (sLit "Use -dppr-debug to see specialisations")
+		   	 | otherwise = ptext (sLit "Specialisations:") <+> ppr (pats ++ [p | OS p _ _ _ <- specs])
+
+	    _normal_case -> do {
+
+          let spec_env = decreaseSpecCount env n_pats
+	; (spec_usgs, new_specs) <- mapAndUnzipM (spec_one spec_env fn arg_bndrs body)
+					         (pats `zip` [spec_count..])
+		-- See Note [Specialise original body]
+
+	; let spec_usg = combineUsages spec_usgs
+	      (new_usg, mb_unspec')
+		  = case mb_unspec of
+		      Just rhs_usg | boring_call -> (spec_usg `combineUsage` rhs_usg, Nothing)
+		      _			         -> (spec_usg,                      mb_unspec)
+	    
+	; return (new_usg, SI (new_specs ++ specs) spec_count' mb_unspec') } }
+  | otherwise
+  = return (nullUsage, spec_info)		-- The boring case
 
----------------------
-good_arg :: ConstrEnv -> IdEnv ArgOcc -> (CoreBndr, CoreArg) -> Bool
--- See Note [Good arguments] above
-good_arg con_env arg_occs (bndr, arg)
-  = case is_con_app_maybe con_env arg of	
-	Just _ ->  bndr_usg_ok arg_occs bndr arg
-	other   -> False
-
-bndr_usg_ok :: IdEnv ArgOcc -> Var -> CoreArg -> Bool
-bndr_usg_ok arg_occs bndr arg
-  = case lookupVarEnv arg_occs bndr of
-	Just CaseScrut -> True			-- Used only by case scrutiny
-	Just Both      -> case arg of		-- Used by case and elsewhere
-			    App _ _ -> True	-- so the arg should be an explicit con app
-			    other   -> False
-	other -> False				-- Not used, or used wonkily
-    
 
 ---------------------
 spec_one :: ScEnv
-	 -> Id					-- Function
-	 -> CoreExpr				-- Rhs of the original function
-	 -> ([CoreArg], Int)
-	 -> UniqSM (CoreRule, (Id,CoreExpr))	-- Rule and binding
+	 -> OutId	-- Function
+	 -> [InVar]	-- Lambda-binders of RHS; should match patterns
+	 -> InExpr	-- Body of the original function
+	 -> (CallPat, Int)
+	 -> UniqSM (ScUsage, OneSpec)	-- Rule and binding
 
 -- spec_one creates a specialised copy of the function, together
 -- with a rule for using it.  I'm very proud of how short this
@@ -686,7 +1238,8 @@ spec_one :: ScEnv
 	  [c::*, v::(b,c) are presumably bound by the (...) part]
   ==>
      f_spec = /\ b c \ v::(b,c) hw::[(a,(b,c))] ->
-		  (...entire RHS of f...) (b,c) ((:) (a,(b,c)) (x,v) hw)
+		  (...entire body of f...) [b -> (b,c), 
+  					    y -> ((:) (a,(b,c)) (x,v) hw)]
   
      RULE:  forall b::* c::*,		-- Note, *not* forall a, x
 		   v::(b,c),
@@ -695,48 +1248,109 @@ spec_one :: ScEnv
 	    f (b,c) ((:) (a,(b,c)) (x,v) hw) = f_spec b c v hw
 -}
 
-spec_one env fn rhs (pats, rule_number)
-  = getUniqueUs 		`thenUs` \ spec_uniq ->
-    let 
-	fn_name      = idName fn
-	fn_loc       = nameSrcLoc fn_name
-	spec_occ     = mkSpecOcc (nameOccName fn_name)
-	pat_fvs	     = varSetElems (exprsFreeVars pats)
-	vars_to_bind = filter not_avail pat_fvs
-		-- See Note [Shadowing] at the top
+spec_one env fn arg_bndrs body (call_pat@(qvars, pats), rule_number)
+  = do	{ spec_uniq <- getUniqueUs
+        ; let spec_env = extendScSubstList (extendScInScope env qvars)
+				           (arg_bndrs `zip` pats)
+	      fn_name    = idName fn
+	      fn_loc     = nameSrcSpan fn_name
+	      spec_occ   = mkSpecOcc (nameOccName fn_name)
+	      rule_name  = mkFastString ("SC:" ++ showSDoc (ppr fn <> int rule_number))
+	      spec_name  = mkInternalName spec_uniq spec_occ fn_loc
+--	; pprTrace "{spec_one" (ppr (sc_count env) <+> ppr fn <+> ppr pats <+> text "-->" <+> ppr spec_name) $ 
+--	  return ()
+
+	-- Specialise the body
+	; (spec_usg, spec_body) <- scExpr spec_env body
+
+--	; pprTrace "done spec_one}" (ppr fn) $ 
+--	  return ()
+
+		-- And build the results
+	; let spec_id = mkLocalId spec_name (mkPiTypes spec_lam_args body_ty) 
+	      		     `setIdStrictness` spec_str    	-- See Note [Transfer strictness]
+			     `setIdArity` count isId spec_lam_args
+	      spec_str   = calcSpecStrictness fn spec_lam_args pats
+	      (spec_lam_args, spec_call_args) = mkWorkerArgs qvars body_ty
+	      	-- Usual w/w hack to avoid generating 
+	      	-- a spec_rhs of unlifted type and no args
+
+              spec_rhs   = mkLams spec_lam_args spec_body
+	      body_ty    = exprType spec_body
+	      rule_rhs   = mkVarApps (Var spec_id) spec_call_args
+              inline_act = idInlineActivation fn
+	      rule       = mkLocalRule rule_name inline_act fn_name qvars pats rule_rhs
+	      		   -- See Note [Transfer activation]
+	; return (spec_usg, OS call_pat rule spec_id spec_rhs) }
+
+calcSpecStrictness :: Id 		     -- The original function
+                   -> [Var] -> [CoreExpr]    -- Call pattern
+		   -> StrictSig              -- Strictness of specialised thing
+-- See Note [Transfer strictness]
+calcSpecStrictness fn qvars pats
+  = StrictSig (mkTopDmdType spec_dmds TopRes)
+  where
+    spec_dmds = [ lookupVarEnv dmd_env qv `orElse` lazyDmd | qv <- qvars, isId qv ]
+    StrictSig (DmdType _ dmds _) = idStrictness fn
+
+    dmd_env = go emptyVarEnv dmds pats
+
+    go env ds (Type {} : pats) = go env ds pats
+    go env (d:ds) (pat : pats) = go (go_one env d pat) ds pats
+    go env _      _            = env
+
+    go_one env d   (Var v) = extendVarEnv_C both env v d
+    go_one env (Box d)   e = go_one env d e
+    go_one env (Eval (Prod ds)) e 
+    	   | (Var _, args) <- collectArgs e = go env ds args
+    go_one env _         _ = env
 
-	not_avail v  = not (v `elemVarEnv` scope env)
-		-- Put the type variables first; the type of a term
-		-- variable may mention a type variable
-	(tvs, ids)   = partition isTyVar vars_to_bind
-	bndrs  	     = tvs ++ ids
-	spec_body    = mkApps rhs pats
-	body_ty	     = exprType spec_body
-	
-	(spec_lam_args, spec_call_args) = mkWorkerArgs bndrs body_ty
-		-- Usual w/w hack to avoid generating 
-		-- a spec_rhs of unlifted type and no args
-	
-	rule_name = mkFastString ("SC:" ++ showSDoc (ppr fn <> int rule_number))
-	spec_rhs  = mkLams spec_lam_args spec_body
-	spec_id   = mkUserLocal spec_occ spec_uniq (mkPiTypes spec_lam_args body_ty) fn_loc
-	rule_rhs  = mkVarApps (Var spec_id) spec_call_args
-	rule      = mkLocalRule rule_name specConstrActivation fn_name bndrs pats rule_rhs
-    in
-    returnUs (rule, (spec_id, spec_rhs))
-
--- In which phase should the specialise-constructor rules be active?
--- Originally I made them always-active, but Manuel found that
--- this defeated some clever user-written rules.  So Plan B
--- is to make them active only in Phase 0; after all, currently,
--- the specConstr transformation is only run after the simplifier
--- has reached Phase 0.  In general one would want it to be 
--- flag-controllable, but for now I'm leaving it baked in
---					[SLPJ Oct 01]
-specConstrActivation :: Activation
-specConstrActivation = ActiveAfter 0	-- Baked in; see comments above
 \end{code}
 
+Note [Specialise original body]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The RhsInfo for a binding keeps the *original* body of the binding.  We
+must specialise that, *not* the result of applying specExpr to the RHS
+(which is also kept in RhsInfo). Otherwise we end up specialising a
+specialised RHS, and that can lead directly to exponential behaviour.
+
+Note [Transfer activation]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+  This note is for SpecConstr, but exactly the same thing
+  happens in the overloading specialiser; see
+  Note [Auto-specialisation and RULES] in Specialise.
+
+In which phase should the specialise-constructor rules be active?
+Originally I made them always-active, but Manuel found that this
+defeated some clever user-written rules.  Then I made them active only
+in Phase 0; after all, currently, the specConstr transformation is
+only run after the simplifier has reached Phase 0, but that meant
+that specialisations didn't fire inside wrappers; see test
+simplCore/should_compile/spec-inline.
+
+So now I just use the inline-activation of the parent Id, as the
+activation for the specialiation RULE, just like the main specialiser;
+
+This in turn means there is no point in specialising NOINLINE things,
+so we test for that.
+
+Note [Transfer strictness]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+We must transfer strictness information from the original function to
+the specialised one.  Suppose, for example
+
+  f has strictness     SS
+        and a RULE     f (a:as) b = f_spec a as b
+
+Now we want f_spec to have strictess  LLS, otherwise we'll use call-by-need
+when calling f_spec instead of call-by-value.  And that can result in 
+unbounded worsening in space (cf the classic foldl vs foldl')
+
+See Trac #3437 for a good example.
+
+The function calcSpecStrictness performs the calculation.
+
+
 %************************************************************************
 %*									*
 \subsection{Argument analysis}
@@ -746,68 +1360,276 @@ specConstrActivation = ActiveAfter 0	-- Baked in; see comments above
 This code deals with analysing call-site arguments to see whether
 they are constructor applications.
 
+
 \begin{code}
+type CallPat = ([Var], [CoreExpr])	-- Quantified variables and arguments
+
+
+callsToPats :: ScEnv -> [OneSpec] -> [ArgOcc] -> [Call] -> UniqSM (Bool, [CallPat])
+	-- Result has no duplicate patterns, 
+	-- nor ones mentioned in done_pats
+	-- Bool indicates that there was at least one boring pattern
+callsToPats env done_specs bndr_occs calls
+  = do	{ mb_pats <- mapM (callToPats env bndr_occs) calls
+
+	; let good_pats :: [([Var], [CoreArg])]
+	      good_pats = catMaybes mb_pats
+	      done_pats = [p | OS p _ _ _ <- done_specs] 
+	      is_done p = any (samePat p) done_pats
+
+	; return (any isNothing mb_pats, 
+		  filterOut is_done (nubBy samePat good_pats)) }
+
+callToPats :: ScEnv -> [ArgOcc] -> Call -> UniqSM (Maybe CallPat)
+	-- The [Var] is the variables to quantify over in the rule
+	--	Type variables come first, since they may scope 
+	--	over the following term variables
+	-- The [CoreExpr] are the argument patterns for the rule
+callToPats env bndr_occs (con_env, args)
+  | length args < length bndr_occs	-- Check saturated
+  = return Nothing
+  | otherwise
+  = do	{ let in_scope = substInScope (sc_subst env)
+	; prs <- argsToPats env in_scope con_env (args `zip` bndr_occs)
+	; let (interesting_s, pats) = unzip prs
+	      pat_fvs = varSetElems (exprsFreeVars pats)
+	      qvars   = filterOut (`elemInScopeSet` in_scope) pat_fvs
+		-- Quantify over variables that are not in sccpe
+		-- at the call site
+		-- See Note [Shadowing] at the top
+		
+	      (tvs, ids) = partition isTyCoVar qvars
+	      qvars'     = tvs ++ ids
+		-- Put the type variables first; the type of a term
+		-- variable may mention a type variable
+
+	; -- pprTrace "callToPats"  (ppr args $$ ppr prs $$ ppr bndr_occs) $
+	  if or interesting_s
+	  then return (Just (qvars', pats))
+	  else return Nothing }
+
     -- argToPat takes an actual argument, and returns an abstracted
     -- version, consisting of just the "constructor skeleton" of the
     -- argument, with non-constructor sub-expression replaced by new
     -- placeholder variables.  For example:
     --    C a (D (f x) (g y))  ==>  C p1 (D p2 p3)
 
-argToPat   :: ConstrEnv -> UniqSupply -> CoreArg -> (UniqSupply, CoreExpr)
-argToPat env us (Type ty) 
-  = (us, Type ty)
+argToPat :: ScEnv
+         -> InScopeSet			-- What's in scope at the fn defn site
+	 -> ValueEnv			-- ValueEnv at the call site
+	 -> CoreArg			-- A call arg (or component thereof)
+	 -> ArgOcc
+	 -> UniqSM (Bool, CoreArg)
+-- Returns (interesting, pat), 
+-- where pat is the pattern derived from the argument
+--	      intersting=True if the pattern is non-trivial (not a variable or type)
+-- E.g.		x:xs	     --> (True, x:xs)
+--		f xs         --> (False, w)	   where w is a fresh wildcard
+--		(f xs, 'c')  --> (True, (w, 'c'))  where w is a fresh wildcard
+--		\x. x+y      --> (True, \x. x+y)
+--		lvl7	     --> (True, lvl7)	   if lvl7 is bound 
+--						   somewhere further out
+
+argToPat _env _in_scope _val_env arg@(Type {}) _arg_occ
+  = return (False, arg)
+
+argToPat env in_scope val_env (Note _ arg) arg_occ
+  = argToPat env in_scope val_env arg arg_occ
+	-- Note [Notes in call patterns]
+	-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+	-- Ignore Notes.  In particular, we want to ignore any InlineMe notes
+	-- Perhaps we should not ignore profiling notes, but I'm going to
+	-- ride roughshod over them all for now.
+	--- See Note [Notes in RULE matching] in Rules
+
+argToPat env in_scope val_env (Let _ arg) arg_occ
+  = argToPat env in_scope val_env arg arg_occ
+	-- See Note [Matching lets] in Rule.lhs
+	-- Look through let expressions
+	-- e.g.		f (let v = rhs in (v,w))
+	-- Here we can specialise for f (v,w)
+	-- because the rule-matcher will look through the let.
+
+{- Disabled; see Note [Matching cases] in Rule.lhs
+argToPat env in_scope val_env (Case scrut _ _ [(_, _, rhs)]) arg_occ
+  | exprOkForSpeculation scrut	-- See Note [Matching cases] in Rule.hhs
+  = argToPat env in_scope val_env rhs arg_occ
+-}
 
-argToPat env us arg
-  | Just (CV dc args) <- is_con_app_maybe env arg
-  = let
-	(us',args') = argsToPats env us args
-    in
-    (us', mk_con_app dc args')
+argToPat env in_scope val_env (Cast arg co) arg_occ
+  | not (ignoreType env ty2)
+  = do	{ (interesting, arg') <- argToPat env in_scope val_env arg arg_occ
+	; if not interesting then 
+		wildCardPat ty2
+	  else do
+	{ -- Make a wild-card pattern for the coercion
+	  uniq <- getUniqueUs
+	; let co_name = mkSysTvName uniq (fsLit "sg")
+	      co_var = mkCoVar co_name (mkCoKind ty1 ty2)
+	; return (interesting, Cast arg' (mkTyVarTy co_var)) } }
+  where
+    (ty1, ty2) = coercionKind co
 
-argToPat env us (Var v)	-- Don't uniqify existing vars,
-  = (us, Var v)		-- so that we can spot when we pass them twice
+    
 
-argToPat env us arg
-  = (us1, Var (mkSysLocal FSLIT("sc") (uniqFromSupply us2) (exprType arg)))
+{-	Disabling lambda specialisation for now
+	It's fragile, and the spec_loop can be infinite
+argToPat in_scope val_env arg arg_occ
+  | is_value_lam arg
+  = return (True, arg)
   where
-    (us1,us2) = splitUniqSupply us
+    is_value_lam (Lam v e) 	-- Spot a value lambda, even if 
+	| isId v = True		-- it is inside a type lambda
+	| otherwise = is_value_lam e
+    is_value_lam other = False
+-}
 
-argsToPats :: ConstrEnv -> UniqSupply -> [CoreArg] -> (UniqSupply, [CoreExpr])
-argsToPats env us args = mapAccumL (argToPat env) us args
+  -- Check for a constructor application
+  -- NB: this *precedes* the Var case, so that we catch nullary constrs
+argToPat env in_scope val_env arg arg_occ
+  | Just (ConVal dc args) <- isValue val_env arg
+  , not (ignoreAltCon env dc)
+  , case arg_occ of
+	ScrutOcc _ -> True		-- Used only by case scrutinee
+	BothOcc    -> case arg of	-- Used elsewhere
+			App {} -> True	--     see Note [Reboxing]
+			_other -> False
+	_other	   -> False	-- No point; the arg is not decomposed
+  = do	{ args' <- argsToPats env in_scope val_env (args `zip` conArgOccs arg_occ dc)
+	; return (True, mk_con_app dc (map snd args')) }
+
+  -- Check if the argument is a variable that 
+  -- is in scope at the function definition site
+  -- It's worth specialising on this if
+  --	(a) it's used in an interesting way in the body
+  --	(b) we know what its value is
+argToPat env in_scope val_env (Var v) arg_occ
+  | case arg_occ of { UnkOcc -> False; _other -> True },	-- (a)
+    is_value,							-- (b)
+    not (ignoreType env (varType v))
+  = return (True, Var v)
+  where
+    is_value 
+	| isLocalId v = v `elemInScopeSet` in_scope 
+			&& isJust (lookupVarEnv val_env v)
+		-- Local variables have values in val_env
+	| otherwise   = isValueUnfolding (idUnfolding v)
+		-- Imports have unfoldings
+
+--	I'm really not sure what this comment means
+--	And by not wild-carding we tend to get forall'd 
+--	variables that are in soope, which in turn can
+--	expose the weakness in let-matching
+--	See Note [Matching lets] in Rules
+
+  -- Check for a variable bound inside the function. 
+  -- Don't make a wild-card, because we may usefully share
+  --	e.g.  f a = let x = ... in f (x,x)
+  -- NB: this case follows the lambda and con-app cases!!
+-- argToPat _in_scope _val_env (Var v) _arg_occ
+--   = return (False, Var v)
+	-- SLPJ : disabling this to avoid proliferation of versions
+	-- also works badly when thinking about seeding the loop
+	-- from the body of the let
+	--	 f x y = letrec g z = ... in g (x,y)
+	-- We don't want to specialise for that *particular* x,y
+
+  -- The default case: make a wild-card
+argToPat _env _in_scope _val_env arg _arg_occ
+  = wildCardPat (exprType arg)
+
+wildCardPat :: Type -> UniqSM (Bool, CoreArg)
+wildCardPat ty = do { uniq <- getUniqueUs
+		    ; let id = mkSysLocal (fsLit "sc") uniq ty
+		    ; return (False, Var id) }
+
+argsToPats :: ScEnv -> InScopeSet -> ValueEnv
+	   -> [(CoreArg, ArgOcc)]
+	   -> UniqSM [(Bool, CoreArg)]
+argsToPats env in_scope val_env args
+  = mapM do_one args
+  where
+    do_one (arg,occ) = argToPat env in_scope val_env arg occ
 \end{code}
 
 
 \begin{code}
-is_con_app_maybe :: ConstrEnv -> CoreExpr -> Maybe ConValue
-is_con_app_maybe env (Var v)
-  = case lookupVarEnv env v of
-	Just stuff -> Just stuff
-		-- You might think we could look in the idUnfolding here
+isValue :: ValueEnv -> CoreExpr -> Maybe Value
+isValue _env (Lit lit)
+  = Just (ConVal (LitAlt lit) [])
+
+isValue env (Var v)
+  | Just stuff <- lookupVarEnv env v
+  = Just stuff	-- You might think we could look in the idUnfolding here
 		-- but that doesn't take account of which branch of a 
 		-- case we are in, which is the whole point
 
-	Nothing | isCheapUnfolding unf
-		-> is_con_app_maybe env (unfoldingTemplate unf)
-		where
-		  unf = idUnfolding v
-		-- However we do want to consult the unfolding as well,
-		-- for let-bound constructors!
+  | not (isLocalId v) && isCheapUnfolding unf
+  = isValue env (unfoldingTemplate unf)
+  where
+    unf = idUnfolding v
+	-- However we do want to consult the unfolding 
+	-- as well, for let-bound constructors!
+
+isValue env (Lam b e)
+  | isTyCoVar b = case isValue env e of
+		  Just _  -> Just LambdaVal
+		  Nothing -> Nothing
+  | otherwise = Just LambdaVal
 
-	other  -> Nothing
+isValue _env expr	-- Maybe it's a constructor application
+  | (Var fun, args) <- collectArgs expr
+  = case isDataConWorkId_maybe fun of
 
-is_con_app_maybe env (Lit lit)
-  = Just (CV (LitAlt lit) [])
+	Just con | args `lengthAtLeast` dataConRepArity con 
+		-- Check saturated; might be > because the 
+		--		    arity excludes type args
+		-> Just (ConVal (DataAlt con) args)
 
-is_con_app_maybe env expr
-  = case collectArgs expr of
-	(Var fun, args) | Just con <- isDataConWorkId_maybe fun,
-			  args `lengthAtLeast` dataConRepArity con
-		-- Might be > because the arity excludes type args
-		        -> Just (CV (DataAlt con) args)
+	_other | valArgCount args < idArity fun
+		-- Under-applied function
+	       -> Just LambdaVal	-- Partial application
 
-	other -> Nothing
+	_other -> Nothing
+
+isValue _env _expr = Nothing
 
 mk_con_app :: AltCon -> [CoreArg] -> CoreExpr
 mk_con_app (LitAlt lit)  []   = Lit lit
 mk_con_app (DataAlt con) args = mkConApp con args
+mk_con_app _other _args = panic "SpecConstr.mk_con_app"
+
+samePat :: CallPat -> CallPat -> Bool
+samePat (vs1, as1) (vs2, as2)
+  = all2 same as1 as2
+  where
+    same (Var v1) (Var v2) 
+	| v1 `elem` vs1 = v2 `elem` vs2
+	| v2 `elem` vs2 = False
+	| otherwise     = v1 == v2
+
+    same (Lit l1)    (Lit l2)    = l1==l2
+    same (App f1 a1) (App f2 a2) = same f1 f2 && same a1 a2
+
+    same (Type {}) (Type {}) = True	-- Note [Ignore type differences]
+    same (Note _ e1) e2	= same e1 e2	-- Ignore casts and notes
+    same (Cast e1 _) e2	= same e1 e2
+    same e1 (Note _ e2) = same e1 e2
+    same e1 (Cast e2 _) = same e1 e2
+
+    same e1 e2 = WARN( bad e1 || bad e2, ppr e1 $$ ppr e2) 
+		 False 	-- Let, lambda, case should not occur
+    bad (Case {}) = True
+    bad (Let {})  = True
+    bad (Lam {})  = True
+    bad _other	  = False
 \end{code}
+
+Note [Ignore type differences]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We do not want to generate specialisations where the call patterns
+differ only in their type arguments!  Not only is it utterly useless,
+but it also means that (with polymorphic recursion) we can generate
+an infinite number of specialisations. Example is Data.Sequence.adjustTree, 
+I think.
+