X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=compiler%2Fspecialise%2FSpecialise.lhs;h=f6f85a114099ce6c91ae90cdc4b5aee8286a51ac;hp=3564c27380d1d7229bcb74dfc7a454718e6d6461;hb=707ea5881703d680155aab268bdbf7edc113e3b1;hpb=10f18550c3684368b9d8e5b7adcccc14994cf170

diff --git a/compiler/specialise/Specialise.lhs b/compiler/specialise/Specialise.lhs
index 3564c27..f6f85a1 100644
--- a/compiler/specialise/Specialise.lhs
+++ b/compiler/specialise/Specialise.lhs
@@ -4,48 +4,35 @@
 \section[Specialise]{Stamping out overloading, and (optionally) polymorphism}
 
 \begin{code}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
---     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
 module Specialise ( specProgram ) where
 
 #include "HsVersions.h"
 
-import DynFlags	( DynFlags, DynFlag(..) )
-import Id		( Id, idName, idType, mkUserLocal, 
-			  idInlinePragma, setInlinePragma ) 
-import TcType		( Type, mkTyVarTy, tcSplitSigmaTy, 
-			  tyVarsOfTypes, tyVarsOfTheta, isClassPred,
-			  tcCmpType, isUnLiftedType
-			)
-import CoreSubst	( Subst, mkEmptySubst, extendTvSubstList, lookupIdSubst,
-			  substBndr, substBndrs, substTy, substInScope,
-			  cloneIdBndr, cloneIdBndrs, cloneRecIdBndrs
-			) 
+import Id
+import TcType
+import CoreMonad
+import CoreSubst 
+import CoreUnfold
 import VarSet
 import VarEnv
 import CoreSyn
 import Rules
-import CoreUtils	( applyTypeToArgs, mkPiTypes )
+import CoreUtils	( exprIsTrivial, applyTypeToArgs, mkPiTypes )
 import CoreFVs		( exprFreeVars, exprsFreeVars, idFreeVars )
-import CoreLint		( showPass, endPass )
-import UniqSupply	( UniqSupply,
-			  UniqSM, initUs_,
-			  MonadUnique(..)
-			)
+import UniqSupply	( UniqSM, initUs_, MonadUnique(..) )
 import Name
 import MkId		( voidArgId, realWorldPrimId )
-import FiniteMap
-import Maybes		( catMaybes, maybeToBool )
-import ErrUtils		( dumpIfSet_dyn )
+import Maybes		( catMaybes, isJust )
+import BasicTypes	
+import HscTypes
 import Bag
 import Util
 import Outputable
 import FastString
 
+import Data.Map (Map)
+import qualified Data.Map as Map
+import qualified FiniteMap as Map
 \end{code}
 
 %************************************************************************
@@ -486,8 +473,6 @@ of this is permanently ruled out.
 Still, this is no great hardship, because we intend to eliminate
 overloading altogether anyway!
 
-
-
 A note about non-tyvar dictionaries
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Some Ids have types like
@@ -512,7 +497,7 @@ Should we specialise wrt this compound-type dictionary?  We used to say
 But it is simpler and more uniform to specialise wrt these dicts too;
 and in future GHC is likely to support full fledged type signatures 
 like
-	f ;: Eq [(a,b)] => ...
+	f :: Eq [(a,b)] => ...
 
 
 %************************************************************************
@@ -575,34 +560,98 @@ Hence, the invariant is this:
 %************************************************************************
 
 \begin{code}
-specProgram :: DynFlags -> UniqSupply -> [CoreBind] -> IO [CoreBind]
-specProgram dflags us binds = do
-   
-	showPass dflags "Specialise"
+specProgram :: ModGuts -> CoreM ModGuts
+specProgram guts 
+  = do { hpt_rules <- getRuleBase
+       ; let local_rules = mg_rules guts
+             rule_base = extendRuleBaseList hpt_rules (mg_rules guts)
 
-	let binds' = initSM us (do (binds', uds') <- go binds
-			           return (dumpAllDictBinds uds' binds'))
+	     -- Specialise the bindings of this module
+       ; (binds', uds) <- runSpecM (go (mg_binds guts))
 
-	endPass dflags "Specialise" Opt_D_dump_spec binds'
+	     -- Specialise imported functions 
+       ; (new_rules, spec_binds) <- specImports emptyVarSet rule_base uds
 
-	dumpIfSet_dyn dflags Opt_D_dump_rules "Top-level specialisations"
-		      (pprRulesForUser (rulesOfBinds binds'))
-
-	return binds'
+       ; return (guts { mg_binds = spec_binds ++ binds'
+                      , mg_rules = local_rules ++ new_rules }) }
   where
 	-- We need to start with a Subst that knows all the things
 	-- that are in scope, so that the substitution engine doesn't
 	-- accidentally re-use a unique that's already in use
 	-- Easiest thing is to do it all at once, as if all the top-level
 	-- decls were mutually recursive
-    top_subst       = mkEmptySubst (mkInScopeSet (mkVarSet (bindersOfBinds binds)))
+    top_subst = mkEmptySubst $ mkInScopeSet $ mkVarSet $ 
+                bindersOfBinds $ mg_binds guts
 
     go []           = return ([], emptyUDs)
     go (bind:binds) = do (binds', uds) <- go binds
                          (bind', uds') <- specBind top_subst bind uds
                          return (bind' ++ binds', uds')
+
+specImports :: VarSet	 	-- Don't specialise these ones
+	       			-- See Note [Avoiding recursive specialisation]
+            -> RuleBase	 	-- Rules from this module and the home package
+	       			-- (but not external packages, which can change)
+            -> UsageDetails	-- Calls for imported things, and floating bindings
+            -> CoreM ( [CoreRule]   -- New rules
+                     , [CoreBind] ) -- Specialised bindings and floating bindings
+specImports done rb uds
+  = do { let import_calls = varEnvElts (ud_calls uds)
+       ; (rules, spec_binds) <- go rb import_calls
+       ; return (rules, wrapDictBinds (ud_binds uds) spec_binds) }
+  where
+    go _ [] = return ([], [])
+    go rb (CIS fn calls_for_fn : other_calls)
+      = do { (rules1, spec_binds1) <- specImport done rb fn (Map.toList calls_for_fn)
+           ; (rules2, spec_binds2) <- go (extendRuleBaseList rb rules1) other_calls
+           ; return (rules1 ++ rules2, spec_binds1 ++ spec_binds2) }
+
+specImport :: VarSet	    	    -- Don't specialise these
+	       			    -- See Note [Avoiding recursive specialisation]
+           -> RuleBase	    	    -- Rules from this module
+           -> Id -> [CallInfo]	    -- Imported function and calls for it
+           -> CoreM ( [CoreRule]    -- New rules
+                    , [CoreBind] )  -- Specialised bindings
+specImport done rb fn calls_for_fn
+  | not (fn `elemVarSet` done)
+  , isInlinablePragma (idInlinePragma fn)
+  , Just rhs <- maybeUnfoldingTemplate (realIdUnfolding fn)
+  = do {     -- Get rules from the external package state
+       	     -- We keep doing this in case we "page-fault in" 
+	     -- more rules as we go along
+       ; hsc_env <- getHscEnv
+       ; eps <- liftIO $ hscEPS hsc_env 
+       ; let full_rb = unionRuleBase rb (eps_rule_base eps)
+             rules_for_fn = getRules full_rb fn 
+
+       ; (rules1, spec_pairs, uds) <- runSpecM $
+              specCalls emptySubst rules_for_fn calls_for_fn fn rhs
+       ; let spec_binds1 = [NonRec b r | (b,r) <- spec_pairs]
+       	     -- After the rules kick in we may get recursion, but 
+	     -- we rely on a global GlomBinds to sort that out later
+       
+	      -- Now specialise any cascaded calls
+       ; (rules2, spec_binds2) <- specImports (extendVarSet done fn) 
+                                              (extendRuleBaseList rb rules1)
+                                              uds
+
+       ; return (rules2 ++ rules1, spec_binds2 ++ spec_binds1) }
+
+  | otherwise
+  = WARN( True, ptext (sLit "specImport discard") <+> ppr fn <+> ppr calls_for_fn )
+    return ([], [])    
 \end{code}
 
+Avoiding recursive specialisation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When we specialise 'f' we may find new overloaded calls to 'g', 'h' in
+'f's RHS.  So we want to specialise g,h.  But we don't want to
+specialise f any more!  It's possible that f's RHS might have a
+recursive yet-more-specialised call, so we'd diverge in that case.
+And if the call is to the same type, one specialisation is enough.
+Avoiding this recursive specialisation loop is the reason for the 
+'done' VarSet passed to specImports and specImport.
+
 %************************************************************************
 %*									*
 \subsubsection{@specExpr@: the main function}
@@ -611,22 +660,22 @@ specProgram dflags us binds = do
 
 \begin{code}
 specVar :: Subst -> Id -> CoreExpr
-specVar subst v = lookupIdSubst subst v
+specVar subst v = lookupIdSubst (text "specVar") subst v
 
 specExpr :: Subst -> CoreExpr -> SpecM (CoreExpr, UsageDetails)
 -- We carry a substitution down:
---	a) we must clone any binding that might flaot outwards,
+--	a) we must clone any binding that might float outwards,
 --	   to avoid name clashes
 --	b) we carry a type substitution to use when analysing
 --	   the RHS of specialised bindings (no type-let!)
 
 ---------------- First the easy cases --------------------
-specExpr subst (Type ty) = return (Type (substTy subst ty), emptyUDs)
+specExpr subst (Type ty) = return (Type (CoreSubst.substTy subst ty), emptyUDs)
 specExpr subst (Var v)   = return (specVar subst v,         emptyUDs)
 specExpr _     (Lit lit) = return (Lit lit,                 emptyUDs)
 specExpr subst (Cast e co) = do
     (e', uds) <- specExpr subst e
-    return ((Cast e' (substTy subst co)), uds)
+    return ((Cast e' (CoreSubst.substTy subst co)), uds)
 specExpr subst (Note note body) = do
     (body', uds) <- specExpr subst body
     return (Note (specNote subst note) body', uds)
@@ -641,35 +690,27 @@ specExpr subst expr@(App {})
                                return (App fun' arg', uds_arg `plusUDs` uds_app)
 
     go (Var f)       args = case specVar subst f of
-                                Var f' -> return (Var f', mkCallUDs subst f' args)
+                                Var f' -> return (Var f', mkCallUDs f' args)
                                 e'     -> return (e', emptyUDs)	-- I don't expect this!
     go other	     _    = specExpr subst other
 
 ---------------- Lambda/case require dumping of usage details --------------------
 specExpr subst e@(Lam _ _) = do
     (body', uds) <- specExpr subst' body
-    let (filtered_uds, body'') = dumpUDs bndrs' uds body'
-    return (mkLams bndrs' body'', filtered_uds)
+    let (free_uds, dumped_dbs) = dumpUDs bndrs' uds 
+    return (mkLams bndrs' (wrapDictBindsE dumped_dbs body'), free_uds)
   where
     (bndrs, body) = collectBinders e
     (subst', bndrs') = substBndrs subst bndrs
 	-- More efficient to collect a group of binders together all at once
 	-- and we don't want to split a lambda group with dumped bindings
 
-specExpr subst (Case scrut case_bndr ty alts) = do
-    (scrut', uds_scrut) <- specExpr subst scrut
-    (alts', uds_alts) <- mapAndCombineSM spec_alt alts
-    return (Case scrut' case_bndr' (substTy subst ty) alts', uds_scrut `plusUDs` uds_alts)
-  where
-    (subst_alt, case_bndr') = substBndr subst case_bndr
-	-- No need to clone case binder; it can't float like a let(rec)
-
-    spec_alt (con, args, rhs) = do
-          (rhs', uds) <- specExpr subst_rhs rhs
-          let (uds', rhs'') = dumpUDs args uds rhs'
-          return ((con, args', rhs''), uds')
-        where
-          (subst_rhs, args') = substBndrs subst_alt args
+specExpr subst (Case scrut case_bndr ty alts) 
+  = do { (scrut', scrut_uds) <- specExpr subst scrut
+       ; (scrut'', case_bndr', alts', alts_uds) 
+             <- specCase subst scrut' case_bndr alts 
+       ; return (Case scrut'' case_bndr' (CoreSubst.substTy subst ty) alts'
+                , scrut_uds `plusUDs` alts_uds) }
 
 ---------------- Finally, let is the interesting case --------------------
 specExpr subst (Let bind body) = do
@@ -688,11 +729,117 @@ specExpr subst (Let bind body) = do
 -- Must apply the type substitution to coerceions
 specNote :: Subst -> Note -> Note
 specNote _ note = note
+
+
+specCase :: Subst 
+         -> CoreExpr	 	-- Scrutinee, already done
+         -> Id -> [CoreAlt]
+         -> SpecM ( CoreExpr	-- New scrutinee
+	    	  , Id
+	    	  , [CoreAlt]
+                  , UsageDetails)
+specCase subst scrut' case_bndr [(con, args, rhs)]
+  | isDictId case_bndr  	 -- See Note [Floating dictionaries out of cases]
+  , interestingDict scrut'
+  , not (isDeadBinder case_bndr && null sc_args')
+  = do { (case_bndr_flt : sc_args_flt) <- mapM clone_me (case_bndr' : sc_args')
+
+       ; let sc_rhss = [ Case (Var case_bndr_flt) case_bndr' (idType sc_arg')
+                              [(con, args', Var sc_arg')]
+                       | sc_arg' <- sc_args' ]
+
+	     -- Extend the substitution for RHS to map the *original* binders
+	     -- to their floated verions.  Attach an unfolding to these floated
+	     -- binders so they look interesting to interestingDict
+	     mb_sc_flts :: [Maybe DictId]
+             mb_sc_flts = map (lookupVarEnv clone_env) args'
+             clone_env  = zipVarEnv sc_args' (zipWith add_unf sc_args_flt sc_rhss)
+             subst_prs  = (case_bndr, Var (add_unf case_bndr_flt scrut'))
+                        : [ (arg, Var sc_flt) 
+                          | (arg, Just sc_flt) <- args `zip` mb_sc_flts ]
+             subst_rhs' = extendIdSubstList subst_rhs subst_prs
+                                                      
+       ; (rhs',   rhs_uds)   <- specExpr subst_rhs' rhs
+       ; let scrut_bind    = mkDB (NonRec case_bndr_flt scrut')
+             case_bndr_set = unitVarSet case_bndr_flt
+             sc_binds      = [(NonRec sc_arg_flt sc_rhs, case_bndr_set)
+                             | (sc_arg_flt, sc_rhs) <- sc_args_flt `zip` sc_rhss ]
+             flt_binds     = scrut_bind : sc_binds
+             (free_uds, dumped_dbs) = dumpUDs (case_bndr':args') rhs_uds
+             all_uds = flt_binds `addDictBinds` free_uds
+             alt'    = (con, args', wrapDictBindsE dumped_dbs rhs')
+       ; return (Var case_bndr_flt, case_bndr', [alt'], all_uds) }
+  where
+    (subst_rhs, (case_bndr':args')) = substBndrs subst (case_bndr:args)
+    sc_args' = filter is_flt_sc_arg args'
+             
+    clone_me bndr = do { uniq <- getUniqueM
+                       ; return (mkUserLocal occ uniq ty loc) }
+       where
+         name = idName bndr
+         ty   = idType bndr
+         occ  = nameOccName name
+         loc  = getSrcSpan name
+
+    add_unf sc_flt sc_rhs  -- Sole purpose: make sc_flt respond True to interestingDictId
+      = setIdUnfolding sc_flt (mkSimpleUnfolding sc_rhs)
+
+    arg_set = mkVarSet args'
+    is_flt_sc_arg var =  isId var
+                      && not (isDeadBinder var)
+	              && isDictTy var_ty
+                      && not (tyVarsOfType var_ty `intersectsVarSet` arg_set)
+       where
+         var_ty = idType var
+
+
+specCase subst scrut case_bndr alts
+  = do { (alts', uds_alts) <- mapAndCombineSM spec_alt alts
+       ; return (scrut, case_bndr', alts', uds_alts) }
+  where
+    (subst_alt, case_bndr') = substBndr subst case_bndr
+    spec_alt (con, args, rhs) = do
+          (rhs', uds) <- specExpr subst_rhs rhs
+          let (free_uds, dumped_dbs) = dumpUDs (case_bndr' : args') uds
+          return ((con, args', wrapDictBindsE dumped_dbs rhs'), free_uds)
+        where
+          (subst_rhs, args') = substBndrs subst_alt args
 \end{code}
 
+Note [Floating dictionaries out of cases]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+   g = \d. case d of { MkD sc ... -> ...(f sc)... }
+Naively we can't float d2's binding out of the case expression,
+because 'sc' is bound by the case, and that in turn means we can't
+specialise f, which seems a pity.  
+
+So we invert the case, by floating out a binding 
+for 'sc_flt' thus:
+    sc_flt = case d of { MkD sc ... -> sc }
+Now we can float the call instance for 'f'.  Indeed this is just
+what'll happen if 'sc' was originally bound with a let binding,
+but case is more efficient, and necessary with equalities. So it's
+good to work with both.
+
+You might think that this won't make any difference, because the
+call instance will only get nuked by the \d.  BUT if 'g' itself is 
+specialised, then transitively we should be able to specialise f.
+
+In general, given
+   case e of cb { MkD sc ... -> ...(f sc)... }
+we transform to
+   let cb_flt = e
+       sc_flt = case cb_flt of { MkD sc ... -> sc }
+   in
+   case cb_flt of bg { MkD sc ... -> ....(f sc_flt)... }
+
+The "_flt" things are the floated binds; we use the current substitution
+to substitute sc -> sc_flt in the RHS
+
 %************************************************************************
 %*									*
-\subsubsection{Dealing with a binding}
+                     Dealing with a binding
 %*									*
 %************************************************************************
 
@@ -703,215 +850,445 @@ specBind :: Subst			-- Use this for RHSs
 	 -> SpecM ([CoreBind],		-- New bindings
 		   UsageDetails)	-- And info to pass upstream
 
-specBind rhs_subst bind body_uds
-  = do	{ (bind', bind_uds) <- specBindItself rhs_subst bind (calls body_uds)
-	; return (finishSpecBind bind' bind_uds body_uds) }
-
-finishSpecBind :: CoreBind -> UsageDetails -> UsageDetails -> ([CoreBind], UsageDetails)
-finishSpecBind bind 
-	(MkUD { dict_binds = rhs_dbs,  calls = rhs_calls,  ud_fvs = rhs_fvs })
-	(MkUD { dict_binds = body_dbs, calls = body_calls, ud_fvs = body_fvs })
-  | not (mkVarSet bndrs `intersectsVarSet` all_fvs)
-		-- Common case 1: the bound variables are not
-		--		  mentioned in the dictionary bindings
-  = ([bind], MkUD { dict_binds = body_dbs `unionBags` rhs_dbs
-			-- It's important that the `unionBags` is this way round,
-			-- because body_uds may bind dictionaries that are
-			-- used in the calls passed to specDefn.  So the
-			-- dictionary bindings in rhs_uds may mention 
-			-- dictionaries bound in body_uds.
-		  , calls  = all_calls
-		  , ud_fvs = all_fvs })
-
-  | case bind of { NonRec {} -> True; Rec {} -> False }
-		-- Common case 2: no specialisation happened, and binding
-		--		  is non-recursive.  But the binding may be
-		--		  mentioned in body_dbs, so we should put it first
-  = ([], MkUD { dict_binds = rhs_dbs `unionBags` ((bind, b_fvs) `consBag` body_dbs)
-	      , calls      = all_calls
-	      , ud_fvs     = all_fvs `unionVarSet` b_fvs })
-
-  | otherwise	-- General case: make a huge Rec (sigh)
-  = ([], MkUD { dict_binds = unitBag (Rec all_db_prs, all_db_fvs)
-	      , calls      = all_calls
-	      , ud_fvs     = all_fvs `unionVarSet` b_fvs })
-  where
-    all_fvs = rhs_fvs `unionVarSet` body_fvs
-    all_calls = zapCalls bndrs (rhs_calls `unionCalls` body_calls)
-
-    bndrs   = bindersOf bind
-    b_fvs   = bind_fvs bind
-
-    (all_db_prs, all_db_fvs) = add (bind, b_fvs) $ 
-			       foldrBag add ([], emptyVarSet) $
-			       rhs_dbs `unionBags` body_dbs
-    add (NonRec b r, b_fvs) (prs, fvs) = ((b,r)  : prs, b_fvs `unionVarSet` fvs)
-    add (Rec b_prs,  b_fvs) (prs, fvs) = (b_prs ++ prs, b_fvs `unionVarSet` fvs)
-
-specBindItself :: Subst -> CoreBind -> CallDetails -> SpecM (CoreBind, UsageDetails)
-
--- specBindItself deals with the RHS, specialising it according
--- to the calls found in the body (if any)
-specBindItself rhs_subst (NonRec bndr rhs) call_info = do
-    ((bndr',rhs'), spec_defns, spec_uds) <- specDefn rhs_subst call_info (bndr,rhs)
-    let
-        new_bind | null spec_defns = NonRec bndr' rhs'
-                 | otherwise       = Rec ((bndr',rhs'):spec_defns)
-		-- bndr' mentions the spec_defns in its SpecEnv
-		-- Not sure why we couln't just put the spec_defns first
-    return (new_bind, spec_uds)
-
-specBindItself rhs_subst (Rec pairs) call_info = do
-    stuff <- mapM (specDefn rhs_subst call_info) pairs
-    let
-	(pairs', spec_defns_s, spec_uds_s) = unzip3 stuff
-	spec_defns = concat spec_defns_s
-	spec_uds   = plusUDList spec_uds_s
-        new_bind   = Rec (spec_defns ++ pairs')
-    return (new_bind, spec_uds)
-
-
-specDefn :: Subst			-- Subst to use for RHS
-	 -> CallDetails			-- Info on how it is used in its scope
-	 -> (Id, CoreExpr)		-- The thing being bound and its un-processed RHS
-	 -> SpecM ((Id, CoreExpr),	-- The thing and its processed RHS
-					-- 	the Id may now have specialisations attached
+-- Returned UsageDetails:
+--    No calls for binders of this bind
+specBind rhs_subst (NonRec fn rhs) body_uds
+  = do { (rhs', rhs_uds) <- specExpr rhs_subst rhs
+       ; (fn', spec_defns, body_uds1) <- specDefn rhs_subst body_uds fn rhs
+
+       ; let pairs = spec_defns ++ [(fn', rhs')]
+	 		-- fn' mentions the spec_defns in its rules, 
+			-- so put the latter first
+
+             combined_uds = body_uds1 `plusUDs` rhs_uds
+		-- This way round a call in rhs_uds of a function f
+		-- at type T will override a call of f at T in body_uds1; and
+		-- that is good because it'll tend to keep "earlier" calls
+		-- See Note [Specialisation of dictionary functions]
+
+	     (free_uds, dump_dbs, float_all) = dumpBindUDs [fn] combined_uds
+	        -- See Note [From non-recursive to recursive]
+
+             final_binds | isEmptyBag dump_dbs = [NonRec b r | (b,r) <- pairs]
+                         | otherwise = [Rec (flattenDictBinds dump_dbs pairs)]
+
+	 ; if float_all then
+	     -- Rather than discard the calls mentioning the bound variables
+	     -- we float this binding along with the others
+	      return ([], free_uds `snocDictBinds` final_binds)
+           else
+	     -- No call in final_uds mentions bound variables, 
+	     -- so we can just leave the binding here
+	      return (final_binds, free_uds) }
+
+
+specBind rhs_subst (Rec pairs) body_uds
+       -- Note [Specialising a recursive group]
+  = do { let (bndrs,rhss) = unzip pairs
+       ; (rhss', rhs_uds) <- mapAndCombineSM (specExpr rhs_subst) rhss
+       ; let scope_uds = body_uds `plusUDs` rhs_uds
+       	     	       -- Includes binds and calls arising from rhss
+
+       ; (bndrs1, spec_defns1, uds1) <- specDefns rhs_subst scope_uds pairs
+
+       ; (bndrs3, spec_defns3, uds3)
+             <- if null spec_defns1  -- Common case: no specialisation
+	        then return (bndrs1, [], uds1)
+		else do {      	     -- Specialisation occurred; do it again
+                          (bndrs2, spec_defns2, uds2)
+                              <- specDefns rhs_subst uds1 (bndrs1 `zip` rhss)
+                        ; return (bndrs2, spec_defns2 ++ spec_defns1, uds2) }
+
+       ; let (final_uds, dumped_dbs, float_all) = dumpBindUDs bndrs uds3
+             bind = Rec (flattenDictBinds dumped_dbs $
+                         spec_defns3 ++ zip bndrs3 rhss')
+             
+       ; if float_all then
+	      return ([], final_uds `snocDictBind` bind)
+           else
+	      return ([bind], final_uds) }
+
+
+---------------------------
+specDefns :: Subst
+	  -> UsageDetails		-- Info on how it is used in its scope
+	  -> [(Id,CoreExpr)]		-- The things being bound and their un-processed RHS
+	  -> SpecM ([Id],		-- Original Ids with RULES added
+	     	    [(Id,CoreExpr)],	-- Extra, specialised bindings
+		    UsageDetails)	-- Stuff to fling upwards from the specialised versions
+
+-- Specialise a list of bindings (the contents of a Rec), but flowing usages
+-- upwards binding by binding.  Example: { f = ...g ...; g = ...f .... }
+-- Then if the input CallDetails has a specialised call for 'g', whose specialisation
+-- in turn generates a specialised call for 'f', we catch that in this one sweep.
+-- But not vice versa (it's a fixpoint problem).
+
+specDefns _subst uds []
+  = return ([], [], uds)
+specDefns subst uds ((bndr,rhs):pairs)
+  = do { (bndrs1, spec_defns1, uds1) <- specDefns subst uds pairs
+       ; (bndr1, spec_defns2, uds2)  <- specDefn subst uds1 bndr rhs
+       ; return (bndr1 : bndrs1, spec_defns1 ++ spec_defns2, uds2) }
+
+---------------------------
+specDefn :: Subst
+	 -> UsageDetails		-- Info on how it is used in its scope
+	 -> Id -> CoreExpr		-- The thing being bound and its un-processed RHS
+	 -> SpecM (Id,			-- Original Id with added RULES
 		   [(Id,CoreExpr)],	-- Extra, specialised bindings
-		   UsageDetails		-- Stuff to fling upwards from the RHS and its
-	    )				-- 	specialised versions
-
-specDefn subst calls (fn, rhs)
+	    	   UsageDetails)	-- Stuff to fling upwards from the specialised versions
+
+specDefn subst body_uds fn rhs
+  = do { let (body_uds_without_me, calls_for_me) = callsForMe fn body_uds
+             rules_for_me = idCoreRules fn
+       ; (rules, spec_defns, spec_uds) <- specCalls subst rules_for_me 
+                                                    calls_for_me fn rhs
+       ; return ( fn `addIdSpecialisations` rules
+                , spec_defns
+                , body_uds_without_me `plusUDs` spec_uds) }
+		-- It's important that the `plusUDs` is this way
+		-- round, because body_uds_without_me may bind
+		-- dictionaries that are used in calls_for_me passed
+		-- to specDefn.  So the dictionary bindings in
+		-- spec_uds may mention dictionaries bound in
+		-- body_uds_without_me
+
+---------------------------
+specCalls :: Subst
+          -> [CoreRule]			-- Existing RULES for the fn
+	  -> [CallInfo]	
+	  -> Id -> CoreExpr
+	  -> SpecM ([CoreRule],		-- New RULES for the fn
+		    [(Id,CoreExpr)],	-- Extra, specialised bindings
+	    	    UsageDetails)	-- New usage details from the specialised RHSs
+
+-- This function checks existing rules, and does not create
+-- duplicate ones. So the caller does not nneed to do this filtering.
+-- See 'already_covered'
+
+specCalls subst rules_for_me calls_for_me fn rhs
 	-- The first case is the interesting one
   |  rhs_tyvars `lengthIs`     n_tyvars -- Rhs of fn's defn has right number of big lambdas
   && rhs_ids    `lengthAtLeast` n_dicts	-- and enough dict args
   && notNull calls_for_me		-- And there are some calls to specialise
+  && not (isNeverActive (idInlineActivation fn))
+	-- Don't specialise NOINLINE things
+	-- See Note [Auto-specialisation and RULES]
 
 --   && not (certainlyWillInline (idUnfolding fn))	-- And it's not small
 --	See Note [Inline specialisation] for why we do not 
---	switch off specialisation for inline functions = do
-  = do
-     -- Specialise the body of the function
-    (rhs', rhs_uds) <- specExpr subst rhs
-
-      -- Make a specialised version for each call in calls_for_me
-    stuff <- mapM spec_call calls_for_me
-    let
-        (spec_defns, spec_uds, spec_rules) = unzip3 stuff
-
-        fn' = addIdSpecialisations fn spec_rules
+--	switch off specialisation for inline functions
 
-    return ((fn',rhs'),
-              spec_defns,
-              rhs_uds `plusUDs` plusUDList spec_uds)
+  = -- pprTrace "specDefn: some" (ppr fn $$ ppr calls_for_me $$ ppr rules_for_me) $
+    do { stuff <- mapM spec_call calls_for_me
+       ; let (spec_defns, spec_uds, spec_rules) = unzip3 (catMaybes stuff)
+       ; return (spec_rules, spec_defns, plusUDList spec_uds) }
 
   | otherwise	-- No calls or RHS doesn't fit our preconceptions
   = WARN( notNull calls_for_me, ptext (sLit "Missed specialisation opportunity for") <+> ppr fn )
 	  -- Note [Specialisation shape]
-    (do  { (rhs', rhs_uds) <- specExpr subst rhs
-	; return ((fn, rhs'), [], rhs_uds) })
+    -- pprTrace "specDefn: none" (ppr fn $$ ppr calls_for_me) $
+    return ([], [], emptyUDs)
   
   where
     fn_type	       = idType fn
+    fn_arity	       = idArity fn
+    fn_unf             = realIdUnfolding fn	-- Ignore loop-breaker-ness here
     (tyvars, theta, _) = tcSplitSigmaTy fn_type
     n_tyvars	       = length tyvars
     n_dicts	       = length theta
-    inline_prag        = idInlinePragma fn
+    inl_prag           = idInlinePragma fn
+    inl_act            = inlinePragmaActivation inl_prag
+    is_local           = isLocalId fn
 
-	-- It's important that we "see past" any INLINE pragma
-	-- else we'll fail to specialise an INLINE thing
-    (inline_rhs, rhs_inside) = dropInline rhs
-    (rhs_tyvars, rhs_ids, rhs_body) = collectTyAndValBinders rhs_inside
+	-- Figure out whether the function has an INLINE pragma
+	-- See Note [Inline specialisations]
 
-    rhs_dicts = take n_dicts rhs_ids
-    body      = mkLams (drop n_dicts rhs_ids) rhs_body
+    spec_arity = unfoldingArity fn_unf - n_dicts  -- Arity of the *specialised* inline rule
+
+    (rhs_tyvars, rhs_ids, rhs_body) = collectTyAndValBinders rhs
+
+    rhs_dict_ids = take n_dicts rhs_ids
+    body         = mkLams (drop n_dicts rhs_ids) rhs_body
 		-- Glue back on the non-dict lambdas
 
-    calls_for_me = case lookupFM calls fn of
-			Nothing -> []
-			Just cs -> fmToList cs
+    already_covered :: [CoreExpr] -> Bool
+    already_covered args	  -- Note [Specialisations already covered]
+       = isJust (lookupRule (const True) realIdUnfolding 
+                            (substInScope subst) 
+       	 		    fn args rules_for_me)
+
+    mk_ty_args :: [Maybe Type] -> [CoreExpr]
+    mk_ty_args call_ts = zipWithEqual "spec_call" mk_ty_arg rhs_tyvars call_ts
+    	       where
+	          mk_ty_arg rhs_tyvar Nothing   = Type (mkTyVarTy rhs_tyvar)
+		  mk_ty_arg _	      (Just ty) = Type ty
 
     ----------------------------------------------------------
 	-- Specialise to one particular call pattern
-    spec_call :: (CallKey, ([DictExpr], VarSet))	-- Call instance
-              -> SpecM ((Id,CoreExpr),			-- Specialised definition
-	                UsageDetails, 			-- Usage details from specialised body
-                	CoreRule)			-- Info for the Id's SpecEnv
+    spec_call :: CallInfo                         -- Call instance
+              -> SpecM (Maybe ((Id,CoreExpr),	  -- Specialised definition
+	                       UsageDetails, 	  -- Usage details from specialised body
+			       CoreRule))	  -- Info for the Id's SpecEnv
     spec_call (CallKey call_ts, (call_ds, _))
-      = ASSERT( call_ts `lengthIs` n_tyvars  && call_ds `lengthIs` n_dicts ) do
-		-- Calls are only recorded for properly-saturated applications
+      = ASSERT( call_ts `lengthIs` n_tyvars  && call_ds `lengthIs` n_dicts )
 	
-	-- Suppose f's defn is 	f = /\ a b c d -> \ d1 d2 -> rhs	
-        -- Supppose the call is for f [Just t1, Nothing, Just t3, Nothing] [dx1, dx2]
+	-- Suppose f's defn is 	f = /\ a b c -> \ d1 d2 -> rhs	
+        -- Supppose the call is for f [Just t1, Nothing, Just t3] [dx1, dx2]
 
 	-- Construct the new binding
-	-- 	f1 = SUBST[a->t1,c->t3, d1->d1', d2->d2'] (/\ b d -> rhs)
+	-- 	f1 = SUBST[a->t1,c->t3, d1->d1', d2->d2'] (/\ b -> rhs)
 	-- PLUS the usage-details
 	--	{ d1' = dx1; d2' = dx2 }
-	-- where d1', d2' are cloned versions of d1,d2, with the type substitution applied.
+	-- where d1', d2' are cloned versions of d1,d2, with the type substitution
+	-- applied.  These auxiliary bindings just avoid duplication of dx1, dx2
 	--
 	-- Note that the substitution is applied to the whole thing.
 	-- This is convenient, but just slightly fragile.  Notably:
-	--	* There had better be no name clashes in a/b/c/d
-	--
-        let
-		-- poly_tyvars = [b,d] in the example above
+	--	* There had better be no name clashes in a/b/c
+        do { let
+		-- poly_tyvars = [b] in the example above
 		-- spec_tyvars = [a,c] 
-		-- ty_args     = [t1,b,t3,d]
-	   poly_tyvars = [tv | (tv, Nothing) <- rhs_tyvars `zip` call_ts]
-           spec_tyvars = [tv | (tv, Just _)  <- rhs_tyvars `zip` call_ts]
-	   ty_args     = zipWithEqual "spec_call" mk_ty_arg rhs_tyvars call_ts
-		       where
-		 	 mk_ty_arg rhs_tyvar Nothing   = Type (mkTyVarTy rhs_tyvar)
-			 mk_ty_arg _	     (Just ty) = Type ty
-
-           spec_ty_args = [ty | Just ty <- call_ts]
-	   rhs_subst  = extendTvSubstList subst (spec_tyvars `zip` spec_ty_args)
-
-	(rhs_subst', rhs_dicts') <- cloneBinders rhs_subst rhs_dicts
-	let
-	   inst_args = ty_args ++ map Var rhs_dicts'
-
-		-- Figure out the type of the specialised function
-	   body_ty = applyTypeToArgs rhs fn_type inst_args
-	   (lam_args, app_args) 		-- Add a dummy argument if body_ty is unlifted
-		| isUnLiftedType body_ty	-- C.f. WwLib.mkWorkerArgs
-		= (poly_tyvars ++ [voidArgId], poly_tyvars ++ [realWorldPrimId])
-		| otherwise = (poly_tyvars, poly_tyvars)
-	   spec_id_ty = mkPiTypes lam_args body_ty
-
-        spec_f <- newIdSM fn spec_id_ty
-        (spec_rhs, rhs_uds) <- specExpr rhs_subst' (mkLams lam_args body)
-	let
+		-- ty_args     = [t1,b,t3]
+		poly_tyvars   = [tv | (tv, Nothing) <- rhs_tyvars `zip` call_ts]
+           	spec_tv_binds = [(tv,ty) | (tv, Just ty) <- rhs_tyvars `zip` call_ts]
+		spec_ty_args  = map snd spec_tv_binds
+	   	ty_args       = mk_ty_args call_ts
+	   	rhs_subst     = CoreSubst.extendTvSubstList subst spec_tv_binds
+
+	   ; (rhs_subst1, inst_dict_ids) <- newDictBndrs rhs_subst rhs_dict_ids
+	     		  -- Clone rhs_dicts, including instantiating their types
+
+	   ; let (rhs_subst2, dx_binds) = bindAuxiliaryDicts rhs_subst1 $
+	     	 	      		  (my_zipEqual rhs_dict_ids inst_dict_ids call_ds)
+	         inst_args = ty_args ++ map Var inst_dict_ids
+
+	   ; if already_covered inst_args then
+	        return Nothing
+	     else do
+	   {	-- Figure out the type of the specialised function
+	     let body_ty = applyTypeToArgs rhs fn_type inst_args
+	         (lam_args, app_args) 		-- Add a dummy argument if body_ty is unlifted
+		   | isUnLiftedType body_ty	-- C.f. WwLib.mkWorkerArgs
+		   = (poly_tyvars ++ [voidArgId], poly_tyvars ++ [realWorldPrimId])
+		   | otherwise = (poly_tyvars, poly_tyvars)
+	         spec_id_ty = mkPiTypes lam_args body_ty
+	
+           ; spec_f <- newSpecIdSM fn spec_id_ty
+           ; (spec_rhs, rhs_uds) <- specExpr rhs_subst2 (mkLams lam_args body)
+	   ; let
 		-- The rule to put in the function's specialisation is:
-		--	forall b,d, d1',d2'.  f t1 b t3 d d1' d2' = f1 b d  
-	   rule_name = mkFastString ("SPEC " ++ showSDoc (ppr fn <+> ppr spec_ty_args))
-           spec_env_rule = mkLocalRule 
-	   		        rule_name
-				inline_prag 	-- Note [Auto-specialisation and RULES]
-			 	(idName fn)
-			        (poly_tyvars ++ rhs_dicts')
-				inst_args 
-				(mkVarApps (Var spec_f) app_args)
+		--	forall b, d1',d2'.  f t1 b t3 d1' d2' = f1 b  
+	        rule_name = mkFastString ("SPEC " ++ showSDoc (ppr fn <+> ppr spec_ty_args))
+  		spec_env_rule = mkRule True {- Auto generated -} is_local
+                                  rule_name
+				  inl_act 	-- Note [Auto-specialisation and RULES]
+			 	  (idName fn)
+			          (poly_tyvars ++ inst_dict_ids)
+				  inst_args 
+				  (mkVarApps (Var spec_f) app_args)
 
 		-- Add the { d1' = dx1; d2' = dx2 } usage stuff
-	   final_uds = foldr addDictBind rhs_uds (my_zipEqual "spec_call" rhs_dicts' call_ds)
+	   	final_uds = foldr consDictBind rhs_uds dx_binds
+
+		-- Add an InlineRule if the parent has one
+		-- See Note [Inline specialisations]
+		spec_unf
+                  = case inlinePragmaSpec inl_prag of
+                      Inline    -> mkInlineUnfolding (Just spec_arity) spec_rhs
+                      Inlinable -> mkInlinableUnfolding spec_rhs
+                      _         -> NoUnfolding
+
+		-- Adding arity information just propagates it a bit faster
+		-- 	See Note [Arity decrease] in Simplify
+		-- Copy InlinePragma information from the parent Id.
+		-- So if f has INLINE[1] so does spec_f
+  	        spec_f_w_arity = spec_f `setIdArity`          max 0 (fn_arity - n_dicts)
+                                        `setInlinePragma` inl_prag
+                                        `setIdUnfolding`  spec_unf
+
+	   ; return (Just ((spec_f_w_arity, spec_rhs), final_uds, spec_env_rule)) } }
+      where
+	my_zipEqual xs ys zs
+	 | debugIsOn && not (equalLength xs ys && equalLength ys zs)
+             = pprPanic "my_zipEqual" (vcat [ ppr xs, ppr ys
+					    , ppr fn <+> ppr call_ts
+					    , ppr (idType fn), ppr theta
+					    , ppr n_dicts, ppr rhs_dict_ids 
+					    , ppr rhs])
+    	 | otherwise = zip3 xs ys zs
+
+bindAuxiliaryDicts
+	:: Subst
+	-> [(DictId,DictId,CoreExpr)]	-- (orig_dict, inst_dict, dx)
+	-> (Subst,			-- Substitute for all orig_dicts
+	    [CoreBind])			-- Auxiliary bindings
+-- Bind any dictionary arguments to fresh names, to preserve sharing
+-- Substitution already substitutes orig_dict -> inst_dict
+bindAuxiliaryDicts subst triples = go subst [] triples
+  where
+    go subst binds []    = (subst, binds)
+    go subst binds ((d, dx_id, dx) : pairs)
+      | exprIsTrivial dx = go (extendIdSubst subst d dx) binds pairs
+             -- No auxiliary binding necessary
+	     -- Note that we bind the *original* dict in the substitution,
+	     -- overriding any d->dx_id binding put there by substBndrs
+
+      | otherwise        = go subst_w_unf (NonRec dx_id dx : binds) pairs
+      where
+        dx_id1 = dx_id `setIdUnfolding` mkSimpleUnfolding dx
+	subst_w_unf = extendIdSubst subst d (Var dx_id1)
+       	     -- Important!  We're going to substitute dx_id1 for d
+	     -- and we want it to look "interesting", else we won't gather *any*
+	     -- consequential calls. E.g.
+	     --	    f d = ...g d....
+	     -- If we specialise f for a call (f (dfun dNumInt)), we'll get 
+	     -- a consequent call (g d') with an auxiliary definition
+	     --	    d' = df dNumInt
+  	     -- We want that consequent call to look interesting
+	     --
+	     -- Again, note that we bind the *original* dict in the substitution,
+	     -- overriding any d->dx_id binding put there by substBndrs
+\end{code}
 
-	   spec_pr | inline_rhs = (spec_f `setInlinePragma` inline_prag, Note InlineMe spec_rhs)
-		   | otherwise  = (spec_f, 				 spec_rhs)
+Note [From non-recursive to recursive]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Even in the non-recursive case, if any dict-binds depend on 'fn' we might 
+have built a recursive knot
 
-        return (spec_pr, final_uds, spec_env_rule)
+      f a d x = <blah>
+      MkUD { ud_binds = d7 = MkD ..f..
+           , ud_calls = ...(f T d7)... }
 
-      where
-	my_zipEqual doc xs ys 
-	 | debugIsOn && not (equalLength xs ys)
-             = pprPanic "my_zipEqual" (vcat 
-						[ ppr xs, ppr ys
-						, ppr fn <+> ppr call_ts
-						, ppr (idType fn), ppr theta
-						, ppr n_dicts, ppr rhs_dicts 
-						, ppr rhs])
-	 | otherwise		   = zipEqual doc xs ys
-\end{code}
+The we generate
+
+      Rec { fs x = <blah>[T/a, d7/d]
+            f a d x = <blah>
+               RULE f T _ = fs
+            d7 = ...f... }
+
+Here the recursion is only through the RULE.
+
+ 
+Note [Specialisation of dictionary functions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Here is a nasty example that bit us badly: see Trac #3591
+
+     dfun a d = MkD a d (meth d)
+     d4 = <blah>
+     d2 = dfun T d4
+     d1 = $p1 d2
+     d3 = dfun T d1
+
+None of these definitions is recursive. What happened was that we 
+generated a specialisation:
+
+     RULE forall d. dfun T d = dT
+     dT = (MkD a d (meth d)) [T/a, d1/d]
+        = MkD T d1 (meth d1)
+
+But now we use the RULE on the RHS of d2, to get
+
+    d2 = dT = MkD d1 (meth d1)
+    d1 = $p1 d2
+
+and now d1 is bottom!  The problem is that when specialising 'dfun' we
+should first dump "below" the binding all floated dictionary bindings
+that mention 'dfun' itself.  So d2 and d3 (and hence d1) must be
+placed below 'dfun', and thus unavailable to it when specialising
+'dfun'.  That in turn means that the call (dfun T d1) must be
+discarded.  On the other hand, the call (dfun T d4) is fine, assuming
+d4 doesn't mention dfun.
+
+But look at this:
+
+  class C a where { foo,bar :: [a] -> [a] }
+
+  instance C Int where 
+     foo x = r_bar x    
+     bar xs = reverse xs
+
+  r_bar :: C a => [a] -> [a]
+  r_bar xs = bar (xs ++ xs)
+
+That translates to:
+
+    r_bar a (c::C a) (xs::[a]) = bar a d (xs ++ xs)
+
+    Rec { $fCInt :: C Int = MkC foo_help reverse
+          foo_help (xs::[Int]) = r_bar Int $fCInt xs }
+
+The call (r_bar $fCInt) mentions $fCInt, 
+                        which mentions foo_help, 
+                        which mentions r_bar
+But we DO want to specialise r_bar at Int:
+
+    Rec { $fCInt :: C Int = MkC foo_help reverse
+          foo_help (xs::[Int]) = r_bar Int $fCInt xs
+
+          r_bar a (c::C a) (xs::[a]) = bar a d (xs ++ xs)
+	    RULE r_bar Int _ = r_bar_Int
+
+          r_bar_Int xs = bar Int $fCInt (xs ++ xs)
+           }
+   
+Note that, because of its RULE, r_bar joins the recursive
+group.  (In this case it'll unravel a short moment later.)
+
+
+Conclusion: we catch the nasty case using filter_dfuns in
+callsForMe. To be honest I'm not 100% certain that this is 100%
+right, but it works.  Sigh.
+
+
+Note [Specialising a recursive group]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+    let rec { f x = ...g x'...
+            ; g y = ...f y'.... }
+    in f 'a'
+Here we specialise 'f' at Char; but that is very likely to lead to 
+a specialisation of 'g' at Char.  We must do the latter, else the
+whole point of specialisation is lost.
+
+But we do not want to keep iterating to a fixpoint, because in the
+presence of polymorphic recursion we might generate an infinite number
+of specialisations.
+
+So we use the following heuristic:
+  * Arrange the rec block in dependency order, so far as possible
+    (the occurrence analyser already does this)
+
+  * Specialise it much like a sequence of lets
+
+  * Then go through the block a second time, feeding call-info from
+    the RHSs back in the bottom, as it were
+
+In effect, the ordering maxmimises the effectiveness of each sweep,
+and we do just two sweeps.   This should catch almost every case of 
+monomorphic recursion -- the exception could be a very knotted-up
+recursion with multiple cycles tied up together.
+
+This plan is implemented in the Rec case of specBindItself.
+ 
+Note [Specialisations already covered]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We obviously don't want to generate two specialisations for the same
+argument pattern.  There are two wrinkles
+
+1. We do the already-covered test in specDefn, not when we generate
+the CallInfo in mkCallUDs.  We used to test in the latter place, but
+we now iterate the specialiser somewhat, and the Id at the call site
+might therefore not have all the RULES that we can see in specDefn
+
+2. What about two specialisations where the second is an *instance*
+of the first?  If the more specific one shows up first, we'll generate
+specialisations for both.  If the *less* specific one shows up first,
+we *don't* currently generate a specialisation for the more specific
+one.  (See the call to lookupRule in already_covered.)  Reasons:
+  (a) lookupRule doesn't say which matches are exact (bad reason)
+  (b) if the earlier specialisation is user-provided, it's
+      far from clear that we should auto-specialise further
 
 Note [Auto-specialisation and RULES]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -935,10 +1312,14 @@ also add
 	RULE f g_spec = 0
 
 But that's a bit complicated.  For now we ask the programmer's help,
-by *copying the INLINE activation pragma* to the auto-specialised rule.
-So if g says {-# NOINLINE[2] g #-}, then the auto-spec rule will also
-not be active until phase 2.  
+by *copying the INLINE activation pragma* to the auto-specialised
+rule.  So if g says {-# NOINLINE[2] g #-}, then the auto-spec rule
+will also not be active until phase 2.  And that's what programmers
+should jolly well do anyway, even aside from specialisation, to ensure
+that g doesn't inline too early.
 
+This in turn means that the RULE would never fire for a NOINLINE
+thing so not much point in generating a specialisation at all.
 
 Note [Specialisation shape]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -951,7 +1332,7 @@ then its body must look like
 Reason: when specialising the body for a call (f ty dexp), we want to
 substitute dexp for d, and pick up specialised calls in the body of f.
 
-This doesn't always work.  One example I came across was htis:
+This doesn't always work.  One example I came across was this:
 	newtype Gen a = MkGen{ unGen :: Int -> a }
 
 	choose :: Eq a => a -> Gen a
@@ -964,12 +1345,12 @@ It's a silly exapmle, but we get
 where choose doesn't have any dict arguments.  Thus far I have not
 tried to fix this (wait till there's a real example).
 
-
 Note [Inline specialisations]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 We transfer to the specialised function any INLINE stuff from the
-original.  This means (a) the Activation in the IdInfo, and (b) any
-InlineMe on the RHS.  
+original.  This means 
+   (a) the Activation for its inlining (from its InlinePragma)
+   (b) any InlineRule
 
 This is a change (Jun06).  Previously the idea is that the point of
 inlining was precisely to specialise the function at its call site,
@@ -988,14 +1369,6 @@ arguments alone are enough to specialise (even though the args are too
 boring to trigger inlining), and it's certainly better to call the 
 specialised version.
 
-A case in point is dictionary functions, which are current marked
-INLINE, but which are worth specialising.
-
-\begin{code}
-dropInline :: CoreExpr -> (Bool, CoreExpr)
-dropInline (Note InlineMe rhs) = (True,  rhs)
-dropInline rhs		       = (False, rhs)
-\end{code}
 
 %************************************************************************
 %*									*
@@ -1006,24 +1379,24 @@ dropInline rhs		       = (False, rhs)
 \begin{code}
 data UsageDetails 
   = MkUD {
-	dict_binds :: !(Bag DictBind),
+	ud_binds :: !(Bag DictBind),
 			-- Floated dictionary bindings
 			-- The order is important; 
 			-- in ds1 `union` ds2, bindings in ds2 can depend on those in ds1
 			-- (Remember, Bags preserve order in GHC.)
 
-	calls     :: !CallDetails, 
+	ud_calls :: !CallDetails  
 
-	ud_fvs :: !VarSet	-- A superset of the variables mentioned in 
-				-- either dict_binds or calls
+	-- INVARIANT: suppose bs = bindersOf ud_binds
+        -- Then 'calls' may *mention* 'bs', 
+        -- but there should be no calls *for* bs
     }
 
 instance Outputable UsageDetails where
-  ppr (MkUD { dict_binds = dbs, calls = calls, ud_fvs = fvs })
+  ppr (MkUD { ud_binds = dbs, ud_calls = calls })
 	= ptext (sLit "MkUD") <+> braces (sep (punctuate comma 
 		[ptext (sLit "binds") <+> equals <+> ppr dbs,
-	 	 ptext (sLit "calls") <+> equals <+> ppr calls,
-	 	 ptext (sLit "fvs")   <+> equals <+> ppr fvs]))
+	 	 ptext (sLit "calls") <+> equals <+> ppr calls]))
 
 type DictBind = (CoreBind, VarSet)
 	-- The set is the free vars of the binding
@@ -1032,18 +1405,27 @@ type DictBind = (CoreBind, VarSet)
 type DictExpr = CoreExpr
 
 emptyUDs :: UsageDetails
-emptyUDs = MkUD { dict_binds = emptyBag, calls = emptyFM, ud_fvs = emptyVarSet }
+emptyUDs = MkUD { ud_binds = emptyBag, ud_calls = emptyVarEnv }
 
 ------------------------------------------------------------			
-type CallDetails  = FiniteMap Id CallInfo
+type CallDetails  = IdEnv CallInfoSet
 newtype CallKey   = CallKey [Maybe Type]			-- Nothing => unconstrained type argument
-type CallInfo     = FiniteMap CallKey
-		              ([DictExpr], VarSet)		-- Dict args and the vars of the whole
-								-- call (including tyvars)
-								-- [*not* include the main id itself, of course]
-	-- The finite maps eliminate duplicates
-	-- The list of types and dictionaries is guaranteed to
-	-- match the type of f
+
+-- CallInfo uses a Map, thereby ensuring that
+-- we record only one call instance for any key
+--
+-- The list of types and dictionaries is guaranteed to
+-- match the type of f
+data CallInfoSet = CIS Id (Map CallKey ([DictExpr], VarSet))
+     	      		-- Range is dict args and the vars of the whole
+			-- call (including tyvars)
+			-- [*not* include the main id itself, of course]
+
+type CallInfo = (CallKey, ([DictExpr], VarSet))
+
+instance Outputable CallInfoSet where
+  ppr (CIS fn map) = hang (ptext (sLit "CIS") <+> ppr fn)
+                        2 (ppr map)
 
 instance Outputable CallKey where
   ppr (CallKey ts) = ppr ts
@@ -1062,13 +1444,23 @@ instance Ord CallKey where
 		  cmp (Just t1) (Just t2) = tcCmpType t1 t2
 
 unionCalls :: CallDetails -> CallDetails -> CallDetails
-unionCalls c1 c2 = plusFM_C plusFM c1 c2
+unionCalls c1 c2 = plusVarEnv_C unionCallInfoSet c1 c2
 
+unionCallInfoSet :: CallInfoSet -> CallInfoSet -> CallInfoSet
+unionCallInfoSet (CIS f calls1) (CIS _ calls2) = CIS f (calls1 `Map.union` calls2)
+
+callDetailsFVs :: CallDetails -> VarSet
+callDetailsFVs calls = foldVarEnv (unionVarSet . callInfoFVs) emptyVarSet calls
+
+callInfoFVs :: CallInfoSet -> VarSet
+callInfoFVs (CIS _ call_info) = Map.foldRight (\(_,fv) vs -> unionVarSet fv vs) emptyVarSet call_info
+
+------------------------------------------------------------			
 singleCall :: Id -> [Maybe Type] -> [DictExpr] -> UsageDetails
 singleCall id tys dicts 
-  = MkUD {dict_binds = emptyBag, 
-	  calls      = unitFM id (unitFM (CallKey tys) (dicts, call_fvs)),
-	  ud_fvs     = call_fvs }
+  = MkUD {ud_binds = emptyBag, 
+	  ud_calls = unitVarEnv id $ CIS id $ 
+                     Map.singleton (CallKey tys) (dicts, call_fvs) }
   where
     call_fvs = exprsFreeVars dicts `unionVarSet` tys_fvs
     tys_fvs  = tyVarsOfTypes (catMaybes tys)
@@ -1082,24 +1474,24 @@ singleCall id tys dicts
 	--
 	-- We don't include the 'id' itself.
 
-mkCallUDs :: Subst -> Id -> [CoreExpr] -> UsageDetails
-mkCallUDs subst f args 
-  | null theta
+mkCallUDs :: Id -> [CoreExpr] -> UsageDetails
+mkCallUDs f args 
+  | not (want_calls_for f)  -- Imported from elsewhere
+  || null theta	   	    -- Not overloaded
   || not (all isClassPred theta)	
 	-- Only specialise if all overloading is on class params. 
 	-- In ptic, with implicit params, the type args
 	--  *don't* say what the value of the implicit param is!
   || not (spec_tys `lengthIs` n_tyvars)
   || not ( dicts   `lengthIs` n_dicts)
-  || maybeToBool (lookupRule (\_act -> True) (substInScope subst) emptyRuleBase f args)
-	-- There's already a rule covering this call.  A typical case
-	-- is where there's an explicit user-provided rule.  Then
-	-- we don't want to create a specialised version 
-	-- of the function that overlaps.
-  = emptyUDs	-- Not overloaded, or no specialisation wanted
+  || not (any interestingDict dicts)	-- Note [Interesting dictionary arguments]
+  -- See also Note [Specialisations already covered]
+  = -- pprTrace "mkCallUDs: discarding" (vcat [ppr f, ppr args, ppr n_tyvars, ppr n_dicts, ppr (map interestingDict dicts)]) 
+    emptyUDs	-- Not overloaded, or no specialisation wanted
 
   | otherwise
-  = singleCall f spec_tys dicts
+  = -- pprTrace "mkCallUDs: keeping" (vcat [ppr f, ppr args, ppr n_tyvars, ppr n_dicts, ppr (map interestingDict dicts)]) 
+    singleCall f spec_tys dicts
   where
     (tyvars, theta, _) = tcSplitSigmaTy (idType f)
     constrained_tyvars = tyVarsOfTheta theta 
@@ -1113,21 +1505,44 @@ mkCallUDs subst f args
 	| tyvar `elemVarSet` constrained_tyvars = Just ty
 	| otherwise				= Nothing
 
-------------------------------------------------------------			
+    want_calls_for f = isLocalId f || isInlinablePragma (idInlinePragma f)
+\end{code}
+
+Note [Interesting dictionary arguments]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider this
+	 \a.\d:Eq a.  let f = ... in ...(f d)...
+There really is not much point in specialising f wrt the dictionary d,
+because the code for the specialised f is not improved at all, because
+d is lambda-bound.  We simply get junk specialisations.
+
+What is "interesting"?  Just that it has *some* structure.  
+
+\begin{code}
+interestingDict :: CoreExpr -> Bool
+-- A dictionary argument is interesting if it has *some* structure
+interestingDict (Var v) =  hasSomeUnfolding (idUnfolding v)
+			|| isDataConWorkId v
+interestingDict (Type _)	  = False
+interestingDict (App fn (Type _)) = interestingDict fn
+interestingDict (Note _ a)	  = interestingDict a
+interestingDict (Cast e _)	  = interestingDict e
+interestingDict _                 = True
+\end{code}
+
+\begin{code}
 plusUDs :: UsageDetails -> UsageDetails -> UsageDetails
-plusUDs (MkUD {dict_binds = db1, calls = calls1, ud_fvs = fvs1})
-	(MkUD {dict_binds = db2, calls = calls2, ud_fvs = fvs2})
-  = MkUD {dict_binds = d, calls = c, ud_fvs = fvs1 `unionVarSet` fvs2}
-  where
-    d = db1    `unionBags`   db2 
-    c = calls1 `unionCalls`  calls2
+plusUDs (MkUD {ud_binds = db1, ud_calls = calls1})
+	(MkUD {ud_binds = db2, ud_calls = calls2})
+  = MkUD { ud_binds = db1    `unionBags`   db2 
+         , ud_calls = calls1 `unionCalls`  calls2 }
 
 plusUDList :: [UsageDetails] -> UsageDetails
 plusUDList = foldr plusUDs emptyUDs
 
--- zapCalls deletes calls to ids from uds
-zapCalls :: [Id] -> CallDetails -> CallDetails
-zapCalls ids calls = delListFromFM calls ids
+-----------------------------
+_dictBindBndrs :: Bag DictBind -> [Id]
+_dictBindBndrs dbs = foldrBag ((++) . bindersOf . fst) [] dbs
 
 mkDB :: CoreBind -> DictBind
 mkDB bind = (bind, bind_fvs bind)
@@ -1147,45 +1562,102 @@ pair_fvs (bndr, rhs) = exprFreeVars rhs `unionVarSet` idFreeVars bndr
 	--	type T a = Int
 	--	x :: T a = 3
 
-addDictBind :: (Id,CoreExpr) -> UsageDetails -> UsageDetails
-addDictBind (dict,rhs) uds 
-  = uds { dict_binds = db `consBag` dict_binds uds
-	, ud_fvs = ud_fvs uds `unionVarSet` fvs }
+flattenDictBinds :: Bag DictBind -> [(Id,CoreExpr)] -> [(Id,CoreExpr)]
+flattenDictBinds dbs pairs
+  = foldrBag add pairs dbs
   where
-    db@(_, fvs) = mkDB (NonRec dict rhs) 
+    add (NonRec b r,_) pairs = (b,r) : pairs
+    add (Rec prs1, _)  pairs = prs1 ++ pairs
+
+snocDictBinds :: UsageDetails -> [CoreBind] -> UsageDetails
+-- Add ud_binds to the tail end of the bindings in uds
+snocDictBinds uds dbs
+  = uds { ud_binds = ud_binds uds `unionBags` 
+                     foldr (consBag . mkDB) emptyBag dbs }
+
+consDictBind :: CoreBind -> UsageDetails -> UsageDetails
+consDictBind bind uds = uds { ud_binds = mkDB bind `consBag` ud_binds uds }
 
-dumpAllDictBinds :: UsageDetails -> [CoreBind] -> [CoreBind]
-dumpAllDictBinds (MkUD {dict_binds = dbs}) binds
+addDictBinds :: [DictBind] -> UsageDetails -> UsageDetails
+addDictBinds binds uds = uds { ud_binds = listToBag binds `unionBags` ud_binds uds }
+
+snocDictBind :: UsageDetails -> CoreBind -> UsageDetails
+snocDictBind uds bind = uds { ud_binds = ud_binds uds `snocBag` mkDB bind }
+
+wrapDictBinds :: Bag DictBind -> [CoreBind] -> [CoreBind]
+wrapDictBinds dbs binds
   = foldrBag add binds dbs
   where
     add (bind,_) binds = bind : binds
 
-dumpUDs :: [CoreBndr]
-	-> UsageDetails -> CoreExpr
-	-> (UsageDetails, CoreExpr)
-dumpUDs bndrs (MkUD { dict_binds = orig_dbs
-		    , calls = orig_calls
-		    , ud_fvs = fvs}) body
-  = (new_uds, foldrBag add_let body dump_dbs)		
-		-- This may delete fewer variables 
-		-- than in priciple possible
+wrapDictBindsE :: Bag DictBind -> CoreExpr -> CoreExpr
+wrapDictBindsE dbs expr
+  = foldrBag add expr dbs
   where
-    new_uds = 
-     MkUD { dict_binds = free_dbs
-	  , calls      = free_calls 
-	  , ud_fvs     = fvs `minusVarSet` bndr_set}
-
+    add (bind,_) expr = Let bind expr
+
+----------------------
+dumpUDs :: [CoreBndr] -> UsageDetails -> (UsageDetails, Bag DictBind)
+-- Used at a lambda or case binder; just dump anything mentioning the binder
+dumpUDs bndrs uds@(MkUD { ud_binds = orig_dbs, ud_calls = orig_calls })
+  | null bndrs = (uds, emptyBag)  -- Common in case alternatives
+  | otherwise  = -- pprTrace "dumpUDs" (ppr bndrs $$ ppr free_uds $$ ppr dump_dbs) $
+                 (free_uds, dump_dbs)
+  where
+    free_uds = MkUD { ud_binds = free_dbs, ud_calls = free_calls }
+    bndr_set = mkVarSet bndrs
+    (free_dbs, dump_dbs, dump_set) = splitDictBinds orig_dbs bndr_set
+    free_calls = deleteCallsMentioning dump_set $   -- Drop calls mentioning bndr_set on the floor
+                 deleteCallsFor bndrs orig_calls    -- Discard calls for bndr_set; there should be 
+		 			   	    -- no calls for any of the dicts in dump_dbs
+
+dumpBindUDs :: [CoreBndr] -> UsageDetails -> (UsageDetails, Bag DictBind, Bool)
+-- Used at a lambda or case binder; just dump anything mentioning the binder
+dumpBindUDs bndrs (MkUD { ud_binds = orig_dbs, ud_calls = orig_calls })
+  = -- pprTrace "dumpBindUDs" (ppr bndrs $$ ppr free_uds $$ ppr dump_dbs) $
+    (free_uds, dump_dbs, float_all)
+  where
+    free_uds = MkUD { ud_binds = free_dbs, ud_calls = free_calls }
     bndr_set = mkVarSet bndrs
-    add_let (bind,_) body = Let bind body
+    (free_dbs, dump_dbs, dump_set) = splitDictBinds orig_dbs bndr_set
+    free_calls = deleteCallsFor bndrs orig_calls
+    float_all = dump_set `intersectsVarSet` callDetailsFVs free_calls
+
+callsForMe :: Id -> UsageDetails -> (UsageDetails, [CallInfo])
+callsForMe fn (MkUD { ud_binds = orig_dbs, ud_calls = orig_calls })
+  = -- pprTrace ("callsForMe")
+    --         (vcat [ppr fn, 
+    --                text "Orig dbs ="     <+> ppr (_dictBindBndrs orig_dbs), 
+    --                text "Orig calls ="   <+> ppr orig_calls,
+    --                text "Dep set ="      <+> ppr dep_set, 
+    --                text "Calls for me =" <+> ppr calls_for_me]) $
+    (uds_without_me, calls_for_me)
+  where
+    uds_without_me = MkUD { ud_binds = orig_dbs, ud_calls = delVarEnv orig_calls fn }
+    calls_for_me = case lookupVarEnv orig_calls fn of
+			Nothing -> []
+			Just (CIS _ calls) -> filter_dfuns (Map.toList calls)
 
-    (free_dbs, dump_dbs, dump_set) 
-	= foldlBag dump_db (emptyBag, emptyBag, bndr_set) orig_dbs
-		-- Important that it's foldl not foldr;
-		-- we're accumulating the set of dumped ids in dump_set
+    dep_set = foldlBag go (unitVarSet fn) orig_dbs
+    go dep_set (db,fvs) | fvs `intersectsVarSet` dep_set
+                        = extendVarSetList dep_set (bindersOf db)
+                        | otherwise = dep_set
 
-    free_calls = filterCalls dump_set orig_calls
+	-- Note [Specialisation of dictionary functions]
+    filter_dfuns | isDFunId fn = filter ok_call
+                 | otherwise   = \cs -> cs
 
-    dump_db (free_dbs, dump_dbs, dump_idset) db@(bind, fvs)
+    ok_call (_, (_,fvs)) = not (fvs `intersectsVarSet` dep_set)
+
+----------------------
+splitDictBinds :: Bag DictBind -> IdSet -> (Bag DictBind, Bag DictBind, IdSet)
+-- Returns (free_dbs, dump_dbs, dump_set)
+splitDictBinds dbs bndr_set
+   = foldlBag split_db (emptyBag, emptyBag, bndr_set) dbs
+		-- Important that it's foldl not foldr;
+		-- we're accumulating the set of dumped ids in dump_set
+   where
+    split_db (free_dbs, dump_dbs, dump_idset) db@(bind, fvs)
 	| dump_idset `intersectsVarSet` fvs	-- Dump it
 	= (free_dbs, dump_dbs `snocBag` db,
 	   extendVarSetList dump_idset (bindersOf bind))
@@ -1193,14 +1665,20 @@ dumpUDs bndrs (MkUD { dict_binds = orig_dbs
 	| otherwise	-- Don't dump it
 	= (free_dbs `snocBag` db, dump_dbs, dump_idset)
 
-filterCalls :: VarSet -> CallDetails -> CallDetails
--- Remove any calls that mention the variables
-filterCalls bs calls
-  = mapFM (\_ cs -> filter_calls cs) $
-    filterFM (\k _ -> not (k `elemVarSet` bs)) calls
+
+----------------------
+deleteCallsMentioning :: VarSet -> CallDetails -> CallDetails
+-- Remove calls *mentioning* bs 
+deleteCallsMentioning bs calls
+  = mapVarEnv filter_calls calls
   where
-    filter_calls :: CallInfo -> CallInfo
-    filter_calls = filterFM (\_ (_, fvs) -> not (fvs `intersectsVarSet` bs))
+    filter_calls :: CallInfoSet -> CallInfoSet
+    filter_calls (CIS f calls) = CIS f (Map.filter keep_call calls)
+    keep_call (_, fvs) = not (fvs `intersectsVarSet` bs)
+
+deleteCallsFor :: [Id] -> CallDetails -> CallDetails
+-- Remove calls *for* bs
+deleteCallsFor bs calls = delVarEnvList calls bs
 \end{code}
 
 
@@ -1213,8 +1691,9 @@ filterCalls bs calls
 \begin{code}
 type SpecM a = UniqSM a
 
-initSM :: UniqSupply -> SpecM a -> a
-initSM	  = initUs_
+runSpecM:: SpecM a -> CoreM a
+runSpecM spec = do { us <- getUniqueSupplyM
+                   ; return (initUs_ us spec) }
 
 mapAndCombineSM :: (a -> SpecM (b, UsageDetails)) -> [a] -> SpecM ([b], UsageDetails)
 mapAndCombineSM _ []     = return ([], emptyUDs)
@@ -1235,19 +1714,28 @@ cloneBindSM subst (Rec pairs) = do
     let (subst', bndrs') = cloneRecIdBndrs subst us (map fst pairs)
     return (subst', subst', Rec (bndrs' `zip` map snd pairs))
 
-cloneBinders :: Subst -> [CoreBndr] -> SpecM (Subst, [CoreBndr])
-cloneBinders subst bndrs = do
-    us <- getUniqueSupplyM
-    return (cloneIdBndrs subst us bndrs)
-
-newIdSM :: Id -> Type -> SpecM Id
-newIdSM old_id new_ty = do
-    uniq <- getUniqueM
-    let
-        -- Give the new Id a similar occurrence name to the old one
-        name   = idName old_id
-        new_id = mkUserLocal (mkSpecOcc (nameOccName name)) uniq new_ty (getSrcSpan name)
-    return new_id
+newDictBndrs :: Subst -> [CoreBndr] -> SpecM (Subst, [CoreBndr])
+-- Make up completely fresh binders for the dictionaries
+-- Their bindings are going to float outwards
+newDictBndrs subst bndrs 
+  = do { bndrs' <- mapM new bndrs
+       ; let subst' = extendIdSubstList subst 
+                        [(d, Var d') | (d,d') <- bndrs `zip` bndrs']
+       ; return (subst', bndrs' ) }
+  where
+    new b = do { uniq <- getUniqueM
+    	       ; let n   = idName b
+                     ty' = CoreSubst.substTy subst (idType b)
+               ; return (mkUserLocal (nameOccName n) uniq ty' (getSrcSpan n)) }
+
+newSpecIdSM :: Id -> Type -> SpecM Id
+    -- Give the new Id a similar occurrence name to the old one
+newSpecIdSM old_id new_ty
+  = do	{ uniq <- getUniqueM
+    	; let name    = idName old_id
+	      new_occ = mkSpecOcc (nameOccName name)
+	      new_id  = mkUserLocal new_occ uniq new_ty (getSrcSpan name)
+        ; return new_id }
 \end{code}