Minor refactoring to remove redundant code

[ghc-hetmet.git] / compiler / specialise / Specialise.lhs
diff --git a/compiler/specialise/Specialise.lhs b/compiler/specialise/Specialise.lhs

index 5350406..d738565 100644 (file)
--- a/compiler/specialise/Specialise.lhs
+++ b/compiler/specialise/Specialise.lhs
@@ -4,7 +4,6 @@
  \section[Specialise]{Stamping out overloading, and (optionally) polymorphism}
  
  \begin{code}
-{-# OPTIONS -w #-}
  -- The above warning supression flag is a temporary kludge.
  -- While working on this module you are encouraged to remove it and fix
  -- any warnings in the module. See
@@ -15,38 +14,22 @@ module Specialise ( specProgram ) where
  
  #include "HsVersions.h"
  
-import DynFlags        ( DynFlags, DynFlag(..) )
-import Id              ( Id, idName, idType, mkUserLocal, 
-                         idInlinePragma, setInlinePragma ) 
-import TcType          ( Type, mkTyVarTy, tcSplitSigmaTy, 
-                         tyVarsOfTypes, tyVarsOfTheta, isClassPred,
-                         tcCmpType, isUnLiftedType
-                       )
-import CoreSubst       ( Subst, mkEmptySubst, extendTvSubstList, lookupIdSubst,
-                         substBndr, substBndrs, substTy, substInScope,
-                         cloneIdBndr, cloneIdBndrs, cloneRecIdBndrs
-                       ) 
+import Id
+import TcType
+import CoreSubst 
+import CoreUnfold      ( mkUnfolding, mkInlineRule )
  import VarSet
  import VarEnv
  import CoreSyn
-import CoreUtils       ( applyTypeToArgs, mkPiTypes )
+import Rules
+import CoreUtils       ( exprIsTrivial, applyTypeToArgs, mkPiTypes )
  import CoreFVs         ( exprFreeVars, exprsFreeVars, idFreeVars )
-import CoreTidy                ( tidyRules )
-import CoreLint                ( showPass, endPass )
-import Rules           ( addIdSpecialisations, mkLocalRule, lookupRule, emptyRuleBase, rulesOfBinds )
-import PprCore         ( pprRules )
-import UniqSupply      ( UniqSupply,
-                         UniqSM, initUs_,
-                         MonadUnique(..)
-                       )
+import UniqSupply      ( UniqSupply, UniqSM, initUs_, MonadUnique(..) )
  import Name
  import MkId            ( voidArgId, realWorldPrimId )
  import FiniteMap
-import Maybes          ( catMaybes, maybeToBool )
-import ErrUtils                ( dumpIfSet_dyn )
-import BasicTypes      ( Activation( AlwaysActive ) )
+import Maybes          ( catMaybes, isJust )
  import Bag
-import List            ( partition )
  import Util
  import Outputable
  import FastString
@@ -491,8 +474,6 @@ of this is permanently ruled out.
  Still, this is no great hardship, because we intend to eliminate
  overloading altogether anyway!
  
-
-
  A note about non-tyvar dictionaries
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  Some Ids have types like
@@ -517,7 +498,7 @@ Should we specialise wrt this compound-type dictionary?  We used to say
  But it is simpler and more uniform to specialise wrt these dicts too;
  and in future GHC is likely to support full fledged type signatures 
  like
-       f ;: Eq [(a,b)] => ...
+       f :: Eq [(a,b)] => ...
  
  
  %************************************************************************
@@ -580,20 +561,10 @@ Hence, the invariant is this:
  %************************************************************************
  
  \begin{code}
-specProgram :: DynFlags -> UniqSupply -> [CoreBind] -> IO [CoreBind]
-specProgram dflags us binds = do
-   
-       showPass dflags "Specialise"
-
-       let binds' = initSM us (do (binds', uds') <- go binds
-                                  return (dumpAllDictBinds uds' binds'))
-
-       endPass dflags "Specialise" Opt_D_dump_spec binds'
-
-       dumpIfSet_dyn dflags Opt_D_dump_rules "Top-level specialisations"
-                 (pprRules (tidyRules emptyTidyEnv (rulesOfBinds binds')))
-
-       return binds'
+specProgram :: UniqSupply -> [CoreBind] -> [CoreBind]
+specProgram us binds = initSM us $
+                       do { (binds', uds') <- go binds
+                         ; return (wrapDictBinds (ud_binds uds') binds') }
    where
         -- We need to start with a Subst that knows all the things
         -- that are in scope, so that the substitution engine doesn't
@@ -620,25 +591,25 @@ specVar subst v = lookupIdSubst subst v
  
  specExpr :: Subst -> CoreExpr -> SpecM (CoreExpr, UsageDetails)
  -- We carry a substitution down:
---     a) we must clone any binding that might flaot outwards,
+--     a) we must clone any binding that might float outwards,
  --        to avoid name clashes
  --     b) we carry a type substitution to use when analysing
  --        the RHS of specialised bindings (no type-let!)
  
  ---------------- First the easy cases --------------------
-specExpr subst (Type ty) = return (Type (substTy subst ty), emptyUDs)
+specExpr subst (Type ty) = return (Type (CoreSubst.substTy subst ty), emptyUDs)
  specExpr subst (Var v)   = return (specVar subst v,         emptyUDs)
-specExpr subst (Lit lit) = return (Lit lit,                 emptyUDs)
+specExpr _     (Lit lit) = return (Lit lit,                 emptyUDs)
  specExpr subst (Cast e co) = do
      (e', uds) <- specExpr subst e
-    return ((Cast e' (substTy subst co)), uds)
+    return ((Cast e' (CoreSubst.substTy subst co)), uds)
  specExpr subst (Note note body) = do
      (body', uds) <- specExpr subst body
      return (Note (specNote subst note) body', uds)
  
  
  ---------------- Applications might generate a call instance --------------------
-specExpr subst expr@(App fun arg)
+specExpr subst expr@(App {})
    = go expr []
    where
      go (App fun arg) args = do (arg', uds_arg) <- specExpr subst arg
@@ -646,15 +617,15 @@ specExpr subst expr@(App fun arg)
                                 return (App fun' arg', uds_arg `plusUDs` uds_app)
  
      go (Var f)       args = case specVar subst f of
-                                Var f' -> return (Var f', mkCallUDs subst f' args)
+                                Var f' -> return (Var f', mkCallUDs f' args)
                                  e'     -> return (e', emptyUDs)        -- I don't expect this!
-    go other        args = specExpr subst other
+    go other        _    = specExpr subst other
  
  ---------------- Lambda/case require dumping of usage details --------------------
  specExpr subst e@(Lam _ _) = do
      (body', uds) <- specExpr subst' body
-    let (filtered_uds, body'') = dumpUDs bndrs' uds body'
-    return (mkLams bndrs' body'', filtered_uds)
+    let (free_uds, dumped_dbs) = dumpUDs bndrs' uds 
+    return (mkLams bndrs' (wrapDictBindsE dumped_dbs body'), free_uds)
    where
      (bndrs, body) = collectBinders e
      (subst', bndrs') = substBndrs subst bndrs
@@ -664,15 +635,16 @@ specExpr subst e@(Lam _ _) = do
  specExpr subst (Case scrut case_bndr ty alts) = do
      (scrut', uds_scrut) <- specExpr subst scrut
      (alts', uds_alts) <- mapAndCombineSM spec_alt alts
-    return (Case scrut' case_bndr' (substTy subst ty) alts', uds_scrut `plusUDs` uds_alts)
+    return (Case scrut' case_bndr' (CoreSubst.substTy subst ty) alts', 
+            uds_scrut `plusUDs` uds_alts)
    where
      (subst_alt, case_bndr') = substBndr subst case_bndr
         -- No need to clone case binder; it can't float like a let(rec)
  
      spec_alt (con, args, rhs) = do
            (rhs', uds) <- specExpr subst_rhs rhs
-          let (uds', rhs'') = dumpUDs args uds rhs'
-          return ((con, args', rhs''), uds')
+          let (free_uds, dumped_dbs) = dumpUDs args' uds
+          return ((con, args', wrapDictBindsE dumped_dbs rhs'), free_uds)
          where
            (subst_rhs, args') = substBndrs subst_alt args
  
@@ -691,7 +663,8 @@ specExpr subst (Let bind body) = do
      return (foldr Let body' binds', uds)
  
  -- Must apply the type substitution to coerceions
-specNote subst note          = note
+specNote :: Subst -> Note -> Note
+specNote _ note = note
  \end{code}
  
  %************************************************************************
@@ -707,79 +680,95 @@ specBind :: Subst                 -- Use this for RHSs
          -> SpecM ([CoreBind],          -- New bindings
                    UsageDetails)        -- And info to pass upstream
  
-specBind rhs_subst bind body_uds = do
-    (bind', bind_uds) <- specBindItself rhs_subst bind (calls body_uds)
-    let
-       bndrs   = bindersOf bind
-       all_uds = zapCalls bndrs (body_uds `plusUDs` bind_uds)
-                       -- It's important that the `plusUDs` is this way round,
-                       -- because body_uds may bind dictionaries that are
-                       -- used in the calls passed to specDefn.  So the
-                       -- dictionary bindings in bind_uds may mention 
-                       -- dictionaries bound in body_uds.
-    case splitUDs bndrs all_uds of
-
-       (_, ([],[]))    -- This binding doesn't bind anything needed
-                       -- in the UDs, so put the binding here
-                       -- This is the case for most non-dict bindings, except
-                       -- for the few that are mentioned in a dict binding
-                       -- that is floating upwards in body_uds
-               -> return ([bind'], all_uds)
-
-       (float_uds, (dict_binds, calls))        -- This binding is needed in the UDs, so float it out
-               -> return ([], float_uds `plusUDs` mkBigUD bind' dict_binds calls)
-   
-
--- A truly gruesome function
-mkBigUD bind@(NonRec _ _) dbs calls
-  =    -- Common case: non-recursive and no specialisations
-       -- (if there were any specialistions it would have been made recursive)
-    MkUD { dict_binds = listToBag (mkDB bind : dbs),
-          calls = listToCallDetails calls }
-
-mkBigUD bind dbs calls
-  =    -- General case
-    MkUD { dict_binds = unitBag (mkDB (Rec (bind_prs bind ++ dbsToPairs dbs))),
-                       -- Make a huge Rec
-          calls = listToCallDetails calls }
-  where
-    bind_prs (NonRec b r) = [(b,r)]
-    bind_prs (Rec prs)    = prs
-
-    dbsToPairs []             = []
-    dbsToPairs ((bind,_):dbs) = bind_prs bind ++ dbsToPairs dbs
-
--- specBindItself deals with the RHS, specialising it according
--- to the calls found in the body (if any)
-specBindItself rhs_subst (NonRec bndr rhs) call_info = do
-    ((bndr',rhs'), spec_defns, spec_uds) <- specDefn rhs_subst call_info (bndr,rhs)
-    let
-        new_bind | null spec_defns = NonRec bndr' rhs'
-                 | otherwise       = Rec ((bndr',rhs'):spec_defns)
-               -- bndr' mentions the spec_defns in its SpecEnv
-               -- Not sure why we couln't just put the spec_defns first
-    return (new_bind, spec_uds)
-
-specBindItself rhs_subst (Rec pairs) call_info = do
-    stuff <- mapM (specDefn rhs_subst call_info) pairs
-    let
-       (pairs', spec_defns_s, spec_uds_s) = unzip3 stuff
-       spec_defns = concat spec_defns_s
-       spec_uds   = plusUDList spec_uds_s
-        new_bind   = Rec (spec_defns ++ pairs')
-    return (new_bind, spec_uds)
-
-
-specDefn :: Subst                      -- Subst to use for RHS
-        -> CallDetails                 -- Info on how it is used in its scope
-        -> (Id, CoreExpr)              -- The thing being bound and its un-processed RHS
-        -> SpecM ((Id, CoreExpr),      -- The thing and its processed RHS
-                                       --      the Id may now have specialisations attached
+-- Returned UsageDetails:
+--    No calls for binders of this bind
+specBind rhs_subst (NonRec fn rhs) body_uds
+  = do { (rhs', rhs_uds) <- specExpr rhs_subst rhs
+       ; (fn', spec_defns, body_uds1) <- specDefn rhs_subst body_uds fn rhs
+
+       ; let pairs = spec_defns ++ [(fn', rhs')]
+                       -- fn' mentions the spec_defns in its rules, 
+                       -- so put the latter first
+
+             combined_uds = body_uds1 `plusUDs` rhs_uds
+               -- This way round a call in rhs_uds of a function f
+               -- at type T will override a call of f at T in body_uds1; and
+               -- that is good because it'll tend to keep "earlier" calls
+               -- See Note [Specialisation of dictionary functions]
+
+            (free_uds, dump_dbs, float_all) = dumpBindUDs [fn] combined_uds
+               -- See Note [From non-recursive to recursive]
+
+             final_binds | isEmptyBag dump_dbs = [NonRec b r | (b,r) <- pairs]
+                         | otherwise = [Rec (flattenDictBinds dump_dbs pairs)]
+
+        ; if float_all then
+            -- Rather than discard the calls mentioning the bound variables
+            -- we float this binding along with the others
+             return ([], free_uds `snocDictBinds` final_binds)
+           else
+            -- No call in final_uds mentions bound variables, 
+            -- so we can just leave the binding here
+             return (final_binds, free_uds) }
+
+
+specBind rhs_subst (Rec pairs) body_uds
+       -- Note [Specialising a recursive group]
+  = do { let (bndrs,rhss) = unzip pairs
+       ; (rhss', rhs_uds) <- mapAndCombineSM (specExpr rhs_subst) rhss
+       ; let scope_uds = body_uds `plusUDs` rhs_uds
+                              -- Includes binds and calls arising from rhss
+
+       ; (bndrs1, spec_defns1, uds1) <- specDefns rhs_subst scope_uds pairs
+
+       ; (bndrs3, spec_defns3, uds3)
+             <- if null spec_defns1  -- Common case: no specialisation
+               then return (bndrs1, [], uds1)
+               else do {            -- Specialisation occurred; do it again
+                          (bndrs2, spec_defns2, uds2)
+                              <- specDefns rhs_subst uds1 (bndrs1 `zip` rhss)
+                        ; return (bndrs2, spec_defns2 ++ spec_defns1, uds2) }
+
+       ; let (final_uds, dumped_dbs, float_all) = dumpBindUDs bndrs uds3
+             bind = Rec (flattenDictBinds dumped_dbs $
+                         spec_defns3 ++ zip bndrs3 rhss')
+             
+       ; if float_all then
+             return ([], final_uds `snocDictBind` bind)
+           else
+             return ([bind], final_uds) }
+
+
+---------------------------
+specDefns :: Subst
+         -> UsageDetails               -- Info on how it is used in its scope
+         -> [(Id,CoreExpr)]            -- The things being bound and their un-processed RHS
+         -> SpecM ([Id],               -- Original Ids with RULES added
+                   [(Id,CoreExpr)],    -- Extra, specialised bindings
+                   UsageDetails)       -- Stuff to fling upwards from the specialised versions
+
+-- Specialise a list of bindings (the contents of a Rec), but flowing usages
+-- upwards binding by binding.  Example: { f = ...g ...; g = ...f .... }
+-- Then if the input CallDetails has a specialised call for 'g', whose specialisation
+-- in turn generates a specialised call for 'f', we catch that in this one sweep.
+-- But not vice versa (it's a fixpoint problem).
+
+specDefns _subst uds []
+  = return ([], [], uds)
+specDefns subst uds ((bndr,rhs):pairs)
+  = do { (bndrs1, spec_defns1, uds1) <- specDefns subst uds pairs
+       ; (bndr1, spec_defns2, uds2)  <- specDefn subst uds1 bndr rhs
+       ; return (bndr1 : bndrs1, spec_defns1 ++ spec_defns2, uds2) }
+
+---------------------------
+specDefn :: Subst
+        -> UsageDetails                -- Info on how it is used in its scope
+        -> Id -> CoreExpr              -- The thing being bound and its un-processed RHS
+        -> SpecM (Id,                  -- Original Id with added RULES
                    [(Id,CoreExpr)],     -- Extra, specialised bindings
-                  UsageDetails         -- Stuff to fling upwards from the RHS and its
-           )                           --      specialised versions
+                  UsageDetails)        -- Stuff to fling upwards from the specialised versions
  
-specDefn subst calls (fn, rhs)
+specDefn subst body_uds fn rhs
         -- The first case is the interesting one
    |  rhs_tyvars `lengthIs`     n_tyvars -- Rhs of fn's defn has right number of big lambdas
    && rhs_ids    `lengthAtLeast` n_dicts        -- and enough dict args
@@ -787,127 +776,319 @@ specDefn subst calls (fn, rhs)
  
  --   && not (certainlyWillInline (idUnfolding fn))     -- And it's not small
  --     See Note [Inline specialisation] for why we do not 
---     switch off specialisation for inline functions = do
-  = do
-     -- Specialise the body of the function
-    (rhs', rhs_uds) <- specExpr subst rhs
-
-      -- Make a specialised version for each call in calls_for_me
-    stuff <- mapM spec_call calls_for_me
-    let
-        (spec_defns, spec_uds, spec_rules) = unzip3 stuff
-
-        fn' = addIdSpecialisations fn spec_rules
-
-    return ((fn',rhs'),
-              spec_defns,
-              rhs_uds `plusUDs` plusUDList spec_uds)
+--     switch off specialisation for inline functions
+
+  = do {       -- Make a specialised version for each call in calls_for_me
+         stuff <- mapM spec_call calls_for_me
+       ; let (spec_defns, spec_uds, spec_rules) = unzip3 (catMaybes stuff)
+             fn' = addIdSpecialisations fn spec_rules
+             final_uds = body_uds_without_me `plusUDs` plusUDList spec_uds 
+               -- It's important that the `plusUDs` is this way
+               -- round, because body_uds_without_me may bind
+               -- dictionaries that are used in calls_for_me passed
+               -- to specDefn.  So the dictionary bindings in
+               -- spec_uds may mention dictionaries bound in
+               -- body_uds_without_me
+
+       ; return (fn', spec_defns, final_uds) }
  
    | otherwise  -- No calls or RHS doesn't fit our preconceptions
-  = WARN( notNull calls_for_me, ptext SLIT("Missed specialisation opportunity for") <+> ppr fn )
+  = WARN( notNull calls_for_me, ptext (sLit "Missed specialisation opportunity for") <+> ppr fn )
           -- Note [Specialisation shape]
-    (do  { (rhs', rhs_uds) <- specExpr subst rhs
-       ; return ((fn, rhs'), [], rhs_uds) })
+    return (fn, [], body_uds_without_me)
    
    where
      fn_type           = idType fn
+    fn_arity          = idArity fn
+    fn_unf             = realIdUnfolding fn    -- Ignore loop-breaker-ness here
      (tyvars, theta, _) = tcSplitSigmaTy fn_type
      n_tyvars          = length tyvars
      n_dicts           = length theta
-    inline_prag        = idInlinePragma fn
+    inline_act         = idInlineActivation fn
  
-       -- It's important that we "see past" any INLINE pragma
-       -- else we'll fail to specialise an INLINE thing
-    (inline_rhs, rhs_inside) = dropInline rhs
-    (rhs_tyvars, rhs_ids, rhs_body) = collectTyAndValBinders rhs_inside
+       -- Figure out whether the function has an INLINE pragma
+       -- See Note [Inline specialisations]
+    fn_has_inline_rule :: Maybe Bool   -- Derive sat-flag from existing thing
+    fn_has_inline_rule = case isInlineRule_maybe fn_unf of
+                           Just (_,sat) -> Just sat
+                          Nothing      -> Nothing
  
-    rhs_dicts = take n_dicts rhs_ids
-    rhs_bndrs = rhs_tyvars ++ rhs_dicts
-    body      = mkLams (drop n_dicts rhs_ids) rhs_body
+    spec_arity = unfoldingArity fn_unf - n_dicts  -- Arity of the *specialised* inline rule
+
+    (rhs_tyvars, rhs_ids, rhs_body) = collectTyAndValBinders rhs
+
+    (body_uds_without_me, calls_for_me) = callsForMe fn body_uds
+
+    rhs_dict_ids = take n_dicts rhs_ids
+    body         = mkLams (drop n_dicts rhs_ids) rhs_body
                 -- Glue back on the non-dict lambdas
  
-    calls_for_me = case lookupFM calls fn of
-                       Nothing -> []
-                       Just cs -> fmToList cs
+    already_covered :: [CoreExpr] -> Bool
+    already_covered args         -- Note [Specialisations already covered]
+       = isJust (lookupRule (const True) realIdUnfolding 
+                            (substInScope subst) 
+                                   fn args (idCoreRules fn))
+
+    mk_ty_args :: [Maybe Type] -> [CoreExpr]
+    mk_ty_args call_ts = zipWithEqual "spec_call" mk_ty_arg rhs_tyvars call_ts
+              where
+                 mk_ty_arg rhs_tyvar Nothing   = Type (mkTyVarTy rhs_tyvar)
+                 mk_ty_arg _         (Just ty) = Type ty
  
      ----------------------------------------------------------
         -- Specialise to one particular call pattern
-    spec_call :: (CallKey, ([DictExpr], VarSet))       -- Call instance
-              -> SpecM ((Id,CoreExpr),                 -- Specialised definition
-                       UsageDetails,                   -- Usage details from specialised body
-                       CoreRule)                       -- Info for the Id's SpecEnv
-    spec_call (CallKey call_ts, (call_ds, call_fvs))
-      = ASSERT( call_ts `lengthIs` n_tyvars  && call_ds `lengthIs` n_dicts ) do
-               -- Calls are only recorded for properly-saturated applications
+    spec_call :: CallInfo                         -- Call instance
+              -> SpecM (Maybe ((Id,CoreExpr),    -- Specialised definition
+                              UsageDetails,      -- Usage details from specialised body
+                              CoreRule))         -- Info for the Id's SpecEnv
+    spec_call (CallKey call_ts, (call_ds, _))
+      = ASSERT( call_ts `lengthIs` n_tyvars  && call_ds `lengthIs` n_dicts )
         
-       -- Suppose f's defn is  f = /\ a b c d -> \ d1 d2 -> rhs        
-        -- Supppose the call is for f [Just t1, Nothing, Just t3, Nothing] [dx1, dx2]
+       -- Suppose f's defn is  f = /\ a b c -> \ d1 d2 -> rhs  
+        -- Supppose the call is for f [Just t1, Nothing, Just t3] [dx1, dx2]
  
         -- Construct the new binding
-       --      f1 = SUBST[a->t1,c->t3, d1->d1', d2->d2'] (/\ b d -> rhs)
+       --      f1 = SUBST[a->t1,c->t3, d1->d1', d2->d2'] (/\ b -> rhs)
         -- PLUS the usage-details
         --      { d1' = dx1; d2' = dx2 }
-       -- where d1', d2' are cloned versions of d1,d2, with the type substitution applied.
+       -- where d1', d2' are cloned versions of d1,d2, with the type substitution
+       -- applied.  These auxiliary bindings just avoid duplication of dx1, dx2
         --
         -- Note that the substitution is applied to the whole thing.
         -- This is convenient, but just slightly fragile.  Notably:
-       --      * There had better be no name clashes in a/b/c/d
-       --
-        let
-               -- poly_tyvars = [b,d] in the example above
+       --      * There had better be no name clashes in a/b/c
+        do { let
+               -- poly_tyvars = [b] in the example above
                 -- spec_tyvars = [a,c] 
-               -- ty_args     = [t1,b,t3,d]
-          poly_tyvars = [tv | (tv, Nothing) <- rhs_tyvars `zip` call_ts]
-           spec_tyvars = [tv | (tv, Just _)  <- rhs_tyvars `zip` call_ts]
-          ty_args     = zipWithEqual "spec_call" mk_ty_arg rhs_tyvars call_ts
-                      where
-                        mk_ty_arg rhs_tyvar Nothing   = Type (mkTyVarTy rhs_tyvar)
-                        mk_ty_arg rhs_tyvar (Just ty) = Type ty
-          rhs_subst  = extendTvSubstList subst (spec_tyvars `zip` [ty | Just ty <- call_ts])
-
-       (rhs_subst', rhs_dicts') <- cloneBinders rhs_subst rhs_dicts
-       let
-          inst_args = ty_args ++ map Var rhs_dicts'
-
-               -- Figure out the type of the specialised function
-          body_ty = applyTypeToArgs rhs fn_type inst_args
-          (lam_args, app_args)                 -- Add a dummy argument if body_ty is unlifted
-               | isUnLiftedType body_ty        -- C.f. WwLib.mkWorkerArgs
-               = (poly_tyvars ++ [voidArgId], poly_tyvars ++ [realWorldPrimId])
-               | otherwise = (poly_tyvars, poly_tyvars)
-          spec_id_ty = mkPiTypes lam_args body_ty
-
-        spec_f <- newIdSM fn spec_id_ty
-        (spec_rhs, rhs_uds) <- specExpr rhs_subst' (mkLams lam_args body)
-       let
+               -- ty_args     = [t1,b,t3]
+               poly_tyvars   = [tv | (tv, Nothing) <- rhs_tyvars `zip` call_ts]
+               spec_tv_binds = [(tv,ty) | (tv, Just ty) <- rhs_tyvars `zip` call_ts]
+               spec_ty_args  = map snd spec_tv_binds
+               ty_args       = mk_ty_args call_ts
+               rhs_subst     = CoreSubst.extendTvSubstList subst spec_tv_binds
+
+          ; (rhs_subst1, inst_dict_ids) <- cloneDictBndrs rhs_subst rhs_dict_ids
+                         -- Clone rhs_dicts, including instantiating their types
+
+          ; let (rhs_subst2, dx_binds) = bindAuxiliaryDicts rhs_subst1 $
+                                         (my_zipEqual rhs_dict_ids inst_dict_ids call_ds)
+                inst_args = ty_args ++ map Var inst_dict_ids
+
+          ; if already_covered inst_args then
+               return Nothing
+            else do
+          {    -- Figure out the type of the specialised function
+            let body_ty = applyTypeToArgs rhs fn_type inst_args
+                (lam_args, app_args)           -- Add a dummy argument if body_ty is unlifted
+                  | isUnLiftedType body_ty     -- C.f. WwLib.mkWorkerArgs
+                  = (poly_tyvars ++ [voidArgId], poly_tyvars ++ [realWorldPrimId])
+                  | otherwise = (poly_tyvars, poly_tyvars)
+                spec_id_ty = mkPiTypes lam_args body_ty
+       
+           ; spec_f <- newSpecIdSM fn spec_id_ty
+          ; let spec_f_w_arity = setIdArity spec_f (max 0 (fn_arity - n_dicts))
+               -- Adding arity information just propagates it a bit faster
+               -- See Note [Arity decrease] in Simplify
+
+           ; (spec_rhs, rhs_uds) <- specExpr rhs_subst2 (mkLams lam_args body)
+          ; let
                 -- The rule to put in the function's specialisation is:
-               --      forall b,d, d1',d2'.  f t1 b t3 d d1' d2' = f1 b d  
-           spec_env_rule = mkLocalRule (mkFastString ("SPEC " ++ showSDoc (ppr fn)))
-                               inline_prag     -- Note [Auto-specialisation and RULES]
-                               (idName fn)
-                               (poly_tyvars ++ rhs_dicts')
-                               inst_args 
-                               (mkVarApps (Var spec_f) app_args)
+               --      forall b, d1',d2'.  f t1 b t3 d1' d2' = f1 b  
+               rule_name = mkFastString ("SPEC " ++ showSDoc (ppr fn <+> ppr spec_ty_args))
+               spec_env_rule = mkLocalRule
+                                 rule_name
+                                 inline_act    -- Note [Auto-specialisation and RULES]
+                                 (idName fn)
+                                 (poly_tyvars ++ inst_dict_ids)
+                                 inst_args 
+                                 (mkVarApps (Var spec_f_w_arity) app_args)
  
                 -- Add the { d1' = dx1; d2' = dx2 } usage stuff
-          final_uds = foldr addDictBind rhs_uds (my_zipEqual "spec_call" rhs_dicts' call_ds)
+               final_uds = foldr consDictBind rhs_uds dx_binds
+
+               -- See Note [Inline specialisations]
+               final_spec_f | Just sat <- fn_has_inline_rule
+                            = spec_f_w_arity `setInlineActivation` inline_act
+                                             `setIdUnfolding` mkInlineRule sat spec_rhs spec_arity
+                                               -- I'm not sure this should be unconditionally InlSat
+                            | otherwise 
+                            = spec_f_w_arity
+          ; return (Just ((final_spec_f, spec_rhs), final_uds, spec_env_rule)) } }
+      where
+       my_zipEqual xs ys zs
+        | debugIsOn && not (equalLength xs ys && equalLength ys zs)
+             = pprPanic "my_zipEqual" (vcat [ ppr xs, ppr ys
+                                           , ppr fn <+> ppr call_ts
+                                           , ppr (idType fn), ppr theta
+                                           , ppr n_dicts, ppr rhs_dict_ids 
+                                           , ppr rhs])
+        | otherwise = zip3 xs ys zs
+
+bindAuxiliaryDicts
+       :: Subst
+       -> [(DictId,DictId,CoreExpr)]   -- (orig_dict, inst_dict, dx)
+       -> (Subst,                      -- Substitute for all orig_dicts
+           [CoreBind])                 -- Auxiliary bindings
+-- Bind any dictionary arguments to fresh names, to preserve sharing
+-- Substitution already substitutes orig_dict -> inst_dict
+bindAuxiliaryDicts subst triples = go subst [] triples
+  where
+    go subst binds []    = (subst, binds)
+    go subst binds ((d, dx_id, dx) : pairs)
+      | exprIsTrivial dx = go (extendIdSubst subst d dx) binds pairs
+             -- No auxiliary binding necessary
+      | otherwise        = go subst_w_unf (NonRec dx_id dx : binds) pairs
+      where
+        dx_id1 = dx_id `setIdUnfolding` mkUnfolding False dx
+       subst_w_unf = extendIdSubst subst d (Var dx_id1)
+                    -- Important!  We're going to substitute dx_id1 for d
+            -- and we want it to look "interesting", else we won't gather *any*
+            -- consequential calls. E.g.
+            --     f d = ...g d....
+            -- If we specialise f for a call (f (dfun dNumInt)), we'll get 
+            -- a consequent call (g d') with an auxiliary definition
+            --     d' = df dNumInt
+            -- We want that consequent call to look interesting
+\end{code}
  
-          spec_pr | inline_rhs = (spec_f `setInlinePragma` inline_prag, Note InlineMe spec_rhs)
-                  | otherwise  = (spec_f,                               spec_rhs)
+Note [From non-recursive to recursive]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Even in the non-recursive case, if any dict-binds depend on 'fn' we might 
+have built a recursive knot
  
-        return (spec_pr, final_uds, spec_env_rule)
+      f a d x = <blah>
+      MkUD { ud_binds = d7 = MkD ..f..
+           , ud_calls = ...(f T d7)... }
  
-      where
-       my_zipEqual doc xs ys 
-        | debugIsOn && not (equalLength xs ys)
-             = pprPanic "my_zipEqual" (vcat 
-                                               [ ppr xs, ppr ys
-                                               , ppr fn <+> ppr call_ts
-                                               , ppr (idType fn), ppr theta
-                                               , ppr n_dicts, ppr rhs_dicts 
-                                               , ppr rhs])
-        | otherwise               = zipEqual doc xs ys
-\end{code}
+The we generate
+
+      Rec { fs x = <blah>[T/a, d7/d]
+            f a d x = <blah>
+               RULE f T _ = fs
+            d7 = ...f... }
+
+Here the recursion is only through the RULE.
+
+ 
+Note [Specialisation of dictionary functions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Here is a nasty example that bit us badly: see Trac #3591
+
+     dfun a d = MkD a d (meth d)
+     d4 = <blah>
+     d2 = dfun T d4
+     d1 = $p1 d2
+     d3 = dfun T d1
+
+None of these definitions is recursive. What happened was that we 
+generated a specialisation:
+
+     RULE forall d. dfun T d = dT
+     dT = (MkD a d (meth d)) [T/a, d1/d]
+        = MkD T d1 (meth d1)
+
+But now we use the RULE on the RHS of d2, to get
+
+    d2 = dT = MkD d1 (meth d1)
+    d1 = $p1 d2
+
+and now d1 is bottom!  The problem is that when specialising 'dfun' we
+should first dump "below" the binding all floated dictionary bindings
+that mention 'dfun' itself.  So d2 and d3 (and hence d1) must be
+placed below 'dfun', and thus unavailable to it when specialising
+'dfun'.  That in turn means that the call (dfun T d1) must be
+discarded.  On the other hand, the call (dfun T d4) is fine, assuming
+d4 doesn't mention dfun.
+
+But look at this:
+
+  class C a where { foo,bar :: [a] -> [a] }
+
+  instance C Int where 
+     foo x = r_bar x    
+     bar xs = reverse xs
+
+  r_bar :: C a => [a] -> [a]
+  r_bar xs = bar (xs ++ xs)
+
+That translates to:
+
+    r_bar a (c::C a) (xs::[a]) = bar a d (xs ++ xs)
+
+    Rec { $fCInt :: C Int = MkC foo_help reverse
+          foo_help (xs::[Int]) = r_bar Int $fCInt xs }
+
+The call (r_bar $fCInt) mentions $fCInt, 
+                        which mentions foo_help, 
+                        which mentions r_bar
+But we DO want to specialise r_bar at Int:
+
+    Rec { $fCInt :: C Int = MkC foo_help reverse
+          foo_help (xs::[Int]) = r_bar Int $fCInt xs
+
+          r_bar a (c::C a) (xs::[a]) = bar a d (xs ++ xs)
+           RULE r_bar Int _ = r_bar_Int
+
+          r_bar_Int xs = bar Int $fCInt (xs ++ xs)
+           }
+   
+Note that, because of its RULE, r_bar joins the recursive
+group.  (In this case it'll unravel a short moment later.)
+
+
+Conclusion: we catch the nasty case using filter_dfuns in
+callsForMe To be honest I'm not 100% certain that this is 100%
+right, but it works.  Sigh.
+
+
+Note [Specialising a recursive group]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+    let rec { f x = ...g x'...
+            ; g y = ...f y'.... }
+    in f 'a'
+Here we specialise 'f' at Char; but that is very likely to lead to 
+a specialisation of 'g' at Char.  We must do the latter, else the
+whole point of specialisation is lost.
+
+But we do not want to keep iterating to a fixpoint, because in the
+presence of polymorphic recursion we might generate an infinite number
+of specialisations.
+
+So we use the following heuristic:
+  * Arrange the rec block in dependency order, so far as possible
+    (the occurrence analyser already does this)
+
+  * Specialise it much like a sequence of lets
+
+  * Then go through the block a second time, feeding call-info from
+    the RHSs back in the bottom, as it were
+
+In effect, the ordering maxmimises the effectiveness of each sweep,
+and we do just two sweeps.   This should catch almost every case of 
+monomorphic recursion -- the exception could be a very knotted-up
+recursion with multiple cycles tied up together.
+
+This plan is implemented in the Rec case of specBindItself.
+ 
+Note [Specialisations already covered]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We obviously don't want to generate two specialisations for the same
+argument pattern.  There are two wrinkles
+
+1. We do the already-covered test in specDefn, not when we generate
+the CallInfo in mkCallUDs.  We used to test in the latter place, but
+we now iterate the specialiser somewhat, and the Id at the call site
+might therefore not have all the RULES that we can see in specDefn
+
+2. What about two specialisations where the second is an *instance*
+of the first?  If the more specific one shows up first, we'll generate
+specialisations for both.  If the *less* specific one shows up first,
+we *don't* currently generate a specialisation for the more specific
+one.  (See the call to lookupRule in already_covered.)  Reasons:
+  (a) lookupRule doesn't say which matches are exact (bad reason)
+  (b) if the earlier specialisation is user-provided, it's
+      far from clear that we should auto-specialise further
  
  Note [Auto-specialisation and RULES]
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -947,7 +1128,7 @@ then its body must look like
  Reason: when specialising the body for a call (f ty dexp), we want to
  substitute dexp for d, and pick up specialised calls in the body of f.
  
-This doesn't always work.  One example I came across was htis:
+This doesn't always work.  One example I came across was this:
         newtype Gen a = MkGen{ unGen :: Int -> a }
  
         choose :: Eq a => a -> Gen a
@@ -965,7 +1146,8 @@ Note [Inline specialisations]
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  We transfer to the specialised function any INLINE stuff from the
  original.  This means (a) the Activation in the IdInfo, and (b) any
-InlineMe on the RHS.  
+InlineMe on the RHS.  We do not, however, transfer the RuleMatchInfo
+since we do not expect the specialisation to occur in rewrite rules.
  
  This is a change (Jun06).  Previously the idea is that the point of
  inlining was precisely to specialise the function at its call site,
@@ -987,11 +1169,6 @@ specialised version.
  A case in point is dictionary functions, which are current marked
  INLINE, but which are worth specialising.
  
-\begin{code}
-dropInline :: CoreExpr -> (Bool, CoreExpr)
-dropInline (Note InlineMe rhs) = (True,  rhs)
-dropInline rhs                = (False, rhs)
-\end{code}
  
  %************************************************************************
  %*                                                                     *
@@ -1002,57 +1179,83 @@ dropInline rhs                 = (False, rhs)
  \begin{code}
  data UsageDetails 
    = MkUD {
-       dict_binds :: !(Bag DictBind),
+       ud_binds :: !(Bag DictBind),
                         -- Floated dictionary bindings
                         -- The order is important; 
                         -- in ds1 `union` ds2, bindings in ds2 can depend on those in ds1
                         -- (Remember, Bags preserve order in GHC.)
  
-       calls     :: !CallDetails
+       ud_calls :: !CallDetails  
+
+       -- INVARIANT: suppose bs = bindersOf ud_binds
+        -- Then 'calls' may *mention* 'bs', 
+        -- but there should be no calls *for* bs
      }
  
+instance Outputable UsageDetails where
+  ppr (MkUD { ud_binds = dbs, ud_calls = calls })
+       = ptext (sLit "MkUD") <+> braces (sep (punctuate comma 
+               [ptext (sLit "binds") <+> equals <+> ppr dbs,
+                ptext (sLit "calls") <+> equals <+> ppr calls]))
+
  type DictBind = (CoreBind, VarSet)
         -- The set is the free vars of the binding
         -- both tyvars and dicts
  
  type DictExpr = CoreExpr
  
-emptyUDs = MkUD { dict_binds = emptyBag, calls = emptyFM }
-
-type ProtoUsageDetails = ([DictBind],
-                         [(Id, CallKey, ([DictExpr], VarSet))]
-                        )
+emptyUDs :: UsageDetails
+emptyUDs = MkUD { ud_binds = emptyBag, ud_calls = emptyVarEnv }
  
  ------------------------------------------------------------                   
-type CallDetails  = FiniteMap Id CallInfo
+type CallDetails  = IdEnv CallInfoSet
  newtype CallKey   = CallKey [Maybe Type]                       -- Nothing => unconstrained type argument
-type CallInfo     = FiniteMap CallKey
-                             ([DictExpr], VarSet)              -- Dict args and the vars of the whole
-                                                               -- call (including tyvars)
-                                                               -- [*not* include the main id itself, of course]
-       -- The finite maps eliminate duplicates
-       -- The list of types and dictionaries is guaranteed to
-       -- match the type of f
+
+-- CallInfo uses a FiniteMap, thereby ensuring that
+-- we record only one call instance for any key
+--
+-- The list of types and dictionaries is guaranteed to
+-- match the type of f
+type CallInfoSet = FiniteMap CallKey ([DictExpr], VarSet)
+                       -- Range is dict args and the vars of the whole
+                       -- call (including tyvars)
+                       -- [*not* include the main id itself, of course]
+
+type CallInfo = (CallKey, ([DictExpr], VarSet))
+
+instance Outputable CallKey where
+  ppr (CallKey ts) = ppr ts
  
  -- Type isn't an instance of Ord, so that we can control which
  -- instance we use.  That's tiresome here.  Oh well
  instance Eq CallKey where
-  k1 == k2 = case k1 `compare` k2 of { EQ -> True; other -> False }
+  k1 == k2 = case k1 `compare` k2 of { EQ -> True; _ -> False }
  
  instance Ord CallKey where
    compare (CallKey k1) (CallKey k2) = cmpList cmp k1 k2
                 where
-                 cmp Nothing Nothing     = EQ
-                 cmp Nothing (Just t2)   = LT
-                 cmp (Just t1) Nothing   = GT
+                 cmp Nothing   Nothing   = EQ
+                 cmp Nothing   (Just _)  = LT
+                 cmp (Just _)  Nothing   = GT
                   cmp (Just t1) (Just t2) = tcCmpType t1 t2
  
  unionCalls :: CallDetails -> CallDetails -> CallDetails
-unionCalls c1 c2 = plusFM_C plusFM c1 c2
+unionCalls c1 c2 = plusVarEnv_C plusFM c1 c2
+
+-- plusCalls :: UsageDetails -> CallDetails -> UsageDetails
+-- plusCalls uds call_ds = uds { ud_calls = ud_calls uds `unionCalls` call_ds }
+
+callDetailsFVs :: CallDetails -> VarSet
+callDetailsFVs calls = foldVarEnv (unionVarSet . callInfoFVs) emptyVarSet calls
  
-singleCall :: Id -> [Maybe Type] -> [DictExpr] -> CallDetails
+callInfoFVs :: CallInfoSet -> VarSet
+callInfoFVs call_info = foldFM (\_ (_,fv) vs -> unionVarSet fv vs) emptyVarSet call_info
+
+------------------------------------------------------------                   
+singleCall :: Id -> [Maybe Type] -> [DictExpr] -> UsageDetails
  singleCall id tys dicts 
-  = unitFM id (unitFM (CallKey tys) (dicts, call_fvs))
+  = MkUD {ud_binds = emptyBag, 
+         ud_calls = unitVarEnv id (unitFM (CallKey tys) (dicts, call_fvs)) }
    where
      call_fvs = exprsFreeVars dicts `unionVarSet` tys_fvs
      tys_fvs  = tyVarsOfTypes (catMaybes tys)
@@ -1066,36 +1269,24 @@ singleCall id tys dicts
         --
         -- We don't include the 'id' itself.
  
-listToCallDetails calls
-  = foldr (unionCalls . mk_call) emptyFM calls
-  where
-    mk_call (id, tys, dicts_w_fvs) = unitFM id (unitFM tys dicts_w_fvs)
-       -- NB: the free vars of the call are provided
-
-callDetailsToList calls = [ (id,tys,dicts)
-                         | (id,fm) <- fmToList calls,
-                           (tys, dicts) <- fmToList fm
-                         ]
-
-mkCallUDs subst f args 
-  | null theta
+mkCallUDs :: Id -> [CoreExpr] -> UsageDetails
+mkCallUDs f args 
+  | not (isLocalId f)  -- Imported from elsewhere
+  || null theta                -- Not overloaded
    || not (all isClassPred theta)       
         -- Only specialise if all overloading is on class params. 
         -- In ptic, with implicit params, the type args
         --  *don't* say what the value of the implicit param is!
    || not (spec_tys `lengthIs` n_tyvars)
    || not ( dicts   `lengthIs` n_dicts)
-  || maybeToBool (lookupRule (\act -> True) (substInScope subst) emptyRuleBase f args)
-       -- There's already a rule covering this call.  A typical case
-       -- is where there's an explicit user-provided rule.  Then
-       -- we don't want to create a specialised version 
-       -- of the function that overlaps.
-  = emptyUDs   -- Not overloaded, or no specialisation wanted
+  || not (any interestingDict dicts)   -- Note [Interesting dictionary arguments]
+  -- See also Note [Specialisations already covered]
+  = -- pprTrace "mkCallUDs: discarding" (vcat [ppr f, ppr args, ppr n_tyvars, ppr n_dicts, ppr (map interestingDict dicts)]) 
+    emptyUDs   -- Not overloaded, or no specialisation wanted
  
    | otherwise
-  = MkUD {dict_binds = emptyBag, 
-         calls      = singleCall f spec_tys dicts
-    }
+  = -- pprTrace "mkCallUDs: keeping" (vcat [ppr f, ppr args, ppr n_tyvars, ppr n_dicts, ppr (map interestingDict dicts)]) 
+    singleCall f spec_tys dicts
    where
      (tyvars, theta, _) = tcSplitSigmaTy (idType f)
      constrained_tyvars = tyVarsOfTheta theta 
@@ -1108,29 +1299,55 @@ mkCallUDs subst f args
      mk_spec_ty tyvar ty 
         | tyvar `elemVarSet` constrained_tyvars = Just ty
         | otherwise                             = Nothing
+\end{code}
  
-------------------------------------------------------------                   
+Note [Interesting dictionary arguments]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider this
+        \a.\d:Eq a.  let f = ... in ...(f d)...
+There really is not much point in specialising f wrt the dictionary d,
+because the code for the specialised f is not improved at all, because
+d is lambda-bound.  We simply get junk specialisations.
+
+What is "interesting"?  Just that it has *some* structure.
+
+\begin{code}
+interestingDict :: CoreExpr -> Bool
+-- A dictionary argument is interesting if it has *some* structure
+interestingDict (Var v) =  hasSomeUnfolding (idUnfolding v)
+                       || isDataConWorkId v
+interestingDict (Type _)         = False
+interestingDict (App fn (Type _)) = interestingDict fn
+interestingDict (Note _ a)       = interestingDict a
+interestingDict (Cast e _)       = interestingDict e
+interestingDict _                 = True
+\end{code}
+
+\begin{code}
  plusUDs :: UsageDetails -> UsageDetails -> UsageDetails
-plusUDs (MkUD {dict_binds = db1, calls = calls1})
-       (MkUD {dict_binds = db2, calls = calls2})
-  = MkUD {dict_binds = d, calls = c}
-  where
-    d = db1    `unionBags`   db2 
-    c = calls1 `unionCalls`  calls2
+plusUDs (MkUD {ud_binds = db1, ud_calls = calls1})
+       (MkUD {ud_binds = db2, ud_calls = calls2})
+  = MkUD { ud_binds = db1    `unionBags`   db2 
+         , ud_calls = calls1 `unionCalls`  calls2 }
  
+plusUDList :: [UsageDetails] -> UsageDetails
  plusUDList = foldr plusUDs emptyUDs
  
--- zapCalls deletes calls to ids from uds
-zapCalls ids uds = uds {calls = delListFromFM (calls uds) ids}
+-----------------------------
+_dictBindBndrs :: Bag DictBind -> [Id]
+_dictBindBndrs dbs = foldrBag ((++) . bindersOf . fst) [] dbs
  
+mkDB :: CoreBind -> DictBind
  mkDB bind = (bind, bind_fvs bind)
  
+bind_fvs :: CoreBind -> VarSet
  bind_fvs (NonRec bndr rhs) = pair_fvs (bndr,rhs)
  bind_fvs (Rec prs)        = foldl delVarSet rhs_fvs bndrs
                            where
                              bndrs = map fst prs
                              rhs_fvs = unionVarSets (map pair_fvs prs)
  
+pair_fvs :: (Id, CoreExpr) -> VarSet
  pair_fvs (bndr, rhs) = exprFreeVars rhs `unionVarSet` idFreeVars bndr
         -- Don't forget variables mentioned in the
         -- rules of the bndr.  C.f. OccAnal.addRuleUsage
@@ -1138,62 +1355,117 @@ pair_fvs (bndr, rhs) = exprFreeVars rhs `unionVarSet` idFreeVars bndr
         --      type T a = Int
         --      x :: T a = 3
  
-addDictBind (dict,rhs) uds = uds { dict_binds = mkDB (NonRec dict rhs) `consBag` dict_binds uds }
+flattenDictBinds :: Bag DictBind -> [(Id,CoreExpr)] -> [(Id,CoreExpr)]
+flattenDictBinds dbs pairs
+  = foldrBag add pairs dbs
+  where
+    add (NonRec b r,_) pairs = (b,r) : pairs
+    add (Rec prs1, _)  pairs = prs1 ++ pairs
+
+snocDictBinds :: UsageDetails -> [CoreBind] -> UsageDetails
+-- Add ud_binds to the tail end of the bindings in uds
+snocDictBinds uds dbs
+  = uds { ud_binds = ud_binds uds `unionBags` 
+                     foldr (consBag . mkDB) emptyBag dbs }
+
+consDictBind :: CoreBind -> UsageDetails -> UsageDetails
+consDictBind bind uds = uds { ud_binds = mkDB bind `consBag` ud_binds uds }
+
+snocDictBind :: UsageDetails -> CoreBind -> UsageDetails
+snocDictBind uds bind = uds { ud_binds = ud_binds uds `snocBag` mkDB bind }
  
-dumpAllDictBinds (MkUD {dict_binds = dbs}) binds
+wrapDictBinds :: Bag DictBind -> [CoreBind] -> [CoreBind]
+wrapDictBinds dbs binds
    = foldrBag add binds dbs
    where
      add (bind,_) binds = bind : binds
  
-dumpUDs :: [CoreBndr]
-       -> UsageDetails -> CoreExpr
-       -> (UsageDetails, CoreExpr)
-dumpUDs bndrs uds body
-  = (free_uds, foldr add_let body dict_binds)
+wrapDictBindsE :: Bag DictBind -> CoreExpr -> CoreExpr
+wrapDictBindsE dbs expr
+  = foldrBag add expr dbs
    where
-    (free_uds, (dict_binds, _)) = splitUDs bndrs uds
-    add_let (bind,_) body      = Let bind body
-
-splitUDs :: [CoreBndr]
-        -> UsageDetails
-        -> (UsageDetails,              -- These don't mention the binders
-            ProtoUsageDetails)         -- These do
-            
-splitUDs bndrs uds@(MkUD {dict_binds = orig_dbs, 
-                         calls      = orig_calls})
-
-  = if isEmptyBag dump_dbs && null dump_calls then
-       -- Common case: binder doesn't affect floats
-       (uds, ([],[]))  
-
-    else
-       -- Binders bind some of the fvs of the floats
-       (MkUD {dict_binds = free_dbs, 
-              calls      = listToCallDetails free_calls},
-        (bagToList dump_dbs, dump_calls)
-       )
-
+    add (bind,_) expr = Let bind expr
+
+----------------------
+dumpUDs :: [CoreBndr] -> UsageDetails -> (UsageDetails, Bag DictBind)
+-- Used at a lambda or case binder; just dump anything mentioning the binder
+dumpUDs bndrs uds@(MkUD { ud_binds = orig_dbs, ud_calls = orig_calls })
+  | null bndrs = (uds, emptyBag)  -- Common in case alternatives
+  | otherwise  = (free_uds, dump_dbs)
+  where
+    free_uds = MkUD { ud_binds = free_dbs, ud_calls = free_calls }
+    bndr_set = mkVarSet bndrs
+    (free_dbs, dump_dbs, dump_set) = splitDictBinds orig_dbs bndr_set
+    free_calls = deleteCallsMentioning dump_set $   -- Drop calls mentioning bndr_set on the floor
+                 deleteCallsFor bndrs orig_calls    -- Discard calls for bndr_set; there should be 
+                                                   -- no calls for any of the dicts in dump_dbs
+
+dumpBindUDs :: [CoreBndr] -> UsageDetails -> (UsageDetails, Bag DictBind, Bool)
+-- Used at a lambda or case binder; just dump anything mentioning the binder
+dumpBindUDs bndrs (MkUD { ud_binds = orig_dbs, ud_calls = orig_calls })
+  = (free_uds, dump_dbs, float_all)
    where
+    free_uds = MkUD { ud_binds = free_dbs, ud_calls = free_calls }
      bndr_set = mkVarSet bndrs
+    (free_dbs, dump_dbs, dump_set) = splitDictBinds orig_dbs bndr_set
+    free_calls = deleteCallsFor bndrs orig_calls
+    float_all = dump_set `intersectsVarSet` callDetailsFVs free_calls
+
+callsForMe :: Id -> UsageDetails -> (UsageDetails, [CallInfo])
+callsForMe fn (MkUD { ud_binds = orig_dbs, ud_calls = orig_calls })
+  = -- pprTrace ("callsForMe")
+    --         (vcat [ppr fn, 
+    --                text "Orig dbs ="     <+> ppr (_dictBindBndrs orig_dbs), 
+    --                text "Orig calls ="   <+> ppr orig_calls,
+    --                text "Dep set ="      <+> ppr dep_set, 
+    --                text "Calls for me =" <+> ppr calls_for_me]) $
+    (uds_without_me, calls_for_me)
+  where
+    uds_without_me = MkUD { ud_binds = orig_dbs, ud_calls = delVarEnv orig_calls fn }
+    calls_for_me = case lookupVarEnv orig_calls fn of
+                       Nothing -> []
+                       Just cs -> filter_dfuns (fmToList cs)
  
-    (free_dbs, dump_dbs, dump_idset) 
-         = foldlBag dump_db (emptyBag, emptyBag, bndr_set) orig_dbs
-               -- Important that it's foldl not foldr;
-               -- we're accumulating the set of dumped ids in dump_set
+    dep_set = foldlBag go (unitVarSet fn) orig_dbs
+    go dep_set (db,fvs) | fvs `intersectsVarSet` dep_set
+                        = extendVarSetList dep_set (bindersOf db)
+                        | otherwise = fvs
+
+       -- Note [Specialisation of dictionary functions]
+    filter_dfuns | isDFunId fn = filter ok_call
+                 | otherwise   = \cs -> cs
  
-       -- Filter out any calls that mention things that are being dumped
-    orig_call_list                = callDetailsToList orig_calls
-    (dump_calls, free_calls)      = partition captured orig_call_list
-    captured (id,tys,(dicts, fvs)) =  fvs `intersectsVarSet` dump_idset
-                                  || id `elemVarSet` dump_idset
+    ok_call (_, (_,fvs)) = not (fvs `intersectsVarSet` dep_set)
  
-    dump_db (free_dbs, dump_dbs, dump_idset) db@(bind, fvs)
+----------------------
+splitDictBinds :: Bag DictBind -> IdSet -> (Bag DictBind, Bag DictBind, IdSet)
+-- Returns (free_dbs, dump_dbs, dump_set)
+splitDictBinds dbs bndr_set
+   = foldlBag split_db (emptyBag, emptyBag, bndr_set) dbs
+               -- Important that it's foldl not foldr;
+               -- we're accumulating the set of dumped ids in dump_set
+   where
+    split_db (free_dbs, dump_dbs, dump_idset) db@(bind, fvs)
         | dump_idset `intersectsVarSet` fvs     -- Dump it
         = (free_dbs, dump_dbs `snocBag` db,
            extendVarSetList dump_idset (bindersOf bind))
  
         | otherwise     -- Don't dump it
         = (free_dbs `snocBag` db, dump_dbs, dump_idset)
+
+
+----------------------
+deleteCallsMentioning :: VarSet -> CallDetails -> CallDetails
+-- Remove calls *mentioning* bs 
+deleteCallsMentioning bs calls
+  = mapVarEnv filter_calls calls
+  where
+    filter_calls :: CallInfoSet -> CallInfoSet
+    filter_calls = filterFM (\_ (_, fvs) -> not (fvs `intersectsVarSet` bs))
+
+deleteCallsFor :: [Id] -> CallDetails -> CallDetails
+-- Remove calls *for* bs
+deleteCallsFor bs calls = delVarEnvList calls bs
  \end{code}
  
  
@@ -1206,9 +1478,11 @@ splitUDs bndrs uds@(MkUD {dict_binds = orig_dbs,
  \begin{code}
  type SpecM a = UniqSM a
  
+initSM :: UniqSupply -> SpecM a -> a
  initSM   = initUs_
  
-mapAndCombineSM f []     = return ([], emptyUDs)
+mapAndCombineSM :: (a -> SpecM (b, UsageDetails)) -> [a] -> SpecM ([b], UsageDetails)
+mapAndCombineSM _ []     = return ([], emptyUDs)
  mapAndCombineSM f (x:xs) = do (y, uds1) <- f x
                                (ys, uds2) <- mapAndCombineSM f xs
                                return (y:ys, uds1 `plusUDs` uds2)
@@ -1226,17 +1500,20 @@ cloneBindSM subst (Rec pairs) = do
      let (subst', bndrs') = cloneRecIdBndrs subst us (map fst pairs)
      return (subst', subst', Rec (bndrs' `zip` map snd pairs))
  
-cloneBinders subst bndrs = do
-    us <- getUniqueSupplyM
-    return (cloneIdBndrs subst us bndrs)
-
-newIdSM old_id new_ty = do
-    uniq <- getUniqueM
-    let
-        -- Give the new Id a similar occurrence name to the old one
-        name   = idName old_id
-        new_id = mkUserLocal (mkSpecOcc (nameOccName name)) uniq new_ty (getSrcSpan name)
-    return new_id
+cloneDictBndrs :: Subst -> [CoreBndr] -> SpecM (Subst, [CoreBndr])
+cloneDictBndrs subst bndrs 
+  = do { us <- getUniqueSupplyM
+       ; return (cloneIdBndrs subst us bndrs) }
+
+newSpecIdSM :: Id -> Type -> SpecM Id
+    -- Give the new Id a similar occurrence name to the old one
+newSpecIdSM old_id new_ty
+  = do { uniq <- getUniqueM
+       ; let 
+           name    = idName old_id
+           new_occ = mkSpecOcc (nameOccName name)
+           new_id  = mkUserLocal new_occ uniq new_ty (getSrcSpan name)
+        ; return new_id }
  \end{code}