X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Frts%2FGC.c;h=a13cd33afadd346ed79e20ac72235ea4e62887eb;hb=a1b4e3b88a6987deed7bb7f1bd870b30eef1b475;hp=9a21af889785c8f366bb7dc3675d45b8d33acc4a;hpb=709d58c007338466175021829aa59cf34b569033;p=ghc-hetmet.git

diff --git a/ghc/rts/GC.c b/ghc/rts/GC.c
index 9a21af8..a13cd33 100644
--- a/ghc/rts/GC.c
+++ b/ghc/rts/GC.c
@@ -104,6 +104,10 @@ static rtsBool major_gc;
  */
 static nat evac_gen;
 
+/* Whether to do eager promotion or not.
+ */
+static rtsBool eager_promotion;
+
 /* Weak pointers
  */
 StgWeak *old_weak_ptr_list; // also pending finaliser list
@@ -140,6 +144,14 @@ static lnat g0s0_pcnt_kept = 30; // percentage of g0s0 live at last minor GC
 static lnat thunk_selector_depth = 0;
 #define MAX_THUNK_SELECTOR_DEPTH 8
 
+/* Mut-list stats */
+#ifdef DEBUG
+static nat 
+    mutlist_MUTVARS,
+    mutlist_MUTARRS,
+    mutlist_OTHERS;
+#endif
+
 /* -----------------------------------------------------------------------------
    Static function declarations
    -------------------------------------------------------------------------- */
@@ -332,7 +344,7 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
 {
   bdescr *bd;
   step *stp;
-  lnat live, allocated, collected = 0, copied = 0, scavd_copied = 0;
+  lnat live, allocated, copied = 0, scavd_copied = 0;
   lnat oldgen_saved_blocks = 0;
   nat g, s, i;
 
@@ -363,6 +375,12 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
   memInventory();
 #endif
 
+#ifdef DEBUG
+  mutlist_MUTVARS = 0;
+  mutlist_MUTARRS = 0;
+  mutlist_OTHERS = 0;
+#endif
+
   // Init stats and print par specific (timing) info 
   PAR_TICKY_PAR_START();
 
@@ -571,6 +589,8 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
       mark_stack_bdescr = NULL;
   }
 
+  eager_promotion = rtsTrue; // for now
+
   /* -----------------------------------------------------------------------
    * follow all the roots that we know about:
    *   - mutable lists from each generation > N
@@ -792,9 +812,13 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
     // Count the mutable list as bytes "copied" for the purposes of
     // stats.  Every mutable list is copied during every GC.
     if (g > 0) {
+	nat mut_list_size = 0;
 	for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
-	    copied += (bd->free - bd->start) * sizeof(StgWord);
+	    mut_list_size += bd->free - bd->start;
 	}
+	copied +=  mut_list_size;
+
+	IF_DEBUG(gc, debugBelch("mut_list_size: %ld (%d vars, %d arrays, %d others)\n", mut_list_size * sizeof(W_), mutlist_MUTVARS, mutlist_MUTARRS, mutlist_OTHERS));
     }
 
     for (s = 0; s < generations[g].n_steps; s++) {
@@ -813,18 +837,6 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
       // for generations we collected... 
       if (g <= N) {
 
-	  // rough calculation of garbage collected, for stats output
-	  if (stp->is_compacted) {
-	      collected += (oldgen_saved_blocks - stp->n_old_blocks) * BLOCK_SIZE_W;
-	  } else {
-	      if (g == 0 && s == 0) {
-		  collected += countNurseryBlocks() * BLOCK_SIZE_W;
-		  collected += alloc_blocks;
-	      } else {
-		  collected += stp->n_old_blocks * BLOCK_SIZE_W;
-	      }
-	  }
-
 	/* free old memory and shift to-space into from-space for all
 	 * the collected steps (except the allocation area).  These
 	 * freed blocks will probaby be quickly recycled.
@@ -1142,7 +1154,9 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
   ACQUIRE_SM_LOCK;
   
   // send exceptions to any threads which were about to die 
+  RELEASE_SM_LOCK;
   resurrectThreads(resurrected_threads);
+  ACQUIRE_SM_LOCK;
 
   // Update the stable pointer hash table.
   updateStablePtrTable(major_gc);
@@ -1175,7 +1189,7 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
 #endif
 
   // ok, GC over: tell the stats department what happened. 
-  stat_endGC(allocated, collected, live, copied, scavd_copied, N);
+  stat_endGC(allocated, live, copied, scavd_copied, N);
 
 #if defined(RTS_USER_SIGNALS)
   // unblock signals again
@@ -1561,11 +1575,11 @@ copy(StgClosure *src, nat size, step *stp)
    * by evacuate()).
    */
   if (stp->gen_no < evac_gen) {
-#ifdef NO_EAGER_PROMOTION    
-    failed_to_evac = rtsTrue;
-#else
-    stp = &generations[evac_gen].steps[0];
-#endif
+      if (eager_promotion) {
+	  stp = &generations[evac_gen].steps[0];
+      } else {
+	  failed_to_evac = rtsTrue;
+      }
   }
 
   /* chain a new block onto the to-space for the destination step if
@@ -1611,11 +1625,11 @@ copy_noscav(StgClosure *src, nat size, step *stp)
    * by evacuate()).
    */
   if (stp->gen_no < evac_gen) {
-#ifdef NO_EAGER_PROMOTION    
-    failed_to_evac = rtsTrue;
-#else
-    stp = &generations[evac_gen].steps[0];
-#endif
+      if (eager_promotion) {
+	  stp = &generations[evac_gen].steps[0];
+      } else {
+	  failed_to_evac = rtsTrue;
+      }
   }
 
   /* chain a new block onto the to-space for the destination step if
@@ -1658,11 +1672,11 @@ copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *stp)
 
   TICK_GC_WORDS_COPIED(size_to_copy);
   if (stp->gen_no < evac_gen) {
-#ifdef NO_EAGER_PROMOTION    
-    failed_to_evac = rtsTrue;
-#else
-    stp = &generations[evac_gen].steps[0];
-#endif
+      if (eager_promotion) {
+	  stp = &generations[evac_gen].steps[0];
+      } else {
+	  failed_to_evac = rtsTrue;
+      }
   }
 
   if (stp->hp + size_to_reserve >= stp->hpLim) {
@@ -1684,7 +1698,7 @@ copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *stp)
   SET_EVACUAEE_FOR_LDV(src, size_to_reserve);
   // fill the slop
   if (size_to_reserve - size_to_copy_org > 0)
-    FILL_SLOP(stp->hp - 1, (int)(size_to_reserve - size_to_copy_org)); 
+    LDV_FILL_SLOP(stp->hp - 1, (int)(size_to_reserve - size_to_copy_org)); 
 #endif
   return (StgClosure *)dest;
 }
@@ -1739,11 +1753,11 @@ evacuate_large(StgPtr p)
    */
   stp = bd->step->to;
   if (stp->gen_no < evac_gen) {
-#ifdef NO_EAGER_PROMOTION    
-    failed_to_evac = rtsTrue;
-#else
-    stp = &generations[evac_gen].steps[0];
-#endif
+      if (eager_promotion) {
+	  stp = &generations[evac_gen].steps[0];
+      } else {
+	  failed_to_evac = rtsTrue;
+      }
   }
 
   bd->step = stp;
@@ -1929,7 +1943,8 @@ loop:
   
   switch (info->type) {
 
-  case MUT_VAR:
+  case MUT_VAR_CLEAN:
+  case MUT_VAR_DIRTY:
   case MVAR:
       return copy(q,sizeW_fromITBL(info),stp);
 
@@ -2003,11 +2018,15 @@ loop:
   case THUNK_SELECTOR:
     {
 	StgClosure *p;
+	const StgInfoTable *info_ptr;
 
 	if (thunk_selector_depth > MAX_THUNK_SELECTOR_DEPTH) {
 	    return copy(q,THUNK_SELECTOR_sizeW(),stp);
 	}
 
+	// stashed away for LDV profiling, see below
+	info_ptr = q->header.info;
+
 	p = eval_thunk_selector(info->layout.selector_offset,
 				(StgSelector *)q);
 
@@ -2020,6 +2039,13 @@ loop:
 	    val = evacuate(p);
 	    thunk_selector_depth--;
 
+#ifdef PROFILING
+	    // For the purposes of LDV profiling, we have destroyed
+	    // the original selector thunk.
+	    SET_INFO(q, info_ptr);
+	    LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(q);
+#endif
+
 	    // Update the THUNK_SELECTOR with an indirection to the
 	    // EVACUATED closure now at p.  Why do this rather than
 	    // upd_evacuee(q,p)?  Because we have an invariant that an
@@ -2029,12 +2055,10 @@ loop:
 	    SET_INFO(q, &stg_IND_info);
 	    ((StgInd *)q)->indirectee = p;
 
-#ifdef PROFILING
-	    // We store the size of the just evacuated object in the
-	    // LDV word so that the profiler can guess the position of
-	    // the next object later.
-	    SET_EVACUAEE_FOR_LDV(q, THUNK_SELECTOR_sizeW());
-#endif
+	    // For the purposes of LDV profiling, we have created an
+	    // indirection.
+	    LDV_RECORD_CREATE(q);
+
 	    return val;
 	}
     }
@@ -2099,7 +2123,8 @@ loop:
       // just copy the block 
       return copy_noscav(q,arr_words_sizeW((StgArrWords *)q),stp);
 
-  case MUT_ARR_PTRS:
+  case MUT_ARR_PTRS_CLEAN:
+  case MUT_ARR_PTRS_DIRTY:
   case MUT_ARR_PTRS_FROZEN:
   case MUT_ARR_PTRS_FROZEN0:
       // just copy the block 
@@ -2150,7 +2175,7 @@ loop:
     }
 
   case BLOCKED_FETCH:
-    ASSERT(sizeofW(StgBlockedFetch) >= MIN_NONUPD_SIZE);
+    ASSERT(sizeofW(StgBlockedFetch) >= MIN_PAYLOD_SIZE);
     to = copy(q,sizeofW(StgBlockedFetch),stp);
     IF_DEBUG(gc,
 	     debugBelch("@@ evacuate: %p (%s) to %p (%s)",
@@ -2161,7 +2186,7 @@ loop:
   case REMOTE_REF:
 # endif
   case FETCH_ME:
-    ASSERT(sizeofW(StgBlockedFetch) >= MIN_UPD_SIZE);
+    ASSERT(sizeofW(StgBlockedFetch) >= MIN_PAYLOAD_SIZE);
     to = copy(q,sizeofW(StgFetchMe),stp);
     IF_DEBUG(gc,
 	     debugBelch("@@ evacuate: %p (%s) to %p (%s)",
@@ -2169,7 +2194,7 @@ loop:
     return to;
 
   case FETCH_ME_BQ:
-    ASSERT(sizeofW(StgBlockedFetch) >= MIN_UPD_SIZE);
+    ASSERT(sizeofW(StgBlockedFetch) >= MIN_PAYLOAD_SIZE);
     to = copy(q,sizeofW(StgFetchMeBlockingQueue),stp);
     IF_DEBUG(gc,
 	     debugBelch("@@ evacuate: %p (%s) to %p (%s)",
@@ -2881,13 +2906,22 @@ scavenge(step *stp)
 	p += sizeofW(StgInd);
 	break;
 
-    case MUT_VAR:
-	evac_gen = 0;
+    case MUT_VAR_CLEAN:
+    case MUT_VAR_DIRTY: {
+	rtsBool saved_eager_promotion = eager_promotion;
+
+	eager_promotion = rtsFalse;
 	((StgMutVar *)p)->var = evacuate(((StgMutVar *)p)->var);
-	evac_gen = saved_evac_gen;
-	failed_to_evac = rtsTrue; // mutable anyhow
+	eager_promotion = saved_eager_promotion;
+
+	if (failed_to_evac) {
+	    ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info;
+	} else {
+	    ((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info;
+	}
 	p += sizeofW(StgMutVar);
 	break;
+    }
 
     case CAF_BLACKHOLE:
     case SE_CAF_BLACKHOLE:
@@ -2928,18 +2962,32 @@ scavenge(step *stp)
 	p += arr_words_sizeW((StgArrWords *)p);
 	break;
 
-    case MUT_ARR_PTRS:
+    case MUT_ARR_PTRS_CLEAN:
+    case MUT_ARR_PTRS_DIRTY:
 	// follow everything 
     {
 	StgPtr next;
-
-	evac_gen = 0;		// repeatedly mutable 
+	rtsBool saved_eager;
+
+	// We don't eagerly promote objects pointed to by a mutable
+	// array, but if we find the array only points to objects in
+	// the same or an older generation, we mark it "clean" and
+	// avoid traversing it during minor GCs.
+	saved_eager = eager_promotion;
+	eager_promotion = rtsFalse;
 	next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
 	for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
 	    *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
 	}
-	evac_gen = saved_evac_gen;
-	failed_to_evac = rtsTrue; // mutable anyhow.
+	eager_promotion = saved_eager;
+
+	if (failed_to_evac) {
+	    ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info;
+	} else {
+	    ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info;
+	}
+
+	failed_to_evac = rtsTrue; // always put it on the mutable list.
 	break;
     }
 
@@ -2953,20 +3001,33 @@ scavenge(step *stp)
 	for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
 	    *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
 	}
-	// it's tempting to recordMutable() if failed_to_evac is
-	// false, but that breaks some assumptions (eg. every
-	// closure on the mutable list is supposed to have the MUT
-	// flag set, and MUT_ARR_PTRS_FROZEN doesn't).
+
+	// If we're going to put this object on the mutable list, then
+	// set its info ptr to MUT_ARR_PTRS_FROZEN0 to indicate that.
+	if (failed_to_evac) {
+	    ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN0_info;
+	} else {
+	    ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_info;
+	}
 	break;
     }
 
     case TSO:
     { 
 	StgTSO *tso = (StgTSO *)p;
-	evac_gen = 0;
+	rtsBool saved_eager = eager_promotion;
+
+	eager_promotion = rtsFalse;
 	scavengeTSO(tso);
-	evac_gen = saved_evac_gen;
-	failed_to_evac = rtsTrue; // mutable anyhow.
+	eager_promotion = saved_eager;
+
+	if (failed_to_evac) {
+	    tso->flags |= TSO_DIRTY;
+	} else {
+	    tso->flags &= ~TSO_DIRTY;
+	}
+
+	failed_to_evac = rtsTrue; // always on the mutable list
 	p += tso_sizeW(tso);
 	break;
     }
@@ -3047,9 +3108,6 @@ scavenge(step *stp)
 	evac_gen = 0;
 	tvar->current_value = evacuate((StgClosure*)tvar->current_value);
 	tvar->first_wait_queue_entry = (StgTVarWaitQueue *)evacuate((StgClosure*)tvar->first_wait_queue_entry);
-#if defined(SMP)
-	tvar->last_update_by = (StgTRecHeader *)evacuate((StgClosure*)tvar->last_update_by);
-#endif
 	evac_gen = saved_evac_gen;
 	failed_to_evac = rtsTrue; // mutable
 	p += sizeofW(StgTVar);
@@ -3249,12 +3307,21 @@ linear_scan:
 		evacuate(((StgInd *)p)->indirectee);
 	    break;
 
-	case MUT_VAR:
-	    evac_gen = 0;
+	case MUT_VAR_CLEAN:
+	case MUT_VAR_DIRTY: {
+	    rtsBool saved_eager_promotion = eager_promotion;
+	    
+	    eager_promotion = rtsFalse;
 	    ((StgMutVar *)p)->var = evacuate(((StgMutVar *)p)->var);
-	    evac_gen = saved_evac_gen;
-	    failed_to_evac = rtsTrue;
+	    eager_promotion = saved_eager_promotion;
+	    
+	    if (failed_to_evac) {
+		((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info;
+	    } else {
+		((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info;
+	    }
 	    break;
+	}
 
 	case CAF_BLACKHOLE:
 	case SE_CAF_BLACKHOLE:
@@ -3288,17 +3355,31 @@ linear_scan:
 	    scavenge_AP((StgAP *)p);
 	    break;
       
-	case MUT_ARR_PTRS:
+	case MUT_ARR_PTRS_CLEAN:
+	case MUT_ARR_PTRS_DIRTY:
 	    // follow everything 
 	{
 	    StgPtr next;
-	    
-	    evac_gen = 0;		// repeatedly mutable 
+	    rtsBool saved_eager;
+
+	    // We don't eagerly promote objects pointed to by a mutable
+	    // array, but if we find the array only points to objects in
+	    // the same or an older generation, we mark it "clean" and
+	    // avoid traversing it during minor GCs.
+	    saved_eager = eager_promotion;
+	    eager_promotion = rtsFalse;
 	    next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
 	    for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
 		*p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
 	    }
-	    evac_gen = saved_evac_gen;
+	    eager_promotion = saved_eager;
+
+	    if (failed_to_evac) {
+		((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info;
+	    } else {
+		((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info;
+	    }
+
 	    failed_to_evac = rtsTrue; // mutable anyhow.
 	    break;
 	}
@@ -3307,22 +3388,39 @@ linear_scan:
 	case MUT_ARR_PTRS_FROZEN0:
 	    // follow everything 
 	{
-	    StgPtr next;
+	    StgPtr next, q = p;
 	    
 	    next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
 	    for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
 		*p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
 	    }
+
+	    // If we're going to put this object on the mutable list, then
+	    // set its info ptr to MUT_ARR_PTRS_FROZEN0 to indicate that.
+	    if (failed_to_evac) {
+		((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN0_info;
+	    } else {
+		((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_info;
+	    }
 	    break;
 	}
 
 	case TSO:
 	{ 
 	    StgTSO *tso = (StgTSO *)p;
-	    evac_gen = 0;
+	    rtsBool saved_eager = eager_promotion;
+
+	    eager_promotion = rtsFalse;
 	    scavengeTSO(tso);
-	    evac_gen = saved_evac_gen;
-	    failed_to_evac = rtsTrue;
+	    eager_promotion = saved_eager;
+	    
+	    if (failed_to_evac) {
+		tso->flags |= TSO_DIRTY;
+	    } else {
+		tso->flags &= ~TSO_DIRTY;
+	    }
+	    
+	    failed_to_evac = rtsTrue; // always on the mutable list
 	    break;
 	}
 
@@ -3396,9 +3494,6 @@ linear_scan:
 	    evac_gen = 0;
 	    tvar->current_value = evacuate((StgClosure*)tvar->current_value);
 	    tvar->first_wait_queue_entry = (StgTVarWaitQueue *)evacuate((StgClosure*)tvar->first_wait_queue_entry);
-#if defined(SMP)
-            tvar->last_update_by = (StgTRecHeader *)evacuate((StgClosure*)tvar->last_update_by);
-#endif
 	    evac_gen = saved_evac_gen;
 	    failed_to_evac = rtsTrue; // mutable
 	    break;
@@ -3471,12 +3566,12 @@ linear_scan:
 
 	    // already scavenged?
 	    if (is_marked(oldgen_scan+1,oldgen_scan_bd)) {
-		oldgen_scan += sizeofW(StgHeader) + MIN_NONUPD_SIZE;
+		oldgen_scan += sizeofW(StgHeader) + MIN_PAYLOAD_SIZE;
 		goto loop;
 	    }
 	    push_mark_stack(oldgen_scan);
 	    // ToDo: bump the linear scan by the actual size of the object
-	    oldgen_scan += sizeofW(StgHeader) + MIN_NONUPD_SIZE;
+	    oldgen_scan += sizeofW(StgHeader) + MIN_PAYLOAD_SIZE;
 	    goto linear_scan;
 	}
 
@@ -3560,12 +3655,22 @@ scavenge_one(StgPtr p)
 	break;
     }
     
-    case MUT_VAR:
-	evac_gen = 0;
+    case MUT_VAR_CLEAN:
+    case MUT_VAR_DIRTY: {
+	StgPtr q = p;
+	rtsBool saved_eager_promotion = eager_promotion;
+
+	eager_promotion = rtsFalse;
 	((StgMutVar *)p)->var = evacuate(((StgMutVar *)p)->var);
-	evac_gen = saved_evac_gen;
-	failed_to_evac = rtsTrue; // mutable anyhow
+	eager_promotion = saved_eager_promotion;
+
+	if (failed_to_evac) {
+	    ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info;
+	} else {
+	    ((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info;
+	}
 	break;
+    }
 
     case CAF_BLACKHOLE:
     case SE_CAF_BLACKHOLE:
@@ -3602,17 +3707,31 @@ scavenge_one(StgPtr p)
 	// nothing to follow 
 	break;
 
-    case MUT_ARR_PTRS:
+    case MUT_ARR_PTRS_CLEAN:
+    case MUT_ARR_PTRS_DIRTY:
     {
-	// follow everything 
-	StgPtr next;
-      
-	evac_gen = 0;		// repeatedly mutable 
+	StgPtr next, q;
+	rtsBool saved_eager;
+
+	// We don't eagerly promote objects pointed to by a mutable
+	// array, but if we find the array only points to objects in
+	// the same or an older generation, we mark it "clean" and
+	// avoid traversing it during minor GCs.
+	saved_eager = eager_promotion;
+	eager_promotion = rtsFalse;
+	q = p;
 	next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
 	for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
 	    *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
 	}
-	evac_gen = saved_evac_gen;
+	eager_promotion = saved_eager;
+
+	if (failed_to_evac) {
+	    ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info;
+	} else {
+	    ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info;
+	}
+
 	failed_to_evac = rtsTrue;
 	break;
     }
@@ -3621,23 +3740,39 @@ scavenge_one(StgPtr p)
     case MUT_ARR_PTRS_FROZEN0:
     {
 	// follow everything 
-	StgPtr next;
+	StgPtr next, q=p;
       
 	next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
 	for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
 	    *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
 	}
+
+	// If we're going to put this object on the mutable list, then
+	// set its info ptr to MUT_ARR_PTRS_FROZEN0 to indicate that.
+	if (failed_to_evac) {
+	    ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN0_info;
+	} else {
+	    ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_info;
+	}
 	break;
     }
 
     case TSO:
     {
 	StgTSO *tso = (StgTSO *)p;
-      
-	evac_gen = 0;		// repeatedly mutable 
+	rtsBool saved_eager = eager_promotion;
+
+	eager_promotion = rtsFalse;
 	scavengeTSO(tso);
-	evac_gen = saved_evac_gen;
-	failed_to_evac = rtsTrue;
+	eager_promotion = saved_eager;
+
+	if (failed_to_evac) {
+	    tso->flags |= TSO_DIRTY;
+	} else {
+	    tso->flags &= ~TSO_DIRTY;
+	}
+
+	failed_to_evac = rtsTrue; // always on the mutable list
 	break;
     }
   
@@ -3712,9 +3847,6 @@ scavenge_one(StgPtr p)
 	evac_gen = 0;
 	tvar->current_value = evacuate((StgClosure*)tvar->current_value);
 	tvar->first_wait_queue_entry = (StgTVarWaitQueue *)evacuate((StgClosure*)tvar->first_wait_queue_entry);
-#if defined(SMP)
-	tvar->last_update_by = (StgTRecHeader *)evacuate((StgClosure*)tvar->last_update_by);
-#endif
 	evac_gen = saved_evac_gen;
 	failed_to_evac = rtsTrue; // mutable
 	break;
@@ -3823,10 +3955,55 @@ scavenge_mutable_list(generation *gen)
 	for (q = bd->start; q < bd->free; q++) {
 	    p = (StgPtr)*q;
 	    ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
+
+#ifdef DEBUG	    
+	    switch (get_itbl((StgClosure *)p)->type) {
+	    case MUT_VAR_CLEAN:
+		barf("MUT_VAR_CLEAN on mutable list");
+	    case MUT_VAR_DIRTY:
+		mutlist_MUTVARS++; break;
+	    case MUT_ARR_PTRS_CLEAN:
+	    case MUT_ARR_PTRS_DIRTY:
+	    case MUT_ARR_PTRS_FROZEN:
+	    case MUT_ARR_PTRS_FROZEN0:
+		mutlist_MUTARRS++; break;
+	    default:
+		mutlist_OTHERS++; break;
+	    }
+#endif
+
+	    // Check whether this object is "clean", that is it
+	    // definitely doesn't point into a young generation.
+	    // Clean objects don't need to be scavenged.  Some clean
+	    // objects (MUT_VAR_CLEAN) are not kept on the mutable
+	    // list at all; others, such as MUT_ARR_PTRS_CLEAN and
+	    // TSO, are always on the mutable list.
+	    //
+	    switch (get_itbl((StgClosure *)p)->type) {
+	    case MUT_ARR_PTRS_CLEAN:
+		recordMutableGen((StgClosure *)p,gen);
+		continue;
+	    case TSO: {
+		StgTSO *tso = (StgTSO *)p;
+		if ((tso->flags & TSO_DIRTY) == 0) {
+		    // A clean TSO: we don't have to traverse its
+		    // stack.  However, we *do* follow the link field:
+		    // we don't want to have to mark a TSO dirty just
+		    // because we put it on a different queue.
+		    if (tso->why_blocked != BlockedOnBlackHole) {
+			tso->link = (StgTSO *)evacuate((StgClosure *)tso->link);
+		    }
+		    recordMutableGen((StgClosure *)p,gen);
+		    continue;
+		}
+	    }
+	    default:
+		;
+	    }
+
 	    if (scavenge_one(p)) {
-		/* didn't manage to promote everything, so put the
-		 * object back on the list.
-		 */
+		// didn't manage to promote everything, so put the
+		// object back on the list.
 		recordMutableGen((StgClosure *)p,gen);
 	    }
 	}
@@ -3988,6 +4165,32 @@ scavenge_stack(StgPtr p, StgPtr stack_end)
     switch (info->i.type) {
 	
     case UPDATE_FRAME:
+	// In SMP, we can get update frames that point to indirections
+	// when two threads evaluate the same thunk.  We do attempt to
+	// discover this situation in threadPaused(), but it's
+	// possible that the following sequence occurs:
+	//
+	//        A             B
+	//                  enter T
+	//     enter T
+	//     blackhole T
+	//                  update T
+	//     GC
+	//
+	// Now T is an indirection, and the update frame is already
+	// marked on A's stack, so we won't traverse it again in
+	// threadPaused().  We could traverse the whole stack again
+	// before GC, but that seems like overkill.
+	//
+	// Scavenging this update frame as normal would be disastrous;
+	// the updatee would end up pointing to the value.  So we turn
+	// the indirection into an IND_PERM, so that evacuate will
+	// copy the indirection into the old generation instead of
+	// discarding it.
+	if (get_itbl(((StgUpdateFrame *)p)->updatee)->type == IND) {
+	    ((StgUpdateFrame *)p)->updatee->header.info = 
+		(StgInfoTable *)&stg_IND_PERM_info;
+	}
 	((StgUpdateFrame *)p)->updatee 
 	    = evacuate(((StgUpdateFrame *)p)->updatee);
 	p += sizeofW(StgUpdateFrame);
@@ -4230,74 +4433,6 @@ gcCAFs(void)
 
 
 /* -----------------------------------------------------------------------------
-   Lazy black holing.
-
-   Whenever a thread returns to the scheduler after possibly doing
-   some work, we have to run down the stack and black-hole all the
-   closures referred to by update frames.
-   -------------------------------------------------------------------------- */
-
-static void
-threadLazyBlackHole(StgTSO *tso)
-{
-    StgClosure *frame;
-    StgRetInfoTable *info;
-    StgClosure *bh;
-    StgPtr stack_end;
-    
-    stack_end = &tso->stack[tso->stack_size];
-    
-    frame = (StgClosure *)tso->sp;
-
-    while (1) {
-	info = get_ret_itbl(frame);
-	
-	switch (info->i.type) {
-	    
-	case UPDATE_FRAME:
-	    bh = ((StgUpdateFrame *)frame)->updatee;
-	    
-	    /* if the thunk is already blackholed, it means we've also
-	     * already blackholed the rest of the thunks on this stack,
-	     * so we can stop early.
-	     *
-	     * The blackhole made for a CAF is a CAF_BLACKHOLE, so they
-	     * don't interfere with this optimisation.
-	     */
-	    if (bh->header.info == &stg_BLACKHOLE_info) {
-		return;
-	    }
-	    
-	    if (bh->header.info != &stg_CAF_BLACKHOLE_info) {
-#if (!defined(LAZY_BLACKHOLING)) && defined(DEBUG)
-		debugBelch("Unexpected lazy BHing required at 0x%04lx\n",(long)bh);
-#endif
-#ifdef PROFILING
-		// @LDV profiling
-		// We pretend that bh is now dead.
-		LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)bh);
-#endif
-		SET_INFO(bh,&stg_BLACKHOLE_info);
-
-		// We pretend that bh has just been created.
-		LDV_RECORD_CREATE(bh);
-	    }
-	    
-	    frame = (StgClosure *) ((StgUpdateFrame *)frame + 1);
-	    break;
-	    
-	case STOP_FRAME:
-	    return;
-	    
-	    // normal stack frames; do nothing except advance the pointer
-	default:
-	    frame = (StgClosure *)((StgPtr)frame + stack_frame_sizeW(frame));
-	}
-    }
-}
-
-
-/* -----------------------------------------------------------------------------
  * Stack squeezing
  *
  * Code largely pinched from old RTS, then hacked to bits.  We also do
@@ -4308,12 +4443,11 @@ threadLazyBlackHole(StgTSO *tso)
 struct stack_gap { StgWord gap_size; struct stack_gap *next_gap; };
 
 static void
-threadSqueezeStack(StgTSO *tso)
+stackSqueeze(StgTSO *tso, StgPtr bottom)
 {
     StgPtr frame;
     rtsBool prev_was_update_frame;
     StgClosure *updatee = NULL;
-    StgPtr bottom;
     StgRetInfoTable *info;
     StgWord current_gap_size;
     struct stack_gap *gap;
@@ -4324,8 +4458,6 @@ threadSqueezeStack(StgTSO *tso)
     //    contains two values: the size of the gap, and the distance
     //    to the next gap (or the stack top).
 
-    bottom = &(tso->stack[tso->stack_size]);
-
     frame = tso->sp;
 
     ASSERT(frame < bottom);
@@ -4343,20 +4475,6 @@ threadSqueezeStack(StgTSO *tso)
 	{ 
 	    StgUpdateFrame *upd = (StgUpdateFrame *)frame;
 
-	    if (upd->updatee->header.info == &stg_BLACKHOLE_info) {
-
-		// found a BLACKHOLE'd update frame; we've been here
-		// before, in a previous GC, so just break out.
-
-		// Mark the end of the gap, if we're in one.
-		if (current_gap_size != 0) {
-		    gap = (struct stack_gap *)(frame-sizeofW(StgUpdateFrame));
-		}
-		
-		frame += sizeofW(StgUpdateFrame);
-		goto done_traversing;
-	    }
-
 	    if (prev_was_update_frame) {
 
 		TICK_UPD_SQUEEZED();
@@ -4389,31 +4507,6 @@ threadSqueezeStack(StgTSO *tso)
 
 	    // single update frame, or the topmost update frame in a series
 	    else {
-		StgClosure *bh = upd->updatee;
-
-		// Do lazy black-holing
-		if (bh->header.info != &stg_BLACKHOLE_info &&
-		    bh->header.info != &stg_CAF_BLACKHOLE_info) {
-#if (!defined(LAZY_BLACKHOLING)) && defined(DEBUG)
-		    debugBelch("Unexpected lazy BHing required at 0x%04lx",(long)bh);
-#endif
-#ifdef DEBUG
-		    // zero out the slop so that the sanity checker can tell
-		    // where the next closure is.
-		    DEBUG_FILL_SLOP(bh);
-#endif
-#ifdef PROFILING
-		    // We pretend that bh is now dead.
-		    // ToDo: is the slop filling the same as DEBUG_FILL_SLOP?
-		    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)bh);
-#endif
-		    // Todo: maybe use SET_HDR() and remove LDV_RECORD_CREATE()?
-		    SET_INFO(bh,&stg_BLACKHOLE_info);
-
-		    // We pretend that bh has just been created.
-		    LDV_RECORD_CREATE(bh);
-		}
-
 		prev_was_update_frame = rtsTrue;
 		updatee = upd->updatee;
 		frame += sizeofW(StgUpdateFrame);
@@ -4436,8 +4529,10 @@ threadSqueezeStack(StgTSO *tso)
 	}
     }
 
-done_traversing:
-	    
+    if (current_gap_size != 0) {
+	gap = (struct stack_gap *) (frame - sizeofW(StgUpdateFrame));
+    }
+
     // Now we have a stack with gaps in it, and we have to walk down
     // shoving the stack up to fill in the gaps.  A diagram might
     // help:
@@ -4495,12 +4590,110 @@ done_traversing:
  * turned on.
  * -------------------------------------------------------------------------- */
 void
-threadPaused(StgTSO *tso)
+threadPaused(Capability *cap, StgTSO *tso)
 {
-  if ( RtsFlags.GcFlags.squeezeUpdFrames == rtsTrue )
-    threadSqueezeStack(tso);	// does black holing too 
-  else
-    threadLazyBlackHole(tso);
+    StgClosure *frame;
+    StgRetInfoTable *info;
+    StgClosure *bh;
+    StgPtr stack_end;
+    nat words_to_squeeze = 0;
+    nat weight           = 0;
+    nat weight_pending   = 0;
+    rtsBool prev_was_update_frame;
+    
+    stack_end = &tso->stack[tso->stack_size];
+    
+    frame = (StgClosure *)tso->sp;
+
+    while (1) {
+	// If we've already marked this frame, then stop here.
+	if (frame->header.info == (StgInfoTable *)&stg_marked_upd_frame_info) {
+	    goto end;
+	}
+
+	info = get_ret_itbl(frame);
+	
+	switch (info->i.type) {
+	    
+	case UPDATE_FRAME:
+
+	    SET_INFO(frame, (StgInfoTable *)&stg_marked_upd_frame_info);
+
+	    bh = ((StgUpdateFrame *)frame)->updatee;
+
+	    if (closure_IND(bh) || bh->header.info == &stg_BLACKHOLE_info) {
+		IF_DEBUG(squeeze, debugBelch("suspending duplicate work: %ld words of stack\n", (StgPtr)frame - tso->sp));
+
+		// If this closure is already an indirection, then
+		// suspend the computation up to this point:
+		suspendComputation(cap,tso,(StgPtr)frame);
+
+		// Now drop the update frame, and arrange to return
+		// the value to the frame underneath:
+		tso->sp = (StgPtr)frame + sizeofW(StgUpdateFrame) - 2;
+		tso->sp[1] = (StgWord)bh;
+		tso->sp[0] = (W_)&stg_enter_info;
+
+		// And continue with threadPaused; there might be
+		// yet more computation to suspend.
+		threadPaused(cap,tso);
+		return;
+	    }
+
+	    if (bh->header.info != &stg_CAF_BLACKHOLE_info) {
+#if (!defined(LAZY_BLACKHOLING)) && defined(DEBUG)
+		debugBelch("Unexpected lazy BHing required at 0x%04lx\n",(long)bh);
+#endif
+		// zero out the slop so that the sanity checker can tell
+		// where the next closure is.
+		DEBUG_FILL_SLOP(bh);
+#ifdef PROFILING
+		// @LDV profiling
+		// We pretend that bh is now dead.
+		LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)bh);
+#endif
+		SET_INFO(bh,&stg_BLACKHOLE_info);
+
+		// We pretend that bh has just been created.
+		LDV_RECORD_CREATE(bh);
+	    }
+	    
+	    frame = (StgClosure *) ((StgUpdateFrame *)frame + 1);
+	    if (prev_was_update_frame) {
+		words_to_squeeze += sizeofW(StgUpdateFrame);
+		weight += weight_pending;
+		weight_pending = 0;
+	    }
+	    prev_was_update_frame = rtsTrue;
+	    break;
+	    
+	case STOP_FRAME:
+	    goto end;
+	    
+	    // normal stack frames; do nothing except advance the pointer
+	default:
+	{
+	    nat frame_size = stack_frame_sizeW(frame);
+	    weight_pending += frame_size;
+	    frame = (StgClosure *)((StgPtr)frame + frame_size);
+	    prev_was_update_frame = rtsFalse;
+	}
+	}
+    }
+
+end:
+    IF_DEBUG(squeeze, 
+	     debugBelch("words_to_squeeze: %d, weight: %d, squeeze: %s\n", 
+			words_to_squeeze, weight, 
+			weight < words_to_squeeze ? "YES" : "NO"));
+
+    // Should we squeeze or not?  Arbitrary heuristic: we squeeze if
+    // the number of words we have to shift down is less than the
+    // number of stack words we squeeze away by doing so.
+    if (RtsFlags.GcFlags.squeezeUpdFrames == rtsTrue &&
+	weight < words_to_squeeze) {
+	stackSqueeze(tso, (StgPtr)frame);
+    }
 }
 
 /* -----------------------------------------------------------------------------