X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Frts%2FGC.c;h=a13cd33afadd346ed79e20ac72235ea4e62887eb;hb=a1b4e3b88a6987deed7bb7f1bd870b30eef1b475;hp=a470f32c0822ed31238cc8649638a1bad11f11d2;hpb=b4dae163a4830e1984a656cdf66df957e840c77d;p=ghc-hetmet.git

diff --git a/ghc/rts/GC.c b/ghc/rts/GC.c
index a470f32..a13cd33 100644
--- a/ghc/rts/GC.c
+++ b/ghc/rts/GC.c
@@ -11,6 +11,7 @@
 #include "RtsFlags.h"
 #include "RtsUtils.h"
 #include "Apply.h"
+#include "OSThreads.h"
 #include "Storage.h"
 #include "LdvProfile.h"
 #include "Updates.h"
@@ -25,7 +26,7 @@
 #include "Prelude.h"
 #include "ParTicky.h"		// ToDo: move into Rts.h
 #include "GCCompact.h"
-#include "Signals.h"
+#include "RtsSignals.h"
 #include "STM.h"
 #if defined(GRAN) || defined(PAR)
 # include "GranSimRts.h"
@@ -46,6 +47,12 @@
 
 #include <string.h>
 
+// Turn off inlining when debugging - it obfuscates things
+#ifdef DEBUG
+# undef  STATIC_INLINE
+# define STATIC_INLINE static
+#endif
+
 /* STATIC OBJECT LIST.
  *
  * During GC:
@@ -97,6 +104,10 @@ static rtsBool major_gc;
  */
 static nat evac_gen;
 
+/* Whether to do eager promotion or not.
+ */
+static rtsBool eager_promotion;
+
 /* Weak pointers
  */
 StgWeak *old_weak_ptr_list; // also pending finaliser list
@@ -117,13 +128,15 @@ StgTSO *resurrected_threads;
  */
 static rtsBool failed_to_evac;
 
-/* Old to-space (used for two-space collector only)
+/* Saved nursery (used for 2-space collector only)
  */
-static bdescr *old_to_blocks;
-
+static bdescr *saved_nursery;
+static nat saved_n_blocks;
+  
 /* Data used for allocation area sizing.
  */
 static lnat new_blocks;		 // blocks allocated during this GC 
+static lnat new_scavd_blocks;	 // ditto, but depth-first blocks
 static lnat g0s0_pcnt_kept = 30; // percentage of g0s0 live at last minor GC 
 
 /* Used to avoid long recursion due to selector thunks
@@ -131,6 +144,14 @@ static lnat g0s0_pcnt_kept = 30; // percentage of g0s0 live at last minor GC
 static lnat thunk_selector_depth = 0;
 #define MAX_THUNK_SELECTOR_DEPTH 8
 
+/* Mut-list stats */
+#ifdef DEBUG
+static nat 
+    mutlist_MUTVARS,
+    mutlist_MUTARRS,
+    mutlist_OTHERS;
+#endif
+
 /* -----------------------------------------------------------------------------
    Static function declarations
    -------------------------------------------------------------------------- */
@@ -239,23 +260,51 @@ gc_alloc_block(step *stp)
     }
 
     // Start a new to-space block, chain it on after the previous one.
-    if (stp->hp_bd == NULL) {
-	stp->hp_bd = bd;
-    } else {
+    if (stp->hp_bd != NULL) {
 	stp->hp_bd->free = stp->hp;
 	stp->hp_bd->link = bd;
-	stp->hp_bd = bd;
     }
 
+    stp->hp_bd = bd;
     stp->hp    = bd->start;
     stp->hpLim = stp->hp + BLOCK_SIZE_W;
 
-    stp->n_to_blocks++;
+    stp->n_blocks++;
     new_blocks++;
 
     return bd;
 }
 
+static bdescr *
+gc_alloc_scavd_block(step *stp)
+{
+    bdescr *bd = allocBlock();
+    bd->gen_no = stp->gen_no;
+    bd->step = stp;
+
+    // blocks in to-space in generations up to and including N
+    // get the BF_EVACUATED flag.
+    if (stp->gen_no <= N) {
+	bd->flags = BF_EVACUATED;
+    } else {
+	bd->flags = 0;
+    }
+
+    bd->link = stp->blocks;
+    stp->blocks = bd;
+
+    if (stp->scavd_hp != NULL) {
+	Bdescr(stp->scavd_hp)->free = stp->scavd_hp;
+    }
+    stp->scavd_hp    = bd->start;
+    stp->scavd_hpLim = stp->scavd_hp + BLOCK_SIZE_W;
+
+    stp->n_blocks++;
+    new_scavd_blocks++;
+
+    return bd;
+}
+
 /* -----------------------------------------------------------------------------
    GarbageCollect
 
@@ -286,7 +335,7 @@ gc_alloc_block(step *stp)
       
      - free from-space in each step, and set from-space = to-space.
 
-   Locks held: sched_mutex
+   Locks held: all capabilities are held throughout GarbageCollect().
 
    -------------------------------------------------------------------------- */
 
@@ -295,9 +344,11 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
 {
   bdescr *bd;
   step *stp;
-  lnat live, allocated, collected = 0, copied = 0;
+  lnat live, allocated, copied = 0, scavd_copied = 0;
   lnat oldgen_saved_blocks = 0;
-  nat g, s;
+  nat g, s, i;
+
+  ACQUIRE_SM_LOCK;
 
 #ifdef PROFILING
   CostCentreStack *prev_CCS;
@@ -319,6 +370,17 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
   // tell the stats department that we've started a GC 
   stat_startGC();
 
+#ifdef DEBUG
+  // check for memory leaks if DEBUG is on 
+  memInventory();
+#endif
+
+#ifdef DEBUG
+  mutlist_MUTVARS = 0;
+  mutlist_MUTARRS = 0;
+  mutlist_OTHERS = 0;
+#endif
+
   // Init stats and print par specific (timing) info 
   PAR_TICKY_PAR_START();
 
@@ -367,18 +429,22 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
   static_objects = END_OF_STATIC_LIST;
   scavenged_static_objects = END_OF_STATIC_LIST;
 
-  /* Save the old to-space if we're doing a two-space collection
+  /* Save the nursery if we're doing a two-space collection.
+   * g0s0->blocks will be used for to-space, so we need to get the
+   * nursery out of the way.
    */
   if (RtsFlags.GcFlags.generations == 1) {
-    old_to_blocks = g0s0->to_blocks;
-    g0s0->to_blocks = NULL;
-    g0s0->n_to_blocks = 0;
+      saved_nursery = g0s0->blocks;
+      saved_n_blocks = g0s0->n_blocks;
+      g0s0->blocks = NULL;
+      g0s0->n_blocks = 0;
   }
 
   /* Keep a count of how many new blocks we allocated during this GC
    * (used for resizing the allocation area, later).
    */
   new_blocks = 0;
+  new_scavd_blocks = 0;
 
   // Initialise to-space in all the generations/steps that we're
   // collecting.
@@ -391,6 +457,10 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
     if (g != 0) {
 	freeChain(generations[g].mut_list);
 	generations[g].mut_list = allocBlock();
+	for (i = 0; i < n_capabilities; i++) {
+	    freeChain(capabilities[i].mut_lists[g]);
+	    capabilities[i].mut_lists[g] = allocBlock();
+	}
     }
 
     for (s = 0; s < generations[g].n_steps; s++) {
@@ -404,17 +474,23 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
       ASSERT(stp->gen_no == g);
 
       // start a new to-space for this step.
-      stp->hp        = NULL;
-      stp->hp_bd     = NULL;
-      stp->to_blocks = NULL;
+      stp->old_blocks   = stp->blocks;
+      stp->n_old_blocks = stp->n_blocks;
 
       // allocate the first to-space block; extra blocks will be
       // chained on as necessary.
+      stp->hp_bd     = NULL;
       bd = gc_alloc_block(stp);
-      stp->to_blocks   = bd;
+      stp->blocks      = bd;
+      stp->n_blocks    = 1;
       stp->scan        = bd->start;
       stp->scan_bd     = bd;
 
+      // allocate a block for "already scavenged" objects.  This goes
+      // on the front of the stp->blocks list, so it won't be
+      // traversed by the scavenging sweep.
+      gc_alloc_scavd_block(stp);
+
       // initialise the large object queues.
       stp->new_large_objects = NULL;
       stp->scavenged_large_objects = NULL;
@@ -431,10 +507,10 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
 	  bdescr *bitmap_bdescr;
 	  StgWord *bitmap;
 
-	  bitmap_size = stp->n_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
+	  bitmap_size = stp->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
 
 	  if (bitmap_size > 0) {
-	      bitmap_bdescr = allocGroup((nat)BLOCK_ROUND_UP(bitmap_size) 
+	      bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size) 
 					 / BLOCK_SIZE);
 	      stp->bitmap = bitmap_bdescr;
 	      bitmap = bitmap_bdescr->start;
@@ -447,7 +523,7 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
 	      
 	      // For each block in this step, point to its bitmap from the
 	      // block descriptor.
-	      for (bd=stp->blocks; bd != NULL; bd = bd->link) {
+	      for (bd=stp->old_blocks; bd != NULL; bd = bd->link) {
 		  bd->u.bitmap = bitmap;
 		  bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
 
@@ -475,16 +551,31 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
 	  stp->blocks = bd;
 	  stp->n_blocks = 1;
       }
+      if (stp->scavd_hp == NULL) {
+	  gc_alloc_scavd_block(stp);
+	  stp->n_blocks++;
+      }
       /* Set the scan pointer for older generations: remember we
        * still have to scavenge objects that have been promoted. */
       stp->scan = stp->hp;
       stp->scan_bd = stp->hp_bd;
-      stp->to_blocks = NULL;
-      stp->n_to_blocks = 0;
       stp->new_large_objects = NULL;
       stp->scavenged_large_objects = NULL;
       stp->n_scavenged_large_blocks = 0;
     }
+
+    /* Move the private mutable lists from each capability onto the
+     * main mutable list for the generation.
+     */
+    for (i = 0; i < n_capabilities; i++) {
+	for (bd = capabilities[i].mut_lists[g]; 
+	     bd->link != NULL; bd = bd->link) {
+	    /* nothing */
+	}
+	bd->link = generations[g].mut_list;
+	generations[g].mut_list = capabilities[i].mut_lists[g];
+	capabilities[i].mut_lists[g] = allocBlock();
+    }
   }
 
   /* Allocate a mark stack if we're doing a major collection.
@@ -498,6 +589,8 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
       mark_stack_bdescr = NULL;
   }
 
+  eager_promotion = rtsTrue; // for now
+
   /* -----------------------------------------------------------------------
    * follow all the roots that we know about:
    *   - mutable lists from each generation > N
@@ -639,7 +732,7 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
     }
   }
 
-  /* Update the pointers from the "main thread" list - these are
+  /* Update the pointers from the task list - these are
    * treated as weak pointers because we want to allow a main thread
    * to get a BlockedOnDeadMVar exception in the same way as any other
    * thread.  Note that the threads should all have been retained by
@@ -647,14 +740,23 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
    * updating pointers here.
    */
   {
-      StgMainThread *m;
+      Task *task;
       StgTSO *tso;
-      for (m = main_threads; m != NULL; m = m->link) {
-	  tso = (StgTSO *) isAlive((StgClosure *)m->tso);
-	  if (tso == NULL) {
-	      barf("main thread has been GC'd");
+      for (task = all_tasks; task != NULL; task = task->all_link) {
+	  if (!task->stopped && task->tso) {
+	      ASSERT(task->tso->bound == task);
+	      tso = (StgTSO *) isAlive((StgClosure *)task->tso);
+	      if (tso == NULL) {
+		  barf("task %p: main thread %d has been GC'd", 
+#ifdef THREADED_RTS
+		       (void *)task->id, 
+#else
+		       (void *)task,
+#endif
+		       task->tso->id);
+	      }
+	      task->tso = tso;
 	  }
-	  m->tso = tso;
       }
   }
 
@@ -674,6 +776,7 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
 	  if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) {
 	      ASSERT(Bdescr(stp->hp) == stp->hp_bd);
 	      stp->hp_bd->free = stp->hp;
+	      Bdescr(stp->scavd_hp)->free = stp->scavd_hp;
 	  }
       }
   }
@@ -690,7 +793,7 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
   // Finally: compaction of the oldest generation.
   if (major_gc && oldest_gen->steps[0].is_compacted) {
       // save number of blocks for stats
-      oldgen_saved_blocks = oldest_gen->steps[0].n_blocks;
+      oldgen_saved_blocks = oldest_gen->steps[0].n_old_blocks;
       compact(get_roots);
   }
 
@@ -699,6 +802,7 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
   /* run through all the generations/steps and tidy up 
    */
   copied = new_blocks * BLOCK_SIZE_W;
+  scavd_copied =  new_scavd_blocks * BLOCK_SIZE_W;
   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 
     if (g <= N) {
@@ -708,9 +812,13 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
     // Count the mutable list as bytes "copied" for the purposes of
     // stats.  Every mutable list is copied during every GC.
     if (g > 0) {
+	nat mut_list_size = 0;
 	for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
-	    copied += (bd->free - bd->start) * sizeof(StgWord);
+	    mut_list_size += bd->free - bd->start;
 	}
+	copied +=  mut_list_size;
+
+	IF_DEBUG(gc, debugBelch("mut_list_size: %ld (%d vars, %d arrays, %d others)\n", mut_list_size * sizeof(W_), mutlist_MUTVARS, mutlist_MUTARRS, mutlist_OTHERS));
     }
 
     for (s = 0; s < generations[g].n_steps; s++) {
@@ -722,19 +830,13 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
 	if (g <= N) {
 	  copied -= stp->hp_bd->start + BLOCK_SIZE_W -
 	    stp->hp_bd->free;
+	  scavd_copied -= (P_)(BLOCK_ROUND_UP(stp->scavd_hp)) - stp->scavd_hp;
 	}
       }
 
       // for generations we collected... 
       if (g <= N) {
 
-	  // rough calculation of garbage collected, for stats output
-	  if (stp->is_compacted) {
-	      collected += (oldgen_saved_blocks - stp->n_blocks) * BLOCK_SIZE_W;
-	  } else {
-	      collected += stp->n_blocks * BLOCK_SIZE_W;
-	  }
-
 	/* free old memory and shift to-space into from-space for all
 	 * the collected steps (except the allocation area).  These
 	 * freed blocks will probaby be quickly recycled.
@@ -743,37 +845,35 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
 	    if (stp->is_compacted) {
 		// for a compacted step, just shift the new to-space
 		// onto the front of the now-compacted existing blocks.
-		for (bd = stp->to_blocks; bd != NULL; bd = bd->link) {
+		for (bd = stp->blocks; bd != NULL; bd = bd->link) {
 		    bd->flags &= ~BF_EVACUATED;  // now from-space 
 		}
 		// tack the new blocks on the end of the existing blocks
-		if (stp->blocks == NULL) {
-		    stp->blocks = stp->to_blocks;
-		} else {
-		    for (bd = stp->blocks; bd != NULL; bd = next) {
-			next = bd->link;
-			if (next == NULL) {
-			    bd->link = stp->to_blocks;
-			}
+		if (stp->old_blocks != NULL) {
+		    for (bd = stp->old_blocks; bd != NULL; bd = next) {
 			// NB. this step might not be compacted next
 			// time, so reset the BF_COMPACTED flags.
 			// They are set before GC if we're going to
 			// compact.  (search for BF_COMPACTED above).
 			bd->flags &= ~BF_COMPACTED;
+			next = bd->link;
+			if (next == NULL) {
+			    bd->link = stp->blocks;
+			}
 		    }
+		    stp->blocks = stp->old_blocks;
 		}
 		// add the new blocks to the block tally
-		stp->n_blocks += stp->n_to_blocks;
+		stp->n_blocks += stp->n_old_blocks;
+		ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
 	    } else {
-		freeChain(stp->blocks);
-		stp->blocks = stp->to_blocks;
-		stp->n_blocks = stp->n_to_blocks;
+		freeChain(stp->old_blocks);
 		for (bd = stp->blocks; bd != NULL; bd = bd->link) {
 		    bd->flags &= ~BF_EVACUATED;	 // now from-space 
 		}
 	    }
-	    stp->to_blocks = NULL;
-	    stp->n_to_blocks = 0;
+	    stp->old_blocks = NULL;
+	    stp->n_old_blocks = 0;
 	}
 
 	/* LARGE OBJECTS.  The current live large objects are chained on
@@ -808,8 +908,6 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
 	}
 
 	// add the new blocks we promoted during this GC 
-	stp->n_blocks += stp->n_to_blocks;
-	stp->n_to_blocks = 0;
 	stp->n_large_blocks += stp->n_scavenged_large_blocks;
       }
     }
@@ -920,8 +1018,9 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
   for (g = 0; g <= N; g++) {
       for (s = 0; s < generations[g].n_steps; s++) {
 	  stp = &generations[g].steps[s];
-	  if (stp->is_compacted && stp->bitmap != NULL) {
+	  if (stp->bitmap != NULL) {
 	      freeGroup(stp->bitmap);
+	      stp->bitmap = NULL;
 	  }
       }
   }
@@ -932,12 +1031,16 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
   if (RtsFlags.GcFlags.generations == 1) {
     nat blocks;
     
-    if (old_to_blocks != NULL) {
-      freeChain(old_to_blocks);
+    if (g0s0->old_blocks != NULL) {
+      freeChain(g0s0->old_blocks);
     }
-    for (bd = g0s0->to_blocks; bd != NULL; bd = bd->link) {
+    for (bd = g0s0->blocks; bd != NULL; bd = bd->link) {
       bd->flags = 0;	// now from-space 
     }
+    g0s0->old_blocks = g0s0->blocks;
+    g0s0->n_old_blocks = g0s0->n_blocks;
+    g0s0->blocks = saved_nursery;
+    g0s0->n_blocks = saved_n_blocks;
 
     /* For a two-space collector, we need to resize the nursery. */
     
@@ -955,7 +1058,7 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
      * performance we get from 3L bytes, reducing to the same
      * performance at 2L bytes.
      */
-    blocks = g0s0->n_to_blocks;
+    blocks = g0s0->n_old_blocks;
 
     if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
 	 blocks * RtsFlags.GcFlags.oldGenFactor * 2 > 
@@ -977,7 +1080,7 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
 	blocks = RtsFlags.GcFlags.minAllocAreaSize;
       }
     }
-    resizeNursery(blocks);
+    resizeNurseries(blocks);
     
   } else {
     /* Generational collector:
@@ -994,7 +1097,7 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
        * percentage of g0s0 that was live at the last minor GC.
        */
       if (N == 0) {
-	g0s0_pcnt_kept = (new_blocks * 100) / g0s0->n_blocks;
+	g0s0_pcnt_kept = (new_blocks * 100) / countNurseryBlocks();
       }
 
       /* Estimate a size for the allocation area based on the
@@ -1017,12 +1120,12 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
 	blocks = RtsFlags.GcFlags.minAllocAreaSize;
       }
       
-      resizeNursery((nat)blocks);
+      resizeNurseries((nat)blocks);
 
     } else {
       // we might have added extra large blocks to the nursery, so
       // resize back to minAllocAreaSize again.
-      resizeNursery(RtsFlags.GcFlags.minAllocAreaSize);
+      resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
     }
   }
 
@@ -1045,15 +1148,15 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
   // Reset the nursery
   resetNurseries();
 
-  RELEASE_LOCK(&sched_mutex);
-  
   // start any pending finalizers 
-  scheduleFinalizers(old_weak_ptr_list);
+  RELEASE_SM_LOCK;
+  scheduleFinalizers(last_free_capability, old_weak_ptr_list);
+  ACQUIRE_SM_LOCK;
   
   // send exceptions to any threads which were about to die 
+  RELEASE_SM_LOCK;
   resurrectThreads(resurrected_threads);
-  
-  ACQUIRE_LOCK(&sched_mutex);
+  ACQUIRE_SM_LOCK;
 
   // Update the stable pointer hash table.
   updateStablePtrTable(major_gc);
@@ -1074,8 +1177,10 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
   CCCS = prev_CCS;
 #endif
 
-  // check for memory leaks if sanity checking is on 
-  IF_DEBUG(sanity, memInventory());
+#ifdef DEBUG
+  // check for memory leaks if DEBUG is on 
+  memInventory();
+#endif
 
 #ifdef RTS_GTK_FRONTPANEL
   if (RtsFlags.GcFlags.frontpanel) {
@@ -1084,13 +1189,15 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
 #endif
 
   // ok, GC over: tell the stats department what happened. 
-  stat_endGC(allocated, collected, live, copied, N);
+  stat_endGC(allocated, live, copied, scavd_copied, N);
 
 #if defined(RTS_USER_SIGNALS)
   // unblock signals again
   unblockUserSignals();
 #endif
 
+  RELEASE_SM_LOCK;
+
   //PAR_TICKY_TP();
 }
 
@@ -1259,6 +1366,16 @@ traverse_weak_ptr_list(void)
 		  ;
 	      }
 	      
+	      // Threads blocked on black holes: if the black hole
+	      // is alive, then the thread is alive too.
+	      if (tmp == NULL && t->why_blocked == BlockedOnBlackHole) {
+		  if (isAlive(t->block_info.closure)) {
+		      t = (StgTSO *)evacuate((StgClosure *)t);
+		      tmp = t;
+		      flag = rtsTrue;
+		  }
+	      }
+
 	      if (tmp == NULL) {
 		  // not alive (yet): leave this thread on the
 		  // old_all_threads list.
@@ -1275,6 +1392,10 @@ traverse_weak_ptr_list(void)
 	  }
       }
       
+      /* If we evacuated any threads, we need to go back to the scavenger.
+       */
+      if (flag) return rtsTrue;
+
       /* And resurrect any threads which were about to become garbage.
        */
       {
@@ -1287,6 +1408,21 @@ traverse_weak_ptr_list(void)
 	  }
       }
       
+      /* Finally, we can update the blackhole_queue.  This queue
+       * simply strings together TSOs blocked on black holes, it is
+       * not intended to keep anything alive.  Hence, we do not follow
+       * pointers on the blackhole_queue until now, when we have
+       * determined which TSOs are otherwise reachable.  We know at
+       * this point that all TSOs have been evacuated, however.
+       */
+      { 
+	  StgTSO **pt;
+	  for (pt = &blackhole_queue; *pt != END_TSO_QUEUE; pt = &((*pt)->link)) {
+	      *pt = (StgTSO *)isAlive((StgClosure *)*pt);
+	      ASSERT(*pt != NULL);
+	  }
+      }
+
       weak_stage = WeakDone;  // *now* we're done,
       return rtsTrue;         // but one more round of scavenging, please
 
@@ -1425,7 +1561,8 @@ upd_evacuee(StgClosure *p, StgClosure *dest)
 STATIC_INLINE StgClosure *
 copy(StgClosure *src, nat size, step *stp)
 {
-  P_ to, from, dest;
+  StgPtr to, from;
+  nat i;
 #ifdef PROFILING
   // @LDV profiling
   nat size_org = size;
@@ -1438,11 +1575,11 @@ copy(StgClosure *src, nat size, step *stp)
    * by evacuate()).
    */
   if (stp->gen_no < evac_gen) {
-#ifdef NO_EAGER_PROMOTION    
-    failed_to_evac = rtsTrue;
-#else
-    stp = &generations[evac_gen].steps[0];
-#endif
+      if (eager_promotion) {
+	  stp = &generations[evac_gen].steps[0];
+      } else {
+	  failed_to_evac = rtsTrue;
+      }
   }
 
   /* chain a new block onto the to-space for the destination step if
@@ -1452,19 +1589,70 @@ copy(StgClosure *src, nat size, step *stp)
     gc_alloc_block(stp);
   }
 
-  for(to = stp->hp, from = (P_)src; size>0; --size) {
-    *to++ = *from++;
+  to = stp->hp;
+  from = (StgPtr)src;
+  stp->hp = to + size;
+  for (i = 0; i < size; i++) { // unroll for small i
+      to[i] = from[i];
   }
+  upd_evacuee((StgClosure *)from,(StgClosure *)to);
 
-  dest = stp->hp;
-  stp->hp = to;
-  upd_evacuee(src,(StgClosure *)dest);
 #ifdef PROFILING
   // We store the size of the just evacuated object in the LDV word so that
   // the profiler can guess the position of the next object later.
-  SET_EVACUAEE_FOR_LDV(src, size_org);
+  SET_EVACUAEE_FOR_LDV(from, size_org);
 #endif
-  return (StgClosure *)dest;
+  return (StgClosure *)to;
+}
+
+// Same as copy() above, except the object will be allocated in memory
+// that will not be scavenged.  Used for object that have no pointer
+// fields.
+STATIC_INLINE StgClosure *
+copy_noscav(StgClosure *src, nat size, step *stp)
+{
+  StgPtr to, from;
+  nat i;
+#ifdef PROFILING
+  // @LDV profiling
+  nat size_org = size;
+#endif
+
+  TICK_GC_WORDS_COPIED(size);
+  /* Find out where we're going, using the handy "to" pointer in 
+   * the step of the source object.  If it turns out we need to
+   * evacuate to an older generation, adjust it here (see comment
+   * by evacuate()).
+   */
+  if (stp->gen_no < evac_gen) {
+      if (eager_promotion) {
+	  stp = &generations[evac_gen].steps[0];
+      } else {
+	  failed_to_evac = rtsTrue;
+      }
+  }
+
+  /* chain a new block onto the to-space for the destination step if
+   * necessary.
+   */
+  if (stp->scavd_hp + size >= stp->scavd_hpLim) {
+    gc_alloc_scavd_block(stp);
+  }
+
+  to = stp->scavd_hp;
+  from = (StgPtr)src;
+  stp->scavd_hp = to + size;
+  for (i = 0; i < size; i++) { // unroll for small i
+      to[i] = from[i];
+  }
+  upd_evacuee((StgClosure *)from,(StgClosure *)to);
+
+#ifdef PROFILING
+  // We store the size of the just evacuated object in the LDV word so that
+  // the profiler can guess the position of the next object later.
+  SET_EVACUAEE_FOR_LDV(from, size_org);
+#endif
+  return (StgClosure *)to;
 }
 
 /* Special version of copy() for when we only want to copy the info
@@ -1484,11 +1672,11 @@ copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *stp)
 
   TICK_GC_WORDS_COPIED(size_to_copy);
   if (stp->gen_no < evac_gen) {
-#ifdef NO_EAGER_PROMOTION    
-    failed_to_evac = rtsTrue;
-#else
-    stp = &generations[evac_gen].steps[0];
-#endif
+      if (eager_promotion) {
+	  stp = &generations[evac_gen].steps[0];
+      } else {
+	  failed_to_evac = rtsTrue;
+      }
   }
 
   if (stp->hp + size_to_reserve >= stp->hpLim) {
@@ -1510,7 +1698,7 @@ copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *stp)
   SET_EVACUAEE_FOR_LDV(src, size_to_reserve);
   // fill the slop
   if (size_to_reserve - size_to_copy_org > 0)
-    FILL_SLOP(stp->hp - 1, (int)(size_to_reserve - size_to_copy_org)); 
+    LDV_FILL_SLOP(stp->hp - 1, (int)(size_to_reserve - size_to_copy_org)); 
 #endif
   return (StgClosure *)dest;
 }
@@ -1565,11 +1753,11 @@ evacuate_large(StgPtr p)
    */
   stp = bd->step->to;
   if (stp->gen_no < evac_gen) {
-#ifdef NO_EAGER_PROMOTION    
-    failed_to_evac = rtsTrue;
-#else
-    stp = &generations[evac_gen].steps[0];
-#endif
+      if (eager_promotion) {
+	  stp = &generations[evac_gen].steps[0];
+      } else {
+	  failed_to_evac = rtsTrue;
+      }
   }
 
   bd->step = stp;
@@ -1624,73 +1812,139 @@ evacuate_large(StgPtr p)
 REGPARM1 static StgClosure *
 evacuate(StgClosure *q)
 {
+#if defined(PAR)
   StgClosure *to;
+#endif
   bdescr *bd = NULL;
   step *stp;
   const StgInfoTable *info;
 
 loop:
-  if (HEAP_ALLOCED(q)) {
-    bd = Bdescr((P_)q);
+  ASSERT(LOOKS_LIKE_CLOSURE_PTR(q));
 
-    if (bd->gen_no > N) {
-	/* Can't evacuate this object, because it's in a generation
-	 * older than the ones we're collecting.  Let's hope that it's
-	 * in evac_gen or older, or we will have to arrange to track
-	 * this pointer using the mutable list.
-	 */
-	if (bd->gen_no < evac_gen) {
-	    // nope 
-	    failed_to_evac = rtsTrue;
-	    TICK_GC_FAILED_PROMOTION();
-	}
-	return q;
-    }
+  if (!HEAP_ALLOCED(q)) {
 
-    /* evacuate large objects by re-linking them onto a different list.
-     */
-    if (bd->flags & BF_LARGE) {
-	info = get_itbl(q);
-	if (info->type == TSO && 
-	    ((StgTSO *)q)->what_next == ThreadRelocated) {
-	    q = (StgClosure *)((StgTSO *)q)->link;
-	    goto loop;
-	}
-	evacuate_large((P_)q);
-	return q;
-    }
+      if (!major_gc) return q;
 
-    /* If the object is in a step that we're compacting, then we
-     * need to use an alternative evacuate procedure.
-     */
-    if (bd->flags & BF_COMPACTED) {
-	if (!is_marked((P_)q,bd)) {
-	    mark((P_)q,bd);
-	    if (mark_stack_full()) {
-		mark_stack_overflowed = rtsTrue;
-		reset_mark_stack();
-	    }
-	    push_mark_stack((P_)q);
-	}
-	return q;
-    }
+      info = get_itbl(q);
+      switch (info->type) {
+
+      case THUNK_STATIC:
+	  if (info->srt_bitmap != 0 && 
+	      *THUNK_STATIC_LINK((StgClosure *)q) == NULL) {
+	      *THUNK_STATIC_LINK((StgClosure *)q) = static_objects;
+	      static_objects = (StgClosure *)q;
+	  }
+	  return q;
+	  
+      case FUN_STATIC:
+	  if (info->srt_bitmap != 0 && 
+	      *FUN_STATIC_LINK((StgClosure *)q) == NULL) {
+	      *FUN_STATIC_LINK((StgClosure *)q) = static_objects;
+	      static_objects = (StgClosure *)q;
+	  }
+	  return q;
+	  
+      case IND_STATIC:
+	  /* If q->saved_info != NULL, then it's a revertible CAF - it'll be
+	   * on the CAF list, so don't do anything with it here (we'll
+	   * scavenge it later).
+	   */
+	  if (((StgIndStatic *)q)->saved_info == NULL
+	      && *IND_STATIC_LINK((StgClosure *)q) == NULL) {
+	      *IND_STATIC_LINK((StgClosure *)q) = static_objects;
+	      static_objects = (StgClosure *)q;
+	  }
+	  return q;
+	  
+      case CONSTR_STATIC:
+	  if (*STATIC_LINK(info,(StgClosure *)q) == NULL) {
+	      *STATIC_LINK(info,(StgClosure *)q) = static_objects;
+	      static_objects = (StgClosure *)q;
+	  }
+	  return q;
+	  
+      case CONSTR_INTLIKE:
+      case CONSTR_CHARLIKE:
+      case CONSTR_NOCAF_STATIC:
+	  /* no need to put these on the static linked list, they don't need
+	   * to be scavenged.
+	   */
+	  return q;
+	  
+      default:
+	  barf("evacuate(static): strange closure type %d", (int)(info->type));
+      }
+  }
 
-    /* Object is not already evacuated. */
-    ASSERT((bd->flags & BF_EVACUATED) == 0);
+  bd = Bdescr((P_)q);
 
-    stp = bd->step->to;
+  if (bd->gen_no > N) {
+      /* Can't evacuate this object, because it's in a generation
+       * older than the ones we're collecting.  Let's hope that it's
+       * in evac_gen or older, or we will have to arrange to track
+       * this pointer using the mutable list.
+       */
+      if (bd->gen_no < evac_gen) {
+	  // nope 
+	  failed_to_evac = rtsTrue;
+	  TICK_GC_FAILED_PROMOTION();
+      }
+      return q;
   }
-#ifdef DEBUG
-  else stp = NULL; // make sure copy() will crash if HEAP_ALLOCED is wrong 
-#endif
 
-  // make sure the info pointer is into text space 
-  ASSERT(LOOKS_LIKE_CLOSURE_PTR(q));
+  if ((bd->flags & (BF_LARGE | BF_COMPACTED | BF_EVACUATED)) != 0) {
+
+      /* pointer into to-space: just return it.  This normally
+       * shouldn't happen, but alllowing it makes certain things
+       * slightly easier (eg. the mutable list can contain the same
+       * object twice, for example).
+       */
+      if (bd->flags & BF_EVACUATED) {
+	  if (bd->gen_no < evac_gen) {
+	      failed_to_evac = rtsTrue;
+	      TICK_GC_FAILED_PROMOTION();
+	  }
+	  return q;
+      }
+
+      /* evacuate large objects by re-linking them onto a different list.
+       */
+      if (bd->flags & BF_LARGE) {
+	  info = get_itbl(q);
+	  if (info->type == TSO && 
+	      ((StgTSO *)q)->what_next == ThreadRelocated) {
+	      q = (StgClosure *)((StgTSO *)q)->link;
+	      goto loop;
+	  }
+	  evacuate_large((P_)q);
+	  return q;
+      }
+      
+      /* If the object is in a step that we're compacting, then we
+       * need to use an alternative evacuate procedure.
+       */
+      if (bd->flags & BF_COMPACTED) {
+	  if (!is_marked((P_)q,bd)) {
+	      mark((P_)q,bd);
+	      if (mark_stack_full()) {
+		  mark_stack_overflowed = rtsTrue;
+		  reset_mark_stack();
+	      }
+	      push_mark_stack((P_)q);
+	  }
+	  return q;
+      }
+  }
+      
+  stp = bd->step->to;
+
   info = get_itbl(q);
   
-  switch (info -> type) {
+  switch (info->type) {
 
-  case MUT_VAR:
+  case MUT_VAR_CLEAN:
+  case MUT_VAR_DIRTY:
   case MVAR:
       return copy(q,sizeW_fromITBL(info),stp);
 
@@ -1706,19 +1960,22 @@ loop:
 	  (StgInt)w >= MIN_INTLIKE && (StgInt)w <= MAX_INTLIKE) {
 	  return (StgClosure *)INTLIKE_CLOSURE((StgInt)w);
       }
-      // else, fall through ... 
+      // else
+      return copy_noscav(q,sizeofW(StgHeader)+1,stp);
   }
 
-  case FUN_1_0:
   case FUN_0_1:
+  case FUN_1_0:
   case CONSTR_1_0:
+    return copy(q,sizeofW(StgHeader)+1,stp);
+
   case THUNK_1_0:
   case THUNK_0_1:
-    return copy(q,sizeofW(StgHeader)+1,stp);
+    return copy(q,sizeofW(StgThunk)+1,stp);
 
   case THUNK_1_1:
-  case THUNK_0_2:
   case THUNK_2_0:
+  case THUNK_0_2:
 #ifdef NO_PROMOTE_THUNKS
     if (bd->gen_no == 0 && 
 	bd->step->no != 0 &&
@@ -1726,23 +1983,26 @@ loop:
       stp = bd->step;
     }
 #endif
-    return copy(q,sizeofW(StgHeader)+2,stp);
+    return copy(q,sizeofW(StgThunk)+2,stp);
 
   case FUN_1_1:
-  case FUN_0_2:
   case FUN_2_0:
   case CONSTR_1_1:
-  case CONSTR_0_2:
   case CONSTR_2_0:
+  case FUN_0_2:
     return copy(q,sizeofW(StgHeader)+2,stp);
 
-  case FUN:
+  case CONSTR_0_2:
+    return copy_noscav(q,sizeofW(StgHeader)+2,stp);
+
   case THUNK:
+    return copy(q,thunk_sizeW_fromITBL(info),stp);
+
+  case FUN:
   case CONSTR:
   case IND_PERM:
   case IND_OLDGEN_PERM:
   case WEAK:
-  case FOREIGN:
   case STABLE_NAME:
     return copy(q,sizeW_fromITBL(info),stp);
 
@@ -1755,36 +2015,51 @@ loop:
   case BLACKHOLE:
     return copyPart(q,BLACKHOLE_sizeW(),sizeofW(StgHeader),stp);
 
-  case BLACKHOLE_BQ:
-    to = copy(q,BLACKHOLE_sizeW(),stp); 
-    return to;
-
   case THUNK_SELECTOR:
     {
 	StgClosure *p;
+	const StgInfoTable *info_ptr;
 
 	if (thunk_selector_depth > MAX_THUNK_SELECTOR_DEPTH) {
 	    return copy(q,THUNK_SELECTOR_sizeW(),stp);
 	}
 
+	// stashed away for LDV profiling, see below
+	info_ptr = q->header.info;
+
 	p = eval_thunk_selector(info->layout.selector_offset,
 				(StgSelector *)q);
 
 	if (p == NULL) {
 	    return copy(q,THUNK_SELECTOR_sizeW(),stp);
 	} else {
+	    StgClosure *val;
 	    // q is still BLACKHOLE'd.
 	    thunk_selector_depth++;
-	    p = evacuate(p);
+	    val = evacuate(p);
 	    thunk_selector_depth--;
-	    upd_evacuee(q,p);
+
 #ifdef PROFILING
-	    // We store the size of the just evacuated object in the
-	    // LDV word so that the profiler can guess the position of
-	    // the next object later.
-	    SET_EVACUAEE_FOR_LDV(q, THUNK_SELECTOR_sizeW());
+	    // For the purposes of LDV profiling, we have destroyed
+	    // the original selector thunk.
+	    SET_INFO(q, info_ptr);
+	    LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(q);
 #endif
-	    return p;
+
+	    // Update the THUNK_SELECTOR with an indirection to the
+	    // EVACUATED closure now at p.  Why do this rather than
+	    // upd_evacuee(q,p)?  Because we have an invariant that an
+	    // EVACUATED closure always points to an object in the
+	    // same or an older generation (required by the short-cut
+	    // test in the EVACUATED case, below).
+	    SET_INFO(q, &stg_IND_info);
+	    ((StgInd *)q)->indirectee = p;
+
+	    // For the purposes of LDV profiling, we have created an
+	    // indirection.
+	    LDV_RECORD_CREATE(q);
+
+	    return val;
 	}
     }
 
@@ -1794,50 +2069,6 @@ loop:
     q = ((StgInd*)q)->indirectee;
     goto loop;
 
-  case THUNK_STATIC:
-    if (info->srt_bitmap != 0 && major_gc && 
-	THUNK_STATIC_LINK((StgClosure *)q) == NULL) {
-      THUNK_STATIC_LINK((StgClosure *)q) = static_objects;
-      static_objects = (StgClosure *)q;
-    }
-    return q;
-
-  case FUN_STATIC:
-    if (info->srt_bitmap != 0 && major_gc && 
-	FUN_STATIC_LINK((StgClosure *)q) == NULL) {
-      FUN_STATIC_LINK((StgClosure *)q) = static_objects;
-      static_objects = (StgClosure *)q;
-    }
-    return q;
-
-  case IND_STATIC:
-    /* If q->saved_info != NULL, then it's a revertible CAF - it'll be
-     * on the CAF list, so don't do anything with it here (we'll
-     * scavenge it later).
-     */
-    if (major_gc
-	  && ((StgIndStatic *)q)->saved_info == NULL
- 	  && IND_STATIC_LINK((StgClosure *)q) == NULL) {
-	IND_STATIC_LINK((StgClosure *)q) = static_objects;
-	static_objects = (StgClosure *)q;
-    }
-    return q;
-
-  case CONSTR_STATIC:
-    if (major_gc && STATIC_LINK(info,(StgClosure *)q) == NULL) {
-      STATIC_LINK(info,(StgClosure *)q) = static_objects;
-      static_objects = (StgClosure *)q;
-    }
-    return q;
-
-  case CONSTR_INTLIKE:
-  case CONSTR_CHARLIKE:
-  case CONSTR_NOCAF_STATIC:
-    /* no need to put these on the static linked list, they don't need
-     * to be scavenged.
-     */
-    return q;
-
   case RET_BCO:
   case RET_SMALL:
   case RET_VEC_SMALL:
@@ -1854,9 +2085,11 @@ loop:
     barf("evacuate: stack frame at %p\n", q);
 
   case PAP:
-  case AP:
       return copy(q,pap_sizeW((StgPAP*)q),stp);
 
+  case AP:
+      return copy(q,ap_sizeW((StgAP*)q),stp);
+
   case AP_STACK:
       return copy(q,ap_stack_sizeW((StgAP_STACK*)q),stp);
 
@@ -1868,7 +2101,16 @@ loop:
      * set the failed_to_evac flag to indicate that we couldn't 
      * manage to promote the object to the desired generation.
      */
-    if (evac_gen > 0) {		// optimisation 
+    /* 
+     * Optimisation: the check is fairly expensive, but we can often
+     * shortcut it if either the required generation is 0, or the
+     * current object (the EVACUATED) is in a high enough generation.
+     * We know that an EVACUATED always points to an object in the
+     * same or an older generation.  stp is the lowest step that the
+     * current object would be evacuated to, so we only do the full
+     * check if stp is too low.
+     */
+    if (evac_gen > 0 && stp->gen_no < evac_gen) {  // optimisation 
       StgClosure *p = ((StgEvacuated*)q)->evacuee;
       if (HEAP_ALLOCED(p) && Bdescr((P_)p)->gen_no < evac_gen) {
 	failed_to_evac = rtsTrue;
@@ -1879,9 +2121,10 @@ loop:
 
   case ARR_WORDS:
       // just copy the block 
-      return copy(q,arr_words_sizeW((StgArrWords *)q),stp);
+      return copy_noscav(q,arr_words_sizeW((StgArrWords *)q),stp);
 
-  case MUT_ARR_PTRS:
+  case MUT_ARR_PTRS_CLEAN:
+  case MUT_ARR_PTRS_DIRTY:
   case MUT_ARR_PTRS_FROZEN:
   case MUT_ARR_PTRS_FROZEN0:
       // just copy the block 
@@ -1919,7 +2162,7 @@ loop:
     }
 
 #if defined(PAR)
-  case RBH: // cf. BLACKHOLE_BQ
+  case RBH:
     {
       //StgInfoTable *rip = get_closure_info(q, &size, &ptrs, &nonptrs, &vhs, str);
       to = copy(q,BLACKHOLE_sizeW(),stp); 
@@ -1932,7 +2175,7 @@ loop:
     }
 
   case BLOCKED_FETCH:
-    ASSERT(sizeofW(StgBlockedFetch) >= MIN_NONUPD_SIZE);
+    ASSERT(sizeofW(StgBlockedFetch) >= MIN_PAYLOD_SIZE);
     to = copy(q,sizeofW(StgBlockedFetch),stp);
     IF_DEBUG(gc,
 	     debugBelch("@@ evacuate: %p (%s) to %p (%s)",
@@ -1943,7 +2186,7 @@ loop:
   case REMOTE_REF:
 # endif
   case FETCH_ME:
-    ASSERT(sizeofW(StgBlockedFetch) >= MIN_UPD_SIZE);
+    ASSERT(sizeofW(StgBlockedFetch) >= MIN_PAYLOAD_SIZE);
     to = copy(q,sizeofW(StgFetchMe),stp);
     IF_DEBUG(gc,
 	     debugBelch("@@ evacuate: %p (%s) to %p (%s)",
@@ -1951,7 +2194,7 @@ loop:
     return to;
 
   case FETCH_ME_BQ:
-    ASSERT(sizeofW(StgBlockedFetch) >= MIN_UPD_SIZE);
+    ASSERT(sizeofW(StgBlockedFetch) >= MIN_PAYLOAD_SIZE);
     to = copy(q,sizeofW(StgFetchMeBlockingQueue),stp);
     IF_DEBUG(gc,
 	     debugBelch("@@ evacuate: %p (%s) to %p (%s)",
@@ -1991,6 +2234,48 @@ loop:
    been BLACKHOLE'd, and should be updated with an indirection or a
    forwarding pointer.  If the return value is NULL, then the selector
    thunk is unchanged.
+
+   ***
+   ToDo: the treatment of THUNK_SELECTORS could be improved in the
+   following way (from a suggestion by Ian Lynagh):
+
+   We can have a chain like this:
+
+      sel_0 --> (a,b)
+                 |
+                 |-----> sel_0 --> (a,b)
+                                    |
+                                    |-----> sel_0 --> ...
+
+   and the depth limit means we don't go all the way to the end of the
+   chain, which results in a space leak.  This affects the recursive
+   call to evacuate() in the THUNK_SELECTOR case in evacuate(): *not*
+   the recursive call to eval_thunk_selector() in
+   eval_thunk_selector().
+
+   We could eliminate the depth bound in this case, in the following
+   way:
+
+      - traverse the chain once to discover the *value* of the 
+        THUNK_SELECTOR.  Mark all THUNK_SELECTORS that we
+        visit on the way as having been visited already (somehow).
+
+      - in a second pass, traverse the chain again updating all
+        THUNK_SEELCTORS that we find on the way with indirections to
+        the value.
+
+      - if we encounter a "marked" THUNK_SELECTOR in a normal 
+        evacuate(), we konw it can't be updated so just evac it.
+
+   Program that illustrates the problem:
+
+	foo [] = ([], [])
+	foo (x:xs) = let (ys, zs) = foo xs
+	             in if x >= 0 then (x:ys, zs) else (ys, x:zs)
+
+	main = bar [1..(100000000::Int)]
+	bar xs = (\(ys, zs) -> print ys >> print zs) (foo xs)
+
    -------------------------------------------------------------------------- */
 
 static inline rtsBool
@@ -2167,7 +2452,6 @@ selector_loop:
       case SE_CAF_BLACKHOLE:
       case SE_BLACKHOLE:
       case BLACKHOLE:
-      case BLACKHOLE_BQ:
 #if defined(PAR)
       case RBH:
       case BLOCKED_FETCH:
@@ -2284,6 +2568,8 @@ scavenge_thunk_srt(const StgInfoTable *info)
 {
     StgThunkInfoTable *thunk_info;
 
+    if (!major_gc) return;
+
     thunk_info = itbl_to_thunk_itbl(info);
     scavenge_srt((StgClosure **)GET_SRT(thunk_info), thunk_info->i.srt_bitmap);
 }
@@ -2293,19 +2579,12 @@ scavenge_fun_srt(const StgInfoTable *info)
 {
     StgFunInfoTable *fun_info;
 
+    if (!major_gc) return;
+  
     fun_info = itbl_to_fun_itbl(info);
     scavenge_srt((StgClosure **)GET_FUN_SRT(fun_info), fun_info->i.srt_bitmap);
 }
 
-STATIC_INLINE void
-scavenge_ret_srt(const StgInfoTable *info)
-{
-    StgRetInfoTable *ret_info;
-
-    ret_info = itbl_to_ret_itbl(info);
-    scavenge_srt((StgClosure **)GET_SRT(ret_info), ret_info->i.srt_bitmap);
-}
-
 /* -----------------------------------------------------------------------------
    Scavenge a TSO.
    -------------------------------------------------------------------------- */
@@ -2313,8 +2592,6 @@ scavenge_ret_srt(const StgInfoTable *info)
 static void
 scavengeTSO (StgTSO *tso)
 {
-    // chase the link field for any TSOs on the same queue 
-    tso->link = (StgTSO *)evacuate((StgClosure *)tso->link);
     if (   tso->why_blocked == BlockedOnMVar
 	|| tso->why_blocked == BlockedOnBlackHole
 	|| tso->why_blocked == BlockedOnException
@@ -2330,6 +2607,13 @@ scavengeTSO (StgTSO *tso)
 	    (StgTSO *)evacuate((StgClosure *)tso->blocked_exceptions);
     }
     
+    // We don't always chase the link field: TSOs on the blackhole
+    // queue are not automatically alive, so the link field is a
+    // "weak" pointer in that case.
+    if (tso->why_blocked != BlockedOnBlackHole) {
+	tso->link = (StgTSO *)evacuate((StgClosure *)tso->link);
+    }
+
     // scavange current transaction record
     tso->trec = (StgTRecHeader *)evacuate((StgClosure *)tso->trec);
     
@@ -2352,8 +2636,8 @@ scavenge_arg_block (StgFunInfoTable *fun_info, StgClosure **args)
     p = (StgPtr)args;
     switch (fun_info->f.fun_type) {
     case ARG_GEN:
-	bitmap = BITMAP_BITS(fun_info->f.bitmap);
-	size = BITMAP_SIZE(fun_info->f.bitmap);
+	bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
+	size = BITMAP_SIZE(fun_info->f.b.bitmap);
 	goto small_bitmap;
     case ARG_GEN_BIG:
 	size = GET_FUN_LARGE_BITMAP(fun_info)->size;
@@ -2378,35 +2662,31 @@ scavenge_arg_block (StgFunInfoTable *fun_info, StgClosure **args)
 }
 
 STATIC_INLINE StgPtr
-scavenge_PAP (StgPAP *pap)
+scavenge_PAP_payload (StgClosure *fun, StgClosure **payload, StgWord size)
 {
     StgPtr p;
-    StgWord bitmap, size;
+    StgWord bitmap;
     StgFunInfoTable *fun_info;
-
-    pap->fun = evacuate(pap->fun);
-    fun_info = get_fun_itbl(pap->fun);
+    
+    fun_info = get_fun_itbl(fun);
     ASSERT(fun_info->i.type != PAP);
-
-    p = (StgPtr)pap->payload;
-    size = pap->n_args;
+    p = (StgPtr)payload;
 
     switch (fun_info->f.fun_type) {
     case ARG_GEN:
-	bitmap = BITMAP_BITS(fun_info->f.bitmap);
+	bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
 	goto small_bitmap;
     case ARG_GEN_BIG:
 	scavenge_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info), size);
 	p += size;
 	break;
     case ARG_BCO:
-	scavenge_large_bitmap((StgPtr)pap->payload, BCO_BITMAP(pap->fun), size);
+	scavenge_large_bitmap((StgPtr)payload, BCO_BITMAP(fun), size);
 	p += size;
 	break;
     default:
 	bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
     small_bitmap:
-	size = pap->n_args;
 	while (size > 0) {
 	    if ((bitmap & 1) == 0) {
 		*p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
@@ -2420,9 +2700,23 @@ scavenge_PAP (StgPAP *pap)
     return p;
 }
 
-/* -----------------------------------------------------------------------------
-   Scavenge a given step until there are no more objects in this step
-   to scavenge.
+STATIC_INLINE StgPtr
+scavenge_PAP (StgPAP *pap)
+{
+    pap->fun = evacuate(pap->fun);
+    return scavenge_PAP_payload (pap->fun, pap->payload, pap->n_args);
+}
+
+STATIC_INLINE StgPtr
+scavenge_AP (StgAP *ap)
+{
+    ap->fun = evacuate(ap->fun);
+    return scavenge_PAP_payload (ap->fun, ap->payload, ap->n_args);
+}
+
+/* -----------------------------------------------------------------------------
+   Scavenge a given step until there are no more objects in this step
+   to scavenge.
 
    evac_gen is set by the caller to be either zero (for a step in a
    generation < N) or G where G is the generation of the step being
@@ -2489,6 +2783,11 @@ scavenge(step *stp)
 
     case THUNK_2_0:
 	scavenge_thunk_srt(info);
+	((StgThunk *)p)->payload[1] = evacuate(((StgThunk *)p)->payload[1]);
+	((StgThunk *)p)->payload[0] = evacuate(((StgThunk *)p)->payload[0]);
+	p += sizeofW(StgThunk) + 2;
+	break;
+
     case CONSTR_2_0:
 	((StgClosure *)p)->payload[1] = evacuate(((StgClosure *)p)->payload[1]);
 	((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]);
@@ -2497,8 +2796,8 @@ scavenge(step *stp)
 	
     case THUNK_1_0:
 	scavenge_thunk_srt(info);
-	((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]);
-	p += sizeofW(StgHeader) + 1;
+	((StgThunk *)p)->payload[0] = evacuate(((StgThunk *)p)->payload[0]);
+	p += sizeofW(StgThunk) + 1;
 	break;
 	
     case FUN_1_0:
@@ -2510,7 +2809,7 @@ scavenge(step *stp)
 	
     case THUNK_0_1:
 	scavenge_thunk_srt(info);
-	p += sizeofW(StgHeader) + 1;
+	p += sizeofW(StgThunk) + 1;
 	break;
 	
     case FUN_0_1:
@@ -2521,7 +2820,7 @@ scavenge(step *stp)
 	
     case THUNK_0_2:
 	scavenge_thunk_srt(info);
-	p += sizeofW(StgHeader) + 2;
+	p += sizeofW(StgThunk) + 2;
 	break;
 	
     case FUN_0_2:
@@ -2532,8 +2831,8 @@ scavenge(step *stp)
 	
     case THUNK_1_1:
 	scavenge_thunk_srt(info);
-	((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]);
-	p += sizeofW(StgHeader) + 2;
+	((StgThunk *)p)->payload[0] = evacuate(((StgThunk *)p)->payload[0]);
+	p += sizeofW(StgThunk) + 2;
 	break;
 
     case FUN_1_1:
@@ -2548,13 +2847,21 @@ scavenge(step *stp)
 	goto gen_obj;
 
     case THUNK:
+    {
+	StgPtr end;
+
 	scavenge_thunk_srt(info);
-	// fall through 
+	end = (P_)((StgThunk *)p)->payload + info->layout.payload.ptrs;
+	for (p = (P_)((StgThunk *)p)->payload; p < end; p++) {
+	    *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+	}
+	p += info->layout.payload.nptrs;
+	break;
+    }
 	
     gen_obj:
     case CONSTR:
     case WEAK:
-    case FOREIGN:
     case STABLE_NAME:
     {
 	StgPtr end;
@@ -2599,13 +2906,22 @@ scavenge(step *stp)
 	p += sizeofW(StgInd);
 	break;
 
-    case MUT_VAR:
-	evac_gen = 0;
+    case MUT_VAR_CLEAN:
+    case MUT_VAR_DIRTY: {
+	rtsBool saved_eager_promotion = eager_promotion;
+
+	eager_promotion = rtsFalse;
 	((StgMutVar *)p)->var = evacuate(((StgMutVar *)p)->var);
-	evac_gen = saved_evac_gen;
-	failed_to_evac = rtsTrue; // mutable anyhow
+	eager_promotion = saved_eager_promotion;
+
+	if (failed_to_evac) {
+	    ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info;
+	} else {
+	    ((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info;
+	}
 	p += sizeofW(StgMutVar);
 	break;
+    }
 
     case CAF_BLACKHOLE:
     case SE_CAF_BLACKHOLE:
@@ -2614,16 +2930,6 @@ scavenge(step *stp)
 	p += BLACKHOLE_sizeW();
 	break;
 
-    case BLACKHOLE_BQ:
-    { 
-	StgBlockingQueue *bh = (StgBlockingQueue *)p;
-	bh->blocking_queue = 
-	    (StgTSO *)evacuate((StgClosure *)bh->blocking_queue);
-	failed_to_evac = rtsTrue;
-	p += BLACKHOLE_sizeW();
-	break;
-    }
-
     case THUNK_SELECTOR:
     { 
 	StgSelector *s = (StgSelector *)p;
@@ -2644,27 +2950,44 @@ scavenge(step *stp)
     }
 
     case PAP:
-    case AP:
 	p = scavenge_PAP((StgPAP *)p);
 	break;
 
+    case AP:
+	p = scavenge_AP((StgAP *)p);
+	break;
+
     case ARR_WORDS:
 	// nothing to follow 
 	p += arr_words_sizeW((StgArrWords *)p);
 	break;
 
-    case MUT_ARR_PTRS:
+    case MUT_ARR_PTRS_CLEAN:
+    case MUT_ARR_PTRS_DIRTY:
 	// follow everything 
     {
 	StgPtr next;
-
-	evac_gen = 0;		// repeatedly mutable 
+	rtsBool saved_eager;
+
+	// We don't eagerly promote objects pointed to by a mutable
+	// array, but if we find the array only points to objects in
+	// the same or an older generation, we mark it "clean" and
+	// avoid traversing it during minor GCs.
+	saved_eager = eager_promotion;
+	eager_promotion = rtsFalse;
 	next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
 	for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
 	    *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
 	}
-	evac_gen = saved_evac_gen;
-	failed_to_evac = rtsTrue; // mutable anyhow.
+	eager_promotion = saved_eager;
+
+	if (failed_to_evac) {
+	    ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info;
+	} else {
+	    ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info;
+	}
+
+	failed_to_evac = rtsTrue; // always put it on the mutable list.
 	break;
     }
 
@@ -2678,26 +3001,39 @@ scavenge(step *stp)
 	for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
 	    *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
 	}
-	// it's tempting to recordMutable() if failed_to_evac is
-	// false, but that breaks some assumptions (eg. every
-	// closure on the mutable list is supposed to have the MUT
-	// flag set, and MUT_ARR_PTRS_FROZEN doesn't).
+
+	// If we're going to put this object on the mutable list, then
+	// set its info ptr to MUT_ARR_PTRS_FROZEN0 to indicate that.
+	if (failed_to_evac) {
+	    ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN0_info;
+	} else {
+	    ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_info;
+	}
 	break;
     }
 
     case TSO:
     { 
 	StgTSO *tso = (StgTSO *)p;
-	evac_gen = 0;
+	rtsBool saved_eager = eager_promotion;
+
+	eager_promotion = rtsFalse;
 	scavengeTSO(tso);
-	evac_gen = saved_evac_gen;
-	failed_to_evac = rtsTrue; // mutable anyhow.
+	eager_promotion = saved_eager;
+
+	if (failed_to_evac) {
+	    tso->flags |= TSO_DIRTY;
+	} else {
+	    tso->flags &= ~TSO_DIRTY;
+	}
+
+	failed_to_evac = rtsTrue; // always on the mutable list
 	p += tso_sizeW(tso);
 	break;
     }
 
 #if defined(PAR)
-    case RBH: // cf. BLACKHOLE_BQ
+    case RBH:
     { 
 #if 0
 	nat size, ptrs, nonptrs, vhs;
@@ -2740,7 +3076,7 @@ scavenge(step *stp)
 	p += sizeofW(StgFetchMe);
 	break; // nothing to do in this case
 
-    case FETCH_ME_BQ: // cf. BLACKHOLE_BQ
+    case FETCH_ME_BQ:
     { 
 	StgFetchMeBlockingQueue *fmbq = (StgFetchMeBlockingQueue *)p;
 	(StgClosure *)fmbq->blocking_queue = 
@@ -2822,7 +3158,9 @@ scavenge(step *stp)
      */
     if (failed_to_evac) {
 	failed_to_evac = rtsFalse;
-	recordMutableGen((StgClosure *)q, stp->gen);
+	if (stp->gen_no > 0) {
+	    recordMutableGen((StgClosure *)q, stp->gen);
+	}
     }
   }
 
@@ -2878,6 +3216,10 @@ linear_scan:
 
 	case THUNK_2_0:
 	    scavenge_thunk_srt(info);
+	    ((StgThunk *)p)->payload[1] = evacuate(((StgThunk *)p)->payload[1]);
+	    ((StgThunk *)p)->payload[0] = evacuate(((StgThunk *)p)->payload[0]);
+	    break;
+
 	case CONSTR_2_0:
 	    ((StgClosure *)p)->payload[1] = evacuate(((StgClosure *)p)->payload[1]);
 	    ((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]);
@@ -2892,6 +3234,9 @@ linear_scan:
 	case THUNK_1_0:
 	case THUNK_1_1:
 	    scavenge_thunk_srt(info);
+	    ((StgThunk *)p)->payload[0] = evacuate(((StgThunk *)p)->payload[0]);
+	    break;
+
 	case CONSTR_1_0:
 	case CONSTR_1_1:
 	    ((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]);
@@ -2916,13 +3261,20 @@ linear_scan:
 	    goto gen_obj;
 
 	case THUNK:
+	{
+	    StgPtr end;
+	    
 	    scavenge_thunk_srt(info);
-	    // fall through 
+	    end = (P_)((StgThunk *)p)->payload + info->layout.payload.ptrs;
+	    for (p = (P_)((StgThunk *)p)->payload; p < end; p++) {
+		*p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+	    }
+	    break;
+	}
 	
 	gen_obj:
 	case CONSTR:
 	case WEAK:
-	case FOREIGN:
 	case STABLE_NAME:
 	{
 	    StgPtr end;
@@ -2955,12 +3307,21 @@ linear_scan:
 		evacuate(((StgInd *)p)->indirectee);
 	    break;
 
-	case MUT_VAR:
-	    evac_gen = 0;
+	case MUT_VAR_CLEAN:
+	case MUT_VAR_DIRTY: {
+	    rtsBool saved_eager_promotion = eager_promotion;
+	    
+	    eager_promotion = rtsFalse;
 	    ((StgMutVar *)p)->var = evacuate(((StgMutVar *)p)->var);
-	    evac_gen = saved_evac_gen;
-	    failed_to_evac = rtsTrue;
+	    eager_promotion = saved_eager_promotion;
+	    
+	    if (failed_to_evac) {
+		((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info;
+	    } else {
+		((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info;
+	    }
 	    break;
+	}
 
 	case CAF_BLACKHOLE:
 	case SE_CAF_BLACKHOLE:
@@ -2969,15 +3330,6 @@ linear_scan:
 	case ARR_WORDS:
 	    break;
 
-	case BLACKHOLE_BQ:
-	{ 
-	    StgBlockingQueue *bh = (StgBlockingQueue *)p;
-	    bh->blocking_queue = 
-		(StgTSO *)evacuate((StgClosure *)bh->blocking_queue);
-	    failed_to_evac = rtsTrue;
-	    break;
-	}
-
 	case THUNK_SELECTOR:
 	{ 
 	    StgSelector *s = (StgSelector *)p;
@@ -2996,21 +3348,38 @@ linear_scan:
 	}
 
 	case PAP:
-	case AP:
 	    scavenge_PAP((StgPAP *)p);
 	    break;
+
+	case AP:
+	    scavenge_AP((StgAP *)p);
+	    break;
       
-	case MUT_ARR_PTRS:
+	case MUT_ARR_PTRS_CLEAN:
+	case MUT_ARR_PTRS_DIRTY:
 	    // follow everything 
 	{
 	    StgPtr next;
-	    
-	    evac_gen = 0;		// repeatedly mutable 
+	    rtsBool saved_eager;
+
+	    // We don't eagerly promote objects pointed to by a mutable
+	    // array, but if we find the array only points to objects in
+	    // the same or an older generation, we mark it "clean" and
+	    // avoid traversing it during minor GCs.
+	    saved_eager = eager_promotion;
+	    eager_promotion = rtsFalse;
 	    next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
 	    for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
 		*p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
 	    }
-	    evac_gen = saved_evac_gen;
+	    eager_promotion = saved_eager;
+
+	    if (failed_to_evac) {
+		((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info;
+	    } else {
+		((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info;
+	    }
+
 	    failed_to_evac = rtsTrue; // mutable anyhow.
 	    break;
 	}
@@ -3019,27 +3388,44 @@ linear_scan:
 	case MUT_ARR_PTRS_FROZEN0:
 	    // follow everything 
 	{
-	    StgPtr next;
+	    StgPtr next, q = p;
 	    
 	    next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
 	    for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
 		*p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
 	    }
+
+	    // If we're going to put this object on the mutable list, then
+	    // set its info ptr to MUT_ARR_PTRS_FROZEN0 to indicate that.
+	    if (failed_to_evac) {
+		((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN0_info;
+	    } else {
+		((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_info;
+	    }
 	    break;
 	}
 
 	case TSO:
 	{ 
 	    StgTSO *tso = (StgTSO *)p;
-	    evac_gen = 0;
+	    rtsBool saved_eager = eager_promotion;
+
+	    eager_promotion = rtsFalse;
 	    scavengeTSO(tso);
-	    evac_gen = saved_evac_gen;
-	    failed_to_evac = rtsTrue;
+	    eager_promotion = saved_eager;
+	    
+	    if (failed_to_evac) {
+		tso->flags |= TSO_DIRTY;
+	    } else {
+		tso->flags &= ~TSO_DIRTY;
+	    }
+	    
+	    failed_to_evac = rtsTrue; // always on the mutable list
 	    break;
 	}
 
 #if defined(PAR)
-	case RBH: // cf. BLACKHOLE_BQ
+	case RBH:
 	{ 
 #if 0
 	    nat size, ptrs, nonptrs, vhs;
@@ -3078,7 +3464,7 @@ linear_scan:
 	case FETCH_ME:
 	    break; // nothing to do in this case
 
-	case FETCH_ME_BQ: // cf. BLACKHOLE_BQ
+	case FETCH_ME_BQ:
 	{ 
 	    StgFetchMeBlockingQueue *fmbq = (StgFetchMeBlockingQueue *)p;
 	    (StgClosure *)fmbq->blocking_queue = 
@@ -3088,7 +3474,7 @@ linear_scan:
 			   p, info_type((StgClosure *)p)));
 	    break;
 	}
-#endif // PAR
+#endif /* PAR */
 
 	case TVAR_WAIT_QUEUE:
 	  {
@@ -3148,7 +3534,9 @@ linear_scan:
 
 	if (failed_to_evac) {
 	    failed_to_evac = rtsFalse;
-	    recordMutableGen((StgClosure *)q, &generations[evac_gen]);
+	    if (evac_gen > 0) {
+		recordMutableGen((StgClosure *)q, &generations[evac_gen]);
+	    }
 	}
 	
 	// mark the next bit to indicate "scavenged"
@@ -3160,7 +3548,7 @@ linear_scan:
     if (mark_stack_overflowed && oldgen_scan_bd == NULL) {
 	IF_DEBUG(gc, debugBelch("scavenge_mark_stack: starting linear scan"));
 	mark_stack_overflowed = rtsFalse;
-	oldgen_scan_bd = oldest_gen->steps[0].blocks;
+	oldgen_scan_bd = oldest_gen->steps[0].old_blocks;
 	oldgen_scan = oldgen_scan_bd->start;
     }
 
@@ -3178,12 +3566,12 @@ linear_scan:
 
 	    // already scavenged?
 	    if (is_marked(oldgen_scan+1,oldgen_scan_bd)) {
-		oldgen_scan += sizeofW(StgHeader) + MIN_NONUPD_SIZE;
+		oldgen_scan += sizeofW(StgHeader) + MIN_PAYLOAD_SIZE;
 		goto loop;
 	    }
 	    push_mark_stack(oldgen_scan);
 	    // ToDo: bump the linear scan by the actual size of the object
-	    oldgen_scan += sizeofW(StgHeader) + MIN_NONUPD_SIZE;
+	    oldgen_scan += sizeofW(StgHeader) + MIN_PAYLOAD_SIZE;
 	    goto linear_scan;
 	}
 
@@ -3227,18 +3615,28 @@ scavenge_one(StgPtr p)
 	break;
     }
 
-    case FUN:
-    case FUN_1_0:			// hardly worth specialising these guys
-    case FUN_0_1:
-    case FUN_1_1:
-    case FUN_0_2:
-    case FUN_2_0:
     case THUNK:
     case THUNK_1_0:
     case THUNK_0_1:
     case THUNK_1_1:
     case THUNK_0_2:
     case THUNK_2_0:
+    {
+	StgPtr q, end;
+	
+	end = (StgPtr)((StgThunk *)p)->payload + info->layout.payload.ptrs;
+	for (q = (StgPtr)((StgThunk *)p)->payload; q < end; q++) {
+	    *q = (StgWord)(StgPtr)evacuate((StgClosure *)*q);
+	}
+	break;
+    }
+
+    case FUN:
+    case FUN_1_0:			// hardly worth specialising these guys
+    case FUN_0_1:
+    case FUN_1_1:
+    case FUN_0_2:
+    case FUN_2_0:
     case CONSTR:
     case CONSTR_1_0:
     case CONSTR_0_1:
@@ -3246,7 +3644,6 @@ scavenge_one(StgPtr p)
     case CONSTR_0_2:
     case CONSTR_2_0:
     case WEAK:
-    case FOREIGN:
     case IND_PERM:
     {
 	StgPtr q, end;
@@ -3258,12 +3655,22 @@ scavenge_one(StgPtr p)
 	break;
     }
     
-    case MUT_VAR:
-	evac_gen = 0;
+    case MUT_VAR_CLEAN:
+    case MUT_VAR_DIRTY: {
+	StgPtr q = p;
+	rtsBool saved_eager_promotion = eager_promotion;
+
+	eager_promotion = rtsFalse;
 	((StgMutVar *)p)->var = evacuate(((StgMutVar *)p)->var);
-	evac_gen = saved_evac_gen;
-	failed_to_evac = rtsTrue; // mutable anyhow
+	eager_promotion = saved_eager_promotion;
+
+	if (failed_to_evac) {
+	    ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info;
+	} else {
+	    ((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info;
+	}
 	break;
+    }
 
     case CAF_BLACKHOLE:
     case SE_CAF_BLACKHOLE:
@@ -3271,16 +3678,6 @@ scavenge_one(StgPtr p)
     case BLACKHOLE:
 	break;
 	
-    case BLACKHOLE_BQ:
-    { 
-	StgBlockingQueue *bh = (StgBlockingQueue *)p;
-	evac_gen = 0;		// repeatedly mutable 
-	bh->blocking_queue = 
-	    (StgTSO *)evacuate((StgClosure *)bh->blocking_queue);
-	failed_to_evac = rtsTrue;
-	break;
-    }
-
     case THUNK_SELECTOR:
     { 
 	StgSelector *s = (StgSelector *)p;
@@ -3299,25 +3696,42 @@ scavenge_one(StgPtr p)
     }
 
     case PAP:
-    case AP:
 	p = scavenge_PAP((StgPAP *)p);
 	break;
 
+    case AP:
+	p = scavenge_AP((StgAP *)p);
+	break;
+
     case ARR_WORDS:
 	// nothing to follow 
 	break;
 
-    case MUT_ARR_PTRS:
+    case MUT_ARR_PTRS_CLEAN:
+    case MUT_ARR_PTRS_DIRTY:
     {
-	// follow everything 
-	StgPtr next;
-      
-	evac_gen = 0;		// repeatedly mutable 
+	StgPtr next, q;
+	rtsBool saved_eager;
+
+	// We don't eagerly promote objects pointed to by a mutable
+	// array, but if we find the array only points to objects in
+	// the same or an older generation, we mark it "clean" and
+	// avoid traversing it during minor GCs.
+	saved_eager = eager_promotion;
+	eager_promotion = rtsFalse;
+	q = p;
 	next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
 	for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
 	    *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
 	}
-	evac_gen = saved_evac_gen;
+	eager_promotion = saved_eager;
+
+	if (failed_to_evac) {
+	    ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info;
+	} else {
+	    ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info;
+	}
+
 	failed_to_evac = rtsTrue;
 	break;
     }
@@ -3326,28 +3740,44 @@ scavenge_one(StgPtr p)
     case MUT_ARR_PTRS_FROZEN0:
     {
 	// follow everything 
-	StgPtr next;
+	StgPtr next, q=p;
       
 	next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
 	for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
 	    *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
 	}
+
+	// If we're going to put this object on the mutable list, then
+	// set its info ptr to MUT_ARR_PTRS_FROZEN0 to indicate that.
+	if (failed_to_evac) {
+	    ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN0_info;
+	} else {
+	    ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_info;
+	}
 	break;
     }
 
     case TSO:
     {
 	StgTSO *tso = (StgTSO *)p;
-      
-	evac_gen = 0;		// repeatedly mutable 
+	rtsBool saved_eager = eager_promotion;
+
+	eager_promotion = rtsFalse;
 	scavengeTSO(tso);
-	evac_gen = saved_evac_gen;
-	failed_to_evac = rtsTrue;
+	eager_promotion = saved_eager;
+
+	if (failed_to_evac) {
+	    tso->flags |= TSO_DIRTY;
+	} else {
+	    tso->flags &= ~TSO_DIRTY;
+	}
+
+	failed_to_evac = rtsTrue; // always on the mutable list
 	break;
     }
   
 #if defined(PAR)
-    case RBH: // cf. BLACKHOLE_BQ
+    case RBH:
     { 
 #if 0
 	nat size, ptrs, nonptrs, vhs;
@@ -3387,7 +3817,7 @@ scavenge_one(StgPtr p)
     case FETCH_ME:
 	break; // nothing to do in this case
 
-    case FETCH_ME_BQ: // cf. BLACKHOLE_BQ
+    case FETCH_ME_BQ:
     { 
 	StgFetchMeBlockingQueue *fmbq = (StgFetchMeBlockingQueue *)p;
 	(StgClosure *)fmbq->blocking_queue = 
@@ -3525,10 +3955,55 @@ scavenge_mutable_list(generation *gen)
 	for (q = bd->start; q < bd->free; q++) {
 	    p = (StgPtr)*q;
 	    ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
+
+#ifdef DEBUG	    
+	    switch (get_itbl((StgClosure *)p)->type) {
+	    case MUT_VAR_CLEAN:
+		barf("MUT_VAR_CLEAN on mutable list");
+	    case MUT_VAR_DIRTY:
+		mutlist_MUTVARS++; break;
+	    case MUT_ARR_PTRS_CLEAN:
+	    case MUT_ARR_PTRS_DIRTY:
+	    case MUT_ARR_PTRS_FROZEN:
+	    case MUT_ARR_PTRS_FROZEN0:
+		mutlist_MUTARRS++; break;
+	    default:
+		mutlist_OTHERS++; break;
+	    }
+#endif
+
+	    // Check whether this object is "clean", that is it
+	    // definitely doesn't point into a young generation.
+	    // Clean objects don't need to be scavenged.  Some clean
+	    // objects (MUT_VAR_CLEAN) are not kept on the mutable
+	    // list at all; others, such as MUT_ARR_PTRS_CLEAN and
+	    // TSO, are always on the mutable list.
+	    //
+	    switch (get_itbl((StgClosure *)p)->type) {
+	    case MUT_ARR_PTRS_CLEAN:
+		recordMutableGen((StgClosure *)p,gen);
+		continue;
+	    case TSO: {
+		StgTSO *tso = (StgTSO *)p;
+		if ((tso->flags & TSO_DIRTY) == 0) {
+		    // A clean TSO: we don't have to traverse its
+		    // stack.  However, we *do* follow the link field:
+		    // we don't want to have to mark a TSO dirty just
+		    // because we put it on a different queue.
+		    if (tso->why_blocked != BlockedOnBlackHole) {
+			tso->link = (StgTSO *)evacuate((StgClosure *)tso->link);
+		    }
+		    recordMutableGen((StgClosure *)p,gen);
+		    continue;
+		}
+	    }
+	    default:
+		;
+	    }
+
 	    if (scavenge_one(p)) {
-		/* didn't manage to promote everything, so put the
-		 * object back on the list.
-		 */
+		// didn't manage to promote everything, so put the
+		// object back on the list.
 		recordMutableGen((StgClosure *)p,gen);
 	    }
 	}
@@ -3565,8 +4040,8 @@ scavenge_static(void)
     /* Take this object *off* the static_objects list,
      * and put it on the scavenged_static_objects list.
      */
-    static_objects = STATIC_LINK(info,p);
-    STATIC_LINK(info,p) = scavenged_static_objects;
+    static_objects = *STATIC_LINK(info,p);
+    *STATIC_LINK(info,p) = scavenged_static_objects;
     scavenged_static_objects = p;
     
     switch (info -> type) {
@@ -3690,6 +4165,32 @@ scavenge_stack(StgPtr p, StgPtr stack_end)
     switch (info->i.type) {
 	
     case UPDATE_FRAME:
+	// In SMP, we can get update frames that point to indirections
+	// when two threads evaluate the same thunk.  We do attempt to
+	// discover this situation in threadPaused(), but it's
+	// possible that the following sequence occurs:
+	//
+	//        A             B
+	//                  enter T
+	//     enter T
+	//     blackhole T
+	//                  update T
+	//     GC
+	//
+	// Now T is an indirection, and the update frame is already
+	// marked on A's stack, so we won't traverse it again in
+	// threadPaused().  We could traverse the whole stack again
+	// before GC, but that seems like overkill.
+	//
+	// Scavenging this update frame as normal would be disastrous;
+	// the updatee would end up pointing to the value.  So we turn
+	// the indirection into an IND_PERM, so that evacuate will
+	// copy the indirection into the old generation instead of
+	// discarding it.
+	if (get_itbl(((StgUpdateFrame *)p)->updatee)->type == IND) {
+	    ((StgUpdateFrame *)p)->updatee->header.info = 
+		(StgInfoTable *)&stg_IND_PERM_info;
+	}
 	((StgUpdateFrame *)p)->updatee 
 	    = evacuate(((StgUpdateFrame *)p)->updatee);
 	p += sizeofW(StgUpdateFrame);
@@ -3711,7 +4212,8 @@ scavenge_stack(StgPtr p, StgPtr stack_end)
 	p = scavenge_small_bitmap(p, size, bitmap);
 
     follow_srt:
-	scavenge_srt((StgClosure **)GET_SRT(info), info->i.srt_bitmap);
+	if (major_gc) 
+	    scavenge_srt((StgClosure **)GET_SRT(info), info->i.srt_bitmap);
 	continue;
 
     case RET_BCO: {
@@ -3817,7 +4319,9 @@ scavenge_large(step *stp)
 
     p = bd->start;
     if (scavenge_one(p)) {
-	recordMutableGen((StgClosure *)p, stp->gen);
+	if (stp->gen_no > 0) {
+	    recordMutableGen((StgClosure *)p, stp->gen);
+	}
     }
   }
 }
@@ -3835,8 +4339,8 @@ zero_static_object_list(StgClosure* first_static)
 
   for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
     info = get_itbl(p);
-    link = STATIC_LINK(info, p);
-    STATIC_LINK(info,p) = NULL;
+    link = *STATIC_LINK(info, p);
+    *STATIC_LINK(info,p) = NULL;
   }
 }
 
@@ -3849,14 +4353,14 @@ revertCAFs( void )
 {
     StgIndStatic *c;
 
-    for (c = (StgIndStatic *)caf_list; c != NULL; 
+    for (c = (StgIndStatic *)revertible_caf_list; c != NULL; 
 	 c = (StgIndStatic *)c->static_link) 
     {
 	SET_INFO(c, c->saved_info);
 	c->saved_info = NULL;
 	// could, but not necessary: c->static_link = NULL; 
     }
-    caf_list = NULL;
+    revertible_caf_list = NULL;
 }
 
 void
@@ -3869,6 +4373,11 @@ markCAFs( evac_fn evac )
     {
 	evac(&c->indirectee);
     }
+    for (c = (StgIndStatic *)revertible_caf_list; c != NULL; 
+	 c = (StgIndStatic *)c->static_link) 
+    {
+	evac(&c->indirectee);
+    }
 }
 
 /* -----------------------------------------------------------------------------
@@ -3924,75 +4433,6 @@ gcCAFs(void)
 
 
 /* -----------------------------------------------------------------------------
-   Lazy black holing.
-
-   Whenever a thread returns to the scheduler after possibly doing
-   some work, we have to run down the stack and black-hole all the
-   closures referred to by update frames.
-   -------------------------------------------------------------------------- */
-
-static void
-threadLazyBlackHole(StgTSO *tso)
-{
-    StgClosure *frame;
-    StgRetInfoTable *info;
-    StgBlockingQueue *bh;
-    StgPtr stack_end;
-    
-    stack_end = &tso->stack[tso->stack_size];
-    
-    frame = (StgClosure *)tso->sp;
-
-    while (1) {
-	info = get_ret_itbl(frame);
-	
-	switch (info->i.type) {
-	    
-	case UPDATE_FRAME:
-	    bh = (StgBlockingQueue *)((StgUpdateFrame *)frame)->updatee;
-	    
-	    /* if the thunk is already blackholed, it means we've also
-	     * already blackholed the rest of the thunks on this stack,
-	     * so we can stop early.
-	     *
-	     * The blackhole made for a CAF is a CAF_BLACKHOLE, so they
-	     * don't interfere with this optimisation.
-	     */
-	    if (bh->header.info == &stg_BLACKHOLE_info) {
-		return;
-	    }
-	    
-	    if (bh->header.info != &stg_BLACKHOLE_BQ_info &&
-		bh->header.info != &stg_CAF_BLACKHOLE_info) {
-#if (!defined(LAZY_BLACKHOLING)) && defined(DEBUG)
-		debugBelch("Unexpected lazy BHing required at 0x%04x",(int)bh);
-#endif
-#ifdef PROFILING
-		// @LDV profiling
-		// We pretend that bh is now dead.
-		LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)bh);
-#endif
-		SET_INFO(bh,&stg_BLACKHOLE_info);
-
-		// We pretend that bh has just been created.
-		LDV_RECORD_CREATE(bh);
-	    }
-	    
-	    frame = (StgClosure *) ((StgUpdateFrame *)frame + 1);
-	    break;
-	    
-	case STOP_FRAME:
-	    return;
-	    
-	    // normal stack frames; do nothing except advance the pointer
-	default:
-	    frame = (StgClosure *)((StgPtr)frame + stack_frame_sizeW(frame));
-	}
-    }
-}
-
-
-/* -----------------------------------------------------------------------------
  * Stack squeezing
  *
  * Code largely pinched from old RTS, then hacked to bits.  We also do
@@ -4003,12 +4443,11 @@ threadLazyBlackHole(StgTSO *tso)
 struct stack_gap { StgWord gap_size; struct stack_gap *next_gap; };
 
 static void
-threadSqueezeStack(StgTSO *tso)
+stackSqueeze(StgTSO *tso, StgPtr bottom)
 {
     StgPtr frame;
     rtsBool prev_was_update_frame;
     StgClosure *updatee = NULL;
-    StgPtr bottom;
     StgRetInfoTable *info;
     StgWord current_gap_size;
     struct stack_gap *gap;
@@ -4019,8 +4458,6 @@ threadSqueezeStack(StgTSO *tso)
     //    contains two values: the size of the gap, and the distance
     //    to the next gap (or the stack top).
 
-    bottom = &(tso->stack[tso->stack_size]);
-
     frame = tso->sp;
 
     ASSERT(frame < bottom);
@@ -4038,20 +4475,6 @@ threadSqueezeStack(StgTSO *tso)
 	{ 
 	    StgUpdateFrame *upd = (StgUpdateFrame *)frame;
 
-	    if (upd->updatee->header.info == &stg_BLACKHOLE_info) {
-
-		// found a BLACKHOLE'd update frame; we've been here
-		// before, in a previous GC, so just break out.
-
-		// Mark the end of the gap, if we're in one.
-		if (current_gap_size != 0) {
-		    gap = (struct stack_gap *)(frame-sizeofW(StgUpdateFrame));
-		}
-		
-		frame += sizeofW(StgUpdateFrame);
-		goto done_traversing;
-	    }
-
 	    if (prev_was_update_frame) {
 
 		TICK_UPD_SQUEEZED();
@@ -4067,7 +4490,6 @@ threadSqueezeStack(StgTSO *tso)
 		 * screw us up if we don't check.
 		 */
 		if (upd->updatee != updatee && !closure_IND(upd->updatee)) {
-		    // this wakes the threads up 
 		    UPD_IND_NOLOCK(upd->updatee, updatee);
 		}
 
@@ -4085,46 +4507,6 @@ threadSqueezeStack(StgTSO *tso)
 
 	    // single update frame, or the topmost update frame in a series
 	    else {
-		StgBlockingQueue *bh = (StgBlockingQueue *)upd->updatee;
-
-		// Do lazy black-holing
-		if (bh->header.info != &stg_BLACKHOLE_info &&
-		    bh->header.info != &stg_BLACKHOLE_BQ_info &&
-		    bh->header.info != &stg_CAF_BLACKHOLE_info) {
-#if (!defined(LAZY_BLACKHOLING)) && defined(DEBUG)
-		    debugBelch("Unexpected lazy BHing required at 0x%04x",(int)bh);
-#endif
-#ifdef DEBUG
-		    /* zero out the slop so that the sanity checker can tell
-		     * where the next closure is.
-		     */
-		    { 
-			StgInfoTable *bh_info = get_itbl(bh);
-			nat np = bh_info->layout.payload.ptrs, 
-			    nw = bh_info->layout.payload.nptrs, i;
-			/* don't zero out slop for a THUNK_SELECTOR,
-			 * because its layout info is used for a
-			 * different purpose, and it's exactly the
-			 * same size as a BLACKHOLE in any case.
-			 */
-			if (bh_info->type != THUNK_SELECTOR) {
-			    for (i = 0; i < np + nw; i++) {
-				((StgClosure *)bh)->payload[i] = INVALID_OBJECT;
-			    }
-			}
-		    }
-#endif
-#ifdef PROFILING
-		    // We pretend that bh is now dead.
-		    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)bh);
-#endif
-		    // Todo: maybe use SET_HDR() and remove LDV_RECORD_CREATE()?
-		    SET_INFO(bh,&stg_BLACKHOLE_info);
-
-		    // We pretend that bh has just been created.
-		    LDV_RECORD_CREATE(bh);
-		}
-
 		prev_was_update_frame = rtsTrue;
 		updatee = upd->updatee;
 		frame += sizeofW(StgUpdateFrame);
@@ -4147,8 +4529,10 @@ threadSqueezeStack(StgTSO *tso)
 	}
     }
 
-done_traversing:
-	    
+    if (current_gap_size != 0) {
+	gap = (struct stack_gap *) (frame - sizeofW(StgUpdateFrame));
+    }
+
     // Now we have a stack with gaps in it, and we have to walk down
     // shoving the stack up to fill in the gaps.  A diagram might
     // help:
@@ -4206,12 +4590,110 @@ done_traversing:
  * turned on.
  * -------------------------------------------------------------------------- */
 void
-threadPaused(StgTSO *tso)
+threadPaused(Capability *cap, StgTSO *tso)
 {
-  if ( RtsFlags.GcFlags.squeezeUpdFrames == rtsTrue )
-    threadSqueezeStack(tso);	// does black holing too 
-  else
-    threadLazyBlackHole(tso);
+    StgClosure *frame;
+    StgRetInfoTable *info;
+    StgClosure *bh;
+    StgPtr stack_end;
+    nat words_to_squeeze = 0;
+    nat weight           = 0;
+    nat weight_pending   = 0;
+    rtsBool prev_was_update_frame;
+    
+    stack_end = &tso->stack[tso->stack_size];
+    
+    frame = (StgClosure *)tso->sp;
+
+    while (1) {
+	// If we've already marked this frame, then stop here.
+	if (frame->header.info == (StgInfoTable *)&stg_marked_upd_frame_info) {
+	    goto end;
+	}
+
+	info = get_ret_itbl(frame);
+	
+	switch (info->i.type) {
+	    
+	case UPDATE_FRAME:
+
+	    SET_INFO(frame, (StgInfoTable *)&stg_marked_upd_frame_info);
+
+	    bh = ((StgUpdateFrame *)frame)->updatee;
+
+	    if (closure_IND(bh) || bh->header.info == &stg_BLACKHOLE_info) {
+		IF_DEBUG(squeeze, debugBelch("suspending duplicate work: %ld words of stack\n", (StgPtr)frame - tso->sp));
+
+		// If this closure is already an indirection, then
+		// suspend the computation up to this point:
+		suspendComputation(cap,tso,(StgPtr)frame);
+
+		// Now drop the update frame, and arrange to return
+		// the value to the frame underneath:
+		tso->sp = (StgPtr)frame + sizeofW(StgUpdateFrame) - 2;
+		tso->sp[1] = (StgWord)bh;
+		tso->sp[0] = (W_)&stg_enter_info;
+
+		// And continue with threadPaused; there might be
+		// yet more computation to suspend.
+		threadPaused(cap,tso);
+		return;
+	    }
+
+	    if (bh->header.info != &stg_CAF_BLACKHOLE_info) {
+#if (!defined(LAZY_BLACKHOLING)) && defined(DEBUG)
+		debugBelch("Unexpected lazy BHing required at 0x%04lx\n",(long)bh);
+#endif
+		// zero out the slop so that the sanity checker can tell
+		// where the next closure is.
+		DEBUG_FILL_SLOP(bh);
+#ifdef PROFILING
+		// @LDV profiling
+		// We pretend that bh is now dead.
+		LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)bh);
+#endif
+		SET_INFO(bh,&stg_BLACKHOLE_info);
+
+		// We pretend that bh has just been created.
+		LDV_RECORD_CREATE(bh);
+	    }
+	    
+	    frame = (StgClosure *) ((StgUpdateFrame *)frame + 1);
+	    if (prev_was_update_frame) {
+		words_to_squeeze += sizeofW(StgUpdateFrame);
+		weight += weight_pending;
+		weight_pending = 0;
+	    }
+	    prev_was_update_frame = rtsTrue;
+	    break;
+	    
+	case STOP_FRAME:
+	    goto end;
+	    
+	    // normal stack frames; do nothing except advance the pointer
+	default:
+	{
+	    nat frame_size = stack_frame_sizeW(frame);
+	    weight_pending += frame_size;
+	    frame = (StgClosure *)((StgPtr)frame + frame_size);
+	    prev_was_update_frame = rtsFalse;
+	}
+	}
+    }
+
+end:
+    IF_DEBUG(squeeze, 
+	     debugBelch("words_to_squeeze: %d, weight: %d, squeeze: %s\n", 
+			words_to_squeeze, weight, 
+			weight < words_to_squeeze ? "YES" : "NO"));
+
+    // Should we squeeze or not?  Arbitrary heuristic: we squeeze if
+    // the number of words we have to shift down is less than the
+    // number of stack words we squeeze away by doing so.
+    if (RtsFlags.GcFlags.squeezeUpdFrames == rtsTrue &&
+	weight < words_to_squeeze) {
+	stackSqueeze(tso, (StgPtr)frame);
+    }
 }
 
 /* -----------------------------------------------------------------------------
@@ -4234,20 +4716,4 @@ printMutableList(generation *gen)
     }
     debugBelch("\n");
 }
-
-STATIC_INLINE rtsBool
-maybeLarge(StgClosure *closure)
-{
-  StgInfoTable *info = get_itbl(closure);
-
-  /* closure types that may be found on the new_large_objects list; 
-     see scavenge_large */
-  return (info->type == MUT_ARR_PTRS ||
-	  info->type == MUT_ARR_PTRS_FROZEN ||
-	  info->type == MUT_ARR_PTRS_FROZEN0 ||
-	  info->type == TSO ||
-	  info->type == ARR_WORDS);
-}
-
-  
-#endif // DEBUG
+#endif /* DEBUG */