X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Frts%2FGC.c;h=67d744aa93de0524834d725b22a996b734fdccd5;hb=a1b4e3b88a6987deed7bb7f1bd870b30eef1b475;hp=63526f1fc7ad48e03420d634671aed9054c5ad4c;hpb=18f73b07621d157c948ec6a86c173c9915712896;p=ghc-hetmet.git

diff --git a/ghc/rts/GC.c b/ghc/rts/GC.c
index 63526f1..a13cd33 100644
--- a/ghc/rts/GC.c
+++ b/ghc/rts/GC.c
@@ -1,49 +1,33 @@
 /* -----------------------------------------------------------------------------
- * $Id: GC.c,v 1.97 2001/03/02 14:28:44 simonmar Exp $
  *
- * (c) The GHC Team 1998-1999
+ * (c) The GHC Team 1998-2003
  *
  * Generational garbage collector
  *
  * ---------------------------------------------------------------------------*/
 
-//@menu
-//* Includes::			
-//* STATIC OBJECT LIST::	
-//* Static function declarations::  
-//* Garbage Collect::		
-//* Weak Pointers::		
-//* Evacuation::		
-//* Scavenging::		
-//* Reverting CAFs::		
-//* Sanity code for CAF garbage collection::  
-//* Lazy black holing::		
-//* Stack squeezing::		
-//* Pausing a thread::		
-//* Index::			
-//@end menu
-
-//@node Includes, STATIC OBJECT LIST
-//@subsection Includes
-
+#include "PosixSource.h"
 #include "Rts.h"
 #include "RtsFlags.h"
 #include "RtsUtils.h"
+#include "Apply.h"
+#include "OSThreads.h"
 #include "Storage.h"
-#include "StoragePriv.h"
+#include "LdvProfile.h"
+#include "Updates.h"
 #include "Stats.h"
 #include "Schedule.h"
-#include "SchedAPI.h" /* for ReverCAFs prototype */
 #include "Sanity.h"
-#include "GC.h"
 #include "BlockAlloc.h"
 #include "MBlock.h"
-#include "Main.h"
 #include "ProfHeap.h"
 #include "SchedAPI.h"
 #include "Weak.h"
-#include "StablePriv.h"
 #include "Prelude.h"
+#include "ParTicky.h"		// ToDo: move into Rts.h
+#include "GCCompact.h"
+#include "RtsSignals.h"
+#include "STM.h"
 #if defined(GRAN) || defined(PAR)
 # include "GranSimRts.h"
 # include "ParallelRts.h"
@@ -59,8 +43,15 @@
 #include "FrontPanel.h"
 #endif
 
-//@node STATIC OBJECT LIST, Static function declarations, Includes
-//@subsection STATIC OBJECT LIST
+#include "RetainerProfile.h"
+
+#include <string.h>
+
+// Turn off inlining when debugging - it obfuscates things
+#ifdef DEBUG
+# undef  STATIC_INLINE
+# define STATIC_INLINE static
+#endif
 
 /* STATIC OBJECT LIST.
  *
@@ -96,8 +87,8 @@
  * We build up a static object list while collecting generations 0..N,
  * which is then appended to the static object list of generation N+1.
  */
-StgClosure* static_objects;	      /* live static objects */
-StgClosure* scavenged_static_objects; /* static objects scavenged so far */
+static StgClosure* static_objects;      // live static objects
+StgClosure* scavenged_static_objects;   // static objects scavenged so far
 
 /* N is the oldest generation being collected, where the generations
  * are numbered starting at 0.  A major GC (indicated by the major_gc
@@ -113,105 +104,287 @@ static rtsBool major_gc;
  */
 static nat evac_gen;
 
+/* Whether to do eager promotion or not.
+ */
+static rtsBool eager_promotion;
+
 /* Weak pointers
  */
-static StgWeak *old_weak_ptr_list; /* also pending finaliser list */
-static rtsBool weak_done;	/* all done for this pass */
+StgWeak *old_weak_ptr_list; // also pending finaliser list
+
+/* Which stage of processing various kinds of weak pointer are we at?
+ * (see traverse_weak_ptr_list() below for discussion).
+ */
+typedef enum { WeakPtrs, WeakThreads, WeakDone } WeakStage;
+static WeakStage weak_stage;
 
 /* List of all threads during GC
  */
 static StgTSO *old_all_threads;
-static StgTSO *resurrected_threads;
+StgTSO *resurrected_threads;
 
 /* Flag indicating failure to evacuate an object to the desired
  * generation.
  */
 static rtsBool failed_to_evac;
 
-/* Old to-space (used for two-space collector only)
+/* Saved nursery (used for 2-space collector only)
  */
-bdescr *old_to_space;
-
+static bdescr *saved_nursery;
+static nat saved_n_blocks;
+  
 /* Data used for allocation area sizing.
  */
-lnat new_blocks;		/* blocks allocated during this GC */
-lnat g0s0_pcnt_kept = 30;	/* percentage of g0s0 live at last minor GC */
+static lnat new_blocks;		 // blocks allocated during this GC 
+static lnat new_scavd_blocks;	 // ditto, but depth-first blocks
+static lnat g0s0_pcnt_kept = 30; // percentage of g0s0 live at last minor GC 
+
+/* Used to avoid long recursion due to selector thunks
+ */
+static lnat thunk_selector_depth = 0;
+#define MAX_THUNK_SELECTOR_DEPTH 8
 
-//@node Static function declarations, Garbage Collect, STATIC OBJECT LIST
-//@subsection Static function declarations
+/* Mut-list stats */
+#ifdef DEBUG
+static nat 
+    mutlist_MUTVARS,
+    mutlist_MUTARRS,
+    mutlist_OTHERS;
+#endif
 
 /* -----------------------------------------------------------------------------
    Static function declarations
    -------------------------------------------------------------------------- */
 
-static StgClosure * evacuate                ( StgClosure *q );
+static bdescr *     gc_alloc_block          ( step *stp );
+static void         mark_root               ( StgClosure **root );
+
+// Use a register argument for evacuate, if available.
+#if __GNUC__ >= 2
+#define REGPARM1 __attribute__((regparm(1)))
+#else
+#define REGPARM1
+#endif
+
+REGPARM1 static StgClosure * evacuate (StgClosure *q);
+
 static void         zero_static_object_list ( StgClosure* first_static );
-static void         zero_mutable_list       ( StgMutClosure *first );
 
 static rtsBool      traverse_weak_ptr_list  ( void );
-static void         cleanup_weak_ptr_list   ( StgWeak **list );
+static void         mark_weak_ptr_list      ( StgWeak **list );
 
-static void         scavenge_stack          ( StgPtr p, StgPtr stack_end );
-static void         scavenge_large          ( step * );
-static void         scavenge                ( step * );
-static void         scavenge_static         ( void );
-static void         scavenge_mutable_list   ( generation *g );
-static void         scavenge_mut_once_list  ( generation *g );
+static StgClosure * eval_thunk_selector     ( nat field, StgSelector * p );
 
-#ifdef DEBUG
+
+static void    scavenge                ( step * );
+static void    scavenge_mark_stack     ( void );
+static void    scavenge_stack          ( StgPtr p, StgPtr stack_end );
+static rtsBool scavenge_one            ( StgPtr p );
+static void    scavenge_large          ( step * );
+static void    scavenge_static         ( void );
+static void    scavenge_mutable_list   ( generation *g );
+
+static void    scavenge_large_bitmap   ( StgPtr p, 
+					 StgLargeBitmap *large_bitmap, 
+					 nat size );
+
+#if 0 && defined(DEBUG)
 static void         gcCAFs                  ( void );
 #endif
 
-void revertCAFs   ( void );
-void scavengeCAFs ( void );
+/* -----------------------------------------------------------------------------
+   inline functions etc. for dealing with the mark bitmap & stack.
+   -------------------------------------------------------------------------- */
+
+#define MARK_STACK_BLOCKS 4
+
+static bdescr *mark_stack_bdescr;
+static StgPtr *mark_stack;
+static StgPtr *mark_sp;
+static StgPtr *mark_splim;
+
+// Flag and pointers used for falling back to a linear scan when the
+// mark stack overflows.
+static rtsBool mark_stack_overflowed;
+static bdescr *oldgen_scan_bd;
+static StgPtr  oldgen_scan;
+
+STATIC_INLINE rtsBool
+mark_stack_empty(void)
+{
+    return mark_sp == mark_stack;
+}
+
+STATIC_INLINE rtsBool
+mark_stack_full(void)
+{
+    return mark_sp >= mark_splim;
+}
+
+STATIC_INLINE void
+reset_mark_stack(void)
+{
+    mark_sp = mark_stack;
+}
+
+STATIC_INLINE void
+push_mark_stack(StgPtr p)
+{
+    *mark_sp++ = p;
+}
+
+STATIC_INLINE StgPtr
+pop_mark_stack(void)
+{
+    return *--mark_sp;
+}
+
+/* -----------------------------------------------------------------------------
+   Allocate a new to-space block in the given step.
+   -------------------------------------------------------------------------- */
+
+static bdescr *
+gc_alloc_block(step *stp)
+{
+    bdescr *bd = allocBlock();
+    bd->gen_no = stp->gen_no;
+    bd->step = stp;
+    bd->link = NULL;
+
+    // blocks in to-space in generations up to and including N
+    // get the BF_EVACUATED flag.
+    if (stp->gen_no <= N) {
+	bd->flags = BF_EVACUATED;
+    } else {
+	bd->flags = 0;
+    }
+
+    // Start a new to-space block, chain it on after the previous one.
+    if (stp->hp_bd != NULL) {
+	stp->hp_bd->free = stp->hp;
+	stp->hp_bd->link = bd;
+    }
+
+    stp->hp_bd = bd;
+    stp->hp    = bd->start;
+    stp->hpLim = stp->hp + BLOCK_SIZE_W;
+
+    stp->n_blocks++;
+    new_blocks++;
+
+    return bd;
+}
+
+static bdescr *
+gc_alloc_scavd_block(step *stp)
+{
+    bdescr *bd = allocBlock();
+    bd->gen_no = stp->gen_no;
+    bd->step = stp;
+
+    // blocks in to-space in generations up to and including N
+    // get the BF_EVACUATED flag.
+    if (stp->gen_no <= N) {
+	bd->flags = BF_EVACUATED;
+    } else {
+	bd->flags = 0;
+    }
+
+    bd->link = stp->blocks;
+    stp->blocks = bd;
+
+    if (stp->scavd_hp != NULL) {
+	Bdescr(stp->scavd_hp)->free = stp->scavd_hp;
+    }
+    stp->scavd_hp    = bd->start;
+    stp->scavd_hpLim = stp->scavd_hp + BLOCK_SIZE_W;
+
+    stp->n_blocks++;
+    new_scavd_blocks++;
 
-//@node Garbage Collect, Weak Pointers, Static function declarations
-//@subsection Garbage Collect
+    return bd;
+}
 
 /* -----------------------------------------------------------------------------
    GarbageCollect
 
-   For garbage collecting generation N (and all younger generations):
+   Rough outline of the algorithm: for garbage collecting generation N
+   (and all younger generations):
 
      - follow all pointers in the root set.  the root set includes all 
-       mutable objects in all steps in all generations.
+       mutable objects in all generations (mutable_list).
 
      - for each pointer, evacuate the object it points to into either
-       + to-space in the next higher step in that generation, if one exists,
-       + if the object's generation == N, then evacuate it to the next
-         generation if one exists, or else to-space in the current
-	 generation.
-       + if the object's generation < N, then evacuate it to to-space
-         in the next generation.
+
+       + to-space of the step given by step->to, which is the next
+         highest step in this generation or the first step in the next
+         generation if this is the last step.
+
+       + to-space of generations[evac_gen]->steps[0], if evac_gen != 0.
+         When we evacuate an object we attempt to evacuate
+         everything it points to into the same generation - this is
+         achieved by setting evac_gen to the desired generation.  If
+         we can't do this, then an entry in the mut list has to
+         be made for the cross-generation pointer.
+
+       + if the object is already in a generation > N, then leave
+         it alone.
 
      - repeatedly scavenge to-space from each step in each generation
        being collected until no more objects can be evacuated.
       
      - free from-space in each step, and set from-space = to-space.
 
+   Locks held: all capabilities are held throughout GarbageCollect().
+
    -------------------------------------------------------------------------- */
-//@cindex GarbageCollect
 
-void GarbageCollect ( void (*get_roots)(void), rtsBool force_major_gc )
+void
+GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
 {
   bdescr *bd;
   step *stp;
-  lnat live, allocated, collected = 0, copied = 0;
-  nat g, s;
+  lnat live, allocated, copied = 0, scavd_copied = 0;
+  lnat oldgen_saved_blocks = 0;
+  nat g, s, i;
+
+  ACQUIRE_SM_LOCK;
 
 #ifdef PROFILING
   CostCentreStack *prev_CCS;
 #endif
 
 #if defined(DEBUG) && defined(GRAN)
-  IF_DEBUG(gc, belch("@@ Starting garbage collection at %ld (%lx)\n", 
+  IF_DEBUG(gc, debugBelch("@@ Starting garbage collection at %ld (%lx)\n", 
 		     Now, Now));
 #endif
 
-  /* tell the stats department that we've started a GC */
+#if defined(RTS_USER_SIGNALS)
+  // block signals
+  blockUserSignals();
+#endif
+
+  // tell the STM to discard any cached closures its hoping to re-use
+  stmPreGCHook();
+
+  // tell the stats department that we've started a GC 
   stat_startGC();
 
-  /* attribute any costs to CCS_GC */
+#ifdef DEBUG
+  // check for memory leaks if DEBUG is on 
+  memInventory();
+#endif
+
+#ifdef DEBUG
+  mutlist_MUTVARS = 0;
+  mutlist_MUTARRS = 0;
+  mutlist_OTHERS = 0;
+#endif
+
+  // Init stats and print par specific (timing) info 
+  PAR_TICKY_PAR_START();
+
+  // attribute any costs to CCS_GC 
 #ifdef PROFILING
   prev_CCS = CCCS;
   CCCS = CCS_GC;
@@ -230,7 +403,9 @@ void GarbageCollect ( void (*get_roots)(void), rtsBool force_major_gc )
   } else {
     N = 0;
     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-      if (generations[g].steps[0].n_blocks >= generations[g].max_blocks) {
+      if (generations[g].steps[0].n_blocks +
+	  generations[g].steps[0].n_large_blocks
+	  >= generations[g].max_blocks) {
         N = g;
       }
     }
@@ -243,7 +418,7 @@ void GarbageCollect ( void (*get_roots)(void), rtsBool force_major_gc )
   }
 #endif
 
-  /* check stack sanity *before* GC (ToDo: check all threads) */
+  // check stack sanity *before* GC (ToDo: check all threads) 
 #if defined(GRAN)
   // ToDo!: check sanity  IF_DEBUG(sanity, checkTSOsSanity());
 #endif
@@ -254,97 +429,168 @@ void GarbageCollect ( void (*get_roots)(void), rtsBool force_major_gc )
   static_objects = END_OF_STATIC_LIST;
   scavenged_static_objects = END_OF_STATIC_LIST;
 
-  /* zero the mutable list for the oldest generation (see comment by
-   * zero_mutable_list below).
-   */
-  if (major_gc) { 
-    zero_mutable_list(generations[RtsFlags.GcFlags.generations-1].mut_once_list);
-  }
-
-  /* Save the old to-space if we're doing a two-space collection
+  /* Save the nursery if we're doing a two-space collection.
+   * g0s0->blocks will be used for to-space, so we need to get the
+   * nursery out of the way.
    */
   if (RtsFlags.GcFlags.generations == 1) {
-    old_to_space = g0s0->to_space;
-    g0s0->to_space = NULL;
+      saved_nursery = g0s0->blocks;
+      saved_n_blocks = g0s0->n_blocks;
+      g0s0->blocks = NULL;
+      g0s0->n_blocks = 0;
   }
 
   /* Keep a count of how many new blocks we allocated during this GC
    * (used for resizing the allocation area, later).
    */
   new_blocks = 0;
+  new_scavd_blocks = 0;
 
-  /* Initialise to-space in all the generations/steps that we're
-   * collecting.
-   */
+  // Initialise to-space in all the generations/steps that we're
+  // collecting.
+  //
   for (g = 0; g <= N; g++) {
-    generations[g].mut_once_list = END_MUT_LIST;
-    generations[g].mut_list = END_MUT_LIST;
+
+    // throw away the mutable list.  Invariant: the mutable list
+    // always has at least one block; this means we can avoid a check for
+    // NULL in recordMutable().
+    if (g != 0) {
+	freeChain(generations[g].mut_list);
+	generations[g].mut_list = allocBlock();
+	for (i = 0; i < n_capabilities; i++) {
+	    freeChain(capabilities[i].mut_lists[g]);
+	    capabilities[i].mut_lists[g] = allocBlock();
+	}
+    }
 
     for (s = 0; s < generations[g].n_steps; s++) {
 
-      /* generation 0, step 0 doesn't need to-space */
+      // generation 0, step 0 doesn't need to-space 
       if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) { 
 	continue; 
       }
 
-      /* Get a free block for to-space.  Extra blocks will be chained on
-       * as necessary.
-       */
-      bd = allocBlock();
       stp = &generations[g].steps[s];
-      ASSERT(stp->gen->no == g);
-      ASSERT(stp->hp ? Bdescr(stp->hp)->step == stp : rtsTrue);
-      bd->gen  = &generations[g];
-      bd->step = stp;
-      bd->link = NULL;
-      bd->evacuated = 1;	/* it's a to-space block */
-      stp->hp        = bd->start;
-      stp->hpLim     = stp->hp + BLOCK_SIZE_W;
-      stp->hp_bd     = bd;
-      stp->to_space  = bd;
-      stp->to_blocks = 1;
-      stp->scan      = bd->start;
-      stp->scan_bd   = bd;
+      ASSERT(stp->gen_no == g);
+
+      // start a new to-space for this step.
+      stp->old_blocks   = stp->blocks;
+      stp->n_old_blocks = stp->n_blocks;
+
+      // allocate the first to-space block; extra blocks will be
+      // chained on as necessary.
+      stp->hp_bd     = NULL;
+      bd = gc_alloc_block(stp);
+      stp->blocks      = bd;
+      stp->n_blocks    = 1;
+      stp->scan        = bd->start;
+      stp->scan_bd     = bd;
+
+      // allocate a block for "already scavenged" objects.  This goes
+      // on the front of the stp->blocks list, so it won't be
+      // traversed by the scavenging sweep.
+      gc_alloc_scavd_block(stp);
+
+      // initialise the large object queues.
       stp->new_large_objects = NULL;
       stp->scavenged_large_objects = NULL;
-      new_blocks++;
-      /* mark the large objects as not evacuated yet */
+      stp->n_scavenged_large_blocks = 0;
+
+      // mark the large objects as not evacuated yet 
       for (bd = stp->large_objects; bd; bd = bd->link) {
-	bd->evacuated = 0;
+	bd->flags &= ~BF_EVACUATED;
+      }
+
+      // for a compacted step, we need to allocate the bitmap
+      if (stp->is_compacted) {
+	  nat bitmap_size; // in bytes
+	  bdescr *bitmap_bdescr;
+	  StgWord *bitmap;
+
+	  bitmap_size = stp->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
+
+	  if (bitmap_size > 0) {
+	      bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size) 
+					 / BLOCK_SIZE);
+	      stp->bitmap = bitmap_bdescr;
+	      bitmap = bitmap_bdescr->start;
+	      
+	      IF_DEBUG(gc, debugBelch("bitmap_size: %d, bitmap: %p",
+				   bitmap_size, bitmap););
+	      
+	      // don't forget to fill it with zeros!
+	      memset(bitmap, 0, bitmap_size);
+	      
+	      // For each block in this step, point to its bitmap from the
+	      // block descriptor.
+	      for (bd=stp->old_blocks; bd != NULL; bd = bd->link) {
+		  bd->u.bitmap = bitmap;
+		  bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
+
+		  // Also at this point we set the BF_COMPACTED flag
+		  // for this block.  The invariant is that
+		  // BF_COMPACTED is always unset, except during GC
+		  // when it is set on those blocks which will be
+		  // compacted.
+		  bd->flags |= BF_COMPACTED;
+	      }
+	  }
       }
     }
   }
 
   /* make sure the older generations have at least one block to
-   * allocate into (this makes things easier for copy(), see below.
+   * allocate into (this makes things easier for copy(), see below).
    */
   for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
     for (s = 0; s < generations[g].n_steps; s++) {
       stp = &generations[g].steps[s];
       if (stp->hp_bd == NULL) {
-	bd = allocBlock();
-	bd->gen = &generations[g];
-	bd->step = stp;
-	bd->link = NULL;
-	bd->evacuated = 0;	/* *not* a to-space block */
-	stp->hp = bd->start;
-	stp->hpLim = stp->hp + BLOCK_SIZE_W;
-	stp->hp_bd = bd;
-	stp->blocks = bd;
-	stp->n_blocks = 1;
-	new_blocks++;
+	  ASSERT(stp->blocks == NULL);
+	  bd = gc_alloc_block(stp);
+	  stp->blocks = bd;
+	  stp->n_blocks = 1;
+      }
+      if (stp->scavd_hp == NULL) {
+	  gc_alloc_scavd_block(stp);
+	  stp->n_blocks++;
       }
       /* Set the scan pointer for older generations: remember we
        * still have to scavenge objects that have been promoted. */
       stp->scan = stp->hp;
       stp->scan_bd = stp->hp_bd;
-      stp->to_space = NULL;
-      stp->to_blocks = 0;
       stp->new_large_objects = NULL;
       stp->scavenged_large_objects = NULL;
+      stp->n_scavenged_large_blocks = 0;
+    }
+
+    /* Move the private mutable lists from each capability onto the
+     * main mutable list for the generation.
+     */
+    for (i = 0; i < n_capabilities; i++) {
+	for (bd = capabilities[i].mut_lists[g]; 
+	     bd->link != NULL; bd = bd->link) {
+	    /* nothing */
+	}
+	bd->link = generations[g].mut_list;
+	generations[g].mut_list = capabilities[i].mut_lists[g];
+	capabilities[i].mut_lists[g] = allocBlock();
     }
   }
 
+  /* Allocate a mark stack if we're doing a major collection.
+   */
+  if (major_gc) {
+      mark_stack_bdescr = allocGroup(MARK_STACK_BLOCKS);
+      mark_stack = (StgPtr *)mark_stack_bdescr->start;
+      mark_sp    = mark_stack;
+      mark_splim = mark_stack + (MARK_STACK_BLOCKS * BLOCK_SIZE_W);
+  } else {
+      mark_stack_bdescr = NULL;
+  }
+
+  eager_promotion = rtsTrue; // for now
+
   /* -----------------------------------------------------------------------
    * follow all the roots that we know about:
    *   - mutable lists from each generation > N
@@ -361,23 +607,12 @@ void GarbageCollect ( void (*get_roots)(void), rtsBool force_major_gc )
     int st;
     for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
       generations[g].saved_mut_list = generations[g].mut_list;
-      generations[g].mut_list = END_MUT_LIST;
-    }
-
-    /* Do the mut-once lists first */
-    for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
-      IF_PAR_DEBUG(verbose,
-		   printMutOnceList(&generations[g]));
-      scavenge_mut_once_list(&generations[g]);
-      evac_gen = g;
-      for (st = generations[g].n_steps-1; st >= 0; st--) {
-	scavenge(&generations[g].steps[st]);
-      }
+      generations[g].mut_list = allocBlock(); 
+        // mut_list always has at least one block.
     }
 
     for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
-      IF_PAR_DEBUG(verbose,
-		   printMutableList(&generations[g]));
+      IF_PAR_DEBUG(verbose, printMutableList(&generations[g]));
       scavenge_mutable_list(&generations[g]);
       evac_gen = g;
       for (st = generations[g].n_steps-1; st >= 0; st--) {
@@ -386,12 +621,15 @@ void GarbageCollect ( void (*get_roots)(void), rtsBool force_major_gc )
     }
   }
 
-  scavengeCAFs();
+  /* follow roots from the CAF list (used by GHCi)
+   */
+  evac_gen = 0;
+  markCAFs(mark_root);
 
   /* follow all the roots that the application knows about.
    */
   evac_gen = 0;
-  get_roots();
+  get_roots(mark_root);
 
 #if defined(PAR)
   /* And don't forget to mark the TSO if we got here direct from
@@ -402,16 +640,19 @@ void GarbageCollect ( void (*get_roots)(void), rtsBool force_major_gc )
   }
   */
 
-  /* Mark the entries in the GALA table of the parallel system */
+  // Mark the entries in the GALA table of the parallel system 
   markLocalGAs(major_gc);
+  // Mark all entries on the list of pending fetches 
+  markPendingFetches(major_gc);
 #endif
 
   /* Mark the weak pointer list, and prepare to detect dead weak
    * pointers.
    */
+  mark_weak_ptr_list(&weak_ptr_list);
   old_weak_ptr_list = weak_ptr_list;
   weak_ptr_list = NULL;
-  weak_done = rtsFalse;
+  weak_stage = WeakPtrs;
 
   /* The all_threads list is like the weak_ptr_list.  
    * See traverse_weak_ptr_list() for the details.
@@ -422,18 +663,7 @@ void GarbageCollect ( void (*get_roots)(void), rtsBool force_major_gc )
 
   /* Mark the stable pointer table.
    */
-  markStablePtrTable(major_gc);
-
-#ifdef INTERPRETER
-  { 
-      /* ToDo: To fix the caf leak, we need to make the commented out
-       * parts of this code do something sensible - as described in 
-       * the CAF document.
-       */
-      extern void markHugsObjects(void);
-      markHugsObjects();
-  }
-#endif
+  markStablePtrTable(mark_root);
 
   /* -------------------------------------------------------------------------
    * Repeatedly scavenge all the areas we know about until there's no
@@ -444,11 +674,10 @@ void GarbageCollect ( void (*get_roots)(void), rtsBool force_major_gc )
   loop:
     flag = rtsFalse;
 
-    /* scavenge static objects */
+    // scavenge static objects 
     if (major_gc && static_objects != END_OF_STATIC_LIST) {
-      IF_DEBUG(sanity,
-	       checkStaticObjects());
-      scavenge_static();
+	IF_DEBUG(sanity, checkStaticObjects(static_objects));
+	scavenge_static();
     }
 
     /* When scavenging the older generations:  Objects may have been
@@ -460,12 +689,21 @@ void GarbageCollect ( void (*get_roots)(void), rtsBool force_major_gc )
      * generation.
      */
 
-    /* scavenge each step in generations 0..maxgen */
+    // scavenge each step in generations 0..maxgen 
     { 
-      int gen, st; 
+      long gen;
+      int st; 
+
     loop2:
-      for (gen = RtsFlags.GcFlags.generations-1; gen >= 0; gen--) {
-	for (st = generations[gen].n_steps-1; st >= 0 ; st--) {
+      // scavenge objects in compacted generation
+      if (mark_stack_overflowed || oldgen_scan_bd != NULL ||
+	  (mark_stack_bdescr != NULL && !mark_stack_empty())) {
+	  scavenge_mark_stack();
+	  flag = rtsTrue;
+      }
+
+      for (gen = RtsFlags.GcFlags.generations; --gen >= 0; ) {
+	for (st = generations[gen].n_steps; --st >= 0; ) {
 	  if (gen == 0 && st == 0 && RtsFlags.GcFlags.generations > 1) { 
 	    continue; 
 	  }
@@ -484,62 +722,103 @@ void GarbageCollect ( void (*get_roots)(void), rtsBool force_major_gc )
 	}
       }
     }
+
     if (flag) { goto loop; }
 
-    /* must be last... */
-    if (traverse_weak_ptr_list()) { /* returns rtsTrue if evaced something */
+    // must be last...  invariant is that everything is fully
+    // scavenged at this point.
+    if (traverse_weak_ptr_list()) { // returns rtsTrue if evaced something 
       goto loop;
     }
   }
 
-  /* Final traversal of the weak pointer list (see comment by
-   * cleanUpWeakPtrList below).
-   */
-  cleanup_weak_ptr_list(&weak_ptr_list);
-
-  /* Now see which stable names are still alive.
+  /* Update the pointers from the task list - these are
+   * treated as weak pointers because we want to allow a main thread
+   * to get a BlockedOnDeadMVar exception in the same way as any other
+   * thread.  Note that the threads should all have been retained by
+   * GC by virtue of being on the all_threads list, we're just
+   * updating pointers here.
    */
-  gcStablePtrTable(major_gc);
+  {
+      Task *task;
+      StgTSO *tso;
+      for (task = all_tasks; task != NULL; task = task->all_link) {
+	  if (!task->stopped && task->tso) {
+	      ASSERT(task->tso->bound == task);
+	      tso = (StgTSO *) isAlive((StgClosure *)task->tso);
+	      if (tso == NULL) {
+		  barf("task %p: main thread %d has been GC'd", 
+#ifdef THREADED_RTS
+		       (void *)task->id, 
+#else
+		       (void *)task,
+#endif
+		       task->tso->id);
+	      }
+	      task->tso = tso;
+	  }
+      }
+  }
 
 #if defined(PAR)
-  /* Reconstruct the Global Address tables used in GUM */
+  // Reconstruct the Global Address tables used in GUM 
   rebuildGAtables(major_gc);
-  IF_DEBUG(sanity, checkGlobalTSOList(rtsTrue/*check TSOs, too*/));
   IF_DEBUG(sanity, checkLAGAtable(rtsTrue/*check closures, too*/));
 #endif
 
-  /* Set the maximum blocks for the oldest generation, based on twice
-   * the amount of live data now, adjusted to fit the maximum heap
-   * size if necessary.  
-   *
-   * This is an approximation, since in the worst case we'll need
-   * twice the amount of live data plus whatever space the other
-   * generations need.
-   */
-  if (RtsFlags.GcFlags.generations > 1) {
-    if (major_gc) {
-      oldest_gen->max_blocks = 
-	stg_max(oldest_gen->steps[0].to_blocks * RtsFlags.GcFlags.oldGenFactor,
-		RtsFlags.GcFlags.minOldGenSize);
-      if (oldest_gen->max_blocks > RtsFlags.GcFlags.maxHeapSize / 2) {
-	oldest_gen->max_blocks = RtsFlags.GcFlags.maxHeapSize / 2;
-	if (((int)oldest_gen->max_blocks - 
-	     (int)oldest_gen->steps[0].to_blocks) < 
-	    (RtsFlags.GcFlags.pcFreeHeap *
-	     RtsFlags.GcFlags.maxHeapSize / 200)) {
-	  heapOverflow();
-	}
+  // Now see which stable names are still alive.
+  gcStablePtrTable();
+
+  // Tidy the end of the to-space chains 
+  for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+      for (s = 0; s < generations[g].n_steps; s++) {
+	  stp = &generations[g].steps[s];
+	  if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) {
+	      ASSERT(Bdescr(stp->hp) == stp->hp_bd);
+	      stp->hp_bd->free = stp->hp;
+	      Bdescr(stp->scavd_hp)->free = stp->scavd_hp;
+	  }
       }
-    }
   }
 
+#ifdef PROFILING
+  // We call processHeapClosureForDead() on every closure destroyed during
+  // the current garbage collection, so we invoke LdvCensusForDead().
+  if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
+      || RtsFlags.ProfFlags.bioSelector != NULL)
+    LdvCensusForDead(N);
+#endif
+
+  // NO MORE EVACUATION AFTER THIS POINT!
+  // Finally: compaction of the oldest generation.
+  if (major_gc && oldest_gen->steps[0].is_compacted) {
+      // save number of blocks for stats
+      oldgen_saved_blocks = oldest_gen->steps[0].n_old_blocks;
+      compact(get_roots);
+  }
+
+  IF_DEBUG(sanity, checkGlobalTSOList(rtsFalse));
+
   /* run through all the generations/steps and tidy up 
    */
   copied = new_blocks * BLOCK_SIZE_W;
+  scavd_copied =  new_scavd_blocks * BLOCK_SIZE_W;
   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 
     if (g <= N) {
-      generations[g].collections++; /* for stats */
+      generations[g].collections++; // for stats 
+    }
+
+    // Count the mutable list as bytes "copied" for the purposes of
+    // stats.  Every mutable list is copied during every GC.
+    if (g > 0) {
+	nat mut_list_size = 0;
+	for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
+	    mut_list_size += bd->free - bd->start;
+	}
+	copied +=  mut_list_size;
+
+	IF_DEBUG(gc, debugBelch("mut_list_size: %ld (%d vars, %d arrays, %d others)\n", mut_list_size * sizeof(W_), mutlist_MUTVARS, mutlist_MUTARRS, mutlist_OTHERS));
     }
 
     for (s = 0; s < generations[g].n_steps; s++) {
@@ -547,34 +826,54 @@ void GarbageCollect ( void (*get_roots)(void), rtsBool force_major_gc )
       stp = &generations[g].steps[s];
 
       if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) {
-	/* Tidy the end of the to-space chains */
-	stp->hp_bd->free = stp->hp;
-	stp->hp_bd->link = NULL;
-	/* stats information: how much we copied */
+	// stats information: how much we copied 
 	if (g <= N) {
 	  copied -= stp->hp_bd->start + BLOCK_SIZE_W -
 	    stp->hp_bd->free;
+	  scavd_copied -= (P_)(BLOCK_ROUND_UP(stp->scavd_hp)) - stp->scavd_hp;
 	}
       }
 
-      /* for generations we collected... */
+      // for generations we collected... 
       if (g <= N) {
 
-	collected += stp->n_blocks * BLOCK_SIZE_W; /* for stats */
-
 	/* free old memory and shift to-space into from-space for all
 	 * the collected steps (except the allocation area).  These
 	 * freed blocks will probaby be quickly recycled.
 	 */
 	if (!(g == 0 && s == 0)) {
-	  freeChain(stp->blocks);
-	  stp->blocks = stp->to_space;
-	  stp->n_blocks = stp->to_blocks;
-	  stp->to_space = NULL;
-	  stp->to_blocks = 0;
-	  for (bd = stp->blocks; bd != NULL; bd = bd->link) {
-	    bd->evacuated = 0;	/* now from-space */
-	  }
+	    if (stp->is_compacted) {
+		// for a compacted step, just shift the new to-space
+		// onto the front of the now-compacted existing blocks.
+		for (bd = stp->blocks; bd != NULL; bd = bd->link) {
+		    bd->flags &= ~BF_EVACUATED;  // now from-space 
+		}
+		// tack the new blocks on the end of the existing blocks
+		if (stp->old_blocks != NULL) {
+		    for (bd = stp->old_blocks; bd != NULL; bd = next) {
+			// NB. this step might not be compacted next
+			// time, so reset the BF_COMPACTED flags.
+			// They are set before GC if we're going to
+			// compact.  (search for BF_COMPACTED above).
+			bd->flags &= ~BF_COMPACTED;
+			next = bd->link;
+			if (next == NULL) {
+			    bd->link = stp->blocks;
+			}
+		    }
+		    stp->blocks = stp->old_blocks;
+		}
+		// add the new blocks to the block tally
+		stp->n_blocks += stp->n_old_blocks;
+		ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
+	    } else {
+		freeChain(stp->old_blocks);
+		for (bd = stp->blocks; bd != NULL; bd = bd->link) {
+		    bd->flags &= ~BF_EVACUATED;	 // now from-space 
+		}
+	    }
+	    stp->old_blocks = NULL;
+	    stp->n_old_blocks = 0;
 	}
 
 	/* LARGE OBJECTS.  The current live large objects are chained on
@@ -587,29 +886,16 @@ void GarbageCollect ( void (*get_roots)(void), rtsBool force_major_gc )
 	  freeGroup(bd);
 	  bd = next;
 	}
-	for (bd = stp->scavenged_large_objects; bd != NULL; bd = bd->link) {
-	  bd->evacuated = 0;
-	}
-	stp->large_objects = stp->scavenged_large_objects;
 
-	/* Set the maximum blocks for this generation, interpolating
-	 * between the maximum size of the oldest and youngest
-	 * generations.
-	 *
-	 * max_blocks =    oldgen_max_blocks * G
-	 *                 ----------------------
-	 *                      oldest_gen
-	 */
-	if (g != 0) {
-#if 0
-	  generations[g].max_blocks = (oldest_gen->max_blocks * g)
-	       / (RtsFlags.GcFlags.generations-1);
-#endif
-	  generations[g].max_blocks = oldest_gen->max_blocks;
+	// update the count of blocks used by large objects
+	for (bd = stp->scavenged_large_objects; bd != NULL; bd = bd->link) {
+	  bd->flags &= ~BF_EVACUATED;
 	}
+	stp->large_objects  = stp->scavenged_large_objects;
+	stp->n_large_blocks = stp->n_scavenged_large_blocks;
 
-      /* for older generations... */
       } else {
+	// for older generations... 
 	
 	/* For older generations, we need to append the
 	 * scavenged_large_object list (i.e. large objects that have been
@@ -617,17 +903,93 @@ void GarbageCollect ( void (*get_roots)(void), rtsBool force_major_gc )
 	 */
 	for (bd = stp->scavenged_large_objects; bd; bd = next) {
 	  next = bd->link;
-	  bd->evacuated = 0;
+	  bd->flags &= ~BF_EVACUATED;
 	  dbl_link_onto(bd, &stp->large_objects);
 	}
 
-	/* add the new blocks we promoted during this GC */
-	stp->n_blocks += stp->to_blocks;
+	// add the new blocks we promoted during this GC 
+	stp->n_large_blocks += stp->n_scavenged_large_blocks;
       }
     }
   }
-  
-  /* Guess the amount of live data for stats. */
+
+  /* Reset the sizes of the older generations when we do a major
+   * collection.
+   *
+   * CURRENT STRATEGY: make all generations except zero the same size.
+   * We have to stay within the maximum heap size, and leave a certain
+   * percentage of the maximum heap size available to allocate into.
+   */
+  if (major_gc && RtsFlags.GcFlags.generations > 1) {
+      nat live, size, min_alloc;
+      nat max  = RtsFlags.GcFlags.maxHeapSize;
+      nat gens = RtsFlags.GcFlags.generations;
+
+      // live in the oldest generations
+      live = oldest_gen->steps[0].n_blocks +
+	     oldest_gen->steps[0].n_large_blocks;
+
+      // default max size for all generations except zero
+      size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
+		     RtsFlags.GcFlags.minOldGenSize);
+
+      // minimum size for generation zero
+      min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
+			  RtsFlags.GcFlags.minAllocAreaSize);
+
+      // Auto-enable compaction when the residency reaches a
+      // certain percentage of the maximum heap size (default: 30%).
+      if (RtsFlags.GcFlags.generations > 1 &&
+	  (RtsFlags.GcFlags.compact ||
+	   (max > 0 &&
+	    oldest_gen->steps[0].n_blocks > 
+	    (RtsFlags.GcFlags.compactThreshold * max) / 100))) {
+	  oldest_gen->steps[0].is_compacted = 1;
+//	  debugBelch("compaction: on\n", live);
+      } else {
+	  oldest_gen->steps[0].is_compacted = 0;
+//	  debugBelch("compaction: off\n", live);
+      }
+
+      // if we're going to go over the maximum heap size, reduce the
+      // size of the generations accordingly.  The calculation is
+      // different if compaction is turned on, because we don't need
+      // to double the space required to collect the old generation.
+      if (max != 0) {
+
+	  // this test is necessary to ensure that the calculations
+	  // below don't have any negative results - we're working
+	  // with unsigned values here.
+	  if (max < min_alloc) {
+	      heapOverflow();
+	  }
+
+	  if (oldest_gen->steps[0].is_compacted) {
+	      if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
+		  size = (max - min_alloc) / ((gens - 1) * 2 - 1);
+	      }
+	  } else {
+	      if ( (size * (gens - 1) * 2) + min_alloc > max ) {
+		  size = (max - min_alloc) / ((gens - 1) * 2);
+	      }
+	  }
+
+	  if (size < live) {
+	      heapOverflow();
+	  }
+      }
+
+#if 0
+      debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
+	      min_alloc, size, max);
+#endif
+
+      for (g = 0; g < gens; g++) {
+	  generations[g].max_blocks = size;
+      }
+  }
+
+  // Guess the amount of live data for stats.
   live = calcLive();
 
   /* Free the small objects allocated via allocate(), since this will
@@ -642,25 +1004,50 @@ void GarbageCollect ( void (*get_roots)(void), rtsBool force_major_gc )
   alloc_HpLim = NULL;
   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 
+  // Start a new pinned_object_block
+  pinned_object_block = NULL;
+
+  /* Free the mark stack.
+   */
+  if (mark_stack_bdescr != NULL) {
+      freeGroup(mark_stack_bdescr);
+  }
+
+  /* Free any bitmaps.
+   */
+  for (g = 0; g <= N; g++) {
+      for (s = 0; s < generations[g].n_steps; s++) {
+	  stp = &generations[g].steps[s];
+	  if (stp->bitmap != NULL) {
+	      freeGroup(stp->bitmap);
+	      stp->bitmap = NULL;
+	  }
+      }
+  }
+
   /* Two-space collector:
    * Free the old to-space, and estimate the amount of live data.
    */
   if (RtsFlags.GcFlags.generations == 1) {
     nat blocks;
     
-    if (old_to_space != NULL) {
-      freeChain(old_to_space);
+    if (g0s0->old_blocks != NULL) {
+      freeChain(g0s0->old_blocks);
     }
-    for (bd = g0s0->to_space; bd != NULL; bd = bd->link) {
-      bd->evacuated = 0;	/* now from-space */
+    for (bd = g0s0->blocks; bd != NULL; bd = bd->link) {
+      bd->flags = 0;	// now from-space 
     }
+    g0s0->old_blocks = g0s0->blocks;
+    g0s0->n_old_blocks = g0s0->n_blocks;
+    g0s0->blocks = saved_nursery;
+    g0s0->n_blocks = saved_n_blocks;
 
     /* For a two-space collector, we need to resize the nursery. */
     
     /* set up a new nursery.  Allocate a nursery size based on a
-     * function of the amount of live data (currently a factor of 2,
-     * should be configurable (ToDo)).  Use the blocks from the old
-     * nursery if possible, freeing up any left over blocks.
+     * function of the amount of live data (by default a factor of 2)
+     * Use the blocks from the old nursery if possible, freeing up any
+     * left over blocks.
      *
      * If we get near the maximum heap size, then adjust our nursery
      * size accordingly.  If the nursery is the same size as the live
@@ -669,17 +1056,18 @@ void GarbageCollect ( void (*get_roots)(void), rtsBool force_major_gc )
      * 
      * A normal 2-space collector would need 4L bytes to give the same
      * performance we get from 3L bytes, reducing to the same
-     * performance at 2L bytes.  
+     * performance at 2L bytes.
      */
-    blocks = g0s0->to_blocks;
+    blocks = g0s0->n_old_blocks;
 
-    if ( blocks * RtsFlags.GcFlags.oldGenFactor * 2 > 
-	 RtsFlags.GcFlags.maxHeapSize ) {
-      int adjusted_blocks;  /* signed on purpose */
+    if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
+	 blocks * RtsFlags.GcFlags.oldGenFactor * 2 > 
+	   RtsFlags.GcFlags.maxHeapSize ) {
+      long adjusted_blocks;  // signed on purpose 
       int pc_free; 
       
       adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
-      IF_DEBUG(gc, fprintf(stderr, "@@ Near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %d\n", RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks));
+      IF_DEBUG(gc, debugBelch("@@ Near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld", RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks));
       pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
       if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even be < 0 */ {
 	heapOverflow();
@@ -692,7 +1080,7 @@ void GarbageCollect ( void (*get_roots)(void), rtsBool force_major_gc )
 	blocks = RtsFlags.GcFlags.minAllocAreaSize;
       }
     }
-    resizeNursery(blocks);
+    resizeNurseries(blocks);
     
   } else {
     /* Generational collector:
@@ -701,15 +1089,15 @@ void GarbageCollect ( void (*get_roots)(void), rtsBool force_major_gc )
      */
 
     if (RtsFlags.GcFlags.heapSizeSuggestion) {
-      int blocks;
-      nat needed = calcNeeded(); 	/* approx blocks needed at next GC */
+      long blocks;
+      nat needed = calcNeeded(); 	// approx blocks needed at next GC 
 
       /* Guess how much will be live in generation 0 step 0 next time.
-       * A good approximation is the obtained by finding the
+       * A good approximation is obtained by finding the
        * percentage of g0s0 that was live at the last minor GC.
        */
       if (N == 0) {
-	g0s0_pcnt_kept = (new_blocks * 100) / g0s0->n_blocks;
+	g0s0_pcnt_kept = (new_blocks * 100) / countNurseryBlocks();
       }
 
       /* Estimate a size for the allocation area based on the
@@ -725,58 +1113,74 @@ void GarbageCollect ( void (*get_roots)(void), rtsBool force_major_gc )
        * collection for collecting all steps except g0s0.
        */
       blocks = 
-	(((int)RtsFlags.GcFlags.heapSizeSuggestion - (int)needed) * 100) /
-	(100 + (int)g0s0_pcnt_kept);
+	(((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
+	(100 + (long)g0s0_pcnt_kept);
       
-      if (blocks < (int)RtsFlags.GcFlags.minAllocAreaSize) {
+      if (blocks < (long)RtsFlags.GcFlags.minAllocAreaSize) {
 	blocks = RtsFlags.GcFlags.minAllocAreaSize;
       }
       
-      resizeNursery((nat)blocks);
+      resizeNurseries((nat)blocks);
+
+    } else {
+      // we might have added extra large blocks to the nursery, so
+      // resize back to minAllocAreaSize again.
+      resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
     }
   }
 
- /* mark the garbage collected CAFs as dead */
-#if 0 /* doesn't work at the moment */
-#if defined(DEBUG)
+ // mark the garbage collected CAFs as dead 
+#if 0 && defined(DEBUG) // doesn't work at the moment 
   if (major_gc) { gcCAFs(); }
 #endif
-#endif
   
-  /* zero the scavenged static object list */
+#ifdef PROFILING
+  // resetStaticObjectForRetainerProfiling() must be called before
+  // zeroing below.
+  resetStaticObjectForRetainerProfiling();
+#endif
+
+  // zero the scavenged static object list 
   if (major_gc) {
     zero_static_object_list(scavenged_static_objects);
   }
 
-  /* Reset the nursery
-   */
+  // Reset the nursery
   resetNurseries();
 
-  /* start any pending finalizers */
-  scheduleFinalizers(old_weak_ptr_list);
+  // start any pending finalizers 
+  RELEASE_SM_LOCK;
+  scheduleFinalizers(last_free_capability, old_weak_ptr_list);
+  ACQUIRE_SM_LOCK;
   
-  /* send exceptions to any threads which were about to die */
+  // send exceptions to any threads which were about to die 
+  RELEASE_SM_LOCK;
   resurrectThreads(resurrected_threads);
+  ACQUIRE_SM_LOCK;
+
+  // Update the stable pointer hash table.
+  updateStablePtrTable(major_gc);
 
-  /* check sanity after GC */
-  IF_DEBUG(sanity, checkSanity(N));
+  // check sanity after GC 
+  IF_DEBUG(sanity, checkSanity());
 
-  /* extra GC trace info */
-  IF_DEBUG(gc, stat_describe_gens());
+  // extra GC trace info 
+  IF_DEBUG(gc, statDescribeGens());
 
 #ifdef DEBUG
-  /* symbol-table based profiling */
-  /*  heapCensus(to_space); */ /* ToDo */
+  // symbol-table based profiling 
+  /*  heapCensus(to_blocks); */ /* ToDo */
 #endif
 
-  /* restore enclosing cost centre */
+  // restore enclosing cost centre 
 #ifdef PROFILING
-  heapCensus();
   CCCS = prev_CCS;
 #endif
 
-  /* check for memory leaks if sanity checking is on */
-  IF_DEBUG(sanity, memInventory());
+#ifdef DEBUG
+  // check for memory leaks if DEBUG is on 
+  memInventory();
+#endif
 
 #ifdef RTS_GTK_FRONTPANEL
   if (RtsFlags.GcFlags.frontpanel) {
@@ -784,12 +1188,19 @@ void GarbageCollect ( void (*get_roots)(void), rtsBool force_major_gc )
   }
 #endif
 
-  /* ok, GC over: tell the stats department what happened. */
-  stat_endGC(allocated, collected, live, copied, N);
+  // ok, GC over: tell the stats department what happened. 
+  stat_endGC(allocated, live, copied, scavd_copied, N);
+
+#if defined(RTS_USER_SIGNALS)
+  // unblock signals again
+  unblockUserSignals();
+#endif
+
+  RELEASE_SM_LOCK;
+
+  //PAR_TICKY_TP();
 }
 
-//@node Weak Pointers, Evacuation, Garbage Collect
-//@subsection Weak Pointers
 
 /* -----------------------------------------------------------------------------
    Weak Pointers
@@ -809,8 +1220,31 @@ void GarbageCollect ( void (*get_roots)(void), rtsBool force_major_gc )
    older generations than the one we're collecting.  This could
    probably be optimised by keeping per-generation lists of weak
    pointers, but for a few weak pointers this scheme will work.
+
+   There are three distinct stages to processing weak pointers:
+
+   - weak_stage == WeakPtrs
+
+     We process all the weak pointers whos keys are alive (evacuate
+     their values and finalizers), and repeat until we can find no new
+     live keys.  If no live keys are found in this pass, then we
+     evacuate the finalizers of all the dead weak pointers in order to
+     run them.
+
+   - weak_stage == WeakThreads
+
+     Now, we discover which *threads* are still alive.  Pointers to
+     threads from the all_threads and main thread lists are the
+     weakest of all: a pointers from the finalizer of a dead weak
+     pointer can keep a thread alive.  Any threads found to be unreachable
+     are evacuated and placed on the resurrected_threads list so we 
+     can send them a signal later.
+
+   - weak_stage == WeakDone
+
+     No more evacuation is done.
+
    -------------------------------------------------------------------------- */
-//@cindex traverse_weak_ptr_list
 
 static rtsBool 
 traverse_weak_ptr_list(void)
@@ -819,127 +1253,184 @@ traverse_weak_ptr_list(void)
   StgClosure *new;
   rtsBool flag = rtsFalse;
 
-  if (weak_done) { return rtsFalse; }
-
-  /* doesn't matter where we evacuate values/finalizers to, since
-   * these pointers are treated as roots (iff the keys are alive).
-   */
-  evac_gen = 0;
+  switch (weak_stage) {
 
-  last_w = &old_weak_ptr_list;
-  for (w = old_weak_ptr_list; w; w = next_w) {
+  case WeakDone:
+      return rtsFalse;
 
-    /* First, this weak pointer might have been evacuated.  If so,
-     * remove the forwarding pointer from the weak_ptr_list.
-     */
-    if (get_itbl(w)->type == EVACUATED) {
-      w = (StgWeak *)((StgEvacuated *)w)->evacuee;
-      *last_w = w;
-    }
+  case WeakPtrs:
+      /* doesn't matter where we evacuate values/finalizers to, since
+       * these pointers are treated as roots (iff the keys are alive).
+       */
+      evac_gen = 0;
+      
+      last_w = &old_weak_ptr_list;
+      for (w = old_weak_ptr_list; w != NULL; w = next_w) {
+	  
+	  /* There might be a DEAD_WEAK on the list if finalizeWeak# was
+	   * called on a live weak pointer object.  Just remove it.
+	   */
+	  if (w->header.info == &stg_DEAD_WEAK_info) {
+	      next_w = ((StgDeadWeak *)w)->link;
+	      *last_w = next_w;
+	      continue;
+	  }
+	  
+	  switch (get_itbl(w)->type) {
 
-    /* There might be a DEAD_WEAK on the list if finalizeWeak# was
-     * called on a live weak pointer object.  Just remove it.
-     */
-    if (w->header.info == &stg_DEAD_WEAK_info) {
-      next_w = ((StgDeadWeak *)w)->link;
-      *last_w = next_w;
-      continue;
-    }
+	  case EVACUATED:
+	      next_w = (StgWeak *)((StgEvacuated *)w)->evacuee;
+	      *last_w = next_w;
+	      continue;
 
-    ASSERT(get_itbl(w)->type == WEAK);
+	  case WEAK:
+	      /* Now, check whether the key is reachable.
+	       */
+	      new = isAlive(w->key);
+	      if (new != NULL) {
+		  w->key = new;
+		  // evacuate the value and finalizer 
+		  w->value = evacuate(w->value);
+		  w->finalizer = evacuate(w->finalizer);
+		  // remove this weak ptr from the old_weak_ptr list 
+		  *last_w = w->link;
+		  // and put it on the new weak ptr list 
+		  next_w  = w->link;
+		  w->link = weak_ptr_list;
+		  weak_ptr_list = w;
+		  flag = rtsTrue;
+		  IF_DEBUG(weak, debugBelch("Weak pointer still alive at %p -> %p", 
+				       w, w->key));
+		  continue;
+	      }
+	      else {
+		  last_w = &(w->link);
+		  next_w = w->link;
+		  continue;
+	      }
 
-    /* Now, check whether the key is reachable.
-     */
-    if ((new = isAlive(w->key))) {
-      w->key = new;
-      /* evacuate the value and finalizer */
-      w->value = evacuate(w->value);
-      w->finalizer = evacuate(w->finalizer);
-      /* remove this weak ptr from the old_weak_ptr list */
-      *last_w = w->link;
-      /* and put it on the new weak ptr list */
-      next_w  = w->link;
-      w->link = weak_ptr_list;
-      weak_ptr_list = w;
-      flag = rtsTrue;
-      IF_DEBUG(weak, fprintf(stderr,"Weak pointer still alive at %p -> %p\n", w, w->key));
-      continue;
-    }
-    else {
-      last_w = &(w->link);
-      next_w = w->link;
-      continue;
-    }
-  }
+	  default:
+	      barf("traverse_weak_ptr_list: not WEAK");
+	  }
+      }
+      
+      /* If we didn't make any changes, then we can go round and kill all
+       * the dead weak pointers.  The old_weak_ptr list is used as a list
+       * of pending finalizers later on.
+       */
+      if (flag == rtsFalse) {
+	  for (w = old_weak_ptr_list; w; w = w->link) {
+	      w->finalizer = evacuate(w->finalizer);
+	  }
 
-  /* Now deal with the all_threads list, which behaves somewhat like
-   * the weak ptr list.  If we discover any threads that are about to
-   * become garbage, we wake them up and administer an exception.
-   */
-  {
-    StgTSO *t, *tmp, *next, **prev;
+	  // Next, move to the WeakThreads stage after fully
+	  // scavenging the finalizers we've just evacuated.
+	  weak_stage = WeakThreads;
+      }
 
-    prev = &old_all_threads;
-    for (t = old_all_threads; t != END_TSO_QUEUE; t = next) {
+      return rtsTrue;
 
-      /* Threads which have finished or died get dropped from
-       * the list.
+  case WeakThreads:
+      /* Now deal with the all_threads list, which behaves somewhat like
+       * the weak ptr list.  If we discover any threads that are about to
+       * become garbage, we wake them up and administer an exception.
        */
-      switch (t->what_next) {
-      case ThreadRelocated:
-	  next = t->link;
-	  *prev = next;
-	  continue;
-      case ThreadKilled:
-      case ThreadComplete:
-	  next = t->global_link;
-	  *prev = next;
-	  continue;
-      default: ;
-      }
-
-      /* Threads which have already been determined to be alive are
-       * moved onto the all_threads list.
-       */
-      (StgClosure *)tmp = isAlive((StgClosure *)t);
-      if (tmp != NULL) {
-	next = tmp->global_link;
-	tmp->global_link = all_threads;
-	all_threads  = tmp;
-	*prev = next;
-      } else {
-	prev = &(t->global_link);
-	next = t->global_link;
-      }
-    }
-  }
+      {
+	  StgTSO *t, *tmp, *next, **prev;
+	  
+	  prev = &old_all_threads;
+	  for (t = old_all_threads; t != END_TSO_QUEUE; t = next) {
+	      
+	      tmp = (StgTSO *)isAlive((StgClosure *)t);
+	      
+	      if (tmp != NULL) {
+		  t = tmp;
+	      }
+	      
+	      ASSERT(get_itbl(t)->type == TSO);
+	      switch (t->what_next) {
+	      case ThreadRelocated:
+		  next = t->link;
+		  *prev = next;
+		  continue;
+	      case ThreadKilled:
+	      case ThreadComplete:
+		  // finshed or died.  The thread might still be alive, but we
+		  // don't keep it on the all_threads list.  Don't forget to
+		  // stub out its global_link field.
+		  next = t->global_link;
+		  t->global_link = END_TSO_QUEUE;
+		  *prev = next;
+		  continue;
+	      default:
+		  ;
+	      }
+	      
+	      // Threads blocked on black holes: if the black hole
+	      // is alive, then the thread is alive too.
+	      if (tmp == NULL && t->why_blocked == BlockedOnBlackHole) {
+		  if (isAlive(t->block_info.closure)) {
+		      t = (StgTSO *)evacuate((StgClosure *)t);
+		      tmp = t;
+		      flag = rtsTrue;
+		  }
+	      }
 
-  /* If we didn't make any changes, then we can go round and kill all
-   * the dead weak pointers.  The old_weak_ptr list is used as a list
-   * of pending finalizers later on.
-   */
-  if (flag == rtsFalse) {
-    cleanup_weak_ptr_list(&old_weak_ptr_list);
-    for (w = old_weak_ptr_list; w; w = w->link) {
-      w->finalizer = evacuate(w->finalizer);
-    }
+	      if (tmp == NULL) {
+		  // not alive (yet): leave this thread on the
+		  // old_all_threads list.
+		  prev = &(t->global_link);
+		  next = t->global_link;
+	      } 
+	      else {
+		  // alive: move this thread onto the all_threads list.
+		  next = t->global_link;
+		  t->global_link = all_threads;
+		  all_threads  = t;
+		  *prev = next;
+	      }
+	  }
+      }
+      
+      /* If we evacuated any threads, we need to go back to the scavenger.
+       */
+      if (flag) return rtsTrue;
 
-    /* And resurrect any threads which were about to become garbage.
-     */
-    {
-      StgTSO *t, *tmp, *next;
-      for (t = old_all_threads; t != END_TSO_QUEUE; t = next) {
-	next = t->global_link;
-	(StgClosure *)tmp = evacuate((StgClosure *)t);
-	tmp->global_link = resurrected_threads;
-	resurrected_threads = tmp;
+      /* And resurrect any threads which were about to become garbage.
+       */
+      {
+	  StgTSO *t, *tmp, *next;
+	  for (t = old_all_threads; t != END_TSO_QUEUE; t = next) {
+	      next = t->global_link;
+	      tmp = (StgTSO *)evacuate((StgClosure *)t);
+	      tmp->global_link = resurrected_threads;
+	      resurrected_threads = tmp;
+	  }
+      }
+      
+      /* Finally, we can update the blackhole_queue.  This queue
+       * simply strings together TSOs blocked on black holes, it is
+       * not intended to keep anything alive.  Hence, we do not follow
+       * pointers on the blackhole_queue until now, when we have
+       * determined which TSOs are otherwise reachable.  We know at
+       * this point that all TSOs have been evacuated, however.
+       */
+      { 
+	  StgTSO **pt;
+	  for (pt = &blackhole_queue; *pt != END_TSO_QUEUE; pt = &((*pt)->link)) {
+	      *pt = (StgTSO *)isAlive((StgClosure *)*pt);
+	      ASSERT(*pt != NULL);
+	  }
       }
-    }
 
-    weak_done = rtsTrue;
+      weak_stage = WeakDone;  // *now* we're done,
+      return rtsTrue;         // but one more round of scavenging, please
+
+  default:
+      barf("traverse_weak_ptr_list");
+      return rtsTrue;
   }
 
-  return rtsTrue;
 }
 
 /* -----------------------------------------------------------------------------
@@ -954,26 +1445,20 @@ traverse_weak_ptr_list(void)
    evacuated need to be evacuated now.
    -------------------------------------------------------------------------- */
 
-//@cindex cleanup_weak_ptr_list
 
 static void
-cleanup_weak_ptr_list ( StgWeak **list )
+mark_weak_ptr_list ( StgWeak **list )
 {
   StgWeak *w, **last_w;
 
   last_w = list;
   for (w = *list; w; w = w->link) {
-
-    if (get_itbl(w)->type == EVACUATED) {
-      w = (StgWeak *)((StgEvacuated *)w)->evacuee;
-      *last_w = w;
-    }
-
-    if (Bdescr((P_)w)->evacuated == 0) {
-      (StgClosure *)w = evacuate((StgClosure *)w);
+      // w might be WEAK, EVACUATED, or DEAD_WEAK (actually CON_STATIC) here
+      ASSERT(w->header.info == &stg_DEAD_WEAK_info 
+	     || get_itbl(w)->type == WEAK || get_itbl(w)->type == EVACUATED);
+      w = (StgWeak *)evacuate((StgClosure *)w);
       *last_w = w;
-    }
-    last_w = &(w->link);
+      last_w = &(w->link);
   }
 }
 
@@ -981,126 +1466,157 @@ cleanup_weak_ptr_list ( StgWeak **list )
    isAlive determines whether the given closure is still alive (after
    a garbage collection) or not.  It returns the new address of the
    closure if it is alive, or NULL otherwise.
+
+   NOTE: Use it before compaction only!
    -------------------------------------------------------------------------- */
 
-//@cindex isAlive
 
 StgClosure *
 isAlive(StgClosure *p)
 {
   const StgInfoTable *info;
-  nat size;
+  bdescr *bd;
 
   while (1) {
 
+    ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
     info = get_itbl(p);
 
-    /* ToDo: for static closures, check the static link field.
-     * Problem here is that we sometimes don't set the link field, eg.
-     * for static closures with an empty SRT or CONSTR_STATIC_NOCAFs.
-     */
+    // ignore static closures 
+    //
+    // ToDo: for static closures, check the static link field.
+    // Problem here is that we sometimes don't set the link field, eg.
+    // for static closures with an empty SRT or CONSTR_STATIC_NOCAFs.
+    //
+    if (!HEAP_ALLOCED(p)) {
+	return p;
+    }
 
-    /* ignore closures in generations that we're not collecting. */
-    if (LOOKS_LIKE_STATIC(p) || Bdescr((P_)p)->gen->no > N) {
-      return p;
+    // ignore closures in generations that we're not collecting. 
+    bd = Bdescr((P_)p);
+    if (bd->gen_no > N) {
+	return p;
     }
-    
+
+    // if it's a pointer into to-space, then we're done
+    if (bd->flags & BF_EVACUATED) {
+	return p;
+    }
+
+    // large objects use the evacuated flag
+    if (bd->flags & BF_LARGE) {
+	return NULL;
+    }
+
+    // check the mark bit for compacted steps
+    if ((bd->flags & BF_COMPACTED) && is_marked((P_)p,bd)) {
+	return p;
+    }
+
     switch (info->type) {
-      
+
     case IND:
     case IND_STATIC:
     case IND_PERM:
-    case IND_OLDGEN:		/* rely on compatible layout with StgInd */
+    case IND_OLDGEN:		// rely on compatible layout with StgInd 
     case IND_OLDGEN_PERM:
-      /* follow indirections */
+      // follow indirections 
       p = ((StgInd *)p)->indirectee;
       continue;
-      
+
     case EVACUATED:
-      /* alive! */
+      // alive! 
       return ((StgEvacuated *)p)->evacuee;
 
-    case ARR_WORDS:
-      size = arr_words_sizeW((StgArrWords *)p);
-      goto large;
-
-    case MUT_ARR_PTRS:
-    case MUT_ARR_PTRS_FROZEN:
-      size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)p);
-      goto large;
-
     case TSO:
       if (((StgTSO *)p)->what_next == ThreadRelocated) {
 	p = (StgClosure *)((StgTSO *)p)->link;
 	continue;
-      }
-    
-      size = tso_sizeW((StgTSO *)p);
-    large:
-      if (size >= LARGE_OBJECT_THRESHOLD/sizeof(W_)
-	  && Bdescr((P_)p)->evacuated)
-	return p;
-      else
-	return NULL;
+      } 
+      return NULL;
 
     default:
-      /* dead. */
+      // dead. 
       return NULL;
     }
   }
 }
 
-//@cindex MarkRoot
-StgClosure *
-MarkRoot(StgClosure *root)
+static void
+mark_root(StgClosure **root)
 {
-# if 0 && defined(PAR) && defined(DEBUG)
-  StgClosure *foo = evacuate(root);
-  // ASSERT(closure_STATIC(foo) || maybeLarge(foo) || Bdescr(foo)->evacuated);
-  ASSERT(isAlive(foo));   // must be in to-space 
-  return foo;
-# else
-  return evacuate(root);
-# endif
+  *root = evacuate(*root);
 }
 
-//@cindex addBlock
-static void addBlock(step *stp)
+STATIC_INLINE void 
+upd_evacuee(StgClosure *p, StgClosure *dest)
 {
-  bdescr *bd = allocBlock();
-  bd->gen = stp->gen;
-  bd->step = stp;
+    // not true: (ToDo: perhaps it should be)
+    // ASSERT(Bdescr((P_)dest)->flags & BF_EVACUATED);
+    SET_INFO(p, &stg_EVACUATED_info);
+    ((StgEvacuated *)p)->evacuee = dest;
+}
 
-  if (stp->gen->no <= N) {
-    bd->evacuated = 1;
-  } else {
-    bd->evacuated = 0;
+
+STATIC_INLINE StgClosure *
+copy(StgClosure *src, nat size, step *stp)
+{
+  StgPtr to, from;
+  nat i;
+#ifdef PROFILING
+  // @LDV profiling
+  nat size_org = size;
+#endif
+
+  TICK_GC_WORDS_COPIED(size);
+  /* Find out where we're going, using the handy "to" pointer in 
+   * the step of the source object.  If it turns out we need to
+   * evacuate to an older generation, adjust it here (see comment
+   * by evacuate()).
+   */
+  if (stp->gen_no < evac_gen) {
+      if (eager_promotion) {
+	  stp = &generations[evac_gen].steps[0];
+      } else {
+	  failed_to_evac = rtsTrue;
+      }
   }
 
-  stp->hp_bd->free = stp->hp;
-  stp->hp_bd->link = bd;
-  stp->hp = bd->start;
-  stp->hpLim = stp->hp + BLOCK_SIZE_W;
-  stp->hp_bd = bd;
-  stp->to_blocks++;
-  new_blocks++;
-}
+  /* chain a new block onto the to-space for the destination step if
+   * necessary.
+   */
+  if (stp->hp + size >= stp->hpLim) {
+    gc_alloc_block(stp);
+  }
 
-//@cindex upd_evacuee
+  to = stp->hp;
+  from = (StgPtr)src;
+  stp->hp = to + size;
+  for (i = 0; i < size; i++) { // unroll for small i
+      to[i] = from[i];
+  }
+  upd_evacuee((StgClosure *)from,(StgClosure *)to);
 
-static __inline__ void 
-upd_evacuee(StgClosure *p, StgClosure *dest)
-{
-  p->header.info = &stg_EVACUATED_info;
-  ((StgEvacuated *)p)->evacuee = dest;
+#ifdef PROFILING
+  // We store the size of the just evacuated object in the LDV word so that
+  // the profiler can guess the position of the next object later.
+  SET_EVACUAEE_FOR_LDV(from, size_org);
+#endif
+  return (StgClosure *)to;
 }
 
-//@cindex copy
-
-static __inline__ StgClosure *
-copy(StgClosure *src, nat size, step *stp)
+// Same as copy() above, except the object will be allocated in memory
+// that will not be scavenged.  Used for object that have no pointer
+// fields.
+STATIC_INLINE StgClosure *
+copy_noscav(StgClosure *src, nat size, step *stp)
 {
-  P_ to, from, dest;
+  StgPtr to, from;
+  nat i;
+#ifdef PROFILING
+  // @LDV profiling
+  nat size_org = size;
+#endif
 
   TICK_GC_WORDS_COPIED(size);
   /* Find out where we're going, using the handy "to" pointer in 
@@ -1108,29 +1624,35 @@ copy(StgClosure *src, nat size, step *stp)
    * evacuate to an older generation, adjust it here (see comment
    * by evacuate()).
    */
-  if (stp->gen->no < evac_gen) {
-#ifdef NO_EAGER_PROMOTION    
-    failed_to_evac = rtsTrue;
-#else
-    stp = &generations[evac_gen].steps[0];
-#endif
+  if (stp->gen_no < evac_gen) {
+      if (eager_promotion) {
+	  stp = &generations[evac_gen].steps[0];
+      } else {
+	  failed_to_evac = rtsTrue;
+      }
   }
 
   /* chain a new block onto the to-space for the destination step if
    * necessary.
    */
-  if (stp->hp + size >= stp->hpLim) {
-    addBlock(stp);
+  if (stp->scavd_hp + size >= stp->scavd_hpLim) {
+    gc_alloc_scavd_block(stp);
   }
 
-  for(to = stp->hp, from = (P_)src; size>0; --size) {
-    *to++ = *from++;
+  to = stp->scavd_hp;
+  from = (StgPtr)src;
+  stp->scavd_hp = to + size;
+  for (i = 0; i < size; i++) { // unroll for small i
+      to[i] = from[i];
   }
+  upd_evacuee((StgClosure *)from,(StgClosure *)to);
 
-  dest = stp->hp;
-  stp->hp = to;
-  upd_evacuee(src,(StgClosure *)dest);
-  return (StgClosure *)dest;
+#ifdef PROFILING
+  // We store the size of the just evacuated object in the LDV word so that
+  // the profiler can guess the position of the next object later.
+  SET_EVACUAEE_FOR_LDV(from, size_org);
+#endif
+  return (StgClosure *)to;
 }
 
 /* Special version of copy() for when we only want to copy the info
@@ -1138,24 +1660,27 @@ copy(StgClosure *src, nat size, step *stp)
  * used to optimise evacuation of BLACKHOLEs.
  */
 
-//@cindex copyPart
 
-static __inline__ StgClosure *
+static StgClosure *
 copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *stp)
 {
   P_ dest, to, from;
+#ifdef PROFILING
+  // @LDV profiling
+  nat size_to_copy_org = size_to_copy;
+#endif
 
   TICK_GC_WORDS_COPIED(size_to_copy);
-  if (stp->gen->no < evac_gen) {
-#ifdef NO_EAGER_PROMOTION    
-    failed_to_evac = rtsTrue;
-#else
-    stp = &generations[evac_gen].steps[0];
-#endif
+  if (stp->gen_no < evac_gen) {
+      if (eager_promotion) {
+	  stp = &generations[evac_gen].steps[0];
+      } else {
+	  failed_to_evac = rtsTrue;
+      }
   }
 
   if (stp->hp + size_to_reserve >= stp->hpLim) {
-    addBlock(stp);
+    gc_alloc_block(stp);
   }
 
   for(to = stp->hp, from = (P_)src; size_to_copy>0; --size_to_copy) {
@@ -1165,40 +1690,48 @@ copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *stp)
   dest = stp->hp;
   stp->hp += size_to_reserve;
   upd_evacuee(src,(StgClosure *)dest);
+#ifdef PROFILING
+  // We store the size of the just evacuated object in the LDV word so that
+  // the profiler can guess the position of the next object later.
+  // size_to_copy_org is wrong because the closure already occupies size_to_reserve
+  // words.
+  SET_EVACUAEE_FOR_LDV(src, size_to_reserve);
+  // fill the slop
+  if (size_to_reserve - size_to_copy_org > 0)
+    LDV_FILL_SLOP(stp->hp - 1, (int)(size_to_reserve - size_to_copy_org)); 
+#endif
   return (StgClosure *)dest;
 }
 
-//@node Evacuation, Scavenging, Weak Pointers
-//@subsection Evacuation
 
 /* -----------------------------------------------------------------------------
    Evacuate a large object
 
    This just consists of removing the object from the (doubly-linked)
-   large_alloc_list, and linking it on to the (singly-linked)
-   new_large_objects list, from where it will be scavenged later.
+   step->large_objects list, and linking it on to the (singly-linked)
+   step->new_large_objects list, from where it will be scavenged later.
 
-   Convention: bd->evacuated is /= 0 for a large object that has been
-   evacuated, or 0 otherwise.
+   Convention: bd->flags has BF_EVACUATED set for a large object
+   that has been evacuated, or unset otherwise.
    -------------------------------------------------------------------------- */
 
-//@cindex evacuate_large
 
-static inline void
-evacuate_large(StgPtr p, rtsBool mutable)
+STATIC_INLINE void
+evacuate_large(StgPtr p)
 {
   bdescr *bd = Bdescr(p);
   step *stp;
 
-  /* should point to the beginning of the block */
-  ASSERT(((W_)p & BLOCK_MASK) == 0);
-  
-  /* already evacuated? */
-  if (bd->evacuated) { 
+  // object must be at the beginning of the block (or be a ByteArray)
+  ASSERT(get_itbl((StgClosure *)p)->type == ARR_WORDS ||
+	 (((W_)p & BLOCK_MASK) == 0));
+
+  // already evacuated? 
+  if (bd->flags & BF_EVACUATED) { 
     /* Don't forget to set the failed_to_evac flag if we didn't get
      * the desired destination (see comments in evacuate()).
      */
-    if (bd->gen->no < evac_gen) {
+    if (bd->gen_no < evac_gen) {
       failed_to_evac = rtsTrue;
       TICK_GC_FAILED_PROMOTION();
     }
@@ -1206,71 +1739,32 @@ evacuate_large(StgPtr p, rtsBool mutable)
   }
 
   stp = bd->step;
-  /* remove from large_object list */
-  if (bd->back) {
-    bd->back->link = bd->link;
-  } else { /* first object in the list */
+  // remove from large_object list 
+  if (bd->u.back) {
+    bd->u.back->link = bd->link;
+  } else { // first object in the list 
     stp->large_objects = bd->link;
   }
   if (bd->link) {
-    bd->link->back = bd->back;
+    bd->link->u.back = bd->u.back;
   }
   
   /* link it on to the evacuated large object list of the destination step
    */
   stp = bd->step->to;
-  if (stp->gen->no < evac_gen) {
-#ifdef NO_EAGER_PROMOTION    
-    failed_to_evac = rtsTrue;
-#else
-    stp = &generations[evac_gen].steps[0];
-#endif
+  if (stp->gen_no < evac_gen) {
+      if (eager_promotion) {
+	  stp = &generations[evac_gen].steps[0];
+      } else {
+	  failed_to_evac = rtsTrue;
+      }
   }
 
   bd->step = stp;
-  bd->gen = stp->gen;
+  bd->gen_no = stp->gen_no;
   bd->link = stp->new_large_objects;
   stp->new_large_objects = bd;
-  bd->evacuated = 1;
-
-  if (mutable) {
-    recordMutable((StgMutClosure *)p);
-  }
-}
-
-/* -----------------------------------------------------------------------------
-   Adding a MUT_CONS to an older generation.
-
-   This is necessary from time to time when we end up with an
-   old-to-new generation pointer in a non-mutable object.  We defer
-   the promotion until the next GC.
-   -------------------------------------------------------------------------- */
-
-//@cindex mkMutCons
-
-static StgClosure *
-mkMutCons(StgClosure *ptr, generation *gen)
-{
-  StgMutVar *q;
-  step *stp;
-
-  stp = &gen->steps[0];
-
-  /* chain a new block onto the to-space for the destination step if
-   * necessary.
-   */
-  if (stp->hp + sizeofW(StgIndOldGen) >= stp->hpLim) {
-    addBlock(stp);
-  }
-
-  q = (StgMutVar *)stp->hp;
-  stp->hp += sizeofW(StgMutVar);
-
-  SET_HDR(q,&stg_MUT_CONS_info,CCS_GC);
-  q->var = ptr;
-  recordOldToNewPtrs((StgMutClosure *)q);
-
-  return (StgClosure *)q;
+  bd->flags |= BF_EVACUATED;
 }
 
 /* -----------------------------------------------------------------------------
@@ -1296,65 +1790,169 @@ mkMutCons(StgClosure *ptr, generation *gen)
    if  M <  evac_gen     set failed_to_evac flag to indicate that we
                          didn't manage to evacuate this object into evac_gen.
 
+
+   OPTIMISATION NOTES:
+
+   evacuate() is the single most important function performance-wise
+   in the GC.  Various things have been tried to speed it up, but as
+   far as I can tell the code generated by gcc 3.2 with -O2 is about
+   as good as it's going to get.  We pass the argument to evacuate()
+   in a register using the 'regparm' attribute (see the prototype for
+   evacuate() near the top of this file).
+
+   Changing evacuate() to take an (StgClosure **) rather than
+   returning the new pointer seems attractive, because we can avoid
+   writing back the pointer when it hasn't changed (eg. for a static
+   object, or an object in a generation > N).  However, I tried it and
+   it doesn't help.  One reason is that the (StgClosure **) pointer
+   gets spilled to the stack inside evacuate(), resulting in far more
+   extra reads/writes than we save.
    -------------------------------------------------------------------------- */
-//@cindex evacuate
 
-static StgClosure *
+REGPARM1 static StgClosure *
 evacuate(StgClosure *q)
 {
+#if defined(PAR)
   StgClosure *to;
+#endif
   bdescr *bd = NULL;
   step *stp;
   const StgInfoTable *info;
 
 loop:
-  if (HEAP_ALLOCED(q)) {
-    bd = Bdescr((P_)q);
-    if (bd->gen->no > N) {
+  ASSERT(LOOKS_LIKE_CLOSURE_PTR(q));
+
+  if (!HEAP_ALLOCED(q)) {
+
+      if (!major_gc) return q;
+
+      info = get_itbl(q);
+      switch (info->type) {
+
+      case THUNK_STATIC:
+	  if (info->srt_bitmap != 0 && 
+	      *THUNK_STATIC_LINK((StgClosure *)q) == NULL) {
+	      *THUNK_STATIC_LINK((StgClosure *)q) = static_objects;
+	      static_objects = (StgClosure *)q;
+	  }
+	  return q;
+	  
+      case FUN_STATIC:
+	  if (info->srt_bitmap != 0 && 
+	      *FUN_STATIC_LINK((StgClosure *)q) == NULL) {
+	      *FUN_STATIC_LINK((StgClosure *)q) = static_objects;
+	      static_objects = (StgClosure *)q;
+	  }
+	  return q;
+	  
+      case IND_STATIC:
+	  /* If q->saved_info != NULL, then it's a revertible CAF - it'll be
+	   * on the CAF list, so don't do anything with it here (we'll
+	   * scavenge it later).
+	   */
+	  if (((StgIndStatic *)q)->saved_info == NULL
+	      && *IND_STATIC_LINK((StgClosure *)q) == NULL) {
+	      *IND_STATIC_LINK((StgClosure *)q) = static_objects;
+	      static_objects = (StgClosure *)q;
+	  }
+	  return q;
+	  
+      case CONSTR_STATIC:
+	  if (*STATIC_LINK(info,(StgClosure *)q) == NULL) {
+	      *STATIC_LINK(info,(StgClosure *)q) = static_objects;
+	      static_objects = (StgClosure *)q;
+	  }
+	  return q;
+	  
+      case CONSTR_INTLIKE:
+      case CONSTR_CHARLIKE:
+      case CONSTR_NOCAF_STATIC:
+	  /* no need to put these on the static linked list, they don't need
+	   * to be scavenged.
+	   */
+	  return q;
+	  
+      default:
+	  barf("evacuate(static): strange closure type %d", (int)(info->type));
+      }
+  }
+
+  bd = Bdescr((P_)q);
+
+  if (bd->gen_no > N) {
       /* Can't evacuate this object, because it's in a generation
        * older than the ones we're collecting.  Let's hope that it's
-       * in evac_gen or older, or we will have to make an IND_OLDGEN object.
+       * in evac_gen or older, or we will have to arrange to track
+       * this pointer using the mutable list.
        */
-      if (bd->gen->no < evac_gen) {
-	/* nope */
-	failed_to_evac = rtsTrue;
-	TICK_GC_FAILED_PROMOTION();
+      if (bd->gen_no < evac_gen) {
+	  // nope 
+	  failed_to_evac = rtsTrue;
+	  TICK_GC_FAILED_PROMOTION();
       }
       return q;
-    }
-    stp = bd->step->to;
   }
-#ifdef DEBUG
-  else stp = NULL; /* make sure copy() will crash if HEAP_ALLOCED is wrong */
-#endif
 
-  /* make sure the info pointer is into text space */
-  ASSERT(q && (LOOKS_LIKE_GHC_INFO(GET_INFO(q))
-	       || IS_HUGS_CONSTR_INFO(GET_INFO(q))));
-  info = get_itbl(q);
-  /*
-  if (info->type==RBH) {
-    info = REVERT_INFOPTR(info);
-    IF_DEBUG(gc,
-	     belch("@_ Trying to evacuate an RBH %p (%s); reverting to IP %p (%s)",
-		     q, info_type(q), info, info_type_by_ip(info)));
+  if ((bd->flags & (BF_LARGE | BF_COMPACTED | BF_EVACUATED)) != 0) {
+
+      /* pointer into to-space: just return it.  This normally
+       * shouldn't happen, but alllowing it makes certain things
+       * slightly easier (eg. the mutable list can contain the same
+       * object twice, for example).
+       */
+      if (bd->flags & BF_EVACUATED) {
+	  if (bd->gen_no < evac_gen) {
+	      failed_to_evac = rtsTrue;
+	      TICK_GC_FAILED_PROMOTION();
+	  }
+	  return q;
+      }
+
+      /* evacuate large objects by re-linking them onto a different list.
+       */
+      if (bd->flags & BF_LARGE) {
+	  info = get_itbl(q);
+	  if (info->type == TSO && 
+	      ((StgTSO *)q)->what_next == ThreadRelocated) {
+	      q = (StgClosure *)((StgTSO *)q)->link;
+	      goto loop;
+	  }
+	  evacuate_large((P_)q);
+	  return q;
+      }
+      
+      /* If the object is in a step that we're compacting, then we
+       * need to use an alternative evacuate procedure.
+       */
+      if (bd->flags & BF_COMPACTED) {
+	  if (!is_marked((P_)q,bd)) {
+	      mark((P_)q,bd);
+	      if (mark_stack_full()) {
+		  mark_stack_overflowed = rtsTrue;
+		  reset_mark_stack();
+	      }
+	      push_mark_stack((P_)q);
+	  }
+	  return q;
+      }
   }
-  */
+      
+  stp = bd->step->to;
+
+  info = get_itbl(q);
   
-  switch (info -> type) {
+  switch (info->type) {
 
-  case MUT_VAR:
-    ASSERT(q->header.info != &stg_MUT_CONS_info);
+  case MUT_VAR_CLEAN:
+  case MUT_VAR_DIRTY:
   case MVAR:
-    to = copy(q,sizeW_fromITBL(info),stp);
-    recordMutable((StgMutClosure *)to);
-    return to;
+      return copy(q,sizeW_fromITBL(info),stp);
 
   case CONSTR_0_1:
   { 
       StgWord w = (StgWord)q->payload[0];
       if (q->header.info == Czh_con_info &&
-	  /* unsigned, so always true:  (StgChar)w >= MIN_CHARLIKE && */ 
+	  // unsigned, so always true:  (StgChar)w >= MIN_CHARLIKE &&  
 	  (StgChar)w <= MAX_CHARLIKE) {
 	  return (StgClosure *)CHARLIKE_CLOSURE((StgChar)w);
       }
@@ -1362,192 +1960,115 @@ loop:
 	  (StgInt)w >= MIN_INTLIKE && (StgInt)w <= MAX_INTLIKE) {
 	  return (StgClosure *)INTLIKE_CLOSURE((StgInt)w);
       }
-      /* else, fall through ... */
+      // else
+      return copy_noscav(q,sizeofW(StgHeader)+1,stp);
   }
 
-  case FUN_1_0:
   case FUN_0_1:
+  case FUN_1_0:
   case CONSTR_1_0:
     return copy(q,sizeofW(StgHeader)+1,stp);
 
-  case THUNK_1_0:		/* here because of MIN_UPD_SIZE */
+  case THUNK_1_0:
   case THUNK_0_1:
+    return copy(q,sizeofW(StgThunk)+1,stp);
+
   case THUNK_1_1:
-  case THUNK_0_2:
   case THUNK_2_0:
+  case THUNK_0_2:
 #ifdef NO_PROMOTE_THUNKS
-    if (bd->gen->no == 0 && 
+    if (bd->gen_no == 0 && 
 	bd->step->no != 0 &&
-	bd->step->no == bd->gen->n_steps-1) {
+	bd->step->no == generations[bd->gen_no].n_steps-1) {
       stp = bd->step;
     }
 #endif
-    return copy(q,sizeofW(StgHeader)+2,stp);
+    return copy(q,sizeofW(StgThunk)+2,stp);
 
   case FUN_1_1:
-  case FUN_0_2:
   case FUN_2_0:
   case CONSTR_1_1:
-  case CONSTR_0_2:
   case CONSTR_2_0:
+  case FUN_0_2:
     return copy(q,sizeofW(StgHeader)+2,stp);
 
-  case FUN:
+  case CONSTR_0_2:
+    return copy_noscav(q,sizeofW(StgHeader)+2,stp);
+
   case THUNK:
+    return copy(q,thunk_sizeW_fromITBL(info),stp);
+
+  case FUN:
   case CONSTR:
   case IND_PERM:
   case IND_OLDGEN_PERM:
   case WEAK:
-  case FOREIGN:
   case STABLE_NAME:
-  case BCO:
     return copy(q,sizeW_fromITBL(info),stp);
 
+  case BCO:
+      return copy(q,bco_sizeW((StgBCO *)q),stp);
+
   case CAF_BLACKHOLE:
   case SE_CAF_BLACKHOLE:
   case SE_BLACKHOLE:
   case BLACKHOLE:
     return copyPart(q,BLACKHOLE_sizeW(),sizeofW(StgHeader),stp);
 
-  case BLACKHOLE_BQ:
-    to = copy(q,BLACKHOLE_sizeW(),stp); 
-    recordMutable((StgMutClosure *)to);
-    return to;
-
   case THUNK_SELECTOR:
     {
-      const StgInfoTable* selectee_info;
-      StgClosure* selectee = ((StgSelector*)q)->selectee;
+	StgClosure *p;
+	const StgInfoTable *info_ptr;
 
-    selector_loop:
-      selectee_info = get_itbl(selectee);
-      switch (selectee_info->type) {
-      case CONSTR:
-      case CONSTR_1_0:
-      case CONSTR_0_1:
-      case CONSTR_2_0:
-      case CONSTR_1_1:
-      case CONSTR_0_2:
-      case CONSTR_STATIC:
-	{ 
-	  StgWord32 offset = info->layout.selector_offset;
+	if (thunk_selector_depth > MAX_THUNK_SELECTOR_DEPTH) {
+	    return copy(q,THUNK_SELECTOR_sizeW(),stp);
+	}
 
-	  /* check that the size is in range */
-	  ASSERT(offset < 
-		 (StgWord32)(selectee_info->layout.payload.ptrs + 
-		            selectee_info->layout.payload.nptrs));
+	// stashed away for LDV profiling, see below
+	info_ptr = q->header.info;
 
-	  /* perform the selection! */
-	  q = selectee->payload[offset];
+	p = eval_thunk_selector(info->layout.selector_offset,
+				(StgSelector *)q);
 
-	  /* if we're already in to-space, there's no need to continue
-	   * with the evacuation, just update the source address with
-	   * a pointer to the (evacuated) constructor field.
-	   */
-	  if (HEAP_ALLOCED(q)) {
-	    bdescr *bd = Bdescr((P_)q);
-	    if (bd->evacuated) {
-	      if (bd->gen->no < evac_gen) {
-		failed_to_evac = rtsTrue;
-		TICK_GC_FAILED_PROMOTION();
-	      }
-	      return q;
-	    }
-	  }
+	if (p == NULL) {
+	    return copy(q,THUNK_SELECTOR_sizeW(),stp);
+	} else {
+	    StgClosure *val;
+	    // q is still BLACKHOLE'd.
+	    thunk_selector_depth++;
+	    val = evacuate(p);
+	    thunk_selector_depth--;
 
-	  /* otherwise, carry on and evacuate this constructor field,
-	   * (but not the constructor itself)
-	   */
-	  goto loop;
-	}
+#ifdef PROFILING
+	    // For the purposes of LDV profiling, we have destroyed
+	    // the original selector thunk.
+	    SET_INFO(q, info_ptr);
+	    LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(q);
+#endif
 
-      case IND:
-      case IND_STATIC:
-      case IND_PERM:
-      case IND_OLDGEN:
-      case IND_OLDGEN_PERM:
-	selectee = ((StgInd *)selectee)->indirectee;
-	goto selector_loop;
+	    // Update the THUNK_SELECTOR with an indirection to the
+	    // EVACUATED closure now at p.  Why do this rather than
+	    // upd_evacuee(q,p)?  Because we have an invariant that an
+	    // EVACUATED closure always points to an object in the
+	    // same or an older generation (required by the short-cut
+	    // test in the EVACUATED case, below).
+	    SET_INFO(q, &stg_IND_info);
+	    ((StgInd *)q)->indirectee = p;
 
-      case EVACUATED:
-	selectee = ((StgEvacuated *)selectee)->evacuee;
-	goto selector_loop;
+	    // For the purposes of LDV profiling, we have created an
+	    // indirection.
+	    LDV_RECORD_CREATE(q);
 
-      case AP_UPD:
-      case THUNK:
-      case THUNK_1_0:
-      case THUNK_0_1:
-      case THUNK_2_0:
-      case THUNK_1_1:
-      case THUNK_0_2:
-      case THUNK_STATIC:
-      case THUNK_SELECTOR:
-	/* aargh - do recursively???? */
-      case CAF_BLACKHOLE:
-      case SE_CAF_BLACKHOLE:
-      case SE_BLACKHOLE:
-      case BLACKHOLE:
-      case BLACKHOLE_BQ:
-	/* not evaluated yet */
-	break;
-
-      default:
-	barf("evacuate: THUNK_SELECTOR: strange selectee %d",
-	     (int)(selectee_info->type));
-      }
+	    return val;
+	}
     }
-    return copy(q,THUNK_SELECTOR_sizeW(),stp);
 
   case IND:
   case IND_OLDGEN:
-    /* follow chains of indirections, don't evacuate them */
+    // follow chains of indirections, don't evacuate them 
     q = ((StgInd*)q)->indirectee;
     goto loop;
 
-  case THUNK_STATIC:
-    if (info->srt_len > 0 && major_gc && 
-	THUNK_STATIC_LINK((StgClosure *)q) == NULL) {
-      THUNK_STATIC_LINK((StgClosure *)q) = static_objects;
-      static_objects = (StgClosure *)q;
-    }
-    return q;
-
-  case FUN_STATIC:
-    if (info->srt_len > 0 && major_gc && 
-	FUN_STATIC_LINK((StgClosure *)q) == NULL) {
-      FUN_STATIC_LINK((StgClosure *)q) = static_objects;
-      static_objects = (StgClosure *)q;
-    }
-    return q;
-
-  case IND_STATIC:
-    /* a revertible CAF - it'll be on the CAF list, so don't do
-     * anything with it here (we'll scavenge it later).
-     */
-    if (((StgIndStatic *)q)->saved_info != NULL) {
-       return q;
-    }
-    if (major_gc && IND_STATIC_LINK((StgClosure *)q) == NULL) {
-       IND_STATIC_LINK((StgClosure *)q) = static_objects;
-       static_objects = (StgClosure *)q;
-    }
-    return q;
-
-  case CONSTR_STATIC:
-    if (major_gc && STATIC_LINK(info,(StgClosure *)q) == NULL) {
-      STATIC_LINK(info,(StgClosure *)q) = static_objects;
-      static_objects = (StgClosure *)q;
-    }
-    return q;
-
-  case CONSTR_INTLIKE:
-  case CONSTR_CHARLIKE:
-  case CONSTR_NOCAF_STATIC:
-    /* no need to put these on the static linked list, they don't need
-     * to be scavenged.
-     */
-    return q;
-
   case RET_BCO:
   case RET_SMALL:
   case RET_VEC_SMALL:
@@ -1557,28 +2078,20 @@ loop:
   case UPDATE_FRAME:
   case STOP_FRAME:
   case CATCH_FRAME:
-  case SEQ_FRAME:
-    /* shouldn't see these */
+  case CATCH_STM_FRAME:
+  case CATCH_RETRY_FRAME:
+  case ATOMICALLY_FRAME:
+    // shouldn't see these 
     barf("evacuate: stack frame at %p\n", q);
 
-  case AP_UPD:
   case PAP:
-    /* PAPs and AP_UPDs are special - the payload is a copy of a chunk
-     * of stack, tagging and all.
-     *
-     * They can be larger than a block in size.  Both are only
-     * allocated via allocate(), so they should be chained on to the
-     * large_object list.
-     */
-    {
-      nat size = pap_sizeW((StgPAP*)q);
-      if (size >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
-	evacuate_large((P_)q, rtsFalse);
-	return q;
-      } else {
-	return copy(q,size,stp);
-      }
-    }
+      return copy(q,pap_sizeW((StgPAP*)q),stp);
+
+  case AP:
+      return copy(q,ap_sizeW((StgAP*)q),stp);
+
+  case AP_STACK:
+      return copy(q,ap_stack_sizeW((StgAP_STACK*)q),stp);
 
   case EVACUATED:
     /* Already evacuated, just return the forwarding address.
@@ -1588,10 +2101,18 @@ loop:
      * set the failed_to_evac flag to indicate that we couldn't 
      * manage to promote the object to the desired generation.
      */
-    if (evac_gen > 0) {		/* optimisation */
+    /* 
+     * Optimisation: the check is fairly expensive, but we can often
+     * shortcut it if either the required generation is 0, or the
+     * current object (the EVACUATED) is in a high enough generation.
+     * We know that an EVACUATED always points to an object in the
+     * same or an older generation.  stp is the lowest step that the
+     * current object would be evacuated to, so we only do the full
+     * check if stp is too low.
+     */
+    if (evac_gen > 0 && stp->gen_no < evac_gen) {  // optimisation 
       StgClosure *p = ((StgEvacuated*)q)->evacuee;
-      if (Bdescr((P_)p)->gen->no < evac_gen) {
-	IF_DEBUG(gc, belch("@@ evacuate: evac of EVACUATED node %p failed!", p));
+      if (HEAP_ALLOCED(p) && Bdescr((P_)p)->gen_no < evac_gen) {
 	failed_to_evac = rtsTrue;
 	TICK_GC_FAILED_PROMOTION();
       }
@@ -1599,41 +2120,19 @@ loop:
     return ((StgEvacuated*)q)->evacuee;
 
   case ARR_WORDS:
-    {
-      nat size = arr_words_sizeW((StgArrWords *)q); 
-
-      if (size >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
-	evacuate_large((P_)q, rtsFalse);
-	return q;
-      } else {
-	/* just copy the block */
-	return copy(q,size,stp);
-      }
-    }
+      // just copy the block 
+      return copy_noscav(q,arr_words_sizeW((StgArrWords *)q),stp);
 
-  case MUT_ARR_PTRS:
+  case MUT_ARR_PTRS_CLEAN:
+  case MUT_ARR_PTRS_DIRTY:
   case MUT_ARR_PTRS_FROZEN:
-    {
-      nat size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)q); 
-
-      if (size >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
-	evacuate_large((P_)q, info->type == MUT_ARR_PTRS);
-	to = q;
-      } else {
-	/* just copy the block */
-	to = copy(q,size,stp);
-	if (info->type == MUT_ARR_PTRS) {
-	  recordMutable((StgMutClosure *)to);
-	}
-      }
-      return to;
-    }
+  case MUT_ARR_PTRS_FROZEN0:
+      // just copy the block 
+      return copy(q,mut_arr_ptrs_sizeW((StgMutArrPtrs *)q),stp);
 
   case TSO:
     {
       StgTSO *tso = (StgTSO *)q;
-      nat size = tso_sizeW(tso);
-      int diff;
 
       /* Deal with redirected TSOs (a TSO that's had its stack enlarged).
        */
@@ -1642,70 +2141,79 @@ loop:
 	goto loop;
       }
 
-      /* Large TSOs don't get moved, so no relocation is required.
-       */
-      if (size >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
-	evacuate_large((P_)q, rtsTrue);
-	return q;
-
       /* To evacuate a small TSO, we need to relocate the update frame
        * list it contains.  
        */
-      } else {
-	StgTSO *new_tso = (StgTSO *)copy((StgClosure *)tso,tso_sizeW(tso),stp);
-
-	diff = (StgPtr)new_tso - (StgPtr)tso; /* In *words* */
-
-	/* relocate the stack pointers... */
-	new_tso->su = (StgUpdateFrame *) ((StgPtr)new_tso->su + diff);
-	new_tso->sp = (StgPtr)new_tso->sp + diff;
-	
-	relocate_TSO(tso, new_tso);
-
-	recordMutable((StgMutClosure *)new_tso);
-	return (StgClosure *)new_tso;
+      {
+	  StgTSO *new_tso;
+	  StgPtr p, q;
+
+	  new_tso = (StgTSO *)copyPart((StgClosure *)tso,
+				       tso_sizeW(tso),
+				       sizeofW(StgTSO), stp);
+	  move_TSO(tso, new_tso);
+	  for (p = tso->sp, q = new_tso->sp;
+	       p < tso->stack+tso->stack_size;) {
+	      *q++ = *p++;
+	  }
+	  
+	  return (StgClosure *)new_tso;
       }
     }
 
 #if defined(PAR)
-  case RBH: // cf. BLACKHOLE_BQ
+  case RBH:
     {
       //StgInfoTable *rip = get_closure_info(q, &size, &ptrs, &nonptrs, &vhs, str);
       to = copy(q,BLACKHOLE_sizeW(),stp); 
       //ToDo: derive size etc from reverted IP
       //to = copy(q,size,stp);
-      recordMutable((StgMutClosure *)to);
       IF_DEBUG(gc,
-	       belch("@@ evacuate: RBH %p (%s) to %p (%s)",
+	       debugBelch("@@ evacuate: RBH %p (%s) to %p (%s)",
 		     q, info_type(q), to, info_type(to)));
       return to;
     }
 
   case BLOCKED_FETCH:
-    ASSERT(sizeofW(StgBlockedFetch) >= MIN_NONUPD_SIZE);
+    ASSERT(sizeofW(StgBlockedFetch) >= MIN_PAYLOD_SIZE);
     to = copy(q,sizeofW(StgBlockedFetch),stp);
     IF_DEBUG(gc,
-	     belch("@@ evacuate: %p (%s) to %p (%s)",
+	     debugBelch("@@ evacuate: %p (%s) to %p (%s)",
 		   q, info_type(q), to, info_type(to)));
     return to;
 
+# ifdef DIST    
+  case REMOTE_REF:
+# endif
   case FETCH_ME:
-    ASSERT(sizeofW(StgBlockedFetch) >= MIN_UPD_SIZE);
+    ASSERT(sizeofW(StgBlockedFetch) >= MIN_PAYLOAD_SIZE);
     to = copy(q,sizeofW(StgFetchMe),stp);
     IF_DEBUG(gc,
-	     belch("@@ evacuate: %p (%s) to %p (%s)",
+	     debugBelch("@@ evacuate: %p (%s) to %p (%s)",
 		   q, info_type(q), to, info_type(to)));
     return to;
 
   case FETCH_ME_BQ:
-    ASSERT(sizeofW(StgBlockedFetch) >= MIN_UPD_SIZE);
+    ASSERT(sizeofW(StgBlockedFetch) >= MIN_PAYLOAD_SIZE);
     to = copy(q,sizeofW(StgFetchMeBlockingQueue),stp);
     IF_DEBUG(gc,
-	     belch("@@ evacuate: %p (%s) to %p (%s)",
+	     debugBelch("@@ evacuate: %p (%s) to %p (%s)",
 		   q, info_type(q), to, info_type(to)));
     return to;
 #endif
 
+  case TREC_HEADER: 
+    return copy(q,sizeofW(StgTRecHeader),stp);
+
+  case TVAR_WAIT_QUEUE:
+    return copy(q,sizeofW(StgTVarWaitQueue),stp);
+
+  case TVAR:
+    return copy(q,sizeofW(StgTVar),stp);
+    
+  case TREC_CHUNK:
+    return copy(q,sizeofW(StgTRecChunk),stp);
+
   default:
     barf("evacuate: strange closure type %d", (int)(info->type));
   }
@@ -1714,96 +2222,369 @@ loop:
 }
 
 /* -----------------------------------------------------------------------------
-   relocate_TSO is called just after a TSO has been copied from src to
-   dest.  It adjusts the update frame list for the new location.
+   Evaluate a THUNK_SELECTOR if possible.
+
+   returns: NULL if we couldn't evaluate this THUNK_SELECTOR, or
+   a closure pointer if we evaluated it and this is the result.  Note
+   that "evaluating" the THUNK_SELECTOR doesn't necessarily mean
+   reducing it to HNF, just that we have eliminated the selection.
+   The result might be another thunk, or even another THUNK_SELECTOR.
+
+   If the return value is non-NULL, the original selector thunk has
+   been BLACKHOLE'd, and should be updated with an indirection or a
+   forwarding pointer.  If the return value is NULL, then the selector
+   thunk is unchanged.
+
+   ***
+   ToDo: the treatment of THUNK_SELECTORS could be improved in the
+   following way (from a suggestion by Ian Lynagh):
+
+   We can have a chain like this:
+
+      sel_0 --> (a,b)
+                 |
+                 |-----> sel_0 --> (a,b)
+                                    |
+                                    |-----> sel_0 --> ...
+
+   and the depth limit means we don't go all the way to the end of the
+   chain, which results in a space leak.  This affects the recursive
+   call to evacuate() in the THUNK_SELECTOR case in evacuate(): *not*
+   the recursive call to eval_thunk_selector() in
+   eval_thunk_selector().
+
+   We could eliminate the depth bound in this case, in the following
+   way:
+
+      - traverse the chain once to discover the *value* of the 
+        THUNK_SELECTOR.  Mark all THUNK_SELECTORS that we
+        visit on the way as having been visited already (somehow).
+
+      - in a second pass, traverse the chain again updating all
+        THUNK_SEELCTORS that we find on the way with indirections to
+        the value.
+
+      - if we encounter a "marked" THUNK_SELECTOR in a normal 
+        evacuate(), we konw it can't be updated so just evac it.
+
+   Program that illustrates the problem:
+
+	foo [] = ([], [])
+	foo (x:xs) = let (ys, zs) = foo xs
+	             in if x >= 0 then (x:ys, zs) else (ys, x:zs)
+
+	main = bar [1..(100000000::Int)]
+	bar xs = (\(ys, zs) -> print ys >> print zs) (foo xs)
+
    -------------------------------------------------------------------------- */
-//@cindex relocate_TSO
 
-StgTSO *
-relocate_TSO(StgTSO *src, StgTSO *dest)
+static inline rtsBool
+is_to_space ( StgClosure *p )
 {
-  StgUpdateFrame *su;
-  StgCatchFrame  *cf;
-  StgSeqFrame    *sf;
-  int diff;
+    bdescr *bd;
+
+    bd = Bdescr((StgPtr)p);
+    if (HEAP_ALLOCED(p) &&
+	((bd->flags & BF_EVACUATED) 
+	 || ((bd->flags & BF_COMPACTED) &&
+	     is_marked((P_)p,bd)))) {
+	return rtsTrue;
+    } else {
+	return rtsFalse;
+    }
+}    
 
-  diff = (StgPtr)dest->sp - (StgPtr)src->sp; /* In *words* */
+static StgClosure *
+eval_thunk_selector( nat field, StgSelector * p )
+{
+    StgInfoTable *info;
+    const StgInfoTable *info_ptr;
+    StgClosure *selectee;
+    
+    selectee = p->selectee;
 
-  su = dest->su;
+    // Save the real info pointer (NOTE: not the same as get_itbl()).
+    info_ptr = p->header.info;
 
-  while ((P_)su < dest->stack + dest->stack_size) {
-    switch (get_itbl(su)->type) {
-   
-      /* GCC actually manages to common up these three cases! */
+    // If the THUNK_SELECTOR is in a generation that we are not
+    // collecting, then bail out early.  We won't be able to save any
+    // space in any case, and updating with an indirection is trickier
+    // in an old gen.
+    if (Bdescr((StgPtr)p)->gen_no > N) {
+	return NULL;
+    }
 
-    case UPDATE_FRAME:
-      su->link = (StgUpdateFrame *) ((StgPtr)su->link + diff);
-      su = su->link;
-      continue;
+    // BLACKHOLE the selector thunk, since it is now under evaluation.
+    // This is important to stop us going into an infinite loop if
+    // this selector thunk eventually refers to itself.
+    SET_INFO(p,&stg_BLACKHOLE_info);
+
+selector_loop:
+
+    // We don't want to end up in to-space, because this causes
+    // problems when the GC later tries to evacuate the result of
+    // eval_thunk_selector().  There are various ways this could
+    // happen:
+    //
+    // 1. following an IND_STATIC
+    //
+    // 2. when the old generation is compacted, the mark phase updates
+    //    from-space pointers to be to-space pointers, and we can't
+    //    reliably tell which we're following (eg. from an IND_STATIC).
+    // 
+    // 3. compacting GC again: if we're looking at a constructor in
+    //    the compacted generation, it might point directly to objects
+    //    in to-space.  We must bale out here, otherwise doing the selection
+    //    will result in a to-space pointer being returned.
+    //
+    //  (1) is dealt with using a BF_EVACUATED test on the
+    //  selectee. (2) and (3): we can tell if we're looking at an
+    //  object in the compacted generation that might point to
+    //  to-space objects by testing that (a) it is BF_COMPACTED, (b)
+    //  the compacted generation is being collected, and (c) the
+    //  object is marked.  Only a marked object may have pointers that
+    //  point to to-space objects, because that happens when
+    //  scavenging.
+    //
+    //  The to-space test is now embodied in the in_to_space() inline
+    //  function, as it is re-used below.
+    //
+    if (is_to_space(selectee)) {
+	goto bale_out;
+    }
 
-    case CATCH_FRAME:
-      cf = (StgCatchFrame *)su;
-      cf->link = (StgUpdateFrame *) ((StgPtr)cf->link + diff);
-      su = cf->link;
-      continue;
+    info = get_itbl(selectee);
+    switch (info->type) {
+      case CONSTR:
+      case CONSTR_1_0:
+      case CONSTR_0_1:
+      case CONSTR_2_0:
+      case CONSTR_1_1:
+      case CONSTR_0_2:
+      case CONSTR_STATIC:
+      case CONSTR_NOCAF_STATIC:
+	  // check that the size is in range 
+	  ASSERT(field <  (StgWord32)(info->layout.payload.ptrs + 
+				      info->layout.payload.nptrs));
+	  
+	  // Select the right field from the constructor, and check
+	  // that the result isn't in to-space.  It might be in
+	  // to-space if, for example, this constructor contains
+	  // pointers to younger-gen objects (and is on the mut-once
+	  // list).
+	  //
+	  { 
+	      StgClosure *q;
+	      q = selectee->payload[field];
+	      if (is_to_space(q)) {
+		  goto bale_out;
+	      } else {
+		  return q;
+	      }
+	  }
 
-    case SEQ_FRAME:
-      sf = (StgSeqFrame *)su;
-      sf->link = (StgUpdateFrame *) ((StgPtr)sf->link + diff);
-      su = sf->link;
-      continue;
+      case IND:
+      case IND_PERM:
+      case IND_OLDGEN:
+      case IND_OLDGEN_PERM:
+      case IND_STATIC:
+	  selectee = ((StgInd *)selectee)->indirectee;
+	  goto selector_loop;
 
-    case STOP_FRAME:
-      /* all done! */
-      break;
+      case EVACUATED:
+	  // We don't follow pointers into to-space; the constructor
+	  // has already been evacuated, so we won't save any space
+	  // leaks by evaluating this selector thunk anyhow.
+	  break;
 
-    default:
-      barf("relocate_TSO %d", (int)(get_itbl(su)->type));
+      case THUNK_SELECTOR:
+      {
+	  StgClosure *val;
+
+	  // check that we don't recurse too much, re-using the
+	  // depth bound also used in evacuate().
+	  if (thunk_selector_depth >= MAX_THUNK_SELECTOR_DEPTH) {
+	      break;
+	  }
+	  thunk_selector_depth++;
+
+	  val = eval_thunk_selector(info->layout.selector_offset, 
+				    (StgSelector *)selectee);
+
+	  thunk_selector_depth--;
+
+	  if (val == NULL) { 
+	      break;
+	  } else {
+	      // We evaluated this selector thunk, so update it with
+	      // an indirection.  NOTE: we don't use UPD_IND here,
+	      // because we are guaranteed that p is in a generation
+	      // that we are collecting, and we never want to put the
+	      // indirection on a mutable list.
+#ifdef PROFILING
+	      // For the purposes of LDV profiling, we have destroyed
+	      // the original selector thunk.
+	      SET_INFO(p, info_ptr);
+	      LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(selectee);
+#endif
+	      ((StgInd *)selectee)->indirectee = val;
+	      SET_INFO(selectee,&stg_IND_info);
+
+	      // For the purposes of LDV profiling, we have created an
+	      // indirection.
+	      LDV_RECORD_CREATE(selectee);
+
+	      selectee = val;
+	      goto selector_loop;
+	  }
+      }
+
+      case AP:
+      case AP_STACK:
+      case THUNK:
+      case THUNK_1_0:
+      case THUNK_0_1:
+      case THUNK_2_0:
+      case THUNK_1_1:
+      case THUNK_0_2:
+      case THUNK_STATIC:
+      case CAF_BLACKHOLE:
+      case SE_CAF_BLACKHOLE:
+      case SE_BLACKHOLE:
+      case BLACKHOLE:
+#if defined(PAR)
+      case RBH:
+      case BLOCKED_FETCH:
+# ifdef DIST    
+      case REMOTE_REF:
+# endif
+      case FETCH_ME:
+      case FETCH_ME_BQ:
+#endif
+	  // not evaluated yet 
+	  break;
+    
+      default:
+	barf("eval_thunk_selector: strange selectee %d",
+	     (int)(info->type));
     }
-    break;
-  }
 
-  return dest;
+bale_out:
+    // We didn't manage to evaluate this thunk; restore the old info pointer
+    SET_INFO(p, info_ptr);
+    return NULL;
 }
 
-//@node Scavenging, Reverting CAFs, Evacuation
-//@subsection Scavenging
+/* -----------------------------------------------------------------------------
+   move_TSO is called to update the TSO structure after it has been
+   moved from one place to another.
+   -------------------------------------------------------------------------- */
+
+void
+move_TSO (StgTSO *src, StgTSO *dest)
+{
+    ptrdiff_t diff;
+
+    // relocate the stack pointer... 
+    diff = (StgPtr)dest - (StgPtr)src; // In *words* 
+    dest->sp = (StgPtr)dest->sp + diff;
+}
 
-//@cindex scavenge_srt
+/* Similar to scavenge_large_bitmap(), but we don't write back the
+ * pointers we get back from evacuate().
+ */
+static void
+scavenge_large_srt_bitmap( StgLargeSRT *large_srt )
+{
+    nat i, b, size;
+    StgWord bitmap;
+    StgClosure **p;
+    
+    b = 0;
+    bitmap = large_srt->l.bitmap[b];
+    size   = (nat)large_srt->l.size;
+    p      = (StgClosure **)large_srt->srt;
+    for (i = 0; i < size; ) {
+	if ((bitmap & 1) != 0) {
+	    evacuate(*p);
+	}
+	i++;
+	p++;
+	if (i % BITS_IN(W_) == 0) {
+	    b++;
+	    bitmap = large_srt->l.bitmap[b];
+	} else {
+	    bitmap = bitmap >> 1;
+	}
+    }
+}
 
-static inline void
-scavenge_srt(const StgInfoTable *info)
+/* evacuate the SRT.  If srt_bitmap is zero, then there isn't an
+ * srt field in the info table.  That's ok, because we'll
+ * never dereference it.
+ */
+STATIC_INLINE void
+scavenge_srt (StgClosure **srt, nat srt_bitmap)
 {
-  StgClosure **srt, **srt_end;
+  nat bitmap;
+  StgClosure **p;
 
-  /* evacuate the SRT.  If srt_len is zero, then there isn't an
-   * srt field in the info table.  That's ok, because we'll
-   * never dereference it.
-   */
-  srt = (StgClosure **)(info->srt);
-  srt_end = srt + info->srt_len;
-  for (; srt < srt_end; srt++) {
-    /* Special-case to handle references to closures hiding out in DLLs, since
-       double indirections required to get at those. The code generator knows
-       which is which when generating the SRT, so it stores the (indirect)
-       reference to the DLL closure in the table by first adding one to it.
-       We check for this here, and undo the addition before evacuating it.
-
-       If the SRT entry hasn't got bit 0 set, the SRT entry points to a
-       closure that's fixed at link-time, and no extra magic is required.
-    */
+  bitmap = srt_bitmap;
+  p = srt;
+
+  if (bitmap == (StgHalfWord)(-1)) {  
+      scavenge_large_srt_bitmap( (StgLargeSRT *)srt );
+      return;
+  }
+
+  while (bitmap != 0) {
+      if ((bitmap & 1) != 0) {
 #ifdef ENABLE_WIN32_DLL_SUPPORT
-    if ( (unsigned long)(*srt) & 0x1 ) {
-       evacuate(*stgCast(StgClosure**,(stgCast(unsigned long, *srt) & ~0x1)));
-    } else {
-       evacuate(*srt);
-    }
+	  // Special-case to handle references to closures hiding out in DLLs, since
+	  // double indirections required to get at those. The code generator knows
+	  // which is which when generating the SRT, so it stores the (indirect)
+	  // reference to the DLL closure in the table by first adding one to it.
+	  // We check for this here, and undo the addition before evacuating it.
+	  // 
+	  // If the SRT entry hasn't got bit 0 set, the SRT entry points to a
+	  // closure that's fixed at link-time, and no extra magic is required.
+	  if ( (unsigned long)(*srt) & 0x1 ) {
+	      evacuate(*stgCast(StgClosure**,(stgCast(unsigned long, *srt) & ~0x1)));
+	  } else {
+	      evacuate(*p);
+	  }
 #else
-       evacuate(*srt);
+	  evacuate(*p);
 #endif
+      }
+      p++;
+      bitmap = bitmap >> 1;
   }
 }
 
+
+STATIC_INLINE void
+scavenge_thunk_srt(const StgInfoTable *info)
+{
+    StgThunkInfoTable *thunk_info;
+
+    if (!major_gc) return;
+
+    thunk_info = itbl_to_thunk_itbl(info);
+    scavenge_srt((StgClosure **)GET_SRT(thunk_info), thunk_info->i.srt_bitmap);
+}
+
+STATIC_INLINE void
+scavenge_fun_srt(const StgInfoTable *info)
+{
+    StgFunInfoTable *fun_info;
+
+    if (!major_gc) return;
+  
+    fun_info = itbl_to_fun_itbl(info);
+    scavenge_srt((StgClosure **)GET_FUN_SRT(fun_info), fun_info->i.srt_bitmap);
+}
+
 /* -----------------------------------------------------------------------------
    Scavenge a TSO.
    -------------------------------------------------------------------------- */
@@ -1811,24 +2592,126 @@ scavenge_srt(const StgInfoTable *info)
 static void
 scavengeTSO (StgTSO *tso)
 {
-  /* chase the link field for any TSOs on the same queue */
-  (StgClosure *)tso->link = evacuate((StgClosure *)tso->link);
-  if (   tso->why_blocked == BlockedOnMVar
-	 || tso->why_blocked == BlockedOnBlackHole
-	 || tso->why_blocked == BlockedOnException
+    if (   tso->why_blocked == BlockedOnMVar
+	|| tso->why_blocked == BlockedOnBlackHole
+	|| tso->why_blocked == BlockedOnException
 #if defined(PAR)
-	 || tso->why_blocked == BlockedOnGA
-	 || tso->why_blocked == BlockedOnGA_NoSend
+	|| tso->why_blocked == BlockedOnGA
+	|| tso->why_blocked == BlockedOnGA_NoSend
 #endif
-	 ) {
-    tso->block_info.closure = evacuate(tso->block_info.closure);
-  }
-  if ( tso->blocked_exceptions != NULL ) {
-    tso->blocked_exceptions = 
-      (StgTSO *)evacuate((StgClosure *)tso->blocked_exceptions);
-  }
-  /* scavenge this thread's stack */
-  scavenge_stack(tso->sp, &(tso->stack[tso->stack_size]));
+	) {
+	tso->block_info.closure = evacuate(tso->block_info.closure);
+    }
+    if ( tso->blocked_exceptions != NULL ) {
+	tso->blocked_exceptions = 
+	    (StgTSO *)evacuate((StgClosure *)tso->blocked_exceptions);
+    }
+    
+    // We don't always chase the link field: TSOs on the blackhole
+    // queue are not automatically alive, so the link field is a
+    // "weak" pointer in that case.
+    if (tso->why_blocked != BlockedOnBlackHole) {
+	tso->link = (StgTSO *)evacuate((StgClosure *)tso->link);
+    }
+
+    // scavange current transaction record
+    tso->trec = (StgTRecHeader *)evacuate((StgClosure *)tso->trec);
+    
+    // scavenge this thread's stack 
+    scavenge_stack(tso->sp, &(tso->stack[tso->stack_size]));
+}
+
+/* -----------------------------------------------------------------------------
+   Blocks of function args occur on the stack (at the top) and
+   in PAPs.
+   -------------------------------------------------------------------------- */
+
+STATIC_INLINE StgPtr
+scavenge_arg_block (StgFunInfoTable *fun_info, StgClosure **args)
+{
+    StgPtr p;
+    StgWord bitmap;
+    nat size;
+
+    p = (StgPtr)args;
+    switch (fun_info->f.fun_type) {
+    case ARG_GEN:
+	bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
+	size = BITMAP_SIZE(fun_info->f.b.bitmap);
+	goto small_bitmap;
+    case ARG_GEN_BIG:
+	size = GET_FUN_LARGE_BITMAP(fun_info)->size;
+	scavenge_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info), size);
+	p += size;
+	break;
+    default:
+	bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
+	size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
+    small_bitmap:
+	while (size > 0) {
+	    if ((bitmap & 1) == 0) {
+		*p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+	    }
+	    p++;
+	    bitmap = bitmap >> 1;
+	    size--;
+	}
+	break;
+    }
+    return p;
+}
+
+STATIC_INLINE StgPtr
+scavenge_PAP_payload (StgClosure *fun, StgClosure **payload, StgWord size)
+{
+    StgPtr p;
+    StgWord bitmap;
+    StgFunInfoTable *fun_info;
+    
+    fun_info = get_fun_itbl(fun);
+    ASSERT(fun_info->i.type != PAP);
+    p = (StgPtr)payload;
+
+    switch (fun_info->f.fun_type) {
+    case ARG_GEN:
+	bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
+	goto small_bitmap;
+    case ARG_GEN_BIG:
+	scavenge_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info), size);
+	p += size;
+	break;
+    case ARG_BCO:
+	scavenge_large_bitmap((StgPtr)payload, BCO_BITMAP(fun), size);
+	p += size;
+	break;
+    default:
+	bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
+    small_bitmap:
+	while (size > 0) {
+	    if ((bitmap & 1) == 0) {
+		*p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+	    }
+	    p++;
+	    bitmap = bitmap >> 1;
+	    size--;
+	}
+	break;
+    }
+    return p;
+}
+
+STATIC_INLINE StgPtr
+scavenge_PAP (StgPAP *pap)
+{
+    pap->fun = evacuate(pap->fun);
+    return scavenge_PAP_payload (pap->fun, pap->payload, pap->n_args);
+}
+
+STATIC_INLINE StgPtr
+scavenge_AP (StgAP *ap)
+{
+    ap->fun = evacuate(ap->fun);
+    return scavenge_PAP_payload (ap->fun, ap->payload, ap->n_args);
 }
 
 /* -----------------------------------------------------------------------------
@@ -1843,15 +2726,14 @@ scavengeTSO (StgTSO *tso)
    scavenging a mutable object where early promotion isn't such a good
    idea.  
    -------------------------------------------------------------------------- */
-//@cindex scavenge
 
 static void
 scavenge(step *stp)
 {
   StgPtr p, q;
-  const StgInfoTable *info;
+  StgInfoTable *info;
   bdescr *bd;
-  nat saved_evac_gen = evac_gen; /* used for temporarily changing evac_gen */
+  nat saved_evac_gen = evac_gen;
 
   p = stp->scan;
   bd = stp->scan_bd;
@@ -1864,134 +2746,182 @@ scavenge(step *stp)
 
   while (bd != stp->hp_bd || p < stp->hp) {
 
-    /* If we're at the end of this block, move on to the next block */
+    // If we're at the end of this block, move on to the next block 
     if (bd != stp->hp_bd && p == bd->free) {
       bd = bd->link;
       p = bd->start;
       continue;
     }
 
-    q = p;			/* save ptr to object */
-
-    ASSERT(p && (LOOKS_LIKE_GHC_INFO(GET_INFO((StgClosure *)p))
-		 || IS_HUGS_CONSTR_INFO(GET_INFO((StgClosure *)p))));
-
+    ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
     info = get_itbl((StgClosure *)p);
-    /*
-    if (info->type==RBH)
-      info = REVERT_INFOPTR(info);
-    */
+    
+    ASSERT(thunk_selector_depth == 0);
 
-    switch (info -> type) {
+    q = p;
+    switch (info->type) {
 
     case MVAR:
-      /* treat MVars specially, because we don't want to evacuate the
-       * mut_link field in the middle of the closure.
-       */
-      { 
+    { 
 	StgMVar *mvar = ((StgMVar *)p);
 	evac_gen = 0;
-	(StgClosure *)mvar->head = evacuate((StgClosure *)mvar->head);
-	(StgClosure *)mvar->tail = evacuate((StgClosure *)mvar->tail);
-	(StgClosure *)mvar->value = evacuate((StgClosure *)mvar->value);
-	p += sizeofW(StgMVar);
+	mvar->head = (StgTSO *)evacuate((StgClosure *)mvar->head);
+	mvar->tail = (StgTSO *)evacuate((StgClosure *)mvar->tail);
+	mvar->value = evacuate((StgClosure *)mvar->value);
 	evac_gen = saved_evac_gen;
+	failed_to_evac = rtsTrue; // mutable.
+	p += sizeofW(StgMVar);
 	break;
-      }
+    }
 
-    case THUNK_2_0:
     case FUN_2_0:
-      scavenge_srt(info);
-    case CONSTR_2_0:
-      ((StgClosure *)p)->payload[1] = evacuate(((StgClosure *)p)->payload[1]);
-      ((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]);
-      p += sizeofW(StgHeader) + 2;
-      break;
+	scavenge_fun_srt(info);
+	((StgClosure *)p)->payload[1] = evacuate(((StgClosure *)p)->payload[1]);
+	((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]);
+	p += sizeofW(StgHeader) + 2;
+	break;
 
-    case THUNK_1_0:
-      scavenge_srt(info);
-      ((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]);
-      p += sizeofW(StgHeader) + 2; /* MIN_UPD_SIZE */
-      break;
-
-    case FUN_1_0:
-      scavenge_srt(info);
-    case CONSTR_1_0:
-      ((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]);
-      p += sizeofW(StgHeader) + 1;
-      break;
+    case THUNK_2_0:
+	scavenge_thunk_srt(info);
+	((StgThunk *)p)->payload[1] = evacuate(((StgThunk *)p)->payload[1]);
+	((StgThunk *)p)->payload[0] = evacuate(((StgThunk *)p)->payload[0]);
+	p += sizeofW(StgThunk) + 2;
+	break;
 
+    case CONSTR_2_0:
+	((StgClosure *)p)->payload[1] = evacuate(((StgClosure *)p)->payload[1]);
+	((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]);
+	p += sizeofW(StgHeader) + 2;
+	break;
+	
+    case THUNK_1_0:
+	scavenge_thunk_srt(info);
+	((StgThunk *)p)->payload[0] = evacuate(((StgThunk *)p)->payload[0]);
+	p += sizeofW(StgThunk) + 1;
+	break;
+	
+    case FUN_1_0:
+	scavenge_fun_srt(info);
+    case CONSTR_1_0:
+	((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]);
+	p += sizeofW(StgHeader) + 1;
+	break;
+	
     case THUNK_0_1:
-      scavenge_srt(info);
-      p += sizeofW(StgHeader) + 2; /* MIN_UPD_SIZE */
-      break;
-
+	scavenge_thunk_srt(info);
+	p += sizeofW(StgThunk) + 1;
+	break;
+	
     case FUN_0_1:
-      scavenge_srt(info);
+	scavenge_fun_srt(info);
     case CONSTR_0_1:
-      p += sizeofW(StgHeader) + 1;
-      break;
-
+	p += sizeofW(StgHeader) + 1;
+	break;
+	
     case THUNK_0_2:
+	scavenge_thunk_srt(info);
+	p += sizeofW(StgThunk) + 2;
+	break;
+	
     case FUN_0_2:
-      scavenge_srt(info);
+	scavenge_fun_srt(info);
     case CONSTR_0_2:
-      p += sizeofW(StgHeader) + 2;
-      break;
-
+	p += sizeofW(StgHeader) + 2;
+	break;
+	
     case THUNK_1_1:
+	scavenge_thunk_srt(info);
+	((StgThunk *)p)->payload[0] = evacuate(((StgThunk *)p)->payload[0]);
+	p += sizeofW(StgThunk) + 2;
+	break;
+
     case FUN_1_1:
-      scavenge_srt(info);
+	scavenge_fun_srt(info);
     case CONSTR_1_1:
-      ((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]);
-      p += sizeofW(StgHeader) + 2;
-      break;
-
+	((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]);
+	p += sizeofW(StgHeader) + 2;
+	break;
+	
     case FUN:
+	scavenge_fun_srt(info);
+	goto gen_obj;
+
     case THUNK:
-      scavenge_srt(info);
-      /* fall through */
+    {
+	StgPtr end;
 
+	scavenge_thunk_srt(info);
+	end = (P_)((StgThunk *)p)->payload + info->layout.payload.ptrs;
+	for (p = (P_)((StgThunk *)p)->payload; p < end; p++) {
+	    *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+	}
+	p += info->layout.payload.nptrs;
+	break;
+    }
+	
+    gen_obj:
     case CONSTR:
     case WEAK:
-    case FOREIGN:
     case STABLE_NAME:
-    case BCO:
-      {
+    {
 	StgPtr end;
 
 	end = (P_)((StgClosure *)p)->payload + info->layout.payload.ptrs;
 	for (p = (P_)((StgClosure *)p)->payload; p < end; p++) {
-	  (StgClosure *)*p = evacuate((StgClosure *)*p);
+	    *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
 	}
 	p += info->layout.payload.nptrs;
 	break;
-      }
+    }
+
+    case BCO: {
+	StgBCO *bco = (StgBCO *)p;
+	bco->instrs = (StgArrWords *)evacuate((StgClosure *)bco->instrs);
+	bco->literals = (StgArrWords *)evacuate((StgClosure *)bco->literals);
+	bco->ptrs = (StgMutArrPtrs *)evacuate((StgClosure *)bco->ptrs);
+	bco->itbls = (StgArrWords *)evacuate((StgClosure *)bco->itbls);
+	p += bco_sizeW(bco);
+	break;
+    }
 
     case IND_PERM:
       if (stp->gen->no != 0) {
+#ifdef PROFILING
+        // @LDV profiling
+        // No need to call LDV_recordDead_FILL_SLOP_DYNAMIC() because an 
+        // IND_OLDGEN_PERM closure is larger than an IND_PERM closure.
+        LDV_recordDead((StgClosure *)p, sizeofW(StgInd));
+#endif        
+        // 
+        // Todo: maybe use SET_HDR() and remove LDV_RECORD_CREATE()?
+        //
 	SET_INFO(((StgClosure *)p), &stg_IND_OLDGEN_PERM_info);
+
+        // We pretend that p has just been created.
+        LDV_RECORD_CREATE((StgClosure *)p);
       }
-      /* fall through */
+	// fall through 
     case IND_OLDGEN_PERM:
-      ((StgIndOldGen *)p)->indirectee = 
-	evacuate(((StgIndOldGen *)p)->indirectee);
-      if (failed_to_evac) {
-	failed_to_evac = rtsFalse;
-	recordOldToNewPtrs((StgMutClosure *)p);
-      }
-      p += sizeofW(StgIndOldGen);
-      break;
+	((StgInd *)p)->indirectee = evacuate(((StgInd *)p)->indirectee);
+	p += sizeofW(StgInd);
+	break;
 
-    case MUT_VAR:
-      /* ignore MUT_CONSs */
-      if (((StgMutVar *)p)->header.info != &stg_MUT_CONS_info) {
-	evac_gen = 0;
+    case MUT_VAR_CLEAN:
+    case MUT_VAR_DIRTY: {
+	rtsBool saved_eager_promotion = eager_promotion;
+
+	eager_promotion = rtsFalse;
 	((StgMutVar *)p)->var = evacuate(((StgMutVar *)p)->var);
-	evac_gen = saved_evac_gen;
-      }
-      p += sizeofW(StgMutVar);
-      break;
+	eager_promotion = saved_eager_promotion;
+
+	if (failed_to_evac) {
+	    ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info;
+	} else {
+	    ((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info;
+	}
+	p += sizeofW(StgMutVar);
+	break;
+    }
 
     case CAF_BLACKHOLE:
     case SE_CAF_BLACKHOLE:
@@ -2000,196 +2930,237 @@ scavenge(step *stp)
 	p += BLACKHOLE_sizeW();
 	break;
 
-    case BLACKHOLE_BQ:
-      { 
-	StgBlockingQueue *bh = (StgBlockingQueue *)p;
-	(StgClosure *)bh->blocking_queue = 
-	  evacuate((StgClosure *)bh->blocking_queue);
-	if (failed_to_evac) {
-	  failed_to_evac = rtsFalse;
-	  recordMutable((StgMutClosure *)bh);
-	}
-	p += BLACKHOLE_sizeW();
-	break;
-      }
-
     case THUNK_SELECTOR:
-      { 
+    { 
 	StgSelector *s = (StgSelector *)p;
 	s->selectee = evacuate(s->selectee);
 	p += THUNK_SELECTOR_sizeW();
 	break;
-      }
-
-    case IND:
-    case IND_OLDGEN:
-      barf("scavenge:IND???\n");
+    }
 
-    case CONSTR_INTLIKE:
-    case CONSTR_CHARLIKE:
-    case CONSTR_STATIC:
-    case CONSTR_NOCAF_STATIC:
-    case THUNK_STATIC:
-    case FUN_STATIC:
-    case IND_STATIC:
-      /* Shouldn't see a static object here. */
-      barf("scavenge: STATIC object\n");
+    // A chunk of stack saved in a heap object
+    case AP_STACK:
+    {
+	StgAP_STACK *ap = (StgAP_STACK *)p;
 
-    case RET_BCO:
-    case RET_SMALL:
-    case RET_VEC_SMALL:
-    case RET_BIG:
-    case RET_VEC_BIG:
-    case RET_DYN:
-    case UPDATE_FRAME:
-    case STOP_FRAME:
-    case CATCH_FRAME:
-    case SEQ_FRAME:
-      /* Shouldn't see stack frames here. */
-      barf("scavenge: stack frame\n");
+	ap->fun = evacuate(ap->fun);
+	scavenge_stack((StgPtr)ap->payload, (StgPtr)ap->payload + ap->size);
+	p = (StgPtr)ap->payload + ap->size;
+	break;
+    }
 
-    case AP_UPD: /* same as PAPs */
     case PAP:
-      /* Treat a PAP just like a section of stack, not forgetting to
-       * evacuate the function pointer too...
-       */
-      { 
-	StgPAP* pap = (StgPAP *)p;
+	p = scavenge_PAP((StgPAP *)p);
+	break;
 
-	pap->fun = evacuate(pap->fun);
-	scavenge_stack((P_)pap->payload, (P_)pap->payload + pap->n_args);
-	p += pap_sizeW(pap);
+    case AP:
+	p = scavenge_AP((StgAP *)p);
 	break;
-      }
-      
+
     case ARR_WORDS:
-      /* nothing to follow */
-      p += arr_words_sizeW((StgArrWords *)p);
-      break;
+	// nothing to follow 
+	p += arr_words_sizeW((StgArrWords *)p);
+	break;
 
-    case MUT_ARR_PTRS:
-      /* follow everything */
-      {
+    case MUT_ARR_PTRS_CLEAN:
+    case MUT_ARR_PTRS_DIRTY:
+	// follow everything 
+    {
 	StgPtr next;
-
-	evac_gen = 0;		/* repeatedly mutable */
+	rtsBool saved_eager;
+
+	// We don't eagerly promote objects pointed to by a mutable
+	// array, but if we find the array only points to objects in
+	// the same or an older generation, we mark it "clean" and
+	// avoid traversing it during minor GCs.
+	saved_eager = eager_promotion;
+	eager_promotion = rtsFalse;
 	next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
 	for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
-	  (StgClosure *)*p = evacuate((StgClosure *)*p);
+	    *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
 	}
-	evac_gen = saved_evac_gen;
+	eager_promotion = saved_eager;
+
+	if (failed_to_evac) {
+	    ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info;
+	} else {
+	    ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info;
+	}
+
+	failed_to_evac = rtsTrue; // always put it on the mutable list.
 	break;
-      }
+    }
 
     case MUT_ARR_PTRS_FROZEN:
-      /* follow everything */
-      {
-	StgPtr start = p, next;
+    case MUT_ARR_PTRS_FROZEN0:
+	// follow everything 
+    {
+	StgPtr next;
 
 	next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
 	for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
-	  (StgClosure *)*p = evacuate((StgClosure *)*p);
+	    *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
 	}
+
+	// If we're going to put this object on the mutable list, then
+	// set its info ptr to MUT_ARR_PTRS_FROZEN0 to indicate that.
 	if (failed_to_evac) {
-	  /* we can do this easier... */
-	  recordMutable((StgMutClosure *)start);
-	  failed_to_evac = rtsFalse;
+	    ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN0_info;
+	} else {
+	    ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_info;
 	}
 	break;
-      }
+    }
 
     case TSO:
-      { 
+    { 
 	StgTSO *tso = (StgTSO *)p;
-	evac_gen = 0;
+	rtsBool saved_eager = eager_promotion;
+
+	eager_promotion = rtsFalse;
 	scavengeTSO(tso);
-	evac_gen = saved_evac_gen;
+	eager_promotion = saved_eager;
+
+	if (failed_to_evac) {
+	    tso->flags |= TSO_DIRTY;
+	} else {
+	    tso->flags &= ~TSO_DIRTY;
+	}
+
+	failed_to_evac = rtsTrue; // always on the mutable list
 	p += tso_sizeW(tso);
 	break;
-      }
+    }
 
 #if defined(PAR)
-    case RBH: // cf. BLACKHOLE_BQ
-      { 
-	// nat size, ptrs, nonptrs, vhs;
-	// char str[80];
-	// StgInfoTable *rip = get_closure_info(p, &size, &ptrs, &nonptrs, &vhs, str);
+    case RBH:
+    { 
+#if 0
+	nat size, ptrs, nonptrs, vhs;
+	char str[80];
+	StgInfoTable *rip = get_closure_info(p, &size, &ptrs, &nonptrs, &vhs, str);
+#endif
 	StgRBH *rbh = (StgRBH *)p;
 	(StgClosure *)rbh->blocking_queue = 
-	  evacuate((StgClosure *)rbh->blocking_queue);
-	if (failed_to_evac) {
-	  failed_to_evac = rtsFalse;
-	  recordMutable((StgMutClosure *)rbh);
-	}
+	    evacuate((StgClosure *)rbh->blocking_queue);
+	failed_to_evac = rtsTrue;  // mutable anyhow.
 	IF_DEBUG(gc,
-		 belch("@@ scavenge: RBH %p (%s) (new blocking_queue link=%p)",
+		 debugBelch("@@ scavenge: RBH %p (%s) (new blocking_queue link=%p)",
 		       p, info_type(p), (StgClosure *)rbh->blocking_queue));
 	// ToDo: use size of reverted closure here!
 	p += BLACKHOLE_sizeW(); 
 	break;
-      }
+    }
 
     case BLOCKED_FETCH:
-      { 
+    { 
 	StgBlockedFetch *bf = (StgBlockedFetch *)p;
-	/* follow the pointer to the node which is being demanded */
+	// follow the pointer to the node which is being demanded 
 	(StgClosure *)bf->node = 
-	  evacuate((StgClosure *)bf->node);
-	/* follow the link to the rest of the blocking queue */
+	    evacuate((StgClosure *)bf->node);
+	// follow the link to the rest of the blocking queue 
 	(StgClosure *)bf->link = 
-	  evacuate((StgClosure *)bf->link);
-	if (failed_to_evac) {
-	  failed_to_evac = rtsFalse;
-	  recordMutable((StgMutClosure *)bf);
-	}
+	    evacuate((StgClosure *)bf->link);
 	IF_DEBUG(gc,
-		 belch("@@ scavenge: %p (%s); node is now %p; exciting, isn't it",
-		     bf, info_type((StgClosure *)bf), 
-		     bf->node, info_type(bf->node)));
+		 debugBelch("@@ scavenge: %p (%s); node is now %p; exciting, isn't it",
+		       bf, info_type((StgClosure *)bf), 
+		       bf->node, info_type(bf->node)));
 	p += sizeofW(StgBlockedFetch);
 	break;
-      }
+    }
 
+#ifdef DIST
+    case REMOTE_REF:
+#endif
     case FETCH_ME:
-      IF_DEBUG(gc,
-	       belch("@@ scavenge: HWL claims nothing to do for %p (%s)",
-		     p, info_type((StgClosure *)p)));
-      p += sizeofW(StgFetchMe);
-      break; // nothing to do in this case
+	p += sizeofW(StgFetchMe);
+	break; // nothing to do in this case
 
-    case FETCH_ME_BQ: // cf. BLACKHOLE_BQ
-      { 
+    case FETCH_ME_BQ:
+    { 
 	StgFetchMeBlockingQueue *fmbq = (StgFetchMeBlockingQueue *)p;
 	(StgClosure *)fmbq->blocking_queue = 
-	  evacuate((StgClosure *)fmbq->blocking_queue);
-	if (failed_to_evac) {
-	  failed_to_evac = rtsFalse;
-	  recordMutable((StgMutClosure *)fmbq);
-	}
+	    evacuate((StgClosure *)fmbq->blocking_queue);
 	IF_DEBUG(gc,
-		 belch("@@ scavenge: %p (%s) exciting, isn't it",
-		     p, info_type((StgClosure *)p)));
+		 debugBelch("@@ scavenge: %p (%s) exciting, isn't it",
+		       p, info_type((StgClosure *)p)));
 	p += sizeofW(StgFetchMeBlockingQueue);
 	break;
-      }
+    }
 #endif
 
-    case EVACUATED:
-      barf("scavenge: unimplemented/strange closure type %d @ %p", 
-	   info->type, p);
+    case TVAR_WAIT_QUEUE:
+      {
+	StgTVarWaitQueue *wq = ((StgTVarWaitQueue *) p);
+	evac_gen = 0;
+	wq->waiting_tso = (StgTSO *)evacuate((StgClosure*)wq->waiting_tso);
+	wq->next_queue_entry = (StgTVarWaitQueue *)evacuate((StgClosure*)wq->next_queue_entry);
+	wq->prev_queue_entry = (StgTVarWaitQueue *)evacuate((StgClosure*)wq->prev_queue_entry);
+	evac_gen = saved_evac_gen;
+	failed_to_evac = rtsTrue; // mutable
+	p += sizeofW(StgTVarWaitQueue);
+	break;
+      }
+
+    case TVAR:
+      {
+	StgTVar *tvar = ((StgTVar *) p);
+	evac_gen = 0;
+	tvar->current_value = evacuate((StgClosure*)tvar->current_value);
+	tvar->first_wait_queue_entry = (StgTVarWaitQueue *)evacuate((StgClosure*)tvar->first_wait_queue_entry);
+	evac_gen = saved_evac_gen;
+	failed_to_evac = rtsTrue; // mutable
+	p += sizeofW(StgTVar);
+	break;
+      }
+
+    case TREC_HEADER:
+      {
+        StgTRecHeader *trec = ((StgTRecHeader *) p);
+        evac_gen = 0;
+	trec->enclosing_trec = (StgTRecHeader *)evacuate((StgClosure*)trec->enclosing_trec);
+	trec->current_chunk = (StgTRecChunk *)evacuate((StgClosure*)trec->current_chunk);
+	evac_gen = saved_evac_gen;
+	failed_to_evac = rtsTrue; // mutable
+	p += sizeofW(StgTRecHeader);
+        break;
+      }
+
+    case TREC_CHUNK:
+      {
+	StgWord i;
+	StgTRecChunk *tc = ((StgTRecChunk *) p);
+	TRecEntry *e = &(tc -> entries[0]);
+	evac_gen = 0;
+	tc->prev_chunk = (StgTRecChunk *)evacuate((StgClosure*)tc->prev_chunk);
+	for (i = 0; i < tc -> next_entry_idx; i ++, e++ ) {
+	  e->tvar = (StgTVar *)evacuate((StgClosure*)e->tvar);
+	  e->expected_value = evacuate((StgClosure*)e->expected_value);
+	  e->new_value = evacuate((StgClosure*)e->new_value);
+	}
+	evac_gen = saved_evac_gen;
+	failed_to_evac = rtsTrue; // mutable
+	p += sizeofW(StgTRecChunk);
+	break;
+      }
 
     default:
-      barf("scavenge: unimplemented/strange closure type %d @ %p", 
-	   info->type, p);
+	barf("scavenge: unimplemented/strange closure type %d @ %p", 
+	     info->type, p);
     }
 
-    /* If we didn't manage to promote all the objects pointed to by
-     * the current object, then we have to designate this object as
-     * mutable (because it contains old-to-new generation pointers).
+    /*
+     * We need to record the current object on the mutable list if
+     *  (a) It is actually mutable, or 
+     *  (b) It contains pointers to a younger generation.
+     * Case (b) arises if we didn't manage to promote everything that
+     * the current object points to into the current generation.
      */
     if (failed_to_evac) {
-      mkMutCons((StgClosure *)q, &generations[evac_gen]);
-      failed_to_evac = rtsFalse;
+	failed_to_evac = rtsFalse;
+	if (stp->gen_no > 0) {
+	    recordMutableGen((StgClosure *)q, stp->gen);
+	}
     }
   }
 
@@ -2198,153 +3169,736 @@ scavenge(step *stp)
 }    
 
 /* -----------------------------------------------------------------------------
+   Scavenge everything on the mark stack.
+
+   This is slightly different from scavenge():
+      - we don't walk linearly through the objects, so the scavenger
+        doesn't need to advance the pointer on to the next object.
+   -------------------------------------------------------------------------- */
+
+static void
+scavenge_mark_stack(void)
+{
+    StgPtr p, q;
+    StgInfoTable *info;
+    nat saved_evac_gen;
+
+    evac_gen = oldest_gen->no;
+    saved_evac_gen = evac_gen;
+
+linear_scan:
+    while (!mark_stack_empty()) {
+	p = pop_mark_stack();
+
+	ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
+	info = get_itbl((StgClosure *)p);
+	
+	q = p;
+	switch (info->type) {
+	    
+	case MVAR:
+	{
+	    StgMVar *mvar = ((StgMVar *)p);
+	    evac_gen = 0;
+	    mvar->head = (StgTSO *)evacuate((StgClosure *)mvar->head);
+	    mvar->tail = (StgTSO *)evacuate((StgClosure *)mvar->tail);
+	    mvar->value = evacuate((StgClosure *)mvar->value);
+	    evac_gen = saved_evac_gen;
+	    failed_to_evac = rtsTrue; // mutable.
+	    break;
+	}
+
+	case FUN_2_0:
+	    scavenge_fun_srt(info);
+	    ((StgClosure *)p)->payload[1] = evacuate(((StgClosure *)p)->payload[1]);
+	    ((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]);
+	    break;
+
+	case THUNK_2_0:
+	    scavenge_thunk_srt(info);
+	    ((StgThunk *)p)->payload[1] = evacuate(((StgThunk *)p)->payload[1]);
+	    ((StgThunk *)p)->payload[0] = evacuate(((StgThunk *)p)->payload[0]);
+	    break;
+
+	case CONSTR_2_0:
+	    ((StgClosure *)p)->payload[1] = evacuate(((StgClosure *)p)->payload[1]);
+	    ((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]);
+	    break;
+	
+	case FUN_1_0:
+	case FUN_1_1:
+	    scavenge_fun_srt(info);
+	    ((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]);
+	    break;
+
+	case THUNK_1_0:
+	case THUNK_1_1:
+	    scavenge_thunk_srt(info);
+	    ((StgThunk *)p)->payload[0] = evacuate(((StgThunk *)p)->payload[0]);
+	    break;
+
+	case CONSTR_1_0:
+	case CONSTR_1_1:
+	    ((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]);
+	    break;
+	
+	case FUN_0_1:
+	case FUN_0_2:
+	    scavenge_fun_srt(info);
+	    break;
+
+	case THUNK_0_1:
+	case THUNK_0_2:
+	    scavenge_thunk_srt(info);
+	    break;
+
+	case CONSTR_0_1:
+	case CONSTR_0_2:
+	    break;
+	
+	case FUN:
+	    scavenge_fun_srt(info);
+	    goto gen_obj;
+
+	case THUNK:
+	{
+	    StgPtr end;
+	    
+	    scavenge_thunk_srt(info);
+	    end = (P_)((StgThunk *)p)->payload + info->layout.payload.ptrs;
+	    for (p = (P_)((StgThunk *)p)->payload; p < end; p++) {
+		*p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+	    }
+	    break;
+	}
+	
+	gen_obj:
+	case CONSTR:
+	case WEAK:
+	case STABLE_NAME:
+	{
+	    StgPtr end;
+	    
+	    end = (P_)((StgClosure *)p)->payload + info->layout.payload.ptrs;
+	    for (p = (P_)((StgClosure *)p)->payload; p < end; p++) {
+		*p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+	    }
+	    break;
+	}
+
+	case BCO: {
+	    StgBCO *bco = (StgBCO *)p;
+	    bco->instrs = (StgArrWords *)evacuate((StgClosure *)bco->instrs);
+	    bco->literals = (StgArrWords *)evacuate((StgClosure *)bco->literals);
+	    bco->ptrs = (StgMutArrPtrs *)evacuate((StgClosure *)bco->ptrs);
+	    bco->itbls = (StgArrWords *)evacuate((StgClosure *)bco->itbls);
+	    break;
+	}
+
+	case IND_PERM:
+	    // don't need to do anything here: the only possible case
+	    // is that we're in a 1-space compacting collector, with
+	    // no "old" generation.
+	    break;
+
+	case IND_OLDGEN:
+	case IND_OLDGEN_PERM:
+	    ((StgInd *)p)->indirectee = 
+		evacuate(((StgInd *)p)->indirectee);
+	    break;
+
+	case MUT_VAR_CLEAN:
+	case MUT_VAR_DIRTY: {
+	    rtsBool saved_eager_promotion = eager_promotion;
+	    
+	    eager_promotion = rtsFalse;
+	    ((StgMutVar *)p)->var = evacuate(((StgMutVar *)p)->var);
+	    eager_promotion = saved_eager_promotion;
+	    
+	    if (failed_to_evac) {
+		((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info;
+	    } else {
+		((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info;
+	    }
+	    break;
+	}
+
+	case CAF_BLACKHOLE:
+	case SE_CAF_BLACKHOLE:
+	case SE_BLACKHOLE:
+	case BLACKHOLE:
+	case ARR_WORDS:
+	    break;
+
+	case THUNK_SELECTOR:
+	{ 
+	    StgSelector *s = (StgSelector *)p;
+	    s->selectee = evacuate(s->selectee);
+	    break;
+	}
+
+	// A chunk of stack saved in a heap object
+	case AP_STACK:
+	{
+	    StgAP_STACK *ap = (StgAP_STACK *)p;
+	    
+	    ap->fun = evacuate(ap->fun);
+	    scavenge_stack((StgPtr)ap->payload, (StgPtr)ap->payload + ap->size);
+	    break;
+	}
+
+	case PAP:
+	    scavenge_PAP((StgPAP *)p);
+	    break;
+
+	case AP:
+	    scavenge_AP((StgAP *)p);
+	    break;
+      
+	case MUT_ARR_PTRS_CLEAN:
+	case MUT_ARR_PTRS_DIRTY:
+	    // follow everything 
+	{
+	    StgPtr next;
+	    rtsBool saved_eager;
+
+	    // We don't eagerly promote objects pointed to by a mutable
+	    // array, but if we find the array only points to objects in
+	    // the same or an older generation, we mark it "clean" and
+	    // avoid traversing it during minor GCs.
+	    saved_eager = eager_promotion;
+	    eager_promotion = rtsFalse;
+	    next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+	    for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
+		*p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+	    }
+	    eager_promotion = saved_eager;
+
+	    if (failed_to_evac) {
+		((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info;
+	    } else {
+		((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info;
+	    }
+
+	    failed_to_evac = rtsTrue; // mutable anyhow.
+	    break;
+	}
+
+	case MUT_ARR_PTRS_FROZEN:
+	case MUT_ARR_PTRS_FROZEN0:
+	    // follow everything 
+	{
+	    StgPtr next, q = p;
+	    
+	    next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+	    for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
+		*p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+	    }
+
+	    // If we're going to put this object on the mutable list, then
+	    // set its info ptr to MUT_ARR_PTRS_FROZEN0 to indicate that.
+	    if (failed_to_evac) {
+		((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN0_info;
+	    } else {
+		((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_info;
+	    }
+	    break;
+	}
+
+	case TSO:
+	{ 
+	    StgTSO *tso = (StgTSO *)p;
+	    rtsBool saved_eager = eager_promotion;
+
+	    eager_promotion = rtsFalse;
+	    scavengeTSO(tso);
+	    eager_promotion = saved_eager;
+	    
+	    if (failed_to_evac) {
+		tso->flags |= TSO_DIRTY;
+	    } else {
+		tso->flags &= ~TSO_DIRTY;
+	    }
+	    
+	    failed_to_evac = rtsTrue; // always on the mutable list
+	    break;
+	}
+
+#if defined(PAR)
+	case RBH:
+	{ 
+#if 0
+	    nat size, ptrs, nonptrs, vhs;
+	    char str[80];
+	    StgInfoTable *rip = get_closure_info(p, &size, &ptrs, &nonptrs, &vhs, str);
+#endif
+	    StgRBH *rbh = (StgRBH *)p;
+	    bh->blocking_queue = 
+		(StgTSO *)evacuate((StgClosure *)bh->blocking_queue);
+	    failed_to_evac = rtsTrue;  // mutable anyhow.
+	    IF_DEBUG(gc,
+		     debugBelch("@@ scavenge: RBH %p (%s) (new blocking_queue link=%p)",
+			   p, info_type(p), (StgClosure *)rbh->blocking_queue));
+	    break;
+	}
+	
+	case BLOCKED_FETCH:
+	{ 
+	    StgBlockedFetch *bf = (StgBlockedFetch *)p;
+	    // follow the pointer to the node which is being demanded 
+	    (StgClosure *)bf->node = 
+		evacuate((StgClosure *)bf->node);
+	    // follow the link to the rest of the blocking queue 
+	    (StgClosure *)bf->link = 
+		evacuate((StgClosure *)bf->link);
+	    IF_DEBUG(gc,
+		     debugBelch("@@ scavenge: %p (%s); node is now %p; exciting, isn't it",
+			   bf, info_type((StgClosure *)bf), 
+			   bf->node, info_type(bf->node)));
+	    break;
+	}
+
+#ifdef DIST
+	case REMOTE_REF:
+#endif
+	case FETCH_ME:
+	    break; // nothing to do in this case
+
+	case FETCH_ME_BQ:
+	{ 
+	    StgFetchMeBlockingQueue *fmbq = (StgFetchMeBlockingQueue *)p;
+	    (StgClosure *)fmbq->blocking_queue = 
+		evacuate((StgClosure *)fmbq->blocking_queue);
+	    IF_DEBUG(gc,
+		     debugBelch("@@ scavenge: %p (%s) exciting, isn't it",
+			   p, info_type((StgClosure *)p)));
+	    break;
+	}
+#endif /* PAR */
+
+	case TVAR_WAIT_QUEUE:
+	  {
+	    StgTVarWaitQueue *wq = ((StgTVarWaitQueue *) p);
+	    evac_gen = 0;
+	    wq->waiting_tso = (StgTSO *)evacuate((StgClosure*)wq->waiting_tso);
+	    wq->next_queue_entry = (StgTVarWaitQueue *)evacuate((StgClosure*)wq->next_queue_entry);
+	    wq->prev_queue_entry = (StgTVarWaitQueue *)evacuate((StgClosure*)wq->prev_queue_entry);
+	    evac_gen = saved_evac_gen;
+	    failed_to_evac = rtsTrue; // mutable
+	    break;
+	  }
+	  
+	case TVAR:
+	  {
+	    StgTVar *tvar = ((StgTVar *) p);
+	    evac_gen = 0;
+	    tvar->current_value = evacuate((StgClosure*)tvar->current_value);
+	    tvar->first_wait_queue_entry = (StgTVarWaitQueue *)evacuate((StgClosure*)tvar->first_wait_queue_entry);
+	    evac_gen = saved_evac_gen;
+	    failed_to_evac = rtsTrue; // mutable
+	    break;
+	  }
+	  
+	case TREC_CHUNK:
+	  {
+	    StgWord i;
+	    StgTRecChunk *tc = ((StgTRecChunk *) p);
+	    TRecEntry *e = &(tc -> entries[0]);
+	    evac_gen = 0;
+	    tc->prev_chunk = (StgTRecChunk *)evacuate((StgClosure*)tc->prev_chunk);
+	    for (i = 0; i < tc -> next_entry_idx; i ++, e++ ) {
+	      e->tvar = (StgTVar *)evacuate((StgClosure*)e->tvar);
+	      e->expected_value = evacuate((StgClosure*)e->expected_value);
+	      e->new_value = evacuate((StgClosure*)e->new_value);
+	    }
+	    evac_gen = saved_evac_gen;
+	    failed_to_evac = rtsTrue; // mutable
+	    break;
+	  }
+
+	case TREC_HEADER:
+	  {
+	    StgTRecHeader *trec = ((StgTRecHeader *) p);
+	    evac_gen = 0;
+	    trec->enclosing_trec = (StgTRecHeader *)evacuate((StgClosure*)trec->enclosing_trec);
+	    trec->current_chunk = (StgTRecChunk *)evacuate((StgClosure*)trec->current_chunk);
+	    evac_gen = saved_evac_gen;
+	    failed_to_evac = rtsTrue; // mutable
+	    break;
+	  }
+
+	default:
+	    barf("scavenge_mark_stack: unimplemented/strange closure type %d @ %p", 
+		 info->type, p);
+	}
+
+	if (failed_to_evac) {
+	    failed_to_evac = rtsFalse;
+	    if (evac_gen > 0) {
+		recordMutableGen((StgClosure *)q, &generations[evac_gen]);
+	    }
+	}
+	
+	// mark the next bit to indicate "scavenged"
+	mark(q+1, Bdescr(q));
+
+    } // while (!mark_stack_empty())
+
+    // start a new linear scan if the mark stack overflowed at some point
+    if (mark_stack_overflowed && oldgen_scan_bd == NULL) {
+	IF_DEBUG(gc, debugBelch("scavenge_mark_stack: starting linear scan"));
+	mark_stack_overflowed = rtsFalse;
+	oldgen_scan_bd = oldest_gen->steps[0].old_blocks;
+	oldgen_scan = oldgen_scan_bd->start;
+    }
+
+    if (oldgen_scan_bd) {
+	// push a new thing on the mark stack
+    loop:
+	// find a closure that is marked but not scavenged, and start
+	// from there.
+	while (oldgen_scan < oldgen_scan_bd->free 
+	       && !is_marked(oldgen_scan,oldgen_scan_bd)) {
+	    oldgen_scan++;
+	}
+
+	if (oldgen_scan < oldgen_scan_bd->free) {
+
+	    // already scavenged?
+	    if (is_marked(oldgen_scan+1,oldgen_scan_bd)) {
+		oldgen_scan += sizeofW(StgHeader) + MIN_PAYLOAD_SIZE;
+		goto loop;
+	    }
+	    push_mark_stack(oldgen_scan);
+	    // ToDo: bump the linear scan by the actual size of the object
+	    oldgen_scan += sizeofW(StgHeader) + MIN_PAYLOAD_SIZE;
+	    goto linear_scan;
+	}
+
+	oldgen_scan_bd = oldgen_scan_bd->link;
+	if (oldgen_scan_bd != NULL) {
+	    oldgen_scan = oldgen_scan_bd->start;
+	    goto loop;
+	}
+    }
+}
+
+/* -----------------------------------------------------------------------------
    Scavenge one object.
 
    This is used for objects that are temporarily marked as mutable
    because they contain old-to-new generation pointers.  Only certain
    objects can have this property.
    -------------------------------------------------------------------------- */
-//@cindex scavenge_one
 
 static rtsBool
-scavenge_one(StgClosure *p)
+scavenge_one(StgPtr p)
 {
-  const StgInfoTable *info;
-  rtsBool no_luck;
+    const StgInfoTable *info;
+    nat saved_evac_gen = evac_gen;
+    rtsBool no_luck;
+    
+    ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
+    info = get_itbl((StgClosure *)p);
+    
+    switch (info->type) {
+	
+    case MVAR:
+    { 
+	StgMVar *mvar = ((StgMVar *)p);
+	evac_gen = 0;
+	mvar->head = (StgTSO *)evacuate((StgClosure *)mvar->head);
+	mvar->tail = (StgTSO *)evacuate((StgClosure *)mvar->tail);
+	mvar->value = evacuate((StgClosure *)mvar->value);
+	evac_gen = saved_evac_gen;
+	failed_to_evac = rtsTrue; // mutable.
+	break;
+    }
 
-  ASSERT(p && (LOOKS_LIKE_GHC_INFO(GET_INFO(p))
-	       || IS_HUGS_CONSTR_INFO(GET_INFO(p))));
+    case THUNK:
+    case THUNK_1_0:
+    case THUNK_0_1:
+    case THUNK_1_1:
+    case THUNK_0_2:
+    case THUNK_2_0:
+    {
+	StgPtr q, end;
+	
+	end = (StgPtr)((StgThunk *)p)->payload + info->layout.payload.ptrs;
+	for (q = (StgPtr)((StgThunk *)p)->payload; q < end; q++) {
+	    *q = (StgWord)(StgPtr)evacuate((StgClosure *)*q);
+	}
+	break;
+    }
 
-  info = get_itbl(p);
+    case FUN:
+    case FUN_1_0:			// hardly worth specialising these guys
+    case FUN_0_1:
+    case FUN_1_1:
+    case FUN_0_2:
+    case FUN_2_0:
+    case CONSTR:
+    case CONSTR_1_0:
+    case CONSTR_0_1:
+    case CONSTR_1_1:
+    case CONSTR_0_2:
+    case CONSTR_2_0:
+    case WEAK:
+    case IND_PERM:
+    {
+	StgPtr q, end;
+	
+	end = (StgPtr)((StgClosure *)p)->payload + info->layout.payload.ptrs;
+	for (q = (StgPtr)((StgClosure *)p)->payload; q < end; q++) {
+	    *q = (StgWord)(StgPtr)evacuate((StgClosure *)*q);
+	}
+	break;
+    }
+    
+    case MUT_VAR_CLEAN:
+    case MUT_VAR_DIRTY: {
+	StgPtr q = p;
+	rtsBool saved_eager_promotion = eager_promotion;
 
-  /* ngoq moHqu'! 
-  if (info->type==RBH)
-    info = REVERT_INFOPTR(info); // if it's an RBH, look at the orig closure
-  */
+	eager_promotion = rtsFalse;
+	((StgMutVar *)p)->var = evacuate(((StgMutVar *)p)->var);
+	eager_promotion = saved_eager_promotion;
 
-  switch (info -> type) {
+	if (failed_to_evac) {
+	    ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info;
+	} else {
+	    ((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info;
+	}
+	break;
+    }
 
-  case FUN:
-  case FUN_1_0:			/* hardly worth specialising these guys */
-  case FUN_0_1:
-  case FUN_1_1:
-  case FUN_0_2:
-  case FUN_2_0:
-  case THUNK:
-  case THUNK_1_0:
-  case THUNK_0_1:
-  case THUNK_1_1:
-  case THUNK_0_2:
-  case THUNK_2_0:
-  case CONSTR:
-  case CONSTR_1_0:
-  case CONSTR_0_1:
-  case CONSTR_1_1:
-  case CONSTR_0_2:
-  case CONSTR_2_0:
-  case WEAK:
-  case FOREIGN:
-  case IND_PERM:
-  case IND_OLDGEN_PERM:
+    case CAF_BLACKHOLE:
+    case SE_CAF_BLACKHOLE:
+    case SE_BLACKHOLE:
+    case BLACKHOLE:
+	break;
+	
+    case THUNK_SELECTOR:
+    { 
+	StgSelector *s = (StgSelector *)p;
+	s->selectee = evacuate(s->selectee);
+	break;
+    }
+    
+    case AP_STACK:
     {
-      StgPtr q, end;
-      
-      end = (P_)p->payload + info->layout.payload.ptrs;
-      for (q = (P_)p->payload; q < end; q++) {
-	(StgClosure *)*q = evacuate((StgClosure *)*q);
-      }
-      break;
+	StgAP_STACK *ap = (StgAP_STACK *)p;
+
+	ap->fun = evacuate(ap->fun);
+	scavenge_stack((StgPtr)ap->payload, (StgPtr)ap->payload + ap->size);
+	p = (StgPtr)ap->payload + ap->size;
+	break;
     }
 
-  case CAF_BLACKHOLE:
-  case SE_CAF_BLACKHOLE:
-  case SE_BLACKHOLE:
-  case BLACKHOLE:
-      break;
+    case PAP:
+	p = scavenge_PAP((StgPAP *)p);
+	break;
 
-  case THUNK_SELECTOR:
-    { 
-      StgSelector *s = (StgSelector *)p;
-      s->selectee = evacuate(s->selectee);
-      break;
+    case AP:
+	p = scavenge_AP((StgAP *)p);
+	break;
+
+    case ARR_WORDS:
+	// nothing to follow 
+	break;
+
+    case MUT_ARR_PTRS_CLEAN:
+    case MUT_ARR_PTRS_DIRTY:
+    {
+	StgPtr next, q;
+	rtsBool saved_eager;
+
+	// We don't eagerly promote objects pointed to by a mutable
+	// array, but if we find the array only points to objects in
+	// the same or an older generation, we mark it "clean" and
+	// avoid traversing it during minor GCs.
+	saved_eager = eager_promotion;
+	eager_promotion = rtsFalse;
+	q = p;
+	next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+	for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
+	    *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+	}
+	eager_promotion = saved_eager;
+
+	if (failed_to_evac) {
+	    ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info;
+	} else {
+	    ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info;
+	}
+
+	failed_to_evac = rtsTrue;
+	break;
     }
-    
-  case AP_UPD: /* same as PAPs */
-  case PAP:
-    /* Treat a PAP just like a section of stack, not forgetting to
-     * evacuate the function pointer too...
-     */
-    { 
-      StgPAP* pap = (StgPAP *)p;
+
+    case MUT_ARR_PTRS_FROZEN:
+    case MUT_ARR_PTRS_FROZEN0:
+    {
+	// follow everything 
+	StgPtr next, q=p;
       
-      pap->fun = evacuate(pap->fun);
-      scavenge_stack((P_)pap->payload, (P_)pap->payload + pap->n_args);
-      break;
+	next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+	for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
+	    *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+	}
+
+	// If we're going to put this object on the mutable list, then
+	// set its info ptr to MUT_ARR_PTRS_FROZEN0 to indicate that.
+	if (failed_to_evac) {
+	    ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN0_info;
+	} else {
+	    ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_info;
+	}
+	break;
     }
 
-  case IND_OLDGEN:
-    /* This might happen if for instance a MUT_CONS was pointing to a
-     * THUNK which has since been updated.  The IND_OLDGEN will
-     * be on the mutable list anyway, so we don't need to do anything
-     * here.
-     */
-    break;
+    case TSO:
+    {
+	StgTSO *tso = (StgTSO *)p;
+	rtsBool saved_eager = eager_promotion;
 
-  default:
-    barf("scavenge_one: strange object %d", (int)(info->type));
-  }    
+	eager_promotion = rtsFalse;
+	scavengeTSO(tso);
+	eager_promotion = saved_eager;
 
-  no_luck = failed_to_evac;
-  failed_to_evac = rtsFalse;
-  return (no_luck);
-}
+	if (failed_to_evac) {
+	    tso->flags |= TSO_DIRTY;
+	} else {
+	    tso->flags &= ~TSO_DIRTY;
+	}
 
+	failed_to_evac = rtsTrue; // always on the mutable list
+	break;
+    }
+  
+#if defined(PAR)
+    case RBH:
+    { 
+#if 0
+	nat size, ptrs, nonptrs, vhs;
+	char str[80];
+	StgInfoTable *rip = get_closure_info(p, &size, &ptrs, &nonptrs, &vhs, str);
+#endif
+	StgRBH *rbh = (StgRBH *)p;
+	(StgClosure *)rbh->blocking_queue = 
+	    evacuate((StgClosure *)rbh->blocking_queue);
+	failed_to_evac = rtsTrue;  // mutable anyhow.
+	IF_DEBUG(gc,
+		 debugBelch("@@ scavenge: RBH %p (%s) (new blocking_queue link=%p)",
+		       p, info_type(p), (StgClosure *)rbh->blocking_queue));
+	// ToDo: use size of reverted closure here!
+	break;
+    }
 
-/* -----------------------------------------------------------------------------
-   Scavenging mutable lists.
+    case BLOCKED_FETCH:
+    { 
+	StgBlockedFetch *bf = (StgBlockedFetch *)p;
+	// follow the pointer to the node which is being demanded 
+	(StgClosure *)bf->node = 
+	    evacuate((StgClosure *)bf->node);
+	// follow the link to the rest of the blocking queue 
+	(StgClosure *)bf->link = 
+	    evacuate((StgClosure *)bf->link);
+	IF_DEBUG(gc,
+		 debugBelch("@@ scavenge: %p (%s); node is now %p; exciting, isn't it",
+		       bf, info_type((StgClosure *)bf), 
+		       bf->node, info_type(bf->node)));
+	break;
+    }
 
-   We treat the mutable list of each generation > N (i.e. all the
-   generations older than the one being collected) as roots.  We also
-   remove non-mutable objects from the mutable list at this point.
-   -------------------------------------------------------------------------- */
-//@cindex scavenge_mut_once_list
+#ifdef DIST
+    case REMOTE_REF:
+#endif
+    case FETCH_ME:
+	break; // nothing to do in this case
 
-static void
-scavenge_mut_once_list(generation *gen)
-{
-  const StgInfoTable *info;
-  StgMutClosure *p, *next, *new_list;
+    case FETCH_ME_BQ:
+    { 
+	StgFetchMeBlockingQueue *fmbq = (StgFetchMeBlockingQueue *)p;
+	(StgClosure *)fmbq->blocking_queue = 
+	    evacuate((StgClosure *)fmbq->blocking_queue);
+	IF_DEBUG(gc,
+		 debugBelch("@@ scavenge: %p (%s) exciting, isn't it",
+		       p, info_type((StgClosure *)p)));
+	break;
+    }
+#endif
 
-  p = gen->mut_once_list;
-  new_list = END_MUT_LIST;
-  next = p->mut_link;
+    case TVAR_WAIT_QUEUE:
+      {
+	StgTVarWaitQueue *wq = ((StgTVarWaitQueue *) p);
+	evac_gen = 0;
+	wq->waiting_tso = (StgTSO *)evacuate((StgClosure*)wq->waiting_tso);
+	wq->next_queue_entry = (StgTVarWaitQueue *)evacuate((StgClosure*)wq->next_queue_entry);
+	wq->prev_queue_entry = (StgTVarWaitQueue *)evacuate((StgClosure*)wq->prev_queue_entry);
+	evac_gen = saved_evac_gen;
+	failed_to_evac = rtsTrue; // mutable
+	break;
+      }
 
-  evac_gen = gen->no;
-  failed_to_evac = rtsFalse;
+    case TVAR:
+      {
+	StgTVar *tvar = ((StgTVar *) p);
+	evac_gen = 0;
+	tvar->current_value = evacuate((StgClosure*)tvar->current_value);
+	tvar->first_wait_queue_entry = (StgTVarWaitQueue *)evacuate((StgClosure*)tvar->first_wait_queue_entry);
+	evac_gen = saved_evac_gen;
+	failed_to_evac = rtsTrue; // mutable
+	break;
+      }
 
-  for (; p != END_MUT_LIST; p = next, next = p->mut_link) {
+    case TREC_HEADER:
+      {
+        StgTRecHeader *trec = ((StgTRecHeader *) p);
+        evac_gen = 0;
+	trec->enclosing_trec = (StgTRecHeader *)evacuate((StgClosure*)trec->enclosing_trec);
+	trec->current_chunk = (StgTRecChunk *)evacuate((StgClosure*)trec->current_chunk);
+	evac_gen = saved_evac_gen;
+	failed_to_evac = rtsTrue; // mutable
+        break;
+      }
+
+    case TREC_CHUNK:
+      {
+	StgWord i;
+	StgTRecChunk *tc = ((StgTRecChunk *) p);
+	TRecEntry *e = &(tc -> entries[0]);
+	evac_gen = 0;
+	tc->prev_chunk = (StgTRecChunk *)evacuate((StgClosure*)tc->prev_chunk);
+	for (i = 0; i < tc -> next_entry_idx; i ++, e++ ) {
+	  e->tvar = (StgTVar *)evacuate((StgClosure*)e->tvar);
+	  e->expected_value = evacuate((StgClosure*)e->expected_value);
+	  e->new_value = evacuate((StgClosure*)e->new_value);
+	}
+	evac_gen = saved_evac_gen;
+	failed_to_evac = rtsTrue; // mutable
+	break;
+      }
 
-    /* make sure the info pointer is into text space */
-    ASSERT(p && (LOOKS_LIKE_GHC_INFO(GET_INFO(p))
-		 || IS_HUGS_CONSTR_INFO(GET_INFO(p))));
-    
-    info = get_itbl(p);
-    /*
-    if (info->type==RBH)
-      info = REVERT_INFOPTR(info); // if it's an RBH, look at the orig closure
-    */
-    switch(info->type) {
-      
     case IND_OLDGEN:
     case IND_OLDGEN_PERM:
     case IND_STATIC:
-      /* Try to pull the indirectee into this generation, so we can
-       * remove the indirection from the mutable list.  
-       */
-      ((StgIndOldGen *)p)->indirectee = 
-        evacuate(((StgIndOldGen *)p)->indirectee);
-      
-#ifdef DEBUG
+    {
+	/* Careful here: a THUNK can be on the mutable list because
+	 * it contains pointers to young gen objects.  If such a thunk
+	 * is updated, the IND_OLDGEN will be added to the mutable
+	 * list again, and we'll scavenge it twice.  evacuate()
+	 * doesn't check whether the object has already been
+	 * evacuated, so we perform that check here.
+	 */
+	StgClosure *q = ((StgInd *)p)->indirectee;
+	if (HEAP_ALLOCED(q) && Bdescr((StgPtr)q)->flags & BF_EVACUATED) {
+	    break;
+	}
+	((StgInd *)p)->indirectee = evacuate(q);
+    }
+
+#if 0 && defined(DEBUG)
       if (RtsFlags.DebugFlags.gc) 
       /* Debugging code to print out the size of the thing we just
        * promoted 
@@ -2366,249 +3920,100 @@ scavenge_mut_once_list(generation *gen)
 	} else {
 	  size = gen->steps[0].scan - start;
 	}
-	fprintf(stderr,"evac IND_OLDGEN: %d bytes\n", size * sizeof(W_));
+	debugBelch("evac IND_OLDGEN: %ld bytes", size * sizeof(W_));
       }
 #endif
+      break;
 
-      /* failed_to_evac might happen if we've got more than two
-       * generations, we're collecting only generation 0, the
-       * indirection resides in generation 2 and the indirectee is
-       * in generation 1.
-       */
-      if (failed_to_evac) {
-	failed_to_evac = rtsFalse;
-	p->mut_link = new_list;
-	new_list = p;
-      } else {
-	/* the mut_link field of an IND_STATIC is overloaded as the
-	 * static link field too (it just so happens that we don't need
-	 * both at the same time), so we need to NULL it out when
-	 * removing this object from the mutable list because the static
-	 * link fields are all assumed to be NULL before doing a major
-	 * collection. 
-	 */
-	p->mut_link = NULL;
-      }
-      continue;
-      
-    case MUT_VAR:
-      /* MUT_CONS is a kind of MUT_VAR, except it that we try to remove
-       * it from the mutable list if possible by promoting whatever it
-       * points to.
-       */
-      ASSERT(p->header.info == &stg_MUT_CONS_info);
-      if (scavenge_one(((StgMutVar *)p)->var) == rtsTrue) {
-	/* didn't manage to promote everything, so put the
-	 * MUT_CONS back on the list.
-	 */
-	p->mut_link = new_list;
-	new_list = p;
-      } 
-      continue;
-      
     default:
-      /* shouldn't have anything else on the mutables list */
-      barf("scavenge_mut_once_list: strange object? %d", (int)(info->type));
-    }
-  }
+	barf("scavenge_one: strange object %d", (int)(info->type));
+    }    
 
-  gen->mut_once_list = new_list;
+    no_luck = failed_to_evac;
+    failed_to_evac = rtsFalse;
+    return (no_luck);
 }
 
-//@cindex scavenge_mutable_list
+/* -----------------------------------------------------------------------------
+   Scavenging mutable lists.
+
+   We treat the mutable list of each generation > N (i.e. all the
+   generations older than the one being collected) as roots.  We also
+   remove non-mutable objects from the mutable list at this point.
+   -------------------------------------------------------------------------- */
 
 static void
 scavenge_mutable_list(generation *gen)
 {
-  const StgInfoTable *info;
-  StgMutClosure *p, *next;
-
-  p = gen->saved_mut_list;
-  next = p->mut_link;
-
-  evac_gen = 0;
-  failed_to_evac = rtsFalse;
-
-  for (; p != END_MUT_LIST; p = next, next = p->mut_link) {
-
-    /* make sure the info pointer is into text space */
-    ASSERT(p && (LOOKS_LIKE_GHC_INFO(GET_INFO(p))
-		 || IS_HUGS_CONSTR_INFO(GET_INFO(p))));
-    
-    info = get_itbl(p);
-    /*
-    if (info->type==RBH)
-      info = REVERT_INFOPTR(info); // if it's an RBH, look at the orig closure
-    */
-    switch(info->type) {
-      
-    case MUT_ARR_PTRS_FROZEN:
-      /* remove this guy from the mutable list, but follow the ptrs
-       * anyway (and make sure they get promoted to this gen).
-       */
-      {
-	StgPtr end, q;
-	
-	end = (P_)p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
-	evac_gen = gen->no;
-	for (q = (P_)((StgMutArrPtrs *)p)->payload; q < end; q++) {
-	  (StgClosure *)*q = evacuate((StgClosure *)*q);
-	}
-	evac_gen = 0;
-
-	if (failed_to_evac) {
-	  failed_to_evac = rtsFalse;
-	  p->mut_link = gen->mut_list;
-	  gen->mut_list = p;
-	} 
-	continue;
-      }
-
-    case MUT_ARR_PTRS:
-      /* follow everything */
-      p->mut_link = gen->mut_list;
-      gen->mut_list = p;
-      {
-	StgPtr end, q;
-	
-	end = (P_)p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
-	for (q = (P_)((StgMutArrPtrs *)p)->payload; q < end; q++) {
-	  (StgClosure *)*q = evacuate((StgClosure *)*q);
-	}
-	continue;
-      }
-      
-    case MUT_VAR:
-      /* MUT_CONS is a kind of MUT_VAR, except that we try to remove
-       * it from the mutable list if possible by promoting whatever it
-       * points to.
-       */
-      ASSERT(p->header.info != &stg_MUT_CONS_info);
-      ((StgMutVar *)p)->var = evacuate(((StgMutVar *)p)->var);
-      p->mut_link = gen->mut_list;
-      gen->mut_list = p;
-      continue;
-      
-    case MVAR:
-      {
-	StgMVar *mvar = (StgMVar *)p;
-	(StgClosure *)mvar->head = evacuate((StgClosure *)mvar->head);
-	(StgClosure *)mvar->tail = evacuate((StgClosure *)mvar->tail);
-	(StgClosure *)mvar->value = evacuate((StgClosure *)mvar->value);
-	p->mut_link = gen->mut_list;
-	gen->mut_list = p;
-	continue;
-      }
-
-    case TSO:
-      { 
-	StgTSO *tso = (StgTSO *)p;
-
-	scavengeTSO(tso);
-
-	/* Don't take this TSO off the mutable list - it might still
-	 * point to some younger objects (because we set evac_gen to 0
-	 * above). 
-	 */
-	tso->mut_link = gen->mut_list;
-	gen->mut_list = (StgMutClosure *)tso;
-	continue;
-      }
-      
-    case BLACKHOLE_BQ:
-      { 
-	StgBlockingQueue *bh = (StgBlockingQueue *)p;
-	(StgClosure *)bh->blocking_queue = 
-	  evacuate((StgClosure *)bh->blocking_queue);
-	p->mut_link = gen->mut_list;
-	gen->mut_list = p;
-	continue;
-      }
-
-      /* Happens if a BLACKHOLE_BQ in the old generation is updated: 
-       */
-    case IND_OLDGEN:
-    case IND_OLDGEN_PERM:
-      /* Try to pull the indirectee into this generation, so we can
-       * remove the indirection from the mutable list.  
-       */
-      evac_gen = gen->no;
-      ((StgIndOldGen *)p)->indirectee = 
-        evacuate(((StgIndOldGen *)p)->indirectee);
-      evac_gen = 0;
-
-      if (failed_to_evac) {
-	failed_to_evac = rtsFalse;
-	p->mut_link = gen->mut_once_list;
-	gen->mut_once_list = p;
-      } else {
-	p->mut_link = NULL;
-      }
-      continue;
-
-#if defined(PAR)
-    // HWL: check whether all of these are necessary
-
-    case RBH: // cf. BLACKHOLE_BQ
-      { 
-	// nat size, ptrs, nonptrs, vhs;
-	// char str[80];
-	// StgInfoTable *rip = get_closure_info(p, &size, &ptrs, &nonptrs, &vhs, str);
-	StgRBH *rbh = (StgRBH *)p;
-	(StgClosure *)rbh->blocking_queue = 
-	  evacuate((StgClosure *)rbh->blocking_queue);
-	if (failed_to_evac) {
-	  failed_to_evac = rtsFalse;
-	  recordMutable((StgMutClosure *)rbh);
-	}
-	// ToDo: use size of reverted closure here!
-	p += BLACKHOLE_sizeW(); 
-	break;
-      }
-
-    case BLOCKED_FETCH:
-      { 
-	StgBlockedFetch *bf = (StgBlockedFetch *)p;
-	/* follow the pointer to the node which is being demanded */
-	(StgClosure *)bf->node = 
-	  evacuate((StgClosure *)bf->node);
-	/* follow the link to the rest of the blocking queue */
-	(StgClosure *)bf->link = 
-	  evacuate((StgClosure *)bf->link);
-	if (failed_to_evac) {
-	  failed_to_evac = rtsFalse;
-	  recordMutable((StgMutClosure *)bf);
-	}
-	p += sizeofW(StgBlockedFetch);
-	break;
-      }
+    bdescr *bd;
+    StgPtr p, q;
+
+    bd = gen->saved_mut_list;
+
+    evac_gen = gen->no;
+    for (; bd != NULL; bd = bd->link) {
+	for (q = bd->start; q < bd->free; q++) {
+	    p = (StgPtr)*q;
+	    ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
+
+#ifdef DEBUG	    
+	    switch (get_itbl((StgClosure *)p)->type) {
+	    case MUT_VAR_CLEAN:
+		barf("MUT_VAR_CLEAN on mutable list");
+	    case MUT_VAR_DIRTY:
+		mutlist_MUTVARS++; break;
+	    case MUT_ARR_PTRS_CLEAN:
+	    case MUT_ARR_PTRS_DIRTY:
+	    case MUT_ARR_PTRS_FROZEN:
+	    case MUT_ARR_PTRS_FROZEN0:
+		mutlist_MUTARRS++; break;
+	    default:
+		mutlist_OTHERS++; break;
+	    }
+#endif
 
-    case FETCH_ME:
-      p += sizeofW(StgFetchMe);
-      break; // nothing to do in this case
+	    // Check whether this object is "clean", that is it
+	    // definitely doesn't point into a young generation.
+	    // Clean objects don't need to be scavenged.  Some clean
+	    // objects (MUT_VAR_CLEAN) are not kept on the mutable
+	    // list at all; others, such as MUT_ARR_PTRS_CLEAN and
+	    // TSO, are always on the mutable list.
+	    //
+	    switch (get_itbl((StgClosure *)p)->type) {
+	    case MUT_ARR_PTRS_CLEAN:
+		recordMutableGen((StgClosure *)p,gen);
+		continue;
+	    case TSO: {
+		StgTSO *tso = (StgTSO *)p;
+		if ((tso->flags & TSO_DIRTY) == 0) {
+		    // A clean TSO: we don't have to traverse its
+		    // stack.  However, we *do* follow the link field:
+		    // we don't want to have to mark a TSO dirty just
+		    // because we put it on a different queue.
+		    if (tso->why_blocked != BlockedOnBlackHole) {
+			tso->link = (StgTSO *)evacuate((StgClosure *)tso->link);
+		    }
+		    recordMutableGen((StgClosure *)p,gen);
+		    continue;
+		}
+	    }
+	    default:
+		;
+	    }
 
-    case FETCH_ME_BQ: // cf. BLACKHOLE_BQ
-      { 
-	StgFetchMeBlockingQueue *fmbq = (StgFetchMeBlockingQueue *)p;
-	(StgClosure *)fmbq->blocking_queue = 
-	  evacuate((StgClosure *)fmbq->blocking_queue);
-	if (failed_to_evac) {
-	  failed_to_evac = rtsFalse;
-	  recordMutable((StgMutClosure *)fmbq);
+	    if (scavenge_one(p)) {
+		// didn't manage to promote everything, so put the
+		// object back on the list.
+		recordMutableGen((StgClosure *)p,gen);
+	    }
 	}
-	p += sizeofW(StgFetchMeBlockingQueue);
-	break;
-      }
-#endif
-
-    default:
-      /* shouldn't have anything else on the mutables list */
-      barf("scavenge_mutable_list: strange object? %d", (int)(info->type));
     }
-  }
+
+    // free the old mut_list
+    freeChain(gen->saved_mut_list);
+    gen->saved_mut_list = NULL;
 }
 
-//@cindex scavenge_static
 
 static void
 scavenge_static(void)
@@ -2624,20 +4029,19 @@ scavenge_static(void)
      list... */
   while (p != END_OF_STATIC_LIST) {
 
+    ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
     info = get_itbl(p);
     /*
     if (info->type==RBH)
       info = REVERT_INFOPTR(info); // if it's an RBH, look at the orig closure
     */
-    /* make sure the info pointer is into text space */
-    ASSERT(p && (LOOKS_LIKE_GHC_INFO(GET_INFO(p))
-		 || IS_HUGS_CONSTR_INFO(GET_INFO(p))));
+    // make sure the info pointer is into text space 
     
     /* Take this object *off* the static_objects list,
      * and put it on the scavenged_static_objects list.
      */
-    static_objects = STATIC_LINK(info,p);
-    STATIC_LINK(info,p) = scavenged_static_objects;
+    static_objects = *STATIC_LINK(info,p);
+    *STATIC_LINK(info,p) = scavenged_static_objects;
     scavenged_static_objects = p;
     
     switch (info -> type) {
@@ -2648,32 +4052,33 @@ scavenge_static(void)
 	ind->indirectee = evacuate(ind->indirectee);
 
 	/* might fail to evacuate it, in which case we have to pop it
-	 * back on the mutable list (and take it off the
-	 * scavenged_static list because the static link and mut link
-	 * pointers are one and the same).
+	 * back on the mutable list of the oldest generation.  We
+	 * leave it *on* the scavenged_static_objects list, though,
+	 * in case we visit this object again.
 	 */
 	if (failed_to_evac) {
 	  failed_to_evac = rtsFalse;
-	  scavenged_static_objects = STATIC_LINK(info,p);
-	  ((StgMutClosure *)ind)->mut_link = oldest_gen->mut_once_list;
-	  oldest_gen->mut_once_list = (StgMutClosure *)ind;
+	  recordMutableGen((StgClosure *)p,oldest_gen);
 	}
 	break;
       }
       
     case THUNK_STATIC:
+      scavenge_thunk_srt(info);
+      break;
+
     case FUN_STATIC:
-      scavenge_srt(info);
-      /* fall through */
+      scavenge_fun_srt(info);
+      break;
       
     case CONSTR_STATIC:
       {	
 	StgPtr q, next;
 	
 	next = (P_)p->payload + info->layout.payload.ptrs;
-	/* evacuate the pointers */
+	// evacuate the pointers 
 	for (q = (P_)p->payload; q < next; q++) {
-	  (StgClosure *)*q = evacuate((StgClosure *)*q);
+	    *q = (StgWord)(StgPtr)evacuate((StgClosure *)*q);
 	}
 	break;
       }
@@ -2693,194 +4098,193 @@ scavenge_static(void)
 }
 
 /* -----------------------------------------------------------------------------
+   scavenge a chunk of memory described by a bitmap
+   -------------------------------------------------------------------------- */
+
+static void
+scavenge_large_bitmap( StgPtr p, StgLargeBitmap *large_bitmap, nat size )
+{
+    nat i, b;
+    StgWord bitmap;
+    
+    b = 0;
+    bitmap = large_bitmap->bitmap[b];
+    for (i = 0; i < size; ) {
+	if ((bitmap & 1) == 0) {
+	    *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+	}
+	i++;
+	p++;
+	if (i % BITS_IN(W_) == 0) {
+	    b++;
+	    bitmap = large_bitmap->bitmap[b];
+	} else {
+	    bitmap = bitmap >> 1;
+	}
+    }
+}
+
+STATIC_INLINE StgPtr
+scavenge_small_bitmap (StgPtr p, nat size, StgWord bitmap)
+{
+    while (size > 0) {
+	if ((bitmap & 1) == 0) {
+	    *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+	}
+	p++;
+	bitmap = bitmap >> 1;
+	size--;
+    }
+    return p;
+}
+
+/* -----------------------------------------------------------------------------
    scavenge_stack walks over a section of stack and evacuates all the
    objects pointed to by it.  We can use the same code for walking
-   PAPs, since these are just sections of copied stack.
+   AP_STACK_UPDs, since these are just sections of copied stack.
    -------------------------------------------------------------------------- */
-//@cindex scavenge_stack
+
 
 static void
 scavenge_stack(StgPtr p, StgPtr stack_end)
 {
-  StgPtr q;
-  const StgInfoTable* info;
-  StgWord32 bitmap;
+  const StgRetInfoTable* info;
+  StgWord bitmap;
+  nat size;
 
-  //IF_DEBUG(sanity, belch("  scavenging stack between %p and %p", p, stack_end));
+  //IF_DEBUG(sanity, debugBelch("  scavenging stack between %p and %p", p, stack_end));
 
   /* 
    * Each time around this loop, we are looking at a chunk of stack
-   * that starts with either a pending argument section or an 
-   * activation record. 
+   * that starts with an activation record. 
    */
 
   while (p < stack_end) {
-    q = *(P_ *)p;
-
-    /* If we've got a tag, skip over that many words on the stack */
-    if (IS_ARG_TAG((W_)q)) {
-      p += ARG_SIZE(q);
-      p++; continue;
-    }
-     
-    /* Is q a pointer to a closure?
-     */
-    if (! LOOKS_LIKE_GHC_INFO(q) ) {
-#ifdef DEBUG
-      if ( 0 && LOOKS_LIKE_STATIC_CLOSURE(q) ) {  /* Is it a static closure? */
-	ASSERT(closure_STATIC((StgClosure *)q));
-      }
-      /* otherwise, must be a pointer into the allocation space. */
-#endif
-
-      (StgClosure *)*p = evacuate((StgClosure *)q);
-      p++; 
-      continue;
-    }
-      
-    /* 
-     * Otherwise, q must be the info pointer of an activation
-     * record.  All activation records have 'bitmap' style layout
-     * info.
-     */
-    info  = get_itbl((StgClosure *)p);
+    info  = get_ret_itbl((StgClosure *)p);
       
-    switch (info->type) {
+    switch (info->i.type) {
 	
-      /* Dynamic bitmap: the mask is stored on the stack */
-    case RET_DYN:
-      bitmap = ((StgRetDyn *)p)->liveness;
-      p      = (P_)&((StgRetDyn *)p)->payload[0];
-      goto small_bitmap;
-
-      /* probably a slow-entry point return address: */
-    case FUN:
-    case FUN_STATIC:
-      {
-#if 0	
-	StgPtr old_p = p;
-	p++; p++; 
-	IF_DEBUG(sanity, 
-		 belch("HWL: scavenge_stack: FUN(_STATIC) adjusting p from %p to %p (instead of %p)",
-		       old_p, p, old_p+1));
-#else
-      p++; /* what if FHS!=1 !? -- HWL */
-#endif
-      goto follow_srt;
-      }
-
-      /* Specialised code for update frames, since they're so common.
-       * We *know* the updatee points to a BLACKHOLE, CAF_BLACKHOLE,
-       * or BLACKHOLE_BQ, so just inline the code to evacuate it here.  
-       */
     case UPDATE_FRAME:
-      {
-	StgUpdateFrame *frame = (StgUpdateFrame *)p;
-	StgClosure *to;
-	nat type = get_itbl(frame->updatee)->type;
-
-	p += sizeofW(StgUpdateFrame);
-	if (type == EVACUATED) {
-	  frame->updatee = evacuate(frame->updatee);
-	  continue;
-	} else {
-	  bdescr *bd = Bdescr((P_)frame->updatee);
-	  step *stp;
-	  if (bd->gen->no > N) { 
-	    if (bd->gen->no < evac_gen) {
-	      failed_to_evac = rtsTrue;
-	    }
-	    continue;
-	  }
-
-	  /* Don't promote blackholes */
-	  stp = bd->step;
-	  if (!(stp->gen->no == 0 && 
-		stp->no != 0 &&
-		stp->no == stp->gen->n_steps-1)) {
-	    stp = stp->to;
-	  }
-
-	  switch (type) {
-	  case BLACKHOLE:
-	  case CAF_BLACKHOLE:
-	    to = copyPart(frame->updatee, BLACKHOLE_sizeW(), 
-			  sizeofW(StgHeader), stp);
-	    frame->updatee = to;
-	    continue;
-	  case BLACKHOLE_BQ:
-	    to = copy(frame->updatee, BLACKHOLE_sizeW(), stp);
-	    frame->updatee = to;
-	    recordMutable((StgMutClosure *)to);
-	    continue;
-	  default:
-            /* will never be SE_{,CAF_}BLACKHOLE, since we
-               don't push an update frame for single-entry thunks.  KSW 1999-01. */
-	    barf("scavenge_stack: UPDATE_FRAME updatee");
-	  }
+	// In SMP, we can get update frames that point to indirections
+	// when two threads evaluate the same thunk.  We do attempt to
+	// discover this situation in threadPaused(), but it's
+	// possible that the following sequence occurs:
+	//
+	//        A             B
+	//                  enter T
+	//     enter T
+	//     blackhole T
+	//                  update T
+	//     GC
+	//
+	// Now T is an indirection, and the update frame is already
+	// marked on A's stack, so we won't traverse it again in
+	// threadPaused().  We could traverse the whole stack again
+	// before GC, but that seems like overkill.
+	//
+	// Scavenging this update frame as normal would be disastrous;
+	// the updatee would end up pointing to the value.  So we turn
+	// the indirection into an IND_PERM, so that evacuate will
+	// copy the indirection into the old generation instead of
+	// discarding it.
+	if (get_itbl(((StgUpdateFrame *)p)->updatee)->type == IND) {
+	    ((StgUpdateFrame *)p)->updatee->header.info = 
+		(StgInfoTable *)&stg_IND_PERM_info;
 	}
-      }
+	((StgUpdateFrame *)p)->updatee 
+	    = evacuate(((StgUpdateFrame *)p)->updatee);
+	p += sizeofW(StgUpdateFrame);
+	continue;
 
-      /* small bitmap (< 32 entries, or 64 on a 64-bit machine) */
+      // small bitmap (< 32 entries, or 64 on a 64-bit machine) 
+    case CATCH_STM_FRAME:
+    case CATCH_RETRY_FRAME:
+    case ATOMICALLY_FRAME:
     case STOP_FRAME:
-    case CATCH_FRAME:
-    case SEQ_FRAME:
-    case RET_BCO:
-    case RET_SMALL:
-    case RET_VEC_SMALL:
-      bitmap = info->layout.bitmap;
-      p++;
-      /* this assumes that the payload starts immediately after the info-ptr */
-    small_bitmap:
-      while (bitmap != 0) {
-	if ((bitmap & 1) == 0) {
-	  (StgClosure *)*p = evacuate((StgClosure *)*p);
-	}
+    case CATCH_FRAME:
+    case RET_SMALL:
+    case RET_VEC_SMALL:
+	bitmap = BITMAP_BITS(info->i.layout.bitmap);
+	size   = BITMAP_SIZE(info->i.layout.bitmap);
+	// NOTE: the payload starts immediately after the info-ptr, we
+	// don't have an StgHeader in the same sense as a heap closure.
 	p++;
-	bitmap = bitmap >> 1;
-      }
-      
+	p = scavenge_small_bitmap(p, size, bitmap);
+
     follow_srt:
-      scavenge_srt(info);
-      continue;
+	if (major_gc) 
+	    scavenge_srt((StgClosure **)GET_SRT(info), info->i.srt_bitmap);
+	continue;
+
+    case RET_BCO: {
+	StgBCO *bco;
+	nat size;
+
+	p++;
+	*p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
+	bco = (StgBCO *)*p;
+	p++;
+	size = BCO_BITMAP_SIZE(bco);
+	scavenge_large_bitmap(p, BCO_BITMAP(bco), size);
+	p += size;
+	continue;
+    }
 
-      /* large bitmap (> 32 entries) */
+      // large bitmap (> 32 entries, or > 64 on a 64-bit machine) 
     case RET_BIG:
     case RET_VEC_BIG:
-      {
-	StgPtr q;
-	StgLargeBitmap *large_bitmap;
-	nat i;
+    {
+	nat size;
 
-	large_bitmap = info->layout.large_bitmap;
+	size = GET_LARGE_BITMAP(&info->i)->size;
 	p++;
+	scavenge_large_bitmap(p, GET_LARGE_BITMAP(&info->i), size);
+	p += size;
+	// and don't forget to follow the SRT 
+	goto follow_srt;
+    }
 
-	for (i=0; i<large_bitmap->size; i++) {
-	  bitmap = large_bitmap->bitmap[i];
-	  q = p + sizeof(W_) * 8;
-	  while (bitmap != 0) {
-	    if ((bitmap & 1) == 0) {
-	      (StgClosure *)*p = evacuate((StgClosure *)*p);
-	    }
+      // Dynamic bitmap: the mask is stored on the stack, and
+      // there are a number of non-pointers followed by a number
+      // of pointers above the bitmapped area.  (see StgMacros.h,
+      // HEAP_CHK_GEN).
+    case RET_DYN:
+    {
+	StgWord dyn;
+	dyn = ((StgRetDyn *)p)->liveness;
+
+	// traverse the bitmap first
+	bitmap = RET_DYN_LIVENESS(dyn);
+	p      = (P_)&((StgRetDyn *)p)->payload[0];
+	size   = RET_DYN_BITMAP_SIZE;
+	p = scavenge_small_bitmap(p, size, bitmap);
+
+	// skip over the non-ptr words
+	p += RET_DYN_NONPTRS(dyn) + RET_DYN_NONPTR_REGS_SIZE;
+	
+	// follow the ptr words
+	for (size = RET_DYN_PTRS(dyn); size > 0; size--) {
+	    *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p);
 	    p++;
-	    bitmap = bitmap >> 1;
-	  }
-	  if (i+1 < large_bitmap->size) {
-	    while (p < q) {
-	      (StgClosure *)*p = evacuate((StgClosure *)*p);
-	      p++;
-	    }
-	  }
 	}
+	continue;
+    }
 
-	/* and don't forget to follow the SRT */
+    case RET_FUN:
+    {
+	StgRetFun *ret_fun = (StgRetFun *)p;
+	StgFunInfoTable *fun_info;
+
+	ret_fun->fun = evacuate(ret_fun->fun);
+ 	fun_info = get_fun_itbl(ret_fun->fun);
+	p = scavenge_arg_block(fun_info, ret_fun->payload);
 	goto follow_srt;
-      }
+    }
 
     default:
-      barf("scavenge_stack: weird activation record found on stack: %d", (int)(info->type));
+	barf("scavenge_stack: weird activation record found on stack: %d", (int)(info->i.type));
     }
-  }
+  }		     
 }
 
 /*-----------------------------------------------------------------------------
@@ -2891,17 +4295,13 @@ scavenge_stack(StgPtr p, StgPtr stack_end)
   objects are (repeatedly) mutable, so most of the time evac_gen will
   be zero.
   --------------------------------------------------------------------------- */
-//@cindex scavenge_large
 
 static void
 scavenge_large(step *stp)
 {
   bdescr *bd;
   StgPtr p;
-  const StgInfoTable* info;
-  nat saved_evac_gen = evac_gen; /* used for temporarily changing evac_gen */
 
-  evac_gen = 0;			/* most objects are mutable */
   bd = stp->new_large_objects;
 
   for (; bd != NULL; bd = stp->new_large_objects) {
@@ -2914,69 +4314,21 @@ scavenge_large(step *stp)
     stp->new_large_objects = bd->link;
     dbl_link_onto(bd, &stp->scavenged_large_objects);
 
-    p = bd->start;
-    info  = get_itbl((StgClosure *)p);
-
-    switch (info->type) {
-
-    /* only certain objects can be "large"... */
-
-    case ARR_WORDS:
-      /* nothing to follow */
-      continue;
-
-    case MUT_ARR_PTRS:
-      /* follow everything */
-      {
-	StgPtr next;
-
-	next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
-	for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
-	  (StgClosure *)*p = evacuate((StgClosure *)*p);
-	}
-	continue;
-      }
-
-    case MUT_ARR_PTRS_FROZEN:
-      /* follow everything */
-      {
-	StgPtr start = p, next;
+    // update the block count in this step.
+    stp->n_scavenged_large_blocks += bd->blocks;
 
-	evac_gen = saved_evac_gen; /* not really mutable */
-	next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
-	for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
-	  (StgClosure *)*p = evacuate((StgClosure *)*p);
-	}
-	evac_gen = 0;
-	if (failed_to_evac) {
-	  recordMutable((StgMutClosure *)start);
+    p = bd->start;
+    if (scavenge_one(p)) {
+	if (stp->gen_no > 0) {
+	    recordMutableGen((StgClosure *)p, stp->gen);
 	}
-	continue;
-      }
-
-    case TSO:
-	scavengeTSO((StgTSO *)p);
-	continue;
-
-    case AP_UPD:
-    case PAP:
-      { 
-	StgPAP* pap = (StgPAP *)p;
-	
-	evac_gen = saved_evac_gen; /* not really mutable */
-	pap->fun = evacuate(pap->fun);
-	scavenge_stack((P_)pap->payload, (P_)pap->payload + pap->n_args);
-	evac_gen = 0;
-	continue;
-      }
-
-    default:
-      barf("scavenge_large: unknown/strange object  %d", (int)(info->type));
     }
   }
 }
 
-//@cindex zero_static_object_list
+/* -----------------------------------------------------------------------------
+   Initialising the static object & mutable lists
+   -------------------------------------------------------------------------- */
 
 static void
 zero_static_object_list(StgClosure* first_static)
@@ -2987,28 +4339,8 @@ zero_static_object_list(StgClosure* first_static)
 
   for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
     info = get_itbl(p);
-    link = STATIC_LINK(info, p);
-    STATIC_LINK(info,p) = NULL;
-  }
-}
-
-/* This function is only needed because we share the mutable link
- * field with the static link field in an IND_STATIC, so we have to
- * zero the mut_link field before doing a major GC, which needs the
- * static link field.  
- *
- * It doesn't do any harm to zero all the mutable link fields on the
- * mutable list.
- */
-
-static void
-zero_mutable_list( StgMutClosure *first )
-{
-  StgMutClosure *next, *c;
-
-  for (c = first; c != END_MUT_LIST; c = next) {
-    next = c->mut_link;
-    c->mut_link = NULL;
+    link = *STATIC_LINK(info, p);
+    *STATIC_LINK(info,p) = NULL;
   }
 }
 
@@ -3021,26 +4353,30 @@ revertCAFs( void )
 {
     StgIndStatic *c;
 
-    for (c = (StgIndStatic *)caf_list; c != NULL; 
+    for (c = (StgIndStatic *)revertible_caf_list; c != NULL; 
 	 c = (StgIndStatic *)c->static_link) 
     {
-	c->header.info = c->saved_info;
+	SET_INFO(c, c->saved_info);
 	c->saved_info = NULL;
-	/* could, but not necessary: c->static_link = NULL; */
+	// could, but not necessary: c->static_link = NULL; 
     }
-    caf_list = NULL;
+    revertible_caf_list = NULL;
 }
 
 void
-scavengeCAFs( void )
+markCAFs( evac_fn evac )
 {
     StgIndStatic *c;
 
-    evac_gen = 0;
     for (c = (StgIndStatic *)caf_list; c != NULL; 
 	 c = (StgIndStatic *)c->static_link) 
     {
-	c->indirectee = evacuate(c->indirectee);
+	evac(&c->indirectee);
+    }
+    for (c = (StgIndStatic *)revertible_caf_list; c != NULL; 
+	 c = (StgIndStatic *)c->static_link) 
+    {
+	evac(&c->indirectee);
     }
 }
 
@@ -3056,8 +4392,7 @@ scavengeCAFs( void )
    time. 
    -------------------------------------------------------------------------- */
 
-#ifdef DEBUG
-//@cindex gcCAFs
+#if 0 && defined(DEBUG)
 
 static void
 gcCAFs(void)
@@ -3078,8 +4413,8 @@ gcCAFs(void)
     ASSERT(info->type == IND_STATIC);
 
     if (STATIC_LINK(info,p) == NULL) {
-      IF_DEBUG(gccafs, fprintf(stderr, "CAF gc'd at 0x%04x\n", (int)p));
-      /* black hole it */
+      IF_DEBUG(gccafs, debugBelch("CAF gc'd at 0x%04lx", (long)p));
+      // black hole it 
       SET_INFO(p,&stg_BLACKHOLE_info);
       p = STATIC_LINK2(info,p);
       *pp = p;
@@ -3092,357 +4427,273 @@ gcCAFs(void)
 
   }
 
-  /*  fprintf(stderr, "%d CAFs live\n", i); */
+  //  debugBelch("%d CAFs live", i); 
 }
 #endif
 
-//@node Lazy black holing, Stack squeezing, Sanity code for CAF garbage collection
-//@subsection Lazy black holing
 
 /* -----------------------------------------------------------------------------
-   Lazy black holing.
+ * Stack squeezing
+ *
+ * Code largely pinched from old RTS, then hacked to bits.  We also do
+ * lazy black holing here.
+ *
+ * -------------------------------------------------------------------------- */
 
-   Whenever a thread returns to the scheduler after possibly doing
-   some work, we have to run down the stack and black-hole all the
-   closures referred to by update frames.
-   -------------------------------------------------------------------------- */
-//@cindex threadLazyBlackHole
+struct stack_gap { StgWord gap_size; struct stack_gap *next_gap; };
 
 static void
-threadLazyBlackHole(StgTSO *tso)
+stackSqueeze(StgTSO *tso, StgPtr bottom)
 {
-  StgUpdateFrame *update_frame;
-  StgBlockingQueue *bh;
-  StgPtr stack_end;
+    StgPtr frame;
+    rtsBool prev_was_update_frame;
+    StgClosure *updatee = NULL;
+    StgRetInfoTable *info;
+    StgWord current_gap_size;
+    struct stack_gap *gap;
+
+    // Stage 1: 
+    //    Traverse the stack upwards, replacing adjacent update frames
+    //    with a single update frame and a "stack gap".  A stack gap
+    //    contains two values: the size of the gap, and the distance
+    //    to the next gap (or the stack top).
+
+    frame = tso->sp;
+
+    ASSERT(frame < bottom);
+    
+    prev_was_update_frame = rtsFalse;
+    current_gap_size = 0;
+    gap = (struct stack_gap *) (tso->sp - sizeofW(StgUpdateFrame));
 
-  stack_end = &tso->stack[tso->stack_size];
-  update_frame = tso->su;
+    while (frame < bottom) {
+	
+	info = get_ret_itbl((StgClosure *)frame);
+	switch (info->i.type) {
 
-  while (1) {
-    switch (get_itbl(update_frame)->type) {
+	case UPDATE_FRAME:
+	{ 
+	    StgUpdateFrame *upd = (StgUpdateFrame *)frame;
+
+	    if (prev_was_update_frame) {
+
+		TICK_UPD_SQUEEZED();
+		/* wasn't there something about update squeezing and ticky to be
+		 * sorted out?  oh yes: we aren't counting each enter properly
+		 * in this case.  See the log somewhere.  KSW 1999-04-21
+		 *
+		 * Check two things: that the two update frames don't point to
+		 * the same object, and that the updatee_bypass isn't already an
+		 * indirection.  Both of these cases only happen when we're in a
+		 * block hole-style loop (and there are multiple update frames
+		 * on the stack pointing to the same closure), but they can both
+		 * screw us up if we don't check.
+		 */
+		if (upd->updatee != updatee && !closure_IND(upd->updatee)) {
+		    UPD_IND_NOLOCK(upd->updatee, updatee);
+		}
 
-    case CATCH_FRAME:
-      update_frame = ((StgCatchFrame *)update_frame)->link;
-      break;
+		// now mark this update frame as a stack gap.  The gap
+		// marker resides in the bottom-most update frame of
+		// the series of adjacent frames, and covers all the
+		// frames in this series.
+		current_gap_size += sizeofW(StgUpdateFrame);
+		((struct stack_gap *)frame)->gap_size = current_gap_size;
+		((struct stack_gap *)frame)->next_gap = gap;
+
+		frame += sizeofW(StgUpdateFrame);
+		continue;
+	    } 
+
+	    // single update frame, or the topmost update frame in a series
+	    else {
+		prev_was_update_frame = rtsTrue;
+		updatee = upd->updatee;
+		frame += sizeofW(StgUpdateFrame);
+		continue;
+	    }
+	}
+	    
+	default:
+	    prev_was_update_frame = rtsFalse;
+
+	    // we're not in a gap... check whether this is the end of a gap
+	    // (an update frame can't be the end of a gap).
+	    if (current_gap_size != 0) {
+		gap = (struct stack_gap *) (frame - sizeofW(StgUpdateFrame));
+	    }
+	    current_gap_size = 0;
 
-    case UPDATE_FRAME:
-      bh = (StgBlockingQueue *)update_frame->updatee;
+	    frame += stack_frame_sizeW((StgClosure *)frame);
+	    continue;
+	}
+    }
 
-      /* if the thunk is already blackholed, it means we've also
-       * already blackholed the rest of the thunks on this stack,
-       * so we can stop early.
-       *
-       * The blackhole made for a CAF is a CAF_BLACKHOLE, so they
-       * don't interfere with this optimisation.
-       */
-      if (bh->header.info == &stg_BLACKHOLE_info) {
-	return;
-      }
+    if (current_gap_size != 0) {
+	gap = (struct stack_gap *) (frame - sizeofW(StgUpdateFrame));
+    }
 
-      if (bh->header.info != &stg_BLACKHOLE_BQ_info &&
-	  bh->header.info != &stg_CAF_BLACKHOLE_info) {
-#if (!defined(LAZY_BLACKHOLING)) && defined(DEBUG)
-        fprintf(stderr,"Unexpected lazy BHing required at 0x%04x\n",(int)bh);
-#endif
-	SET_INFO(bh,&stg_BLACKHOLE_info);
-      }
+    // Now we have a stack with gaps in it, and we have to walk down
+    // shoving the stack up to fill in the gaps.  A diagram might
+    // help:
+    //
+    //    +| ********* |
+    //     | ********* | <- sp
+    //     |           |
+    //     |           | <- gap_start
+    //     | ......... |                |
+    //     | stack_gap | <- gap         | chunk_size
+    //     | ......... |                | 
+    //     | ......... | <- gap_end     v
+    //     | ********* | 
+    //     | ********* | 
+    //     | ********* | 
+    //    -| ********* | 
+    //
+    // 'sp'  points the the current top-of-stack
+    // 'gap' points to the stack_gap structure inside the gap
+    // *****   indicates real stack data
+    // .....   indicates gap
+    // <empty> indicates unused
+    //
+    {
+	void *sp;
+	void *gap_start, *next_gap_start, *gap_end;
+	nat chunk_size;
 
-      update_frame = update_frame->link;
-      break;
+	next_gap_start = (void *)((unsigned char*)gap + sizeof(StgUpdateFrame));
+	sp = next_gap_start;
 
-    case SEQ_FRAME:
-      update_frame = ((StgSeqFrame *)update_frame)->link;
-      break;
+	while ((StgPtr)gap > tso->sp) {
 
-    case STOP_FRAME:
-      return;
-    default:
-      barf("threadPaused");
-    }
-  }
-}
+	    // we're working in *bytes* now...
+	    gap_start = next_gap_start;
+	    gap_end = (void*) ((unsigned char*)gap_start - gap->gap_size * sizeof(W_));
+
+	    gap = gap->next_gap;
+	    next_gap_start = (void *)((unsigned char*)gap + sizeof(StgUpdateFrame));
+
+	    chunk_size = (unsigned char*)gap_end - (unsigned char*)next_gap_start;
+	    sp -= chunk_size;
+	    memmove(sp, next_gap_start, chunk_size);
+	}
 
-//@node Stack squeezing, Pausing a thread, Lazy black holing
-//@subsection Stack squeezing
+	tso->sp = (StgPtr)sp;
+    }
+}    
 
 /* -----------------------------------------------------------------------------
- * Stack squeezing
- *
- * Code largely pinched from old RTS, then hacked to bits.  We also do
- * lazy black holing here.
- *
+ * Pausing a thread
+ * 
+ * We have to prepare for GC - this means doing lazy black holing
+ * here.  We also take the opportunity to do stack squeezing if it's
+ * turned on.
  * -------------------------------------------------------------------------- */
-//@cindex threadSqueezeStack
-
-static void
-threadSqueezeStack(StgTSO *tso)
+void
+threadPaused(Capability *cap, StgTSO *tso)
 {
-  lnat displacement = 0;
-  StgUpdateFrame *frame;
-  StgUpdateFrame *next_frame;		        /* Temporally next */
-  StgUpdateFrame *prev_frame;			/* Temporally previous */
-  StgPtr bottom;
-  rtsBool prev_was_update_frame;
-#if DEBUG
-  StgUpdateFrame *top_frame;
-  nat upd_frames=0, stop_frames=0, catch_frames=0, seq_frames=0,
-      bhs=0, squeezes=0;
-  void printObj( StgClosure *obj ); // from Printer.c
-
-  top_frame  = tso->su;
-#endif
-  
-  bottom = &(tso->stack[tso->stack_size]);
-  frame  = tso->su;
+    StgClosure *frame;
+    StgRetInfoTable *info;
+    StgClosure *bh;
+    StgPtr stack_end;
+    nat words_to_squeeze = 0;
+    nat weight           = 0;
+    nat weight_pending   = 0;
+    rtsBool prev_was_update_frame;
+    
+    stack_end = &tso->stack[tso->stack_size];
+    
+    frame = (StgClosure *)tso->sp;
 
-  /* There must be at least one frame, namely the STOP_FRAME.
-   */
-  ASSERT((P_)frame < bottom);
+    while (1) {
+	// If we've already marked this frame, then stop here.
+	if (frame->header.info == (StgInfoTable *)&stg_marked_upd_frame_info) {
+	    goto end;
+	}
 
-  /* Walk down the stack, reversing the links between frames so that
-   * we can walk back up as we squeeze from the bottom.  Note that
-   * next_frame and prev_frame refer to next and previous as they were
-   * added to the stack, rather than the way we see them in this
-   * walk. (It makes the next loop less confusing.)  
-   *
-   * Stop if we find an update frame pointing to a black hole 
-   * (see comment in threadLazyBlackHole()).
-   */
-  
-  next_frame = NULL;
-  /* bottom - sizeof(StgStopFrame) is the STOP_FRAME */
-  while ((P_)frame < bottom - sizeofW(StgStopFrame)) {  
-    prev_frame = frame->link;
-    frame->link = next_frame;
-    next_frame = frame;
-    frame = prev_frame;
-#if DEBUG
-    IF_DEBUG(sanity,
-	     if (!(frame>=top_frame && frame<=(StgUpdateFrame *)bottom)) {
-	       printObj((StgClosure *)prev_frame);
-	       barf("threadSqueezeStack: current frame is rubbish %p; previous was %p\n", 
-		    frame, prev_frame);
-	     })
-    switch (get_itbl(frame)->type) {
-    case UPDATE_FRAME:
-	upd_frames++;
-	if (frame->updatee->header.info == &stg_BLACKHOLE_info)
-	    bhs++;
-	break;
-    case STOP_FRAME:
-	stop_frames++;
-	break;
-    case CATCH_FRAME:
-	catch_frames++;
-	break;
-    case SEQ_FRAME:
-	seq_frames++;
-	break;
-    default:
-      barf("Found non-frame during stack squeezing at %p (prev frame was %p)\n",
-	   frame, prev_frame);
-      printObj((StgClosure *)prev_frame);
-    }
-#endif
-    if (get_itbl(frame)->type == UPDATE_FRAME
-	&& frame->updatee->header.info == &stg_BLACKHOLE_info) {
-        break;
-    }
-  }
+	info = get_ret_itbl(frame);
+	
+	switch (info->i.type) {
+	    
+	case UPDATE_FRAME:
 
-  /* Now, we're at the bottom.  Frame points to the lowest update
-   * frame on the stack, and its link actually points to the frame
-   * above. We have to walk back up the stack, squeezing out empty
-   * update frames and turning the pointers back around on the way
-   * back up.
-   *
-   * The bottom-most frame (the STOP_FRAME) has not been altered, and
-   * we never want to eliminate it anyway.  Just walk one step up
-   * before starting to squeeze. When you get to the topmost frame,
-   * remember that there are still some words above it that might have
-   * to be moved.  
-   */
-  
-  prev_frame = frame;
-  frame = next_frame;
+	    SET_INFO(frame, (StgInfoTable *)&stg_marked_upd_frame_info);
 
-  prev_was_update_frame = (get_itbl(prev_frame)->type == UPDATE_FRAME);
+	    bh = ((StgUpdateFrame *)frame)->updatee;
 
-  /*
-   * Loop through all of the frames (everything except the very
-   * bottom).  Things are complicated by the fact that we have 
-   * CATCH_FRAMEs and SEQ_FRAMEs interspersed with the update frames.
-   * We can only squeeze when there are two consecutive UPDATE_FRAMEs.
-   */
-  while (frame != NULL) {
-    StgPtr sp;
-    StgPtr frame_bottom = (P_)frame + sizeofW(StgUpdateFrame);
-    rtsBool is_update_frame;
-    
-    next_frame = frame->link;
-    is_update_frame = (get_itbl(frame)->type == UPDATE_FRAME);
+	    if (closure_IND(bh) || bh->header.info == &stg_BLACKHOLE_info) {
+		IF_DEBUG(squeeze, debugBelch("suspending duplicate work: %ld words of stack\n", (StgPtr)frame - tso->sp));
 
-    /* Check to see if 
-     *   1. both the previous and current frame are update frames
-     *   2. the current frame is empty
-     */
-    if (prev_was_update_frame && is_update_frame &&
-	(P_)prev_frame == frame_bottom + displacement) {
-      
-      /* Now squeeze out the current frame */
-      StgClosure *updatee_keep   = prev_frame->updatee;
-      StgClosure *updatee_bypass = frame->updatee;
-      
-#if DEBUG
-      IF_DEBUG(gc, fprintf(stderr, "@@ squeezing frame at %p\n", frame));
-      squeezes++;
-#endif
+		// If this closure is already an indirection, then
+		// suspend the computation up to this point:
+		suspendComputation(cap,tso,(StgPtr)frame);
 
-      /* Deal with blocking queues.  If both updatees have blocked
-       * threads, then we should merge the queues into the update
-       * frame that we're keeping.
-       *
-       * Alternatively, we could just wake them up: they'll just go
-       * straight to sleep on the proper blackhole!  This is less code
-       * and probably less bug prone, although it's probably much
-       * slower --SDM
-       */
-#if 0 /* do it properly... */
-#  if (!defined(LAZY_BLACKHOLING)) && defined(DEBUG)
-#    error Unimplemented lazy BH warning.  (KSW 1999-01)
-#  endif
-      if (GET_INFO(updatee_bypass) == stg_BLACKHOLE_BQ_info
-	  || GET_INFO(updatee_bypass) == stg_CAF_BLACKHOLE_info
-	  ) {
-	/* Sigh.  It has one.  Don't lose those threads! */
-	  if (GET_INFO(updatee_keep) == stg_BLACKHOLE_BQ_info) {
-	  /* Urgh.  Two queues.  Merge them. */
-	  P_ keep_tso = ((StgBlockingQueue *)updatee_keep)->blocking_queue;
-	  
-	  while (keep_tso->link != END_TSO_QUEUE) {
-	    keep_tso = keep_tso->link;
-	  }
-	  keep_tso->link = ((StgBlockingQueue *)updatee_bypass)->blocking_queue;
+		// Now drop the update frame, and arrange to return
+		// the value to the frame underneath:
+		tso->sp = (StgPtr)frame + sizeofW(StgUpdateFrame) - 2;
+		tso->sp[1] = (StgWord)bh;
+		tso->sp[0] = (W_)&stg_enter_info;
 
-	} else {
-	  /* For simplicity, just swap the BQ for the BH */
-	  P_ temp = updatee_keep;
-	  
-	  updatee_keep = updatee_bypass;
-	  updatee_bypass = temp;
-	  
-	  /* Record the swap in the kept frame (below) */
-	  prev_frame->updatee = updatee_keep;
-	}
-      }
-#endif
+		// And continue with threadPaused; there might be
+		// yet more computation to suspend.
+		threadPaused(cap,tso);
+		return;
+	    }
 
-      TICK_UPD_SQUEEZED();
-      /* wasn't there something about update squeezing and ticky to be
-       * sorted out?  oh yes: we aren't counting each enter properly
-       * in this case.  See the log somewhere.  KSW 1999-04-21
-       */
-      if (updatee_bypass != updatee_keep) {
-	  /* this wakes the threads up */
-	  UPD_IND_NOLOCK(updatee_bypass, updatee_keep);
-      }
-      
-      sp = (P_)frame - 1;	/* sp = stuff to slide */
-      displacement += sizeofW(StgUpdateFrame);
-      
-    } else {
-      /* No squeeze for this frame */
-      sp = frame_bottom - 1;	/* Keep the current frame */
-      
-      /* Do lazy black-holing.
-       */
-      if (is_update_frame) {
-	StgBlockingQueue *bh = (StgBlockingQueue *)frame->updatee;
-	if (bh->header.info != &stg_BLACKHOLE_info &&
-	    bh->header.info != &stg_BLACKHOLE_BQ_info &&
-	    bh->header.info != &stg_CAF_BLACKHOLE_info) {
+	    if (bh->header.info != &stg_CAF_BLACKHOLE_info) {
 #if (!defined(LAZY_BLACKHOLING)) && defined(DEBUG)
-          fprintf(stderr,"Unexpected lazy BHing required at 0x%04x\n",(int)bh);
+		debugBelch("Unexpected lazy BHing required at 0x%04lx\n",(long)bh);
 #endif
-#ifdef DEBUG
-	  /* zero out the slop so that the sanity checker can tell
-	   * where the next closure is.
-	   */
-	  { 
-	      StgInfoTable *info = get_itbl(bh);
-	      nat np = info->layout.payload.ptrs, nw = info->layout.payload.nptrs, i;
-	      /* don't zero out slop for a THUNK_SELECTOR, because it's layout
-	       * info is used for a different purpose, and it's exactly the
-	       * same size as a BLACKHOLE in any case.
-	       */
-	      if (info->type != THUNK_SELECTOR) {
-		for (i = np; i < np + nw; i++) {
-		  ((StgClosure *)bh)->payload[i] = 0;
-		}
-	      }
-	  }
+		// zero out the slop so that the sanity checker can tell
+		// where the next closure is.
+		DEBUG_FILL_SLOP(bh);
+#ifdef PROFILING
+		// @LDV profiling
+		// We pretend that bh is now dead.
+		LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)bh);
 #endif
-	  SET_INFO(bh,&stg_BLACKHOLE_info);
-	}
-      }
+		SET_INFO(bh,&stg_BLACKHOLE_info);
 
-      /* Fix the link in the current frame (should point to the frame below) */
-      frame->link = prev_frame;
-      prev_was_update_frame = is_update_frame;
+		// We pretend that bh has just been created.
+		LDV_RECORD_CREATE(bh);
+	    }
+	    
+	    frame = (StgClosure *) ((StgUpdateFrame *)frame + 1);
+	    if (prev_was_update_frame) {
+		words_to_squeeze += sizeofW(StgUpdateFrame);
+		weight += weight_pending;
+		weight_pending = 0;
+	    }
+	    prev_was_update_frame = rtsTrue;
+	    break;
+	    
+	case STOP_FRAME:
+	    goto end;
+	    
+	    // normal stack frames; do nothing except advance the pointer
+	default:
+	{
+	    nat frame_size = stack_frame_sizeW(frame);
+	    weight_pending += frame_size;
+	    frame = (StgClosure *)((StgPtr)frame + frame_size);
+	    prev_was_update_frame = rtsFalse;
+	}
+	}
     }
-    
-    /* Now slide all words from sp up to the next frame */
-    
-    if (displacement > 0) {
-      P_ next_frame_bottom;
 
-      if (next_frame != NULL)
-	next_frame_bottom = (P_)next_frame + sizeofW(StgUpdateFrame);
-      else
-	next_frame_bottom = tso->sp - 1;
-      
-#if DEBUG
-      IF_DEBUG(gc,
-	       fprintf(stderr, "sliding [%p, %p] by %ld\n", sp, next_frame_bottom,
-		       displacement))
-#endif
-      
-      while (sp >= next_frame_bottom) {
-	sp[displacement] = *sp;
-	sp -= 1;
-      }
+end:
+    IF_DEBUG(squeeze, 
+	     debugBelch("words_to_squeeze: %d, weight: %d, squeeze: %s\n", 
+			words_to_squeeze, weight, 
+			weight < words_to_squeeze ? "YES" : "NO"));
+
+    // Should we squeeze or not?  Arbitrary heuristic: we squeeze if
+    // the number of words we have to shift down is less than the
+    // number of stack words we squeeze away by doing so.
+    if (RtsFlags.GcFlags.squeezeUpdFrames == rtsTrue &&
+	weight < words_to_squeeze) {
+	stackSqueeze(tso, (StgPtr)frame);
     }
-    (P_)prev_frame = (P_)frame + displacement;
-    frame = next_frame;
-  }
-
-  tso->sp += displacement;
-  tso->su = prev_frame;
-#if DEBUG
-  IF_DEBUG(gc,
-	   fprintf(stderr, "@@ threadSqueezeStack: squeezed %d update-frames; found %d BHs; found %d update-, %d stop-, %d catch, %d seq-frames\n",
-		   squeezes, bhs, upd_frames, stop_frames, catch_frames, seq_frames))
-#endif
-}
-
-//@node Pausing a thread, Index, Stack squeezing
-//@subsection Pausing a thread
-
-/* -----------------------------------------------------------------------------
- * Pausing a thread
- * 
- * We have to prepare for GC - this means doing lazy black holing
- * here.  We also take the opportunity to do stack squeezing if it's
- * turned on.
- * -------------------------------------------------------------------------- */
-//@cindex threadPaused
-void
-threadPaused(StgTSO *tso)
-{
-  if ( RtsFlags.GcFlags.squeezeUpdFrames == rtsTrue )
-    threadSqueezeStack(tso);	/* does black holing too */
-  else
-    threadLazyBlackHole(tso);
 }
 
 /* -----------------------------------------------------------------------------
@@ -3450,90 +4701,19 @@ threadPaused(StgTSO *tso)
  * -------------------------------------------------------------------------- */
 
 #if DEBUG
-//@cindex printMutOnceList
-void
-printMutOnceList(generation *gen)
-{
-  StgMutClosure *p, *next;
-
-  p = gen->mut_once_list;
-  next = p->mut_link;
-
-  fprintf(stderr, "@@ Mut once list %p: ", gen->mut_once_list);
-  for (; p != END_MUT_LIST; p = next, next = p->mut_link) {
-    fprintf(stderr, "%p (%s), ", 
-	    p, info_type((StgClosure *)p));
-  }
-  fputc('\n', stderr);
-}
-
-//@cindex printMutableList
 void
 printMutableList(generation *gen)
 {
-  StgMutClosure *p, *next;
+    bdescr *bd;
+    StgPtr p;
 
-  p = gen->mut_list;
-  next = p->mut_link;
-
-  fprintf(stderr, "@@ Mutable list %p: ", gen->mut_list);
-  for (; p != END_MUT_LIST; p = next, next = p->mut_link) {
-    fprintf(stderr, "%p (%s), ",
-	    p, info_type((StgClosure *)p));
-  }
-  fputc('\n', stderr);
-}
+    debugBelch("@@ Mutable list %p: ", gen->mut_list);
 
-//@cindex maybeLarge
-static inline rtsBool
-maybeLarge(StgClosure *closure)
-{
-  StgInfoTable *info = get_itbl(closure);
-
-  /* closure types that may be found on the new_large_objects list; 
-     see scavenge_large */
-  return (info->type == MUT_ARR_PTRS ||
-	  info->type == MUT_ARR_PTRS_FROZEN ||
-	  info->type == TSO ||
-	  info->type == ARR_WORDS);
+    for (bd = gen->mut_list; bd != NULL; bd = bd->link) {
+	for (p = bd->start; p < bd->free; p++) {
+	    debugBelch("%p (%s), ", (void *)*p, info_type((StgClosure *)*p));
+	}
+    }
+    debugBelch("\n");
 }
-
-  
 #endif /* DEBUG */
-
-//@node Index,  , Pausing a thread
-//@subsection Index
-
-//@index
-//* GarbageCollect::  @cindex\s-+GarbageCollect
-//* MarkRoot::  @cindex\s-+MarkRoot
-//* RevertCAFs::  @cindex\s-+RevertCAFs
-//* addBlock::  @cindex\s-+addBlock
-//* cleanup_weak_ptr_list::  @cindex\s-+cleanup_weak_ptr_list
-//* copy::  @cindex\s-+copy
-//* copyPart::  @cindex\s-+copyPart
-//* evacuate::  @cindex\s-+evacuate
-//* evacuate_large::  @cindex\s-+evacuate_large
-//* gcCAFs::  @cindex\s-+gcCAFs
-//* isAlive::  @cindex\s-+isAlive
-//* maybeLarge::  @cindex\s-+maybeLarge
-//* mkMutCons::  @cindex\s-+mkMutCons
-//* printMutOnceList::  @cindex\s-+printMutOnceList
-//* printMutableList::  @cindex\s-+printMutableList
-//* relocate_TSO::  @cindex\s-+relocate_TSO
-//* scavenge::  @cindex\s-+scavenge
-//* scavenge_large::  @cindex\s-+scavenge_large
-//* scavenge_mut_once_list::  @cindex\s-+scavenge_mut_once_list
-//* scavenge_mutable_list::  @cindex\s-+scavenge_mutable_list
-//* scavenge_one::  @cindex\s-+scavenge_one
-//* scavenge_srt::  @cindex\s-+scavenge_srt
-//* scavenge_stack::  @cindex\s-+scavenge_stack
-//* scavenge_static::  @cindex\s-+scavenge_static
-//* threadLazyBlackHole::  @cindex\s-+threadLazyBlackHole
-//* threadPaused::  @cindex\s-+threadPaused
-//* threadSqueezeStack::  @cindex\s-+threadSqueezeStack
-//* traverse_weak_ptr_list::  @cindex\s-+traverse_weak_ptr_list
-//* upd_evacuee::  @cindex\s-+upd_evacuee
-//* zero_mutable_list::  @cindex\s-+zero_mutable_list
-//* zero_static_object_list::  @cindex\s-+zero_static_object_list
-//@end index