From 214b3663d5d7598c13643f9221e43d5a7735b47f Mon Sep 17 00:00:00 2001
From: Simon Marlow <marlowsd@gmail.com>
Date: Thu, 3 Dec 2009 15:07:28 +0000
Subject: [PATCH] GC refactoring, remove "steps"

The GC had a two-level structure, G generations each of T steps.
Steps are for aging within a generation, mostly to avoid premature
promotion.

Measurements show that more than 2 steps is almost never worthwhile,
and 1 step is usually worse than 2.  In theory fractional steps are
possible, so the ideal number of steps is somewhere between 1 and 3.
GHC's default has always been 2.

We can implement 2 steps quite straightforwardly by having each block
point to the generation to which objects in that block should be
promoted, so blocks in the nursery point to generation 0, and blocks
in gen 0 point to gen 1, and so on.

This commit removes the explicit step structures, merging generations
with steps, thus simplifying a lot of code.  Performance is
unaffected.  The tunable number of steps is now gone, although it may
be replaced in the future by a way to tune the aging in generation 0.
---
 includes/Cmm.h                |    2 +-
 includes/mkDerivedConstants.c |    3 +-
 includes/rts/storage/Block.h  |    4 +-
 includes/rts/storage/GC.h     |   67 ++---
 includes/stg/Regs.h           |    8 +-
 rts/Arena.c                   |    2 +-
 rts/Printer.c                 |   14 +-
 rts/ProfHeap.c                |   18 +-
 rts/Schedule.c                |   52 ++--
 rts/Stats.c                   |   46 ++-
 rts/Threads.c                 |   10 +-
 rts/sm/BlockAlloc.c           |    4 +-
 rts/sm/Compact.c              |   46 ++-
 rts/sm/Evac.c                 |  163 +++++------
 rts/sm/GC.c                   |  648 +++++++++++++++++++----------------------
 rts/sm/GCThread.h             |   45 +--
 rts/sm/GCUtils.c              |   32 +-
 rts/sm/GCUtils.h              |    8 +-
 rts/sm/MarkWeak.c             |   53 ++--
 rts/sm/Sanity.c               |   68 ++---
 rts/sm/Sanity.h               |    2 +-
 rts/sm/Scav.c                 |  163 +++++------
 rts/sm/Storage.c              |  286 +++++++-----------
 rts/sm/Storage.h              |    4 +-
 rts/sm/Sweep.c                |   18 +-
 rts/sm/Sweep.h                |    2 +-
 26 files changed, 790 insertions(+), 978 deletions(-)

diff --git a/includes/Cmm.h b/includes/Cmm.h
index 59081e2..6d39b45 100644
--- a/includes/Cmm.h
+++ b/includes/Cmm.h
@@ -385,7 +385,7 @@
 // allocate() - this includes many of the primops.
 #define MAYBE_GC(liveness,reentry)			\
     if (bdescr_link(CurrentNursery) == NULL || \
-        step_n_large_blocks(StgRegTable_rNursery(BaseReg)) >= CInt[alloc_blocks_lim]) { \
+        generation_n_large_blocks(W_[g0]) >= CInt[alloc_blocks_lim]) {   \
 	R9  = liveness;					\
         R10 = reentry;					\
         HpAlloc = 0;					\
diff --git a/includes/mkDerivedConstants.c b/includes/mkDerivedConstants.c
index ddd2e65..23a4ecd 100644
--- a/includes/mkDerivedConstants.c
+++ b/includes/mkDerivedConstants.c
@@ -249,8 +249,7 @@ main(int argc, char *argv[])
 
     struct_size(generation);
     struct_field(generation, mut_list);
-
-    struct_field(step, n_large_blocks);
+    struct_field(generation, n_large_blocks);
 
     struct_size(CostCentreStack);
     struct_field(CostCentreStack, ccsID);
diff --git a/includes/rts/storage/Block.h b/includes/rts/storage/Block.h
index e99a03e..3114bea 100644
--- a/includes/rts/storage/Block.h
+++ b/includes/rts/storage/Block.h
@@ -57,8 +57,8 @@ typedef struct bdescr_ {
         StgPtr  scan;           /* scan pointer for copying GC */
     } u;
 
-    struct step_ *step;		/* step */
-    struct step_ *dest;		/* destination step */
+    struct generation_ *gen;   /* generation */
+    struct generation_ *dest;  /* destination gen */
 
     StgWord32 blocks;		/* no. of blocks (if grp head, 0 otherwise) */
 
diff --git a/includes/rts/storage/GC.h b/includes/rts/storage/GC.h
index 1cd57c9..e7b2e84 100644
--- a/includes/rts/storage/GC.h
+++ b/includes/rts/storage/GC.h
@@ -53,24 +53,32 @@
  *
  * ------------------------------------------------------------------------- */
 
-typedef struct step_ {
-    unsigned int         no;		// step number in this generation
-    unsigned int         abs_no;	// absolute step number
+typedef struct nursery_ {
+    bdescr *       blocks;
+    unsigned int   n_blocks;
+} nursery;
 
-    struct generation_ * gen;		// generation this step belongs to
-    unsigned int         gen_no;        // generation number (cached)
-
-    bdescr *             blocks;	// blocks in this step
-    unsigned int         n_blocks;	// number of blocks
-    unsigned int         n_words;       // number of words
+typedef struct generation_ {
+    unsigned int   no;			// generation number
 
-    struct step_ *       to;		// destination step for live objects
+    bdescr *       blocks;	        // blocks in this gen
+    unsigned int   n_blocks;	        // number of blocks
+    unsigned int   n_words;             // number of words
 
-    bdescr *             large_objects;	 // large objects (doubly linked)
-    unsigned int         n_large_blocks; // no. of blocks used by large objs
+    bdescr *       large_objects;	// large objects (doubly linked)
+    unsigned int   n_large_blocks;      // no. of blocks used by large objs
 
-    StgTSO *             threads;       // threads in this step
+    unsigned int   max_blocks;		// max blocks
+    bdescr        *mut_list;      	// mut objects in this gen (not G0)
+    
+    StgTSO *       threads;             // threads in this gen
                                         // linked via global_link
+    struct generation_ *to;		// destination gen for live objects
+
+    // stats information
+    unsigned int collections;
+    unsigned int par_collections;
+    unsigned int failed_promotions;
 
     // ------------------------------------
     // Fields below are used during GC only
@@ -85,13 +93,15 @@ typedef struct step_ {
     int          mark;			// mark (not copy)? (old gen only)
     int          compact;		// compact (not sweep)? (old gen only)
 
-    // During GC, if we are collecting this step, blocks and n_blocks
+    // During GC, if we are collecting this gen, blocks and n_blocks
     // are copied into the following two fields.  After GC, these blocks
     // are freed.
     bdescr *     old_blocks;	        // bdescr of first from-space block
     unsigned int n_old_blocks;		// number of blocks in from-space
     unsigned int live_estimate;         // for sweeping: estimate of live data
     
+    bdescr *     saved_mut_list;
+
     bdescr *     part_blocks;           // partially-full scanned blocks
     unsigned int n_part_blocks;         // count of above
 
@@ -101,32 +111,11 @@ typedef struct step_ {
     bdescr *     bitmap;  		// bitmap for compacting collection
 
     StgTSO *     old_threads;
-
-} step;
-
-
-typedef struct generation_ {
-    unsigned int   no;			// generation number
-    step *         steps;		// steps
-    unsigned int   n_steps;		// number of steps
-    unsigned int   max_blocks;		// max blocks in step 0
-    bdescr        *mut_list;      	// mut objects in this gen (not G0)
-    
-    // stats information
-    unsigned int collections;
-    unsigned int par_collections;
-    unsigned int failed_promotions;
-
-    // temporary use during GC:
-    bdescr        *saved_mut_list;
 } generation;
 
 extern generation * generations;
-
 extern generation * g0;
 extern generation * oldest_gen;
-extern step * all_steps;
-extern nat total_steps;
 
 /* -----------------------------------------------------------------------------
    Generic allocation
@@ -194,11 +183,11 @@ void dirty_MUT_VAR(StgRegTable *reg, StgClosure *p);
 /* (needed when dynamic libraries are used). */
 extern rtsBool keepCAFs;
 
-INLINE_HEADER void initBdescr(bdescr *bd, step *step)
+INLINE_HEADER void initBdescr(bdescr *bd, generation *gen, generation *dest)
 {
-    bd->step   = step;
-    bd->gen_no = step->gen_no;
-    bd->dest   = step->to;
+    bd->gen    = gen;
+    bd->gen_no = gen->no;
+    bd->dest   = dest;
 }
 
 #endif /* RTS_STORAGE_GC_H */
diff --git a/includes/stg/Regs.h b/includes/stg/Regs.h
index d620bf1..e50b431 100644
--- a/includes/stg/Regs.h
+++ b/includes/stg/Regs.h
@@ -80,10 +80,10 @@ typedef struct StgRegTable_ {
   StgPtr 	  rSpLim;
   StgPtr 	  rHp;
   StgPtr 	  rHpLim;
-  struct StgTSO_ *rCurrentTSO;
-  struct step_   *rNursery;
-  struct bdescr_ *rCurrentNursery; /* Hp/HpLim point into this block */
-  struct bdescr_ *rCurrentAlloc;   /* for allocation using allocate() */
+  struct StgTSO_ *     rCurrentTSO;
+  struct nursery_ *    rNursery;
+  struct bdescr_ *     rCurrentNursery; /* Hp/HpLim point into this block */
+  struct bdescr_ *     rCurrentAlloc;   /* for allocation using allocate() */
   StgWord         rHpAlloc;	/* number of *bytes* being allocated in heap */
   StgWord         rRet;  // holds the return code of the thread
 } StgRegTable;
diff --git a/rts/Arena.c b/rts/Arena.c
index e636de4..6d7a65b 100644
--- a/rts/Arena.c
+++ b/rts/Arena.c
@@ -85,7 +85,7 @@ arenaAlloc( Arena *arena, size_t size )
 	arena_blocks += req_blocks;
 
 	bd->gen_no  = 0;
-	bd->step    = NULL;
+	bd->gen     = NULL;
 	bd->dest    = NULL;
 	bd->flags   = 0;
 	bd->free    = bd->start;
diff --git a/rts/Printer.c b/rts/Printer.c
index 1b6e57e..1b8a6dd 100644
--- a/rts/Printer.c
+++ b/rts/Printer.c
@@ -943,7 +943,7 @@ findPtrBlocks (StgPtr p, bdescr *bd, StgPtr arr[], int arr_size, int i)
 void
 findPtr(P_ p, int follow)
 {
-  nat s, g;
+  nat g;
   bdescr *bd;
 #if defined(__GNUC__)
   const int arr_size = 1024;
@@ -955,13 +955,11 @@ findPtr(P_ p, int follow)
   searched = 0;
 
   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-      for (s = 0; s < generations[g].n_steps; s++) {
-	  bd = generations[g].steps[s].blocks;
-          i = findPtrBlocks(p,bd,arr,arr_size,i);
-	  bd = generations[g].steps[s].large_objects;
-          i = findPtrBlocks(p,bd,arr,arr_size,i);
-          if (i >= arr_size) return;
-      }
+      bd = generations[g].blocks;
+      i = findPtrBlocks(p,bd,arr,arr_size,i);
+      bd = generations[g].large_objects;
+      i = findPtrBlocks(p,bd,arr,arr_size,i);
+      if (i >= arr_size) return;
   }
   if (follow && i == 1) {
       debugBelch("-->\n");
diff --git a/rts/ProfHeap.c b/rts/ProfHeap.c
index b9fc7b3..15337d4 100644
--- a/rts/ProfHeap.c
+++ b/rts/ProfHeap.c
@@ -1067,7 +1067,7 @@ heapCensusChain( Census *census, bdescr *bd )
 void
 heapCensus( void )
 {
-  nat g, s;
+  nat g;
   Census *census;
 
   census = &censuses[era];
@@ -1085,17 +1085,11 @@ heapCensus( void )
 #endif
 
   // Traverse the heap, collecting the census info
-  if (RtsFlags.GcFlags.generations == 1) {
-      heapCensusChain( census, g0->steps[0].blocks );
-  } else {
-      for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-	  for (s = 0; s < generations[g].n_steps; s++) {
-	      heapCensusChain( census, generations[g].steps[s].blocks );
-	      // Are we interested in large objects?  might be
-	      // confusing to include the stack in a heap profile.
-	      heapCensusChain( census, generations[g].steps[s].large_objects );
-	  }
-      }
+  for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+      heapCensusChain( census, generations[g].blocks );
+      // Are we interested in large objects?  might be
+      // confusing to include the stack in a heap profile.
+      heapCensusChain( census, generations[g].large_objects );
   }
 
   // dump out the census info
diff --git a/rts/Schedule.c b/rts/Schedule.c
index be3b7cb..abd7bc4 100644
--- a/rts/Schedule.c
+++ b/rts/Schedule.c
@@ -1118,7 +1118,7 @@ scheduleHandleHeapOverflow( Capability *cap, StgTSO *t )
 	    { 
 		bdescr *x;
 		for (x = bd; x < bd + blocks; x++) {
-                    initBdescr(x,cap->r.rNursery);
+                    initBdescr(x,g0,g0);
                     x->free = x->start;
 		    x->flags = 0;
 		}
@@ -1378,7 +1378,7 @@ scheduleDoGC (Capability *cap, Task *task USED_IF_THREADS, rtsBool force_major)
     if (sched_state < SCHED_INTERRUPTING
         && RtsFlags.ParFlags.parGcEnabled
         && N >= RtsFlags.ParFlags.parGcGen
-        && ! oldest_gen->steps[0].mark)
+        && ! oldest_gen->mark)
     {
         gc_type = PENDING_GC_PAR;
     } else {
@@ -1580,7 +1580,7 @@ forkProcess(HsStablePtr *entry
     pid_t pid;
     StgTSO* t,*next;
     Capability *cap;
-    nat s;
+    nat g;
     
 #if defined(THREADED_RTS)
     if (RtsFlags.ParFlags.nNodes > 1) {
@@ -1628,8 +1628,8 @@ forkProcess(HsStablePtr *entry
 	// all Tasks, because they correspond to OS threads that are
 	// now gone.
 
-        for (s = 0; s < total_steps; s++) {
-          for (t = all_steps[s].threads; t != END_TSO_QUEUE; t = next) {
+        for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+          for (t = generations[g].threads; t != END_TSO_QUEUE; t = next) {
 	    if (t->what_next == ThreadRelocated) {
 		next = t->_link;
 	    } else {
@@ -1655,8 +1655,8 @@ forkProcess(HsStablePtr *entry
 
 	// Empty the threads lists.  Otherwise, the garbage
 	// collector may attempt to resurrect some of these threads.
-        for (s = 0; s < total_steps; s++) {
-            all_steps[s].threads = END_TSO_QUEUE;
+        for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+            generations[g].threads = END_TSO_QUEUE;
         }
 
 	// Wipe the task list, except the current Task.
@@ -1710,19 +1710,19 @@ deleteAllThreads ( Capability *cap )
     // NOTE: only safe to call if we own all capabilities.
 
     StgTSO* t, *next;
-    nat s;
+    nat g;
 
     debugTrace(DEBUG_sched,"deleting all threads");
-    for (s = 0; s < total_steps; s++) {
-      for (t = all_steps[s].threads; t != END_TSO_QUEUE; t = next) {
-	if (t->what_next == ThreadRelocated) {
-	    next = t->_link;
-	} else {
-	    next = t->global_link;
-	    deleteThread(cap,t);
-	}
-      }
-    }      
+    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+        for (t = generations[g].threads; t != END_TSO_QUEUE; t = next) {
+            if (t->what_next == ThreadRelocated) {
+                next = t->_link;
+            } else {
+                next = t->global_link;
+                deleteThread(cap,t);
+            }
+        }
+    }
 
     // The run queue now contains a bunch of ThreadKilled threads.  We
     // must not throw these away: the main thread(s) will be in there
@@ -2655,14 +2655,14 @@ resurrectThreads (StgTSO *threads)
 {
     StgTSO *tso, *next;
     Capability *cap;
-    step *step;
+    generation *gen;
 
     for (tso = threads; tso != END_TSO_QUEUE; tso = next) {
 	next = tso->global_link;
 
-        step = Bdescr((P_)tso)->step;
-	tso->global_link = step->threads;
-	step->threads = tso;
+        gen = Bdescr((P_)tso)->gen;
+	tso->global_link = gen->threads;
+	gen->threads = tso;
 
 	debugTrace(DEBUG_sched, "resurrecting thread %lu", (unsigned long)tso->id);
 	
@@ -2719,7 +2719,7 @@ performPendingThrowTos (StgTSO *threads)
     StgTSO *tso, *next;
     Capability *cap;
     Task *task, *saved_task;;
-    step *step;
+    generation *gen;
 
     task = myTask();
     cap = task->cap;
@@ -2727,9 +2727,9 @@ performPendingThrowTos (StgTSO *threads)
     for (tso = threads; tso != END_TSO_QUEUE; tso = next) {
 	next = tso->global_link;
 
-        step = Bdescr((P_)tso)->step;
-	tso->global_link = step->threads;
-	step->threads = tso;
+        gen = Bdescr((P_)tso)->gen;
+	tso->global_link = gen->threads;
+	gen->threads = tso;
 
 	debugTrace(DEBUG_sched, "performing blocked throwTo to thread %lu", (unsigned long)tso->id);
 	
diff --git a/rts/Stats.c b/rts/Stats.c
index 6029745..b1061de 100644
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -701,14 +701,12 @@ stat_exit(int alloc)
 #endif
 #if defined(THREADED_RTS) && defined(PROF_SPIN)
             {
-                nat g, s;
+                nat g;
                 
                 statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
                 statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin);
                 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-                    for (s = 0; s < generations[g].n_steps; s++) {
-                        statsPrintf("gen[%d].steps[%d].sync_large_objects: %"FMT_Word64"\n", g, s, generations[g].steps[s].sync_large_objects.spin);
-                    }
+                    statsPrintf("gen[%d].sync_large_objects: %"FMT_Word64"\n", g, generations[g].sync_large_objects.spin);
                 }
             }
 #endif
@@ -769,17 +767,17 @@ stat_exit(int alloc)
 void
 statDescribeGens(void)
 {
-  nat g, s, mut, lge;
+  nat g, mut, lge;
   lnat live, slop;
   lnat tot_live, tot_slop;
   bdescr *bd;
-  step *step;
-
+  generation *gen;
+  
   debugBelch(
-"-----------------------------------------------------------------\n"
-"  Gen     Max  Mut-list  Step   Blocks    Large     Live     Slop\n"
-"       Blocks     Bytes                 Objects                  \n"
-"-----------------------------------------------------------------\n");
+"----------------------------------------------------------\n"
+"  Gen     Max  Mut-list  Blocks    Large     Live     Slop\n"
+"       Blocks     Bytes          Objects                  \n"
+"----------------------------------------------------------\n");
 
   tot_live = 0;
   tot_slop = 0;
@@ -789,27 +787,23 @@ statDescribeGens(void)
 	  mut += (bd->free - bd->start) * sizeof(W_);
       }
 
-    debugBelch("%5d %7d %9d", g, generations[g].max_blocks, mut);
+      gen = &generations[g];
 
-    for (s = 0; s < generations[g].n_steps; s++) {
-      step = &generations[g].steps[s];
-      for (bd = step->large_objects, lge = 0; bd; bd = bd->link) {
-	lge++;
-      }
-      live = step->n_words + countOccupied(step->large_objects);
-      if (s != 0) {
-	debugBelch("%23s","");
+      debugBelch("%5d %7d %9d", g, gen->max_blocks, mut);
+
+      for (bd = gen->large_objects, lge = 0; bd; bd = bd->link) {
+          lge++;
       }
-      slop = (step->n_blocks + step->n_large_blocks) * BLOCK_SIZE_W - live;
-      debugBelch("%6d %8d %8d %8ld %8ld\n", s, step->n_blocks, lge,
+      live = gen->n_words + countOccupied(gen->large_objects);
+      slop = (gen->n_blocks + gen->n_large_blocks) * BLOCK_SIZE_W - live;
+      debugBelch("%8d %8d %8ld %8ld\n", gen->n_blocks, lge,
                  live*sizeof(W_), slop*sizeof(W_));
       tot_live += live;
       tot_slop += slop;
-    }
   }
-  debugBelch("-----------------------------------------------------------------\n");
-  debugBelch("%48s%8ld %8ld\n","",tot_live*sizeof(W_),tot_slop*sizeof(W_));
-  debugBelch("-----------------------------------------------------------------\n");
+  debugBelch("----------------------------------------------------------\n");
+  debugBelch("%41s%8ld %8ld\n","",tot_live*sizeof(W_),tot_slop*sizeof(W_));
+  debugBelch("----------------------------------------------------------\n");
   debugBelch("\n");
 }
 
diff --git a/rts/Threads.c b/rts/Threads.c
index 9867c1c..799cf90 100644
--- a/rts/Threads.c
+++ b/rts/Threads.c
@@ -102,8 +102,8 @@ createThread(Capability *cap, nat size)
      */
     ACQUIRE_LOCK(&sched_mutex);
     tso->id = next_thread_id++;  // while we have the mutex
-    tso->global_link = cap->r.rNursery->threads;
-    cap->r.rNursery->threads = tso;
+    tso->global_link = g0->threads;
+    g0->threads = tso;
     RELEASE_LOCK(&sched_mutex);
     
     // ToDo: report the stack size in the event?
@@ -387,7 +387,7 @@ void
 printAllThreads(void)
 {
   StgTSO *t, *next;
-  nat i, s;
+  nat i, g;
   Capability *cap;
 
 # if defined(GRAN)
@@ -415,8 +415,8 @@ printAllThreads(void)
   }
 
   debugBelch("other threads:\n");
-  for (s = 0; s < total_steps; s++) {
-    for (t = all_steps[s].threads; t != END_TSO_QUEUE; t = next) {
+  for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+    for (t = generations[g].threads; t != END_TSO_QUEUE; t = next) {
       if (t->why_blocked != NotBlocked) {
 	  printThreadStatus(t);
       }
diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c
index 898624a..edc3762 100644
--- a/rts/sm/BlockAlloc.c
+++ b/rts/sm/BlockAlloc.c
@@ -58,7 +58,7 @@ static void  initMBlock(void *mblock);
    The following fields are not used by the allocator:
      bd->flags
      bd->gen_no
-     bd->step
+     bd->gen
      bd->dest
 
   Exceptions: we don't maintain invariants for all the blocks within a
@@ -470,7 +470,7 @@ freeGroup(bdescr *p)
   ASSERT(p->free != (P_)-1);
 
   p->free = (void *)-1;  /* indicates that this block is free */
-  p->step = NULL;
+  p->gen = NULL;
   p->gen_no = 0;
   /* fill the block group with garbage if sanity checking is on */
   IF_DEBUG(sanity,memset(p->start, 0xaa, p->blocks * BLOCK_SIZE));
diff --git a/rts/sm/Compact.c b/rts/sm/Compact.c
index c566aa0..0a695ca 100644
--- a/rts/sm/Compact.c
+++ b/rts/sm/Compact.c
@@ -875,7 +875,7 @@ update_fwd_compact( bdescr *blocks )
 }
 
 static nat
-update_bkwd_compact( step *stp )
+update_bkwd_compact( generation *gen )
 {
     StgPtr p, free;
 #if 0
@@ -886,7 +886,7 @@ update_bkwd_compact( step *stp )
     nat size, free_blocks;
     StgWord iptr;
 
-    bd = free_bd = stp->old_blocks;
+    bd = free_bd = gen->old_blocks;
     free = free_bd->start;
     free_blocks = 1;
 
@@ -965,8 +965,8 @@ update_bkwd_compact( step *stp )
 void
 compact(StgClosure *static_objects)
 {
-    nat g, s, blocks;
-    step *stp;
+    nat g, blocks;
+    generation *gen;
 
     // 1. thread the roots
     markCapabilities((evac_fn)thread_root, NULL);
@@ -1000,8 +1000,8 @@ compact(StgClosure *static_objects)
     }
 
     // the global thread list
-    for (s = 0; s < total_steps; s++) {
-        thread((void *)&all_steps[s].threads);
+    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+        thread((void *)&generations[g].threads);
     }
 
     // any threads resurrected during this GC
@@ -1031,30 +1031,24 @@ compact(StgClosure *static_objects)
 
     // 2. update forward ptrs
     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-	for (s = 0; s < generations[g].n_steps; s++) {
-	    if (g==0 && s ==0) continue;
-	    stp = &generations[g].steps[s];
-	    debugTrace(DEBUG_gc, "update_fwd:  %d.%d", 
-		       stp->gen->no, stp->no);
-
-	    update_fwd(stp->blocks);
-	    update_fwd_large(stp->scavenged_large_objects);
-	    if (g == RtsFlags.GcFlags.generations-1 && stp->old_blocks != NULL) {
-		debugTrace(DEBUG_gc, "update_fwd:  %d.%d (compact)",
-			   stp->gen->no, stp->no);
-		update_fwd_compact(stp->old_blocks);
-	    }
+        gen = &generations[g];
+        debugTrace(DEBUG_gc, "update_fwd:  %d", g);
+
+        update_fwd(gen->blocks);
+        update_fwd_large(gen->scavenged_large_objects);
+        if (g == RtsFlags.GcFlags.generations-1 && gen->old_blocks != NULL) {
+            debugTrace(DEBUG_gc, "update_fwd:  %d (compact)", g);
+            update_fwd_compact(gen->old_blocks);
 	}
     }
 
     // 3. update backward ptrs
-    stp = &oldest_gen->steps[0];
-    if (stp->old_blocks != NULL) {
-	blocks = update_bkwd_compact(stp);
+    gen = oldest_gen;
+    if (gen->old_blocks != NULL) {
+	blocks = update_bkwd_compact(gen);
 	debugTrace(DEBUG_gc, 
-		   "update_bkwd: %d.%d (compact, old: %d blocks, now %d blocks)",
-		   stp->gen->no, stp->no,
-		   stp->n_old_blocks, blocks);
-	stp->n_old_blocks = blocks;
+		   "update_bkwd: %d (compact, old: %d blocks, now %d blocks)",
+		   gen->no, gen->n_old_blocks, blocks);
+	gen->n_old_blocks = blocks;
     }
 }
diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c
index 9836e51..93d5d58 100644
--- a/rts/sm/Evac.c
+++ b/rts/sm/Evac.c
@@ -51,28 +51,28 @@ STATIC_INLINE void evacuate_large(StgPtr p);
    -------------------------------------------------------------------------- */
 
 STATIC_INLINE StgPtr
-alloc_for_copy (nat size, step *stp)
+alloc_for_copy (nat size, generation *gen)
 {
     StgPtr to;
-    step_workspace *ws;
+    gen_workspace *ws;
 
     /* Find out where we're going, using the handy "to" pointer in 
-     * the step of the source object.  If it turns out we need to
+     * the gen of the source object.  If it turns out we need to
      * evacuate to an older generation, adjust it here (see comment
      * by evacuate()).
      */
-    if (stp < gct->evac_step) {
+    if (gen < gct->evac_gen) {
 	if (gct->eager_promotion) {
-	    stp = gct->evac_step;
+	    gen = gct->evac_gen;
 	} else {
 	    gct->failed_to_evac = rtsTrue;
 	}
     }
     
-    ws = &gct->steps[stp->abs_no];
-    // this compiles to a single mem access to stp->abs_no only
+    ws = &gct->gens[gen->no];
+    // this compiles to a single mem access to gen->abs_no only
     
-    /* chain a new block onto the to-space for the destination step if
+    /* chain a new block onto the to-space for the destination gen if
      * necessary.
      */
     to = ws->todo_free;
@@ -91,12 +91,12 @@ alloc_for_copy (nat size, step *stp)
 
 STATIC_INLINE GNUC_ATTR_HOT void
 copy_tag(StgClosure **p, const StgInfoTable *info, 
-         StgClosure *src, nat size, step *stp, StgWord tag)
+         StgClosure *src, nat size, generation *gen, StgWord tag)
 {
     StgPtr to, from;
     nat i;
 
-    to = alloc_for_copy(size,stp);
+    to = alloc_for_copy(size,gen);
     
     from = (StgPtr)src;
     to[0] = (W_)info;
@@ -133,12 +133,12 @@ copy_tag(StgClosure **p, const StgInfoTable *info,
 #if defined(PARALLEL_GC)
 STATIC_INLINE void
 copy_tag_nolock(StgClosure **p, const StgInfoTable *info, 
-         StgClosure *src, nat size, step *stp, StgWord tag)
+         StgClosure *src, nat size, generation *gen, StgWord tag)
 {
     StgPtr to, from;
     nat i;
 
-    to = alloc_for_copy(size,stp);
+    to = alloc_for_copy(size,gen);
     *p = TAG_CLOSURE(tag,(StgClosure*)to);
     src->header.info = (const StgInfoTable *)MK_FORWARDING_PTR(to);
     
@@ -165,7 +165,8 @@ copy_tag_nolock(StgClosure **p, const StgInfoTable *info,
  * used to optimise evacuation of BLACKHOLEs.
  */
 static rtsBool
-copyPart(StgClosure **p, StgClosure *src, nat size_to_reserve, nat size_to_copy, step *stp)
+copyPart(StgClosure **p, StgClosure *src, nat size_to_reserve, 
+         nat size_to_copy, generation *gen)
 {
     StgPtr to, from;
     nat i;
@@ -189,7 +190,7 @@ spin:
     info = (W_)src->header.info;
 #endif
 
-    to = alloc_for_copy(size_to_reserve, stp);
+    to = alloc_for_copy(size_to_reserve, gen);
     *p = (StgClosure *)to;
 
     from = (StgPtr)src;
@@ -219,17 +220,17 @@ spin:
 /* Copy wrappers that don't tag the closure after copying */
 STATIC_INLINE GNUC_ATTR_HOT void
 copy(StgClosure **p, const StgInfoTable *info, 
-     StgClosure *src, nat size, step *stp)
+     StgClosure *src, nat size, generation *gen)
 {
-    copy_tag(p,info,src,size,stp,0);
+    copy_tag(p,info,src,size,gen,0);
 }
 
 /* -----------------------------------------------------------------------------
    Evacuate a large object
 
    This just consists of removing the object from the (doubly-linked)
-   step->large_objects list, and linking it on to the (singly-linked)
-   step->new_large_objects list, from where it will be scavenged later.
+   gen->large_objects list, and linking it on to the (singly-linked)
+   gen->new_large_objects list, from where it will be scavenged later.
 
    Convention: bd->flags has BF_EVACUATED set for a large object
    that has been evacuated, or unset otherwise.
@@ -239,22 +240,22 @@ STATIC_INLINE void
 evacuate_large(StgPtr p)
 {
   bdescr *bd = Bdescr(p);
-  step *stp, *new_stp;
-  step_workspace *ws;
+  generation *gen, *new_gen;
+  gen_workspace *ws;
     
-  stp = bd->step;
-  ACQUIRE_SPIN_LOCK(&stp->sync_large_objects);
+  gen = bd->gen;
+  ACQUIRE_SPIN_LOCK(&gen->sync_large_objects);
 
   // already evacuated? 
   if (bd->flags & BF_EVACUATED) { 
     /* Don't forget to set the gct->failed_to_evac flag if we didn't get
      * the desired destination (see comments in evacuate()).
      */
-    if (stp < gct->evac_step) {
+    if (gen < gct->evac_gen) {
 	gct->failed_to_evac = rtsTrue;
 	TICK_GC_FAILED_PROMOTION();
     }
-    RELEASE_SPIN_LOCK(&stp->sync_large_objects);
+    RELEASE_SPIN_LOCK(&gen->sync_large_objects);
     return;
   }
 
@@ -262,27 +263,27 @@ evacuate_large(StgPtr p)
   if (bd->u.back) {
     bd->u.back->link = bd->link;
   } else { // first object in the list 
-    stp->large_objects = bd->link;
+    gen->large_objects = bd->link;
   }
   if (bd->link) {
     bd->link->u.back = bd->u.back;
   }
   
-  /* link it on to the evacuated large object list of the destination step
+  /* link it on to the evacuated large object list of the destination gen
    */
-  new_stp = stp->to;
-  if (new_stp < gct->evac_step) {
+  new_gen = gen->to;
+  if (new_gen < gct->evac_gen) {
       if (gct->eager_promotion) {
-	  new_stp = gct->evac_step;
+	  new_gen = gct->evac_gen;
       } else {
 	  gct->failed_to_evac = rtsTrue;
       }
   }
 
-  ws = &gct->steps[new_stp->abs_no];
+  ws = &gct->gens[new_gen->no];
 
   bd->flags |= BF_EVACUATED;
-  initBdescr(bd, new_stp);
+  initBdescr(bd, new_gen, new_gen->to);
 
   // If this is a block of pinned objects, we don't have to scan
   // these objects, because they aren't allowed to contain any
@@ -290,16 +291,16 @@ evacuate_large(StgPtr p)
   // them straight on the scavenged_large_objects list.
   if (bd->flags & BF_PINNED) {
       ASSERT(get_itbl((StgClosure *)p)->type == ARR_WORDS);
-      if (new_stp != stp) { ACQUIRE_SPIN_LOCK(&new_stp->sync_large_objects); }
-      dbl_link_onto(bd, &new_stp->scavenged_large_objects);
-      new_stp->n_scavenged_large_blocks += bd->blocks;
-      if (new_stp != stp) { RELEASE_SPIN_LOCK(&new_stp->sync_large_objects); }
+      if (new_gen != gen) { ACQUIRE_SPIN_LOCK(&new_gen->sync_large_objects); }
+      dbl_link_onto(bd, &new_gen->scavenged_large_objects);
+      new_gen->n_scavenged_large_blocks += bd->blocks;
+      if (new_gen != gen) { RELEASE_SPIN_LOCK(&new_gen->sync_large_objects); }
   } else {
       bd->link = ws->todo_large_objects;
       ws->todo_large_objects = bd;
   }
 
-  RELEASE_SPIN_LOCK(&stp->sync_large_objects);
+  RELEASE_SPIN_LOCK(&gen->sync_large_objects);
 }
 
 /* ----------------------------------------------------------------------------
@@ -308,22 +309,22 @@ evacuate_large(StgPtr p)
    This is called (eventually) for every live object in the system.
 
    The caller to evacuate specifies a desired generation in the
-   gct->evac_step thread-local variable.  The following conditions apply to
+   gct->evac_gen thread-local variable.  The following conditions apply to
    evacuating an object which resides in generation M when we're
    collecting up to generation N
 
-   if  M >= gct->evac_step 
+   if  M >= gct->evac_gen 
            if  M > N     do nothing
-	   else          evac to step->to
+	   else          evac to gen->to
 
-   if  M < gct->evac_step      evac to gct->evac_step, step 0
+   if  M < gct->evac_gen      evac to gct->evac_gen, step 0
 
    if the object is already evacuated, then we check which generation
    it now resides in.
 
-   if  M >= gct->evac_step     do nothing
-   if  M <  gct->evac_step     set gct->failed_to_evac flag to indicate that we
-                         didn't manage to evacuate this object into gct->evac_step.
+   if  M >= gct->evac_gen     do nothing
+   if  M <  gct->evac_gen     set gct->failed_to_evac flag to indicate that we
+                         didn't manage to evacuate this object into gct->evac_gen.
 
 
    OPTIMISATION NOTES:
@@ -348,7 +349,7 @@ REGPARM1 GNUC_ATTR_HOT void
 evacuate(StgClosure **p)
 {
   bdescr *bd = NULL;
-  step *stp;
+  generation *gen;
   StgClosure *q;
   const StgInfoTable *info;
   StgWord tag;
@@ -473,7 +474,7 @@ loop:
           // We aren't copying this object, so we have to check
           // whether it is already in the target generation.  (this is
           // the write barrier).
-	  if (bd->step < gct->evac_step) {
+	  if (bd->gen < gct->evac_gen) {
 	      gct->failed_to_evac = rtsTrue;
 	      TICK_GC_FAILED_PROMOTION();
 	  }
@@ -494,7 +495,7 @@ loop:
 	  return;
       }
       
-      /* If the object is in a step that we're compacting, then we
+      /* If the object is in a gen that we're compacting, then we
        * need to use an alternative evacuate procedure.
        */
       if (!is_marked((P_)q,bd)) {
@@ -504,13 +505,13 @@ loop:
       return;
   }
       
-  stp = bd->dest;
+  gen = bd->dest;
 
   info = q->header.info;
   if (IS_FORWARDING_PTR(info))
   {
     /* Already evacuated, just return the forwarding address.
-     * HOWEVER: if the requested destination generation (gct->evac_step) is
+     * HOWEVER: if the requested destination generation (gct->evac_gen) is
      * older than the actual generation (because the object was
      * already evacuated to a younger generation) then we have to
      * set the gct->failed_to_evac flag to indicate that we couldn't 
@@ -521,14 +522,14 @@ loop:
      * shortcut it if either the required generation is 0, or the
      * current object (the EVACUATED) is in a high enough generation.
      * We know that an EVACUATED always points to an object in the
-     * same or an older generation.  stp is the lowest step that the
+     * same or an older generation.  gen is the lowest generation that the
      * current object would be evacuated to, so we only do the full
-     * check if stp is too low.
+     * check if gen is too low.
      */
       StgClosure *e = (StgClosure*)UN_FORWARDING_PTR(info);
       *p = TAG_CLOSURE(tag,e);
-      if (stp < gct->evac_step) {  // optimisation 
-	  if (Bdescr((P_)e)->step < gct->evac_step) {
+      if (gen < gct->evac_gen) {  // optimisation 
+	  if (Bdescr((P_)e)->gen < gct->evac_gen) {
 	      gct->failed_to_evac = rtsTrue;
 	      TICK_GC_FAILED_PROMOTION();
 	  }
@@ -545,7 +546,7 @@ loop:
   case MUT_VAR_DIRTY:
   case MVAR_CLEAN:
   case MVAR_DIRTY:
-      copy(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),stp);
+      copy(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen);
       return;
 
   // For ints and chars of low value, save space by replacing references to
@@ -557,7 +558,7 @@ loop:
   case CONSTR_0_1:
   {   
 #if defined(__PIC__) && defined(mingw32_HOST_OS) 
-      copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,stp,tag);
+      copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,gen,tag);
 #else
       StgWord w = (StgWord)q->payload[0];
       if (info == Czh_con_info &&
@@ -574,7 +575,7 @@ loop:
 			     );
       }
       else {
-          copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,stp,tag);
+          copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,gen,tag);
       }
 #endif
       return;
@@ -583,25 +584,21 @@ loop:
   case FUN_0_1:
   case FUN_1_0:
   case CONSTR_1_0:
-      copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,stp,tag);
+      copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,gen,tag);
       return;
 
   case THUNK_1_0:
   case THUNK_0_1:
-      copy(p,info,q,sizeofW(StgThunk)+1,stp);
+      copy(p,info,q,sizeofW(StgThunk)+1,gen);
       return;
 
   case THUNK_1_1:
   case THUNK_2_0:
   case THUNK_0_2:
 #ifdef NO_PROMOTE_THUNKS
-    if (bd->gen_no == 0 && 
-	bd->step->no != 0 &&
-	bd->step->no == generations[bd->gen_no].n_steps-1) {
-      stp = bd->step;
-    }
+#error bitrotted
 #endif
-    copy(p,info,q,sizeofW(StgThunk)+2,stp);
+    copy(p,info,q,sizeofW(StgThunk)+2,gen);
     return;
 
   case FUN_1_1:
@@ -609,36 +606,36 @@ loop:
   case FUN_0_2:
   case CONSTR_1_1:
   case CONSTR_2_0:
-      copy_tag_nolock(p,info,q,sizeofW(StgHeader)+2,stp,tag);
+      copy_tag_nolock(p,info,q,sizeofW(StgHeader)+2,gen,tag);
       return;
 
   case CONSTR_0_2:
-      copy_tag_nolock(p,info,q,sizeofW(StgHeader)+2,stp,tag);
+      copy_tag_nolock(p,info,q,sizeofW(StgHeader)+2,gen,tag);
       return;
 
   case THUNK:
-      copy(p,info,q,thunk_sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),stp);
+      copy(p,info,q,thunk_sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen);
       return;
 
   case FUN:
   case IND_PERM:
   case IND_OLDGEN_PERM:
   case CONSTR:
-      copy_tag_nolock(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),stp,tag);
+      copy_tag_nolock(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen,tag);
       return;
 
   case WEAK:
   case STABLE_NAME:
-      copy_tag(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),stp,tag);
+      copy_tag(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen,tag);
       return;
 
   case BCO:
-      copy(p,info,q,bco_sizeW((StgBCO *)q),stp);
+      copy(p,info,q,bco_sizeW((StgBCO *)q),gen);
       return;
 
   case CAF_BLACKHOLE:
   case BLACKHOLE:
-      copyPart(p,q,BLACKHOLE_sizeW(),sizeofW(StgHeader),stp);
+      copyPart(p,q,BLACKHOLE_sizeW(),sizeofW(StgHeader),gen);
       return;
 
   case THUNK_SELECTOR:
@@ -666,20 +663,20 @@ loop:
     barf("evacuate: stack frame at %p\n", q);
 
   case PAP:
-      copy(p,info,q,pap_sizeW((StgPAP*)q),stp);
+      copy(p,info,q,pap_sizeW((StgPAP*)q),gen);
       return;
 
   case AP:
-      copy(p,info,q,ap_sizeW((StgAP*)q),stp);
+      copy(p,info,q,ap_sizeW((StgAP*)q),gen);
       return;
 
   case AP_STACK:
-      copy(p,info,q,ap_stack_sizeW((StgAP_STACK*)q),stp);
+      copy(p,info,q,ap_stack_sizeW((StgAP_STACK*)q),gen);
       return;
 
   case ARR_WORDS:
       // just copy the block 
-      copy(p,info,q,arr_words_sizeW((StgArrWords *)q),stp);
+      copy(p,info,q,arr_words_sizeW((StgArrWords *)q),gen);
       return;
 
   case MUT_ARR_PTRS_CLEAN:
@@ -687,7 +684,7 @@ loop:
   case MUT_ARR_PTRS_FROZEN:
   case MUT_ARR_PTRS_FROZEN0:
       // just copy the block 
-      copy(p,info,q,mut_arr_ptrs_sizeW((StgMutArrPtrs *)q),stp);
+      copy(p,info,q,mut_arr_ptrs_sizeW((StgMutArrPtrs *)q),gen);
       return;
 
   case TSO:
@@ -710,7 +707,7 @@ loop:
           rtsBool mine;
 
 	  mine = copyPart(p,(StgClosure *)tso, tso_sizeW(tso), 
-                          sizeofW(StgTSO), stp);
+                          sizeofW(StgTSO), gen);
           if (mine) {
               new_tso = (StgTSO *)*p;
               move_TSO(tso, new_tso);
@@ -724,27 +721,27 @@ loop:
     }
 
   case TREC_HEADER: 
-      copy(p,info,q,sizeofW(StgTRecHeader),stp);
+      copy(p,info,q,sizeofW(StgTRecHeader),gen);
       return;
 
   case TVAR_WATCH_QUEUE:
-      copy(p,info,q,sizeofW(StgTVarWatchQueue),stp);
+      copy(p,info,q,sizeofW(StgTVarWatchQueue),gen);
       return;
 
   case TVAR:
-      copy(p,info,q,sizeofW(StgTVar),stp);
+      copy(p,info,q,sizeofW(StgTVar),gen);
       return;
     
   case TREC_CHUNK:
-      copy(p,info,q,sizeofW(StgTRecChunk),stp);
+      copy(p,info,q,sizeofW(StgTRecChunk),gen);
       return;
 
   case ATOMIC_INVARIANT:
-      copy(p,info,q,sizeofW(StgAtomicInvariant),stp);
+      copy(p,info,q,sizeofW(StgAtomicInvariant),gen);
       return;
 
   case INVARIANT_CHECK_QUEUE:
-      copy(p,info,q,sizeofW(StgInvariantCheckQueue),stp);
+      copy(p,info,q,sizeofW(StgInvariantCheckQueue),gen);
       return;
 
   default:
@@ -846,7 +843,7 @@ selector_chain:
             unchain_thunk_selectors(prev_thunk_selector, (StgClosure *)p);
             *q = (StgClosure *)p;
             // shortcut, behave as for:  if (evac) evacuate(q);
-            if (evac && bd->step < gct->evac_step) {
+            if (evac && bd->gen < gct->evac_gen) {
                 gct->failed_to_evac = rtsTrue;
                 TICK_GC_FAILED_PROMOTION();
             }
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 6b7dc29..dc4c68f 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -101,7 +101,7 @@ rtsBool major_gc;
 
 /* Data used for allocation area sizing.
  */
-static lnat g0s0_pcnt_kept = 30; // percentage of g0s0 live at last minor GC 
+static lnat g0_pcnt_kept = 30; // percentage of g0 live at last minor GC 
 
 /* Mut-list stats */
 #ifdef DEBUG
@@ -116,7 +116,7 @@ nat mutlist_MUTVARS,
 gc_thread **gc_threads = NULL;
 
 #if !defined(THREADED_RTS)
-StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(step_workspace)];
+StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
 #endif
 
 // Number of threads running in *this* GC.  Affects how many
@@ -174,10 +174,10 @@ GarbageCollect (rtsBool force_major_gc,
                 Capability *cap)
 {
   bdescr *bd;
-  step *stp;
+  generation *gen;
   lnat live, allocated, max_copied, avg_copied, slop;
   gc_thread *saved_gct;
-  nat g, s, t, n;
+  nat g, t, n;
 
   // necessary if we stole a callee-saves register for gct:
   saved_gct = gct;
@@ -195,8 +195,8 @@ GarbageCollect (rtsBool force_major_gc,
   }
 #endif
 
-  ASSERT(sizeof(step_workspace) == 16 * sizeof(StgWord));
-  // otherwise adjust the padding in step_workspace.
+  ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
+  // otherwise adjust the padding in gen_workspace.
 
   // tell the stats department that we've started a GC 
   stat_startGC();
@@ -294,7 +294,7 @@ GarbageCollect (rtsBool force_major_gc,
 
   /* Allocate a mark stack if we're doing a major collection.
    */
-  if (major_gc && oldest_gen->steps[0].mark) {
+  if (major_gc && oldest_gen->mark) {
       mark_stack_bd     = allocBlock();
       mark_stack_top_bd = mark_stack_bd;
       mark_stack_bd->link = NULL;
@@ -335,7 +335,15 @@ SET_GCT(gc_threads[0]);
   // namely to reduce the likelihood of spurious old->new pointers.
   //
   for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
+#if defined(THREADED_RTS)
+      if (n_gc_threads > 1) {
+          scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]);
+      } else {
+          scavenge_mutable_list1(generations[g].saved_mut_list, &generations[g]);
+      }
+#else
       scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]);
+#endif
       freeChain_sync(generations[g].saved_mut_list);
       generations[g].saved_mut_list = NULL;
 
@@ -347,18 +355,22 @@ SET_GCT(gc_threads[0]);
   // variable).
   if (n_gc_threads == 1) {
       for (n = 0; n < n_capabilities; n++) {
+#if defined(THREADED_RTS)
+          scavenge_capability_mut_Lists1(&capabilities[n]);
+#else
           scavenge_capability_mut_lists(&capabilities[n]);
+#endif
       }
   } else {
       scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
   }
 
   // follow roots from the CAF list (used by GHCi)
-  gct->evac_step = 0;
+  gct->evac_gen = 0;
   markCAFs(mark_root, gct);
 
   // follow all the roots that the application knows about.
-  gct->evac_step = 0;
+  gct->evac_gen = 0;
   markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
                        rtsTrue/*prune sparks*/);
 
@@ -421,32 +433,26 @@ SET_GCT(gc_threads[0]);
   // NO MORE EVACUATION AFTER THIS POINT!
 
   // Two-space collector: free the old to-space.
-  // g0s0->old_blocks is the old nursery
-  // g0s0->blocks is to-space from the previous GC
+  // g0->old_blocks is the old nursery
+  // g0->blocks is to-space from the previous GC
   if (RtsFlags.GcFlags.generations == 1) {
-      if (g0->steps[0].blocks != NULL) {
-	  freeChain(g0->steps[0].blocks);
-	  g0->steps[0].blocks = NULL;
+      if (g0->blocks != NULL) {
+	  freeChain(g0->blocks);
+	  g0->blocks = NULL;
       }
   }
 
   // For each workspace, in each thread, move the copied blocks to the step
   {
       gc_thread *thr;
-      step_workspace *ws;
+      gen_workspace *ws;
       bdescr *prev, *next;
 
       for (t = 0; t < n_gc_threads; t++) {
 	  thr = gc_threads[t];
 
-          // not step 0
-          if (RtsFlags.GcFlags.generations == 1) {
-              s = 0;
-          } else {
-              s = 1;
-          }
-          for (; s < total_steps; s++) {
-              ws = &thr->steps[s];
+          for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+              ws = &thr->gens[g];
 
               // Push the final block
               if (ws->todo_bd) { 
@@ -458,14 +464,14 @@ SET_GCT(gc_threads[0]);
               
               prev = NULL;
               for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
-                  ws->step->n_words += bd->free - bd->start;
+                  ws->gen->n_words += bd->free - bd->start;
                   prev = bd;
               }
               if (prev != NULL) {
-                  prev->link = ws->step->blocks;
-                  ws->step->blocks = ws->scavd_list;
+                  prev->link = ws->gen->blocks;
+                  ws->gen->blocks = ws->scavd_list;
               } 
-              ws->step->n_blocks += ws->n_scavd_blocks;
+              ws->gen->n_blocks += ws->n_scavd_blocks;
           }
       }
 
@@ -475,14 +481,8 @@ SET_GCT(gc_threads[0]);
       for (t = 0; t < n_gc_threads; t++) {
 	  thr = gc_threads[t];
 
-          // not step 0
-          if (RtsFlags.GcFlags.generations == 1) {
-              s = 0;
-          } else {
-              s = 1;
-          }
-          for (; s < total_steps; s++) {
-              ws = &thr->steps[s];
+          for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+              ws = &thr->gens[g];
 
               prev = NULL;
               for (bd = ws->part_list; bd != NULL; bd = next) {
@@ -496,28 +496,28 @@ SET_GCT(gc_threads[0]);
                       freeGroup(bd);
                       ws->n_part_blocks--;
                   } else {
-                      ws->step->n_words += bd->free - bd->start;
+                      ws->gen->n_words += bd->free - bd->start;
                       prev = bd;
                   }
               }
               if (prev != NULL) {
-                  prev->link = ws->step->blocks;
-                  ws->step->blocks = ws->part_list;
+                  prev->link = ws->gen->blocks;
+                  ws->gen->blocks = ws->part_list;
               }
-              ws->step->n_blocks += ws->n_part_blocks;
+              ws->gen->n_blocks += ws->n_part_blocks;
 
-              ASSERT(countBlocks(ws->step->blocks) == ws->step->n_blocks);
-              ASSERT(countOccupied(ws->step->blocks) == ws->step->n_words);
+              ASSERT(countBlocks(ws->gen->blocks) == ws->gen->n_blocks);
+              ASSERT(countOccupied(ws->gen->blocks) == ws->gen->n_words);
 	  }
       }
   }
 
   // Finally: compact or sweep the oldest generation.
-  if (major_gc && oldest_gen->steps[0].mark) {
-      if (oldest_gen->steps[0].compact) 
+  if (major_gc && oldest_gen->mark) {
+      if (oldest_gen->compact) 
           compact(gct->scavenged_static_objects);
       else
-          sweep(&oldest_gen->steps[0]);
+          sweep(oldest_gen);
   }
 
   /* run through all the generations/steps and tidy up 
@@ -575,101 +575,98 @@ SET_GCT(gc_threads[0]);
 		   mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
     }
 
-    for (s = 0; s < generations[g].n_steps; s++) {
-      bdescr *next, *prev;
-      stp = &generations[g].steps[s];
+    bdescr *next, *prev;
+    gen = &generations[g];
 
-      // for generations we collected... 
-      if (g <= N) {
+    // for generations we collected... 
+    if (g <= N) {
 
 	/* free old memory and shift to-space into from-space for all
 	 * the collected steps (except the allocation area).  These
 	 * freed blocks will probaby be quickly recycled.
 	 */
-	if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) {
-	    if (stp->mark)
-            {
-		// tack the new blocks on the end of the existing blocks
-		if (stp->old_blocks != NULL) {
-
-                    prev = NULL;
-		    for (bd = stp->old_blocks; bd != NULL; bd = next) {
-
-                        next = bd->link;
-
-                        if (!(bd->flags & BF_MARKED))
-                        {
-                            if (prev == NULL) {
-                                stp->old_blocks = next;
-                            } else {
-                                prev->link = next;
-                            }
-                            freeGroup(bd);
-                            stp->n_old_blocks--;
-                        }
-                        else
-                        {
-                            stp->n_words += bd->free - bd->start;
-
-                            // NB. this step might not be compacted next
-                            // time, so reset the BF_MARKED flags.
-                            // They are set before GC if we're going to
-                            // compact.  (search for BF_MARKED above).
-                            bd->flags &= ~BF_MARKED;
-                            
-                            // between GCs, all blocks in the heap except
-                            // for the nursery have the BF_EVACUATED flag set.
-                            bd->flags |= BF_EVACUATED;
-
-                            prev = bd;
+        if (gen->mark)
+        {
+            // tack the new blocks on the end of the existing blocks
+            if (gen->old_blocks != NULL) {
+                
+                prev = NULL;
+                for (bd = gen->old_blocks; bd != NULL; bd = next) {
+                    
+                    next = bd->link;
+                    
+                    if (!(bd->flags & BF_MARKED))
+                    {
+                        if (prev == NULL) {
+                            gen->old_blocks = next;
+                        } else {
+                            prev->link = next;
                         }
-		    }
-
-                    if (prev != NULL) {
-                        prev->link = stp->blocks;
-                        stp->blocks = stp->old_blocks;
+                        freeGroup(bd);
+                        gen->n_old_blocks--;
                     }
-		}
-		// add the new blocks to the block tally
-		stp->n_blocks += stp->n_old_blocks;
-		ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
-                ASSERT(countOccupied(stp->blocks) == stp->n_words);
-	    }
-	    else // not copacted
-	    {
-		freeChain(stp->old_blocks);
-	    }
-	    stp->old_blocks = NULL;
-	    stp->n_old_blocks = 0;
-	}
+                    else
+                    {
+                        gen->n_words += bd->free - bd->start;
+                        
+                        // NB. this step might not be compacted next
+                        // time, so reset the BF_MARKED flags.
+                        // They are set before GC if we're going to
+                        // compact.  (search for BF_MARKED above).
+                        bd->flags &= ~BF_MARKED;
+                        
+                        // between GCs, all blocks in the heap except
+                        // for the nursery have the BF_EVACUATED flag set.
+                        bd->flags |= BF_EVACUATED;
+                        
+                        prev = bd;
+                    }
+                }
 
-	/* LARGE OBJECTS.  The current live large objects are chained on
-	 * scavenged_large, having been moved during garbage
-	 * collection from large_objects.  Any objects left on the
-	 * large_objects list are therefore dead, so we free them here.
-	 */
-        freeChain(stp->large_objects);
-	stp->large_objects  = stp->scavenged_large_objects;
-	stp->n_large_blocks = stp->n_scavenged_large_blocks;
-        ASSERT(countBlocks(stp->large_objects) == stp->n_large_blocks);
-      }
-      else // for older generations... 
-      {
+                if (prev != NULL) {
+                    prev->link = gen->blocks;
+                    gen->blocks = gen->old_blocks;
+                }
+            }
+            // add the new blocks to the block tally
+            gen->n_blocks += gen->n_old_blocks;
+            ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
+            ASSERT(countOccupied(gen->blocks) == gen->n_words);
+        }
+        else // not copacted
+        {
+            freeChain(gen->old_blocks);
+        }
+
+        gen->old_blocks = NULL;
+        gen->n_old_blocks = 0;
+
+        /* LARGE OBJECTS.  The current live large objects are chained on
+         * scavenged_large, having been moved during garbage
+         * collection from large_objects.  Any objects left on the
+         * large_objects list are therefore dead, so we free them here.
+         */
+        freeChain(gen->large_objects);
+        gen->large_objects  = gen->scavenged_large_objects;
+        gen->n_large_blocks = gen->n_scavenged_large_blocks;
+        ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
+    }
+    else // for generations > N
+    {
 	/* For older generations, we need to append the
 	 * scavenged_large_object list (i.e. large objects that have been
 	 * promoted during this GC) to the large_object list for that step.
 	 */
-	for (bd = stp->scavenged_large_objects; bd; bd = next) {
-	  next = bd->link;
-	  dbl_link_onto(bd, &stp->large_objects);
+	for (bd = gen->scavenged_large_objects; bd; bd = next) {
+            next = bd->link;
+            dbl_link_onto(bd, &gen->large_objects);
 	}
-
+        
 	// add the new blocks we promoted during this GC 
-	stp->n_large_blocks += stp->n_scavenged_large_blocks;
-        ASSERT(countBlocks(stp->large_objects) == stp->n_large_blocks);
-      }
+	gen->n_large_blocks += gen->n_scavenged_large_blocks;
+        ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
     }
-  }
+  } // for all generations
 
   // update the max size of older generations after a major GC
   resize_generations();
@@ -679,14 +676,6 @@ SET_GCT(gc_threads[0]);
 
   // Free the small objects allocated via allocate(), since this will
   // all have been copied into G0S1 now.  
-  if (RtsFlags.GcFlags.generations > 1) {
-      if (g0->steps[0].blocks != NULL) {
-          freeChain(g0->steps[0].blocks);
-          g0->steps[0].blocks = NULL;
-      }
-      g0->steps[0].n_blocks = 0;
-      g0->steps[0].n_words = 0;
-  }
   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 
   // Start a new pinned_object_block
@@ -703,12 +692,10 @@ SET_GCT(gc_threads[0]);
 
   // Free any bitmaps.
   for (g = 0; g <= N; g++) {
-      for (s = 0; s < generations[g].n_steps; s++) {
-	  stp = &generations[g].steps[s];
-	  if (stp->bitmap != NULL) {
-	      freeGroup(stp->bitmap);
-	      stp->bitmap = NULL;
-	  }
+      gen = &generations[g];
+      if (gen->bitmap != NULL) {
+          freeGroup(gen->bitmap);
+          gen->bitmap = NULL;
       }
   }
 
@@ -814,7 +801,7 @@ static nat
 initialise_N (rtsBool force_major_gc)
 {
     int g;
-    nat s, blocks, blocks_total;
+    nat blocks, blocks_total;
 
     blocks = 0;
     blocks_total = 0;
@@ -826,11 +813,10 @@ initialise_N (rtsBool force_major_gc)
     }
 
     for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
-        blocks = 0;
-        for (s = 0; s < generations[g].n_steps; s++) {
-            blocks += generations[g].steps[s].n_words / BLOCK_SIZE_W;
-            blocks += generations[g].steps[s].n_large_blocks;
-        }
+
+        blocks = generations[g].n_words / BLOCK_SIZE_W
+               + generations[g].n_large_blocks;
+
         if (blocks >= generations[g].max_blocks) {
             N = stg_max(N,g);
         }
@@ -857,8 +843,8 @@ initialise_N (rtsBool force_major_gc)
 static void
 new_gc_thread (nat n, gc_thread *t)
 {
-    nat s;
-    step_workspace *ws;
+    nat g;
+    gen_workspace *ws;
 
 #ifdef THREADED_RTS
     t->id = 0;
@@ -879,11 +865,11 @@ new_gc_thread (nat n, gc_thread *t)
     t->papi_events = -1;
 #endif
 
-    for (s = 0; s < total_steps; s++)
+    for (g = 0; g < RtsFlags.GcFlags.generations; g++)
     {
-        ws = &t->steps[s];
-        ws->step = &all_steps[s];
-        ASSERT(s == ws->step->abs_no);
+        ws = &t->gens[g];
+        ws->gen = &generations[g];
+        ASSERT(g == ws->gen->no);
         ws->my_gct = t;
         
         ws->todo_bd = NULL;
@@ -912,7 +898,8 @@ initGcThreads (void)
 
 	for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
             gc_threads[i] = 
-                stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace),
+                stgMallocBytes(sizeof(gc_thread) + 
+                               RtsFlags.GcFlags.generations * sizeof(gen_workspace),
                                "alloc_gc_threads");
 
             new_gc_thread(i, gc_threads[i]);
@@ -928,22 +915,22 @@ initGcThreads (void)
 void
 freeGcThreads (void)
 {
-    nat s;
+    nat g;
     if (gc_threads != NULL) {
 #if defined(THREADED_RTS)
         nat i;
 	for (i = 0; i < n_capabilities; i++) {
-            for (s = 0; s < total_steps; s++)
+            for (g = 0; g < RtsFlags.GcFlags.generations; g++)
             {
-                freeWSDeque(gc_threads[i]->steps[s].todo_q);
+                freeWSDeque(gc_threads[i]->gens[g].todo_q);
             }
             stgFree (gc_threads[i]);
 	}
         stgFree (gc_threads);
 #else
-        for (s = 0; s < total_steps; s++)
+        for (g = 0; g < RtsFlags.GcFlags.generations; g++)
         {
-            freeWSDeque(gc_threads[0]->steps[s].todo_q);
+            freeWSDeque(gc_threads[0]->gens[g].todo_q);
         }
         stgFree (gc_threads);
 #endif
@@ -976,8 +963,8 @@ dec_running (void)
 static rtsBool
 any_work (void)
 {
-    int s;
-    step_workspace *ws;
+    int g;
+    gen_workspace *ws;
 
     gct->any_work++;
 
@@ -991,11 +978,8 @@ any_work (void)
     // Check for global work in any step.  We don't need to check for
     // local work, because we have already exited scavenge_loop(),
     // which means there is no local work for this thread.
-    for (s = total_steps-1; s >= 0; s--) {
-        if (s == 0 && RtsFlags.GcFlags.generations > 1) { 
-            continue; 
-        }
-        ws = &gct->steps[s];
+    for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
+        ws = &gct->gens[g];
         if (ws->todo_large_objects) return rtsTrue;
         if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
         if (ws->todo_overflow) return rtsTrue;
@@ -1007,8 +991,8 @@ any_work (void)
         // look for work to steal
         for (n = 0; n < n_gc_threads; n++) {
             if (n == gct->thread_index) continue;
-            for (s = total_steps-1; s >= 0; s--) {
-                ws = &gc_threads[n]->steps[s];
+            for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
+                ws = &gc_threads[n]->gens[g];
                 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
             }
         }
@@ -1089,7 +1073,7 @@ gcWorkerThread (Capability *cap)
 #endif
     
     // Every thread evacuates some roots.
-    gct->evac_step = 0;
+    gct->evac_gen = 0;
     markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
                          rtsTrue/*prune sparks*/);
     scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
@@ -1216,9 +1200,9 @@ releaseGCThreads (Capability *cap USED_IF_THREADS)
 static void
 init_collected_gen (nat g, nat n_threads)
 {
-    nat s, t, i;
-    step_workspace *ws;
-    step *stp;
+    nat t, i;
+    gen_workspace *ws;
+    generation *gen;
     bdescr *bd;
 
     // Throw away the current mutable list.  Invariant: the mutable
@@ -1233,118 +1217,96 @@ init_collected_gen (nat g, nat n_threads)
 	}
     }
 
-    if (g == 0) {
-        for (i = 0; i < n_capabilities; i++) {
-            stp = &nurseries[i];
-            stp->old_threads = stp->threads;
-            stp->threads = END_TSO_QUEUE;
-        }
+    gen = &generations[g];
+    ASSERT(gen->no == g);
+
+    // we'll construct a new list of threads in this step
+    // during GC, throw away the current list.
+    gen->old_threads = gen->threads;
+    gen->threads = END_TSO_QUEUE;
+
+    // deprecate the existing blocks
+    gen->old_blocks   = gen->blocks;
+    gen->n_old_blocks = gen->n_blocks;
+    gen->blocks       = NULL;
+    gen->n_blocks     = 0;
+    gen->n_words      = 0;
+    gen->live_estimate = 0;
+
+    // initialise the large object queues.
+    gen->scavenged_large_objects = NULL;
+    gen->n_scavenged_large_blocks = 0;
+    
+    // mark the small objects as from-space
+    for (bd = gen->old_blocks; bd; bd = bd->link) {
+        bd->flags &= ~BF_EVACUATED;
+    }
+    
+    // mark the large objects as from-space
+    for (bd = gen->large_objects; bd; bd = bd->link) {
+        bd->flags &= ~BF_EVACUATED;
     }
 
-    for (s = 0; s < generations[g].n_steps; s++) {
-
-	// generation 0, step 0 doesn't need to-space, unless -G1
-	if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) { 
-	    continue; 
-	}
+    // for a compacted generation, we need to allocate the bitmap
+    if (gen->mark) {
+        nat bitmap_size; // in bytes
+        bdescr *bitmap_bdescr;
+        StgWord *bitmap;
 	
-	stp = &generations[g].steps[s];
-	ASSERT(stp->gen_no == g);
-
-        // we'll construct a new list of threads in this step
-        // during GC, throw away the current list.
-        stp->old_threads = stp->threads;
-        stp->threads = END_TSO_QUEUE;
-
-	// deprecate the existing blocks
-	stp->old_blocks   = stp->blocks;
-	stp->n_old_blocks = stp->n_blocks;
-	stp->blocks       = NULL;
-	stp->n_blocks     = 0;
-	stp->n_words      = 0;
-	stp->live_estimate = 0;
-
-	// initialise the large object queues.
-	stp->scavenged_large_objects = NULL;
-	stp->n_scavenged_large_blocks = 0;
-
-	// mark the small objects as from-space
-	for (bd = stp->old_blocks; bd; bd = bd->link) {
-	    bd->flags &= ~BF_EVACUATED;
-	}
-
-	// mark the large objects as from-space
-	for (bd = stp->large_objects; bd; bd = bd->link) {
-	    bd->flags &= ~BF_EVACUATED;
-	}
-
-	// for a compacted step, we need to allocate the bitmap
-	if (stp->mark) {
-	    nat bitmap_size; // in bytes
-	    bdescr *bitmap_bdescr;
-	    StgWord *bitmap;
-	    
-	    bitmap_size = stp->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
-	    
-	    if (bitmap_size > 0) {
-		bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size) 
-					   / BLOCK_SIZE);
-		stp->bitmap = bitmap_bdescr;
-		bitmap = bitmap_bdescr->start;
-		
-		debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
-			   bitmap_size, bitmap);
-		
-		// don't forget to fill it with zeros!
-		memset(bitmap, 0, bitmap_size);
+        bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
+	
+        if (bitmap_size > 0) {
+            bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size) 
+                                       / BLOCK_SIZE);
+            gen->bitmap = bitmap_bdescr;
+            bitmap = bitmap_bdescr->start;
+            
+            debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
+                       bitmap_size, bitmap);
+            
+            // don't forget to fill it with zeros!
+            memset(bitmap, 0, bitmap_size);
+            
+            // For each block in this step, point to its bitmap from the
+            // block descriptor.
+            for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
+                bd->u.bitmap = bitmap;
+                bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
 		
-		// For each block in this step, point to its bitmap from the
-		// block descriptor.
-		for (bd=stp->old_blocks; bd != NULL; bd = bd->link) {
-		    bd->u.bitmap = bitmap;
-		    bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
-		    
-		    // Also at this point we set the BF_MARKED flag
-		    // for this block.  The invariant is that
-		    // BF_MARKED is always unset, except during GC
-		    // when it is set on those blocks which will be
-		    // compacted.
-                    if (!(bd->flags & BF_FRAGMENTED)) {
-                        bd->flags |= BF_MARKED;
-                    }
-		}
-	    }
-	}
+                // Also at this point we set the BF_MARKED flag
+                // for this block.  The invariant is that
+                // BF_MARKED is always unset, except during GC
+                // when it is set on those blocks which will be
+                // compacted.
+                if (!(bd->flags & BF_FRAGMENTED)) {
+                    bd->flags |= BF_MARKED;
+                }
+            }
+        }
     }
 
     // For each GC thread, for each step, allocate a "todo" block to
     // store evacuated objects to be scavenged, and a block to store
     // evacuated objects that do not need to be scavenged.
     for (t = 0; t < n_threads; t++) {
-	for (s = 0; s < generations[g].n_steps; s++) {
-
-	    // we don't copy objects into g0s0, unless -G0
-	    if (g==0 && s==0 && RtsFlags.GcFlags.generations > 1) continue;
-
-	    ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
-
-	    ws->todo_large_objects = NULL;
-
-            ws->part_list = NULL;
-            ws->n_part_blocks = 0;
-
-	    // allocate the first to-space block; extra blocks will be
-	    // chained on as necessary.
-	    ws->todo_bd = NULL;
-            ASSERT(looksEmptyWSDeque(ws->todo_q));
-	    alloc_todo_block(ws,0);
-
-            ws->todo_overflow = NULL;
-            ws->n_todo_overflow = 0;
-
-	    ws->scavd_list = NULL;
-	    ws->n_scavd_blocks = 0;
-	}
+        ws = &gc_threads[t]->gens[g];
+        
+        ws->todo_large_objects = NULL;
+        
+        ws->part_list = NULL;
+        ws->n_part_blocks = 0;
+        
+        // allocate the first to-space block; extra blocks will be
+        // chained on as necessary.
+        ws->todo_bd = NULL;
+        ASSERT(looksEmptyWSDeque(ws->todo_q));
+        alloc_todo_block(ws,0);
+        
+        ws->todo_overflow = NULL;
+        ws->n_todo_overflow = 0;
+        
+        ws->scavd_list = NULL;
+        ws->n_scavd_blocks = 0;
     }
 }
 
@@ -1356,9 +1318,9 @@ init_collected_gen (nat g, nat n_threads)
 static void
 init_uncollected_gen (nat g, nat threads)
 {
-    nat s, t, n;
-    step_workspace *ws;
-    step *stp;
+    nat t, n;
+    gen_workspace *ws;
+    generation *gen;
     bdescr *bd;
 
     // save the current mutable lists for this generation, and
@@ -1371,66 +1333,60 @@ init_uncollected_gen (nat g, nat threads)
         capabilities[n].mut_lists[g] = allocBlock();
     }
 
-    for (s = 0; s < generations[g].n_steps; s++) {
-	stp = &generations[g].steps[s];
-	stp->scavenged_large_objects = NULL;
-	stp->n_scavenged_large_blocks = 0;
-    }
-    
-    for (s = 0; s < generations[g].n_steps; s++) {
-	    
-        stp = &generations[g].steps[s];
-
-        for (t = 0; t < threads; t++) {
-	    ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
-	    
-            ASSERT(looksEmptyWSDeque(ws->todo_q));
-	    ws->todo_large_objects = NULL;
-
-            ws->part_list = NULL;
-            ws->n_part_blocks = 0;
+    gen = &generations[g];
 
-	    ws->scavd_list = NULL;
-	    ws->n_scavd_blocks = 0;
+    gen->scavenged_large_objects = NULL;
+    gen->n_scavenged_large_blocks = 0;
 
-	    // If the block at the head of the list in this generation
-	    // is less than 3/4 full, then use it as a todo block.
-	    if (stp->blocks && isPartiallyFull(stp->blocks))
-	    {
-		ws->todo_bd = stp->blocks;
-                ws->todo_free = ws->todo_bd->free;
-                ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
-		stp->blocks = stp->blocks->link;
-		stp->n_blocks -= 1;
-                stp->n_words -= ws->todo_bd->free - ws->todo_bd->start;
-		ws->todo_bd->link = NULL;
-		// we must scan from the current end point.
-		ws->todo_bd->u.scan = ws->todo_bd->free;
-	    } 
-	    else
-	    {
-		ws->todo_bd = NULL;
-		alloc_todo_block(ws,0);
-	    }
-	}
-
-        // deal out any more partial blocks to the threads' part_lists
-        t = 0;
-        while (stp->blocks && isPartiallyFull(stp->blocks))
+    for (t = 0; t < threads; t++) {
+        ws = &gc_threads[t]->gens[g];
+	    
+        ASSERT(looksEmptyWSDeque(ws->todo_q));
+        ws->todo_large_objects = NULL;
+        
+        ws->part_list = NULL;
+        ws->n_part_blocks = 0;
+        
+        ws->scavd_list = NULL;
+        ws->n_scavd_blocks = 0;
+        
+        // If the block at the head of the list in this generation
+        // is less than 3/4 full, then use it as a todo block.
+        if (gen->blocks && isPartiallyFull(gen->blocks))
+        {
+            ws->todo_bd = gen->blocks;
+            ws->todo_free = ws->todo_bd->free;
+            ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
+            gen->blocks = gen->blocks->link;
+            gen->n_blocks -= 1;
+            gen->n_words -= ws->todo_bd->free - ws->todo_bd->start;
+            ws->todo_bd->link = NULL;
+            // we must scan from the current end point.
+            ws->todo_bd->u.scan = ws->todo_bd->free;
+        } 
+        else
         {
-            bd = stp->blocks;
-            stp->blocks = bd->link;
-	    ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
-            bd->link = ws->part_list;
-            ws->part_list = bd;
-            ws->n_part_blocks += 1;
-            bd->u.scan = bd->free;
-            stp->n_blocks -= 1;
-            stp->n_words -= bd->free - bd->start;
-            t++;
-            if (t == n_gc_threads) t = 0;
+            ws->todo_bd = NULL;
+            alloc_todo_block(ws,0);
         }
     }
+
+    // deal out any more partial blocks to the threads' part_lists
+    t = 0;
+    while (gen->blocks && isPartiallyFull(gen->blocks))
+    {
+        bd = gen->blocks;
+        gen->blocks = bd->link;
+        ws = &gc_threads[t]->gens[g];
+        bd->link = ws->part_list;
+        ws->part_list = bd;
+        ws->n_part_blocks += 1;
+        bd->u.scan = bd->free;
+        gen->n_blocks -= 1;
+        gen->n_words -= bd->free - bd->start;
+        t++;
+        if (t == n_gc_threads) t = 0;
+    }
 }
 
 /* -----------------------------------------------------------------------------
@@ -1444,7 +1400,7 @@ init_gc_thread (gc_thread *t)
     t->scavenged_static_objects = END_OF_STATIC_LIST;
     t->scan_bd = NULL;
     t->mut_lists = capabilities[t->thread_index].mut_lists;
-    t->evac_step = 0;
+    t->evac_gen = 0;
     t->failed_to_evac = rtsFalse;
     t->eager_promotion = rtsTrue;
     t->thunk_selector_depth = 0;
@@ -1547,13 +1503,13 @@ resize_generations (void)
 	nat gens = RtsFlags.GcFlags.generations;
 	
 	// live in the oldest generations
-        if (oldest_gen->steps[0].live_estimate != 0) {
-            words = oldest_gen->steps[0].live_estimate;
+        if (oldest_gen->live_estimate != 0) {
+            words = oldest_gen->live_estimate;
         } else {
-            words = oldest_gen->steps[0].n_words;
+            words = oldest_gen->n_words;
         }
         live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
-            oldest_gen->steps[0].n_large_blocks;
+            oldest_gen->n_large_blocks;
 	
 	// default max size for all generations except zero
 	size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
@@ -1572,19 +1528,19 @@ resize_generations (void)
 	if (RtsFlags.GcFlags.generations > 1 &&
 	    (RtsFlags.GcFlags.compact ||
 	     (max > 0 &&
-	      oldest_gen->steps[0].n_blocks > 
+	      oldest_gen->n_blocks > 
 	      (RtsFlags.GcFlags.compactThreshold * max) / 100))) {
-	    oldest_gen->steps[0].mark = 1;
-	    oldest_gen->steps[0].compact = 1;
+	    oldest_gen->mark = 1;
+	    oldest_gen->compact = 1;
 //	  debugBelch("compaction: on\n", live);
 	} else {
-	    oldest_gen->steps[0].mark = 0;
-	    oldest_gen->steps[0].compact = 0;
+	    oldest_gen->mark = 0;
+	    oldest_gen->compact = 0;
 //	  debugBelch("compaction: off\n", live);
 	}
 
         if (RtsFlags.GcFlags.sweep) {
-	    oldest_gen->steps[0].mark = 1;
+	    oldest_gen->mark = 1;
         }
 
 	// if we're going to go over the maximum heap size, reduce the
@@ -1600,7 +1556,7 @@ resize_generations (void)
 		heapOverflow();
 	    }
 	    
-	    if (oldest_gen->steps[0].compact) {
+	    if (oldest_gen->compact) {
 		if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
 		    size = (max - min_alloc) / ((gens - 1) * 2 - 1);
 		}
@@ -1653,7 +1609,7 @@ resize_nursery (void)
 	 * performance we get from 3L bytes, reducing to the same
 	 * performance at 2L bytes.
 	 */
-	blocks = generations[0].steps[0].n_blocks;
+	blocks = generations[0].n_blocks;
 	
 	if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
 	     blocks * RtsFlags.GcFlags.oldGenFactor * 2 > 
@@ -1697,7 +1653,7 @@ resize_nursery (void)
 	    
 	    /* Guess how much will be live in generation 0 step 0 next time.
 	     * A good approximation is obtained by finding the
-	     * percentage of g0s0 that was live at the last minor GC.
+	     * percentage of g0 that was live at the last minor GC.
 	     *
 	     * We have an accurate figure for the amount of copied data in
 	     * 'copied', but we must convert this to a number of blocks, with
@@ -1706,7 +1662,7 @@ resize_nursery (void)
 	     */
 	    if (N == 0)
 	    {
-		g0s0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
+		g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
 		    / countNurseryBlocks();
 	    }
 	    
@@ -1717,14 +1673,14 @@ resize_nursery (void)
 	     *
 	     * Formula:            suggested - needed
 	     *                ----------------------------
-	     *                    1 + g0s0_pcnt_kept/100
+	     *                    1 + g0_pcnt_kept/100
 	     *
 	     * where 'needed' is the amount of memory needed at the next
-	     * collection for collecting all steps except g0s0.
+	     * collection for collecting all gens except g0.
 	     */
 	    blocks = 
 		(((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
-		(100 + (long)g0s0_pcnt_kept);
+		(100 + (long)g0_pcnt_kept);
 	    
 	    if (blocks < (long)min_nursery) {
 		blocks = min_nursery;
diff --git a/rts/sm/GCThread.h b/rts/sm/GCThread.h
index ab5aca7..7d46232 100644
--- a/rts/sm/GCThread.h
+++ b/rts/sm/GCThread.h
@@ -30,18 +30,18 @@ BEGIN_RTS_PRIVATE
    thread-local data.  We'll keep a pointer to this in a thread-local
    variable, or possibly in a register.
 
-   In the gc_thread structure is a step_workspace for each step.  The
-   primary purpose of the step_workspace is to hold evacuated objects;
+   In the gc_thread structure is a gen_workspace for each generation.  The
+   primary purpose of the gen_workspace is to hold evacuated objects;
    when an object is evacuated, it is copied to the "todo" block in
-   the thread's workspace for the appropriate step.  When the todo
-   block is full, it is pushed to the global step->todos list, which
+   the thread's workspace for the appropriate generation.  When the todo
+   block is full, it is pushed to the global gen->todos list, which
    is protected by a lock.  (in fact we intervene a one-place buffer
    here to reduce contention).
 
    A thread repeatedly grabs a block of work from one of the
-   step->todos lists, scavenges it, and keeps the scavenged block on
+   gen->todos lists, scavenges it, and keeps the scavenged block on
    its own ws->scavd_list (this is to avoid unnecessary contention
-   returning the completed buffers back to the step: we can just
+   returning the completed buffers back to the generation: we can just
    collect them all later).
 
    When there is no global work to do, we start scavenging the todo
@@ -57,7 +57,7 @@ BEGIN_RTS_PRIVATE
    scan_bd may still be the same as todo_bd, or it might be different:
    if enough objects were copied into this block that it filled up,
    then we will have allocated a new todo block, but *not* pushed the
-   old one to the step, because it is partially scanned.
+   old one to the generation, because it is partially scanned.
 
    The reason to leave scanning the todo blocks until last is that we
    want to deal with full blocks as far as possible.
@@ -65,17 +65,18 @@ BEGIN_RTS_PRIVATE
 
 
 /* -----------------------------------------------------------------------------
-   Step Workspace
+   Generation Workspace
   
-   A step workspace exists for each step for each GC thread. The GC
-   thread takes a block from the todos list of the step into the
-   scanbd and then scans it.  Objects referred to by those in the scan
-   block are copied into the todo or scavd blocks of the relevant step.
+   A generation workspace exists for each generation for each GC
+   thread. The GC thread takes a block from the todos list of the
+   generation into the scanbd and then scans it.  Objects referred to
+   by those in the scan block are copied into the todo or scavd blocks
+   of the relevant generation.
   
    ------------------------------------------------------------------------- */
 
-typedef struct step_workspace_ {
-    step * step;		// the step for this workspace 
+typedef struct gen_workspace_ {
+    generation * gen;		// the gen for this workspace 
     struct gc_thread_ * my_gct; // the gc_thread that contains this workspace
 
     // where objects to be scavenged go
@@ -100,17 +101,17 @@ typedef struct step_workspace_ {
 
     StgWord pad[3];
 
-} step_workspace ATTRIBUTE_ALIGNED(64);
-// align so that computing gct->steps[n] is a shift, not a multiply
+} gen_workspace ATTRIBUTE_ALIGNED(64);
+// align so that computing gct->gens[n] is a shift, not a multiply
 // fails if the size is <64, which is why we need the pad above
 
 /* ----------------------------------------------------------------------------
    GC thread object
 
-   Every GC thread has one of these. It contains all the step specific
-   workspaces and other GC thread local information. At some later
-   point it maybe useful to move this other into the TLS store of the
-   GC threads
+   Every GC thread has one of these. It contains all the generation
+   specific workspaces and other GC thread local information. At some
+   later point it maybe useful to move this other into the TLS store
+   of the GC threads
    ------------------------------------------------------------------------- */
 
 typedef struct gc_thread_ {
@@ -145,7 +146,7 @@ typedef struct gc_thread_ {
     // --------------------
     // evacuate flags
 
-    step *evac_step;               // Youngest generation that objects
+    generation *evac_gen;          // Youngest generation that objects
                                    // should be evacuated to in
                                    // evacuate().  (Logically an
                                    // argument to evacuate, but it's
@@ -184,7 +185,7 @@ typedef struct gc_thread_ {
     // the gc_thread pointer to a workspace using only pointer
     // arithmetic, no memory access.  This happens in the inner loop
     // of the GC, see Evac.c:alloc_for_copy().
-    step_workspace steps[];
+    gen_workspace gens[];
 } gc_thread;
 
 
diff --git a/rts/sm/GCUtils.c b/rts/sm/GCUtils.c
index 39a79f6..a544e83 100644
--- a/rts/sm/GCUtils.c
+++ b/rts/sm/GCUtils.c
@@ -87,12 +87,12 @@ freeChain_sync(bdescr *bd)
    -------------------------------------------------------------------------- */
 
 bdescr *
-grab_local_todo_block (step_workspace *ws)
+grab_local_todo_block (gen_workspace *ws)
 {
     bdescr *bd;
-    step *stp;
+    generation *gen;
 
-    stp = ws->step;
+    gen = ws->gen;
 
     bd = ws->todo_overflow;
     if (bd != NULL)
@@ -115,7 +115,7 @@ grab_local_todo_block (step_workspace *ws)
 
 #if defined(THREADED_RTS)
 bdescr *
-steal_todo_block (nat s)
+steal_todo_block (nat g)
 {
     nat n;
     bdescr *bd;
@@ -123,7 +123,7 @@ steal_todo_block (nat s)
     // look for work to steal
     for (n = 0; n < n_gc_threads; n++) {
         if (n == gct->thread_index) continue;
-        bd = stealWSDeque(gc_threads[n]->steps[s].todo_q);
+        bd = stealWSDeque(gc_threads[n]->gens[g].todo_q);
         if (bd) {
             return bd;
         }
@@ -133,11 +133,11 @@ steal_todo_block (nat s)
 #endif
 
 void
-push_scanned_block (bdescr *bd, step_workspace *ws)
+push_scanned_block (bdescr *bd, gen_workspace *ws)
 {
     ASSERT(bd != NULL);
     ASSERT(bd->link == NULL);
-    ASSERT(bd->step == ws->step);
+    ASSERT(bd->gen == ws->gen);
     ASSERT(bd->u.scan == bd->free);
 
     if (bd->start + bd->blocks * BLOCK_SIZE_W - bd->free > WORK_UNIT_WORDS)
@@ -161,7 +161,7 @@ push_scanned_block (bdescr *bd, step_workspace *ws)
 }
 
 StgPtr
-todo_block_full (nat size, step_workspace *ws)
+todo_block_full (nat size, gen_workspace *ws)
 {
     StgPtr p;
     bdescr *bd;
@@ -174,7 +174,7 @@ todo_block_full (nat size, step_workspace *ws)
 
     ASSERT(bd != NULL);
     ASSERT(bd->link == NULL);
-    ASSERT(bd->step == ws->step);
+    ASSERT(bd->gen == ws->gen);
 
     // If the global list is not empty, or there's not much work in
     // this block to push, and there's enough room in
@@ -213,11 +213,11 @@ todo_block_full (nat size, step_workspace *ws)
         // Otherwise, push this block out to the global list.
         else 
         {
-            step *stp;
-            stp = ws->step;
+            generation *gen;
+            gen = ws->gen;
             debugTrace(DEBUG_gc, "push todo block %p (%ld words), step %d, todo_q: %ld", 
                   bd->start, (unsigned long)(bd->free - bd->u.scan),
-                  stp->abs_no, dequeElements(ws->todo_q));
+                  gen->no, dequeElements(ws->todo_q));
 
             if (!pushWSDeque(ws->todo_q, bd)) {
                 bd->link = ws->todo_overflow;
@@ -239,7 +239,7 @@ todo_block_full (nat size, step_workspace *ws)
 }
 
 StgPtr
-alloc_todo_block (step_workspace *ws, nat size)
+alloc_todo_block (gen_workspace *ws, nat size)
 {
     bdescr *bd/*, *hd, *tl */;
 
@@ -270,7 +270,7 @@ alloc_todo_block (step_workspace *ws, nat size)
         } else {
             bd = allocBlock_sync();
         }
-        initBdescr(bd, ws->step);
+        initBdescr(bd, ws->gen, ws->gen->to);
         bd->flags = BF_EVACUATED;
         bd->u.scan = bd->free = bd->start;
     }
@@ -282,8 +282,8 @@ alloc_todo_block (step_workspace *ws, nat size)
     ws->todo_lim  = stg_min(bd->start + bd->blocks * BLOCK_SIZE_W,
                             bd->free + stg_max(WORK_UNIT_WORDS,size));
 
-    debugTrace(DEBUG_gc, "alloc new todo block %p for step %d", 
-               bd->free, ws->step->abs_no);
+    debugTrace(DEBUG_gc, "alloc new todo block %p for gen  %d", 
+               bd->free, ws->gen->no);
 
     return ws->todo_free;
 }
diff --git a/rts/sm/GCUtils.h b/rts/sm/GCUtils.h
index 7fafe51..1fbbe3c 100644
--- a/rts/sm/GCUtils.h
+++ b/rts/sm/GCUtils.h
@@ -19,11 +19,11 @@ BEGIN_RTS_PRIVATE
 bdescr *allocBlock_sync(void);
 void    freeChain_sync(bdescr *bd);
 
-void    push_scanned_block   (bdescr *bd, step_workspace *ws);
-StgPtr  todo_block_full      (nat size, step_workspace *ws);
-StgPtr  alloc_todo_block     (step_workspace *ws, nat size);
+void    push_scanned_block   (bdescr *bd, gen_workspace *ws);
+StgPtr  todo_block_full      (nat size, gen_workspace *ws);
+StgPtr  alloc_todo_block     (gen_workspace *ws, nat size);
 
-bdescr *grab_local_todo_block  (step_workspace *ws);
+bdescr *grab_local_todo_block  (gen_workspace *ws);
 #if defined(THREADED_RTS)
 bdescr *steal_todo_block       (nat s);
 #endif
diff --git a/rts/sm/MarkWeak.c b/rts/sm/MarkWeak.c
index 2f5964f..9d8e8c0 100644
--- a/rts/sm/MarkWeak.c
+++ b/rts/sm/MarkWeak.c
@@ -83,8 +83,8 @@ StgTSO *resurrected_threads;
 // List of blocked threads found to have pending throwTos
 StgTSO *exception_threads;
 
-static void resurrectUnreachableThreads (step *stp);
-static rtsBool tidyThreadList (step *stp);
+static void resurrectUnreachableThreads (generation *gen);
+static rtsBool tidyThreadList (generation *gen);
 
 void
 initWeakForGC(void)
@@ -113,7 +113,7 @@ traverseWeakPtrList(void)
       /* doesn't matter where we evacuate values/finalizers to, since
        * these pointers are treated as roots (iff the keys are alive).
        */
-      gct->evac_step = 0;
+      gct->evac_gen = 0;
       
       last_w = &old_weak_ptr_list;
       for (w = old_weak_ptr_list; w != NULL; w = next_w) {
@@ -191,19 +191,18 @@ traverseWeakPtrList(void)
        * become garbage, we wake them up and administer an exception.
        */
   {
-      nat g, s, n;
+      nat g;
 	  
       // Traverse thread lists for generations we collected...
-      for (n = 0; n < n_capabilities; n++) {
-          if (tidyThreadList(&nurseries[n])) {
-              flag = rtsTrue;
-          }
-      }              
+//      ToDo when we have one gen per capability:
+//      for (n = 0; n < n_capabilities; n++) {
+//          if (tidyThreadList(&nurseries[n])) {
+//              flag = rtsTrue;
+//          }
+//      }              
       for (g = 0; g <= N; g++) {
-          for (s = 0; s < generations[g].n_steps; s++) {
-              if (tidyThreadList(&generations[g].steps[s])) {
-                  flag = rtsTrue;
-              }
+          if (tidyThreadList(&generations[g])) {
+              flag = rtsTrue;
           }
       }
 
@@ -214,15 +213,9 @@ traverseWeakPtrList(void)
       /* And resurrect any threads which were about to become garbage.
        */
       {
-          nat g, s, n;
-
-          for (n = 0; n < n_capabilities; n++) {
-              resurrectUnreachableThreads(&nurseries[n]);
-          }              
+          nat g;
           for (g = 0; g <= N; g++) {
-              for (s = 0; s < generations[g].n_steps; s++) {
-                  resurrectUnreachableThreads(&generations[g].steps[s]);
-              }
+              resurrectUnreachableThreads(&generations[g]);
           }
       }
         
@@ -251,11 +244,11 @@ traverseWeakPtrList(void)
   }
 }
   
-  static void resurrectUnreachableThreads (step *stp)
+  static void resurrectUnreachableThreads (generation *gen)
 {
     StgTSO *t, *tmp, *next;
 
-    for (t = stp->old_threads; t != END_TSO_QUEUE; t = next) {
+    for (t = gen->old_threads; t != END_TSO_QUEUE; t = next) {
         next = t->global_link;
         
         // ThreadFinished and ThreadComplete: we have to keep
@@ -275,14 +268,14 @@ traverseWeakPtrList(void)
     }
 }
 
-static rtsBool tidyThreadList (step *stp)
+static rtsBool tidyThreadList (generation *gen)
 {
     StgTSO *t, *tmp, *next, **prev;
     rtsBool flag = rtsFalse;
 
-    prev = &stp->old_threads;
+    prev = &gen->old_threads;
 
-    for (t = stp->old_threads; t != END_TSO_QUEUE; t = next) {
+    for (t = gen->old_threads; t != END_TSO_QUEUE; t = next) {
 	      
         tmp = (StgTSO *)isAlive((StgClosure *)t);
 	
@@ -339,10 +332,10 @@ static rtsBool tidyThreadList (step *stp)
             *prev = next;
             
             // move this thread onto the correct threads list.
-            step *new_step;
-            new_step = Bdescr((P_)t)->step;
-            t->global_link = new_step->threads;
-            new_step->threads  = t;
+            generation *new_gen;
+            new_gen = Bdescr((P_)t)->gen;
+            t->global_link = new_gen->threads;
+            new_gen->threads  = t;
         }
     }
 
diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c
index b6edba8..442fee1 100644
--- a/rts/sm/Sanity.c
+++ b/rts/sm/Sanity.c
@@ -569,10 +569,10 @@ void
 checkGlobalTSOList (rtsBool checkTSOs)
 {
   StgTSO *tso;
-  nat s;
+  nat g;
 
-  for (s = 0; s < total_steps; s++) {
-      for (tso=all_steps[s].threads; tso != END_TSO_QUEUE; 
+  for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+      for (tso=generations[g].threads; tso != END_TSO_QUEUE; 
            tso = tso->global_link) {
           ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso));
           ASSERT(get_itbl(tso)->type == TSO);
@@ -673,20 +673,19 @@ checkStaticObjects ( StgClosure* static_objects )
 
 /* Nursery sanity check */
 void
-checkNurserySanity( step *stp )
+checkNurserySanity (nursery *nursery)
 {
     bdescr *bd, *prev;
     nat blocks = 0;
 
     prev = NULL;
-    for (bd = stp->blocks; bd != NULL; bd = bd->link) {
+    for (bd = nursery->blocks; bd != NULL; bd = bd->link) {
 	ASSERT(bd->u.back == prev);
 	prev = bd;
 	blocks += bd->blocks;
     }
 
-    ASSERT(blocks == stp->n_blocks);
-    ASSERT(countBlocks(stp->large_objects) == stp->n_large_blocks);
+    ASSERT(blocks == nursery->n_blocks);
 }
 
 
@@ -694,26 +693,21 @@ checkNurserySanity( step *stp )
 void
 checkSanity( rtsBool check_heap )
 {
-    nat g, s;
+    nat g, n;
 
     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-        for (s = 0; s < generations[g].n_steps; s++) {
-            if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) { 
-                continue;
-            }
-            ASSERT(countBlocks(generations[g].steps[s].blocks)
-                   == generations[g].steps[s].n_blocks);
-            ASSERT(countBlocks(generations[g].steps[s].large_objects)
-                   == generations[g].steps[s].n_large_blocks);
-            if (check_heap) {
-                checkHeap(generations[g].steps[s].blocks);
-            }
-            checkLargeObjects(generations[g].steps[s].large_objects);
+        ASSERT(countBlocks(generations[g].blocks)
+               == generations[g].n_blocks);
+        ASSERT(countBlocks(generations[g].large_objects)
+                   == generations[g].n_large_blocks);
+        if (check_heap) {
+            checkHeap(generations[g].blocks);
         }
+        checkLargeObjects(generations[g].large_objects);
     }
     
-    for (s = 0; s < n_capabilities; s++) {
-        checkNurserySanity(&nurseries[s]);
+    for (n = 0; n < n_capabilities; n++) {
+        checkNurserySanity(&nurseries[n]);
     }
     
     checkFreeListSanity();
@@ -741,21 +735,18 @@ checkSanity( rtsBool check_heap )
 static void
 findMemoryLeak (void)
 {
-  nat g, s, i;
+  nat g, i;
   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
       for (i = 0; i < n_capabilities; i++) {
 	  markBlocks(capabilities[i].mut_lists[g]);
       }
       markBlocks(generations[g].mut_list);
-      for (s = 0; s < generations[g].n_steps; s++) {
-	  markBlocks(generations[g].steps[s].blocks);
-	  markBlocks(generations[g].steps[s].large_objects);
-      }
+      markBlocks(generations[g].blocks);
+      markBlocks(generations[g].large_objects);
   }
 
   for (i = 0; i < n_capabilities; i++) {
       markBlocks(nurseries[i].blocks);
-      markBlocks(nurseries[i].large_objects);
   }
 
 #ifdef PROFILING
@@ -800,19 +791,18 @@ void findSlop(bdescr *bd)
 }
 
 static lnat
-stepBlocks (step *stp)
+genBlocks (generation *gen)
 {
-    ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
-    ASSERT(countBlocks(stp->large_objects) == stp->n_large_blocks);
-    return stp->n_blocks + stp->n_old_blocks + 
-	    countAllocdBlocks(stp->large_objects);
+    ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
+    ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
+    return gen->n_blocks + gen->n_old_blocks + 
+	    countAllocdBlocks(gen->large_objects);
 }
 
 void
 memInventory (rtsBool show)
 {
-  nat g, s, i;
-  step *stp;
+  nat g, i;
   lnat gen_blocks[RtsFlags.GcFlags.generations];
   lnat nursery_blocks, retainer_blocks,
        arena_blocks, exec_blocks;
@@ -827,15 +817,13 @@ memInventory (rtsBool show)
 	  gen_blocks[g] += countBlocks(capabilities[i].mut_lists[g]);
       }	  
       gen_blocks[g] += countAllocdBlocks(generations[g].mut_list);
-      for (s = 0; s < generations[g].n_steps; s++) {
-	  stp = &generations[g].steps[s];
-	  gen_blocks[g] += stepBlocks(stp);
-      }
+      gen_blocks[g] += genBlocks(&generations[g]);
   }
 
   nursery_blocks = 0;
   for (i = 0; i < n_capabilities; i++) {
-      nursery_blocks += stepBlocks(&nurseries[i]);
+      ASSERT(countBlocks(nurseries[i].blocks) == nurseries[i].n_blocks);
+      nursery_blocks += nurseries[i].n_blocks;
   }
 
   retainer_blocks = 0;
diff --git a/rts/sm/Sanity.h b/rts/sm/Sanity.h
index 1156222..38a7289 100644
--- a/rts/sm/Sanity.h
+++ b/rts/sm/Sanity.h
@@ -22,7 +22,7 @@ BEGIN_RTS_PRIVATE
 
 /* debugging routines */
 void checkSanity        ( rtsBool check_heap );
-void checkNurserySanity ( step *stp );
+void checkNurserySanity ( nursery *nursery );
 void checkHeap          ( bdescr *bd );
 void checkHeapChunk     ( StgPtr start, StgPtr end );
 void checkLargeObjects  ( bdescr *bd );
diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c
index c799de7..b34aca6 100644
--- a/rts/sm/Scav.c
+++ b/rts/sm/Scav.c
@@ -38,6 +38,7 @@ static void scavenge_large_bitmap (StgPtr p,
 # define evacuate(a) evacuate1(a)
 # define recordMutableGen_GC(a,b) recordMutableGen(a,b)
 # define scavenge_loop(a) scavenge_loop1(a)
+# define scavenge_block(a) scavenge_block1(a)
 # define scavenge_mutable_list(bd,g) scavenge_mutable_list1(bd,g)
 # define scavenge_capability_mut_lists(cap) scavenge_capability_mut_Lists1(cap)
 #endif
@@ -300,11 +301,11 @@ scavenge_fun_srt(const StgInfoTable *info)
 /* -----------------------------------------------------------------------------
    Scavenge a block from the given scan pointer up to bd->free.
 
-   evac_step is set by the caller to be either zero (for a step in a
+   evac_gen is set by the caller to be either zero (for a step in a
    generation < N) or G where G is the generation of the step being
    scavenged.  
 
-   We sometimes temporarily change evac_step back to zero if we're
+   We sometimes temporarily change evac_gen back to zero if we're
    scavenging a mutable object where eager promotion isn't such a good
    idea.  
    -------------------------------------------------------------------------- */
@@ -314,20 +315,20 @@ scavenge_block (bdescr *bd)
 {
   StgPtr p, q;
   StgInfoTable *info;
-  step *saved_evac_step;
+  generation *saved_evac_gen;
   rtsBool saved_eager_promotion;
-  step_workspace *ws;
+  gen_workspace *ws;
 
-  debugTrace(DEBUG_gc, "scavenging block %p (gen %d, step %d) @ %p",
-	     bd->start, bd->gen_no, bd->step->no, bd->u.scan);
+  debugTrace(DEBUG_gc, "scavenging block %p (gen %d) @ %p",
+	     bd->start, bd->gen_no, bd->u.scan);
 
   gct->scan_bd = bd;
-  gct->evac_step = bd->step;
-  saved_evac_step = gct->evac_step;
+  gct->evac_gen = bd->gen;
+  saved_evac_gen = gct->evac_gen;
   saved_eager_promotion = gct->eager_promotion;
   gct->failed_to_evac = rtsFalse;
 
-  ws = &gct->steps[bd->step->abs_no];
+  ws = &gct->gens[bd->gen->no];
 
   p = bd->u.scan;
   
@@ -605,11 +606,11 @@ scavenge_block (bdescr *bd)
     case TVAR_WATCH_QUEUE:
       {
 	StgTVarWatchQueue *wq = ((StgTVarWatchQueue *) p);
-	gct->evac_step = 0;
+	gct->evac_gen = 0;
 	evacuate((StgClosure **)&wq->closure);
 	evacuate((StgClosure **)&wq->next_queue_entry);
 	evacuate((StgClosure **)&wq->prev_queue_entry);
-	gct->evac_step = saved_evac_step;
+	gct->evac_gen = saved_evac_gen;
 	gct->failed_to_evac = rtsTrue; // mutable
 	p += sizeofW(StgTVarWatchQueue);
 	break;
@@ -618,10 +619,10 @@ scavenge_block (bdescr *bd)
     case TVAR:
       {
 	StgTVar *tvar = ((StgTVar *) p);
-	gct->evac_step = 0;
+	gct->evac_gen = 0;
 	evacuate((StgClosure **)&tvar->current_value);
 	evacuate((StgClosure **)&tvar->first_watch_queue_entry);
-	gct->evac_step = saved_evac_step;
+	gct->evac_gen = saved_evac_gen;
 	gct->failed_to_evac = rtsTrue; // mutable
 	p += sizeofW(StgTVar);
 	break;
@@ -630,11 +631,11 @@ scavenge_block (bdescr *bd)
     case TREC_HEADER:
       {
         StgTRecHeader *trec = ((StgTRecHeader *) p);
-        gct->evac_step = 0;
+        gct->evac_gen = 0;
 	evacuate((StgClosure **)&trec->enclosing_trec);
 	evacuate((StgClosure **)&trec->current_chunk);
 	evacuate((StgClosure **)&trec->invariants_to_check);
-	gct->evac_step = saved_evac_step;
+	gct->evac_gen = saved_evac_gen;
 	gct->failed_to_evac = rtsTrue; // mutable
 	p += sizeofW(StgTRecHeader);
         break;
@@ -645,14 +646,14 @@ scavenge_block (bdescr *bd)
 	StgWord i;
 	StgTRecChunk *tc = ((StgTRecChunk *) p);
 	TRecEntry *e = &(tc -> entries[0]);
-	gct->evac_step = 0;
+	gct->evac_gen = 0;
 	evacuate((StgClosure **)&tc->prev_chunk);
 	for (i = 0; i < tc -> next_entry_idx; i ++, e++ ) {
 	  evacuate((StgClosure **)&e->tvar);
 	  evacuate((StgClosure **)&e->expected_value);
 	  evacuate((StgClosure **)&e->new_value);
 	}
-	gct->evac_step = saved_evac_step;
+	gct->evac_gen = saved_evac_gen;
 	gct->failed_to_evac = rtsTrue; // mutable
 	p += sizeofW(StgTRecChunk);
 	break;
@@ -661,10 +662,10 @@ scavenge_block (bdescr *bd)
     case ATOMIC_INVARIANT:
       {
         StgAtomicInvariant *invariant = ((StgAtomicInvariant *) p);
-        gct->evac_step = 0;
+        gct->evac_gen = 0;
 	evacuate(&invariant->code);
 	evacuate((StgClosure **)&invariant->last_execution);
-	gct->evac_step = saved_evac_step;
+	gct->evac_gen = saved_evac_gen;
 	gct->failed_to_evac = rtsTrue; // mutable
 	p += sizeofW(StgAtomicInvariant);
         break;
@@ -673,11 +674,11 @@ scavenge_block (bdescr *bd)
     case INVARIANT_CHECK_QUEUE:
       {
         StgInvariantCheckQueue *queue = ((StgInvariantCheckQueue *) p);
-        gct->evac_step = 0;
+        gct->evac_gen = 0;
 	evacuate((StgClosure **)&queue->invariant);
 	evacuate((StgClosure **)&queue->my_execution);
 	evacuate((StgClosure **)&queue->next_queue_entry);
-	gct->evac_step = saved_evac_step;
+	gct->evac_gen = saved_evac_gen;
 	gct->failed_to_evac = rtsTrue; // mutable
 	p += sizeofW(StgInvariantCheckQueue);
         break;
@@ -736,10 +737,10 @@ scavenge_mark_stack(void)
 {
     StgPtr p, q;
     StgInfoTable *info;
-    step *saved_evac_step;
+    generation *saved_evac_gen;
 
-    gct->evac_step = &oldest_gen->steps[0];
-    saved_evac_step = gct->evac_step;
+    gct->evac_gen = oldest_gen;
+    saved_evac_gen = gct->evac_gen;
 
     while ((p = pop_mark_stack())) {
 
@@ -971,11 +972,11 @@ scavenge_mark_stack(void)
 	case TVAR_WATCH_QUEUE:
 	  {
 	    StgTVarWatchQueue *wq = ((StgTVarWatchQueue *) p);
-	    gct->evac_step = 0;
+	    gct->evac_gen = 0;
             evacuate((StgClosure **)&wq->closure);
 	    evacuate((StgClosure **)&wq->next_queue_entry);
 	    evacuate((StgClosure **)&wq->prev_queue_entry);
-	    gct->evac_step = saved_evac_step;
+	    gct->evac_gen = saved_evac_gen;
 	    gct->failed_to_evac = rtsTrue; // mutable
 	    break;
 	  }
@@ -983,10 +984,10 @@ scavenge_mark_stack(void)
 	case TVAR:
 	  {
 	    StgTVar *tvar = ((StgTVar *) p);
-	    gct->evac_step = 0;
+	    gct->evac_gen = 0;
 	    evacuate((StgClosure **)&tvar->current_value);
 	    evacuate((StgClosure **)&tvar->first_watch_queue_entry);
-	    gct->evac_step = saved_evac_step;
+	    gct->evac_gen = saved_evac_gen;
 	    gct->failed_to_evac = rtsTrue; // mutable
 	    break;
 	  }
@@ -996,14 +997,14 @@ scavenge_mark_stack(void)
 	    StgWord i;
 	    StgTRecChunk *tc = ((StgTRecChunk *) p);
 	    TRecEntry *e = &(tc -> entries[0]);
-	    gct->evac_step = 0;
+	    gct->evac_gen = 0;
 	    evacuate((StgClosure **)&tc->prev_chunk);
 	    for (i = 0; i < tc -> next_entry_idx; i ++, e++ ) {
 	      evacuate((StgClosure **)&e->tvar);
 	      evacuate((StgClosure **)&e->expected_value);
 	      evacuate((StgClosure **)&e->new_value);
 	    }
-	    gct->evac_step = saved_evac_step;
+	    gct->evac_gen = saved_evac_gen;
 	    gct->failed_to_evac = rtsTrue; // mutable
 	    break;
 	  }
@@ -1011,11 +1012,11 @@ scavenge_mark_stack(void)
 	case TREC_HEADER:
 	  {
 	    StgTRecHeader *trec = ((StgTRecHeader *) p);
-	    gct->evac_step = 0;
+	    gct->evac_gen = 0;
 	    evacuate((StgClosure **)&trec->enclosing_trec);
 	    evacuate((StgClosure **)&trec->current_chunk);
   	    evacuate((StgClosure **)&trec->invariants_to_check);
-	    gct->evac_step = saved_evac_step;
+	    gct->evac_gen = saved_evac_gen;
 	    gct->failed_to_evac = rtsTrue; // mutable
 	    break;
 	  }
@@ -1023,10 +1024,10 @@ scavenge_mark_stack(void)
         case ATOMIC_INVARIANT:
           {
             StgAtomicInvariant *invariant = ((StgAtomicInvariant *) p);
-            gct->evac_step = 0;
+            gct->evac_gen = 0;
 	    evacuate(&invariant->code);
     	    evacuate((StgClosure **)&invariant->last_execution);
-	    gct->evac_step = saved_evac_step;
+	    gct->evac_gen = saved_evac_gen;
 	    gct->failed_to_evac = rtsTrue; // mutable
             break;
           }
@@ -1034,11 +1035,11 @@ scavenge_mark_stack(void)
         case INVARIANT_CHECK_QUEUE:
           {
             StgInvariantCheckQueue *queue = ((StgInvariantCheckQueue *) p);
-            gct->evac_step = 0;
+            gct->evac_gen = 0;
     	    evacuate((StgClosure **)&queue->invariant);
 	    evacuate((StgClosure **)&queue->my_execution);
             evacuate((StgClosure **)&queue->next_queue_entry);
-	    gct->evac_step = saved_evac_step;
+	    gct->evac_gen = saved_evac_gen;
 	    gct->failed_to_evac = rtsTrue; // mutable
             break;
           }
@@ -1050,8 +1051,8 @@ scavenge_mark_stack(void)
 
 	if (gct->failed_to_evac) {
 	    gct->failed_to_evac = rtsFalse;
-	    if (gct->evac_step) {
-		recordMutableGen_GC((StgClosure *)q, gct->evac_step->gen_no);
+	    if (gct->evac_gen) {
+		recordMutableGen_GC((StgClosure *)q, gct->evac_gen->no);
 	    }
 	}
     } // while (p = pop_mark_stack())
@@ -1069,7 +1070,7 @@ static rtsBool
 scavenge_one(StgPtr p)
 {
     const StgInfoTable *info;
-    step *saved_evac_step = gct->evac_step;
+    generation *saved_evac_gen = gct->evac_gen;
     rtsBool no_luck;
     
     ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
@@ -1246,11 +1247,11 @@ scavenge_one(StgPtr p)
     case TVAR_WATCH_QUEUE:
       {
 	StgTVarWatchQueue *wq = ((StgTVarWatchQueue *) p);
-	gct->evac_step = 0;
+	gct->evac_gen = 0;
         evacuate((StgClosure **)&wq->closure);
         evacuate((StgClosure **)&wq->next_queue_entry);
         evacuate((StgClosure **)&wq->prev_queue_entry);
-	gct->evac_step = saved_evac_step;
+	gct->evac_gen = saved_evac_gen;
 	gct->failed_to_evac = rtsTrue; // mutable
 	break;
       }
@@ -1258,10 +1259,10 @@ scavenge_one(StgPtr p)
     case TVAR:
       {
 	StgTVar *tvar = ((StgTVar *) p);
-	gct->evac_step = 0;
+	gct->evac_gen = 0;
 	evacuate((StgClosure **)&tvar->current_value);
         evacuate((StgClosure **)&tvar->first_watch_queue_entry);
-	gct->evac_step = saved_evac_step;
+	gct->evac_gen = saved_evac_gen;
 	gct->failed_to_evac = rtsTrue; // mutable
 	break;
       }
@@ -1269,11 +1270,11 @@ scavenge_one(StgPtr p)
     case TREC_HEADER:
       {
         StgTRecHeader *trec = ((StgTRecHeader *) p);
-        gct->evac_step = 0;
+        gct->evac_gen = 0;
 	evacuate((StgClosure **)&trec->enclosing_trec);
 	evacuate((StgClosure **)&trec->current_chunk);
         evacuate((StgClosure **)&trec->invariants_to_check);
-	gct->evac_step = saved_evac_step;
+	gct->evac_gen = saved_evac_gen;
 	gct->failed_to_evac = rtsTrue; // mutable
         break;
       }
@@ -1283,14 +1284,14 @@ scavenge_one(StgPtr p)
 	StgWord i;
 	StgTRecChunk *tc = ((StgTRecChunk *) p);
 	TRecEntry *e = &(tc -> entries[0]);
-	gct->evac_step = 0;
+	gct->evac_gen = 0;
 	evacuate((StgClosure **)&tc->prev_chunk);
 	for (i = 0; i < tc -> next_entry_idx; i ++, e++ ) {
 	  evacuate((StgClosure **)&e->tvar);
 	  evacuate((StgClosure **)&e->expected_value);
 	  evacuate((StgClosure **)&e->new_value);
 	}
-	gct->evac_step = saved_evac_step;
+	gct->evac_gen = saved_evac_gen;
 	gct->failed_to_evac = rtsTrue; // mutable
 	break;
       }
@@ -1298,10 +1299,10 @@ scavenge_one(StgPtr p)
     case ATOMIC_INVARIANT:
     {
       StgAtomicInvariant *invariant = ((StgAtomicInvariant *) p);
-      gct->evac_step = 0;
+      gct->evac_gen = 0;
       evacuate(&invariant->code);
       evacuate((StgClosure **)&invariant->last_execution);
-      gct->evac_step = saved_evac_step;
+      gct->evac_gen = saved_evac_gen;
       gct->failed_to_evac = rtsTrue; // mutable
       break;
     }
@@ -1309,11 +1310,11 @@ scavenge_one(StgPtr p)
     case INVARIANT_CHECK_QUEUE:
     {
       StgInvariantCheckQueue *queue = ((StgInvariantCheckQueue *) p);
-      gct->evac_step = 0;
+      gct->evac_gen = 0;
       evacuate((StgClosure **)&queue->invariant);
       evacuate((StgClosure **)&queue->my_execution);
       evacuate((StgClosure **)&queue->next_queue_entry);
-      gct->evac_step = saved_evac_step;
+      gct->evac_gen = saved_evac_gen;
       gct->failed_to_evac = rtsTrue; // mutable
       break;
     }
@@ -1333,21 +1334,21 @@ scavenge_one(StgPtr p)
        * promoted 
        */
       { 
-	StgPtr start = gen->steps[0].scan;
-	bdescr *start_bd = gen->steps[0].scan_bd;
+	StgPtr start = gen->scan;
+	bdescr *start_bd = gen->scan_bd;
 	nat size = 0;
-	scavenge(&gen->steps[0]);
-	if (start_bd != gen->steps[0].scan_bd) {
+	scavenge(&gen);
+	if (start_bd != gen->scan_bd) {
 	  size += (P_)BLOCK_ROUND_UP(start) - start;
 	  start_bd = start_bd->link;
-	  while (start_bd != gen->steps[0].scan_bd) {
+	  while (start_bd != gen->scan_bd) {
 	    size += BLOCK_SIZE_W;
 	    start_bd = start_bd->link;
 	  }
-	  size += gen->steps[0].scan -
-	    (P_)BLOCK_ROUND_DOWN(gen->steps[0].scan);
+	  size += gen->scan -
+	    (P_)BLOCK_ROUND_DOWN(gen->scan);
 	} else {
-	  size = gen->steps[0].scan - start;
+	  size = gen->scan - start;
 	}
 	debugBelch("evac IND_OLDGEN: %ld bytes", size * sizeof(W_));
       }
@@ -1376,7 +1377,7 @@ scavenge_mutable_list(bdescr *bd, generation *gen)
 {
     StgPtr p, q;
 
-    gct->evac_step = &gen->steps[0];
+    gct->evac_gen = gen;
     for (; bd != NULL; bd = bd->link) {
 	for (q = bd->start; q < bd->free; q++) {
 	    p = (StgPtr)*q;
@@ -1479,7 +1480,7 @@ scavenge_static(void)
 
   /* Always evacuate straight to the oldest generation for static
    * objects */
-  gct->evac_step = &oldest_gen->steps[0];
+  gct->evac_gen = oldest_gen;
 
   /* keep going until we've scavenged all the objects on the linked
      list... */
@@ -1766,19 +1767,19 @@ scavenge_stack(StgPtr p, StgPtr stack_end)
 /*-----------------------------------------------------------------------------
   scavenge the large object list.
 
-  evac_step set by caller; similar games played with evac_step as with
+  evac_gen set by caller; similar games played with evac_gen as with
   scavenge() - see comment at the top of scavenge().  Most large
-  objects are (repeatedly) mutable, so most of the time evac_step will
+  objects are (repeatedly) mutable, so most of the time evac_gen will
   be zero.
   --------------------------------------------------------------------------- */
 
 static void
-scavenge_large (step_workspace *ws)
+scavenge_large (gen_workspace *ws)
 {
     bdescr *bd;
     StgPtr p;
 
-    gct->evac_step = ws->step;
+    gct->evac_gen = ws->gen;
 
     bd = ws->todo_large_objects;
     
@@ -1790,15 +1791,15 @@ scavenge_large (step_workspace *ws)
 	// the front when evacuating.
 	ws->todo_large_objects = bd->link;
 	
-	ACQUIRE_SPIN_LOCK(&ws->step->sync_large_objects);
-	dbl_link_onto(bd, &ws->step->scavenged_large_objects);
-	ws->step->n_scavenged_large_blocks += bd->blocks;
-	RELEASE_SPIN_LOCK(&ws->step->sync_large_objects);
+	ACQUIRE_SPIN_LOCK(&ws->gen->sync_large_objects);
+	dbl_link_onto(bd, &ws->gen->scavenged_large_objects);
+	ws->gen->n_scavenged_large_blocks += bd->blocks;
+	RELEASE_SPIN_LOCK(&ws->gen->sync_large_objects);
 	
 	p = bd->start;
 	if (scavenge_one(p)) {
-	    if (ws->step->gen_no > 0) {
-		recordMutableGen_GC((StgClosure *)p, ws->step->gen_no);
+	    if (ws->gen->no > 0) {
+		recordMutableGen_GC((StgClosure *)p, ws->gen->no);
 	    }
 	}
 
@@ -1810,7 +1811,7 @@ scavenge_large (step_workspace *ws)
 /* ----------------------------------------------------------------------------
    Look for work to do.
 
-   We look for the oldest step that has either a todo block that can
+   We look for the oldest gen that has either a todo block that can
    be scanned, or a block of work on the global queue that we can
    scan.
 
@@ -1829,8 +1830,8 @@ scavenge_large (step_workspace *ws)
 static rtsBool
 scavenge_find_work (void)
 {
-    int s;
-    step_workspace *ws;
+    int g;
+    gen_workspace *ws;
     rtsBool did_something, did_anything;
     bdescr *bd;
 
@@ -1840,11 +1841,8 @@ scavenge_find_work (void)
 
 loop:
     did_something = rtsFalse;
-    for (s = total_steps-1; s >= 0; s--) {
-        if (s == 0 && RtsFlags.GcFlags.generations > 1) { 
-            continue; 
-        }
-        ws = &gct->steps[s];
+    for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
+        ws = &gct->gens[g];
         
         gct->scan_bd = NULL;
 
@@ -1879,11 +1877,8 @@ loop:
 #if defined(THREADED_RTS)
     if (work_stealing) {
         // look for work to steal
-        for (s = total_steps-1; s >= 0; s--) {
-            if (s == 0 && RtsFlags.GcFlags.generations > 1) { 
-                continue; 
-            }
-            if ((bd = steal_todo_block(s)) != NULL) {
+        for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
+            if ((bd = steal_todo_block(g)) != NULL) {
                 scavenge_block(bd);
                 did_something = rtsTrue;
                 break;
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 94f9e10..d9c7f86 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -49,12 +49,9 @@ generation *generations = NULL;	/* all the generations */
 generation *g0		= NULL; /* generation 0, for convenience */
 generation *oldest_gen  = NULL; /* oldest generation, for convenience */
 
-nat total_steps         = 0;
-step *all_steps         = NULL; /* single array of steps */
-
 ullong total_allocated = 0;	/* total memory allocated during run */
 
-step *nurseries         = NULL; /* array of nurseries, size == n_capabilities */
+nursery *nurseries = NULL;     /* array of nurseries, size == n_capabilities */
 
 #ifdef THREADED_RTS
 /*
@@ -67,37 +64,38 @@ Mutex sm_mutex;
 static void allocNurseries ( void );
 
 static void
-initStep (step *stp, int g, int s)
+initGeneration (generation *gen, int g)
 {
-    stp->no = s;
-    stp->abs_no = RtsFlags.GcFlags.steps * g + s;
-    stp->blocks = NULL;
-    stp->n_blocks = 0;
-    stp->n_words = 0;
-    stp->live_estimate = 0;
-    stp->old_blocks = NULL;
-    stp->n_old_blocks = 0;
-    stp->gen = &generations[g];
-    stp->gen_no = g;
-    stp->large_objects = NULL;
-    stp->n_large_blocks = 0;
-    stp->scavenged_large_objects = NULL;
-    stp->n_scavenged_large_blocks = 0;
-    stp->mark = 0;
-    stp->compact = 0;
-    stp->bitmap = NULL;
+    gen->no = g;
+    gen->collections = 0;
+    gen->par_collections = 0;
+    gen->failed_promotions = 0;
+    gen->max_blocks = 0;
+    gen->blocks = NULL;
+    gen->n_blocks = 0;
+    gen->n_words = 0;
+    gen->live_estimate = 0;
+    gen->old_blocks = NULL;
+    gen->n_old_blocks = 0;
+    gen->large_objects = NULL;
+    gen->n_large_blocks = 0;
+    gen->mut_list = allocBlock();
+    gen->scavenged_large_objects = NULL;
+    gen->n_scavenged_large_blocks = 0;
+    gen->mark = 0;
+    gen->compact = 0;
+    gen->bitmap = NULL;
 #ifdef THREADED_RTS
-    initSpinLock(&stp->sync_large_objects);
+    initSpinLock(&gen->sync_large_objects);
 #endif
-    stp->threads = END_TSO_QUEUE;
-    stp->old_threads = END_TSO_QUEUE;
+    gen->threads = END_TSO_QUEUE;
+    gen->old_threads = END_TSO_QUEUE;
 }
 
 void
 initStorage( void )
 {
-  nat g, s;
-  generation *gen;
+  nat g;
 
   if (generations != NULL) {
       // multi-init protection
@@ -142,83 +140,30 @@ initStorage( void )
 
   /* Initialise all generations */
   for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
-    gen = &generations[g];
-    gen->no = g;
-    gen->mut_list = allocBlock();
-    gen->collections = 0;
-    gen->par_collections = 0;
-    gen->failed_promotions = 0;
-    gen->max_blocks = 0;
+      initGeneration(&generations[g], g);
   }
 
   /* A couple of convenience pointers */
   g0 = &generations[0];
   oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
 
-  /* allocate all the steps into an array.  It is important that we do
-     it this way, because we need the invariant that two step pointers
-     can be directly compared to see which is the oldest.
-     Remember that the last generation has only one step. */
-  total_steps = 1 + (RtsFlags.GcFlags.generations - 1) * RtsFlags.GcFlags.steps;
-  all_steps   = stgMallocBytes(total_steps * sizeof(struct step_),
-                               "initStorage: steps");
-
-  /* Allocate step structures in each generation */
-  if (RtsFlags.GcFlags.generations > 1) {
-    /* Only for multiple-generations */
-
-    /* Oldest generation: one step */
-    oldest_gen->n_steps = 1;
-    oldest_gen->steps   = all_steps + (RtsFlags.GcFlags.generations - 1)
-	                              * RtsFlags.GcFlags.steps;
-
-    /* set up all except the oldest generation with 2 steps */
-    for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
-      generations[g].n_steps = RtsFlags.GcFlags.steps;
-      generations[g].steps   = all_steps + g * RtsFlags.GcFlags.steps;
-    }
-    
-  } else {
-    /* single generation, i.e. a two-space collector */
-    g0->n_steps = 1;
-    g0->steps   = all_steps;
-  }
-
-  nurseries = stgMallocBytes (n_capabilities * sizeof(struct step_),
-			      "initStorage: nurseries");
-
-  /* Initialise all steps */
-  for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-    for (s = 0; s < generations[g].n_steps; s++) {
-	initStep(&generations[g].steps[s], g, s);
-    }
-  }
-  
-  for (s = 0; s < n_capabilities; s++) {
-      initStep(&nurseries[s], 0, s);
-  }
+  nurseries = stgMallocBytes(n_capabilities * sizeof(struct nursery_),
+                             "initStorage: nurseries");
   
   /* Set up the destination pointers in each younger gen. step */
   for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
-    for (s = 0; s < generations[g].n_steps-1; s++) {
-      generations[g].steps[s].to = &generations[g].steps[s+1];
-    }
-    generations[g].steps[s].to = &generations[g+1].steps[0];
-  }
-  oldest_gen->steps[0].to = &oldest_gen->steps[0];
-  
-  for (s = 0; s < n_capabilities; s++) {
-      nurseries[s].to = generations[0].steps[0].to;
+      generations[g].to = &generations[g+1];
   }
+  oldest_gen->to = oldest_gen;
   
   /* The oldest generation has one step. */
   if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) {
       if (RtsFlags.GcFlags.generations == 1) {
 	  errorBelch("WARNING: compact/sweep is incompatible with -G1; disabled");
       } else {
-	  oldest_gen->steps[0].mark = 1;
+	  oldest_gen->mark = 1;
           if (RtsFlags.GcFlags.compact)
-              oldest_gen->steps[0].compact = 1;
+              oldest_gen->compact = 1;
       }
   }
 
@@ -264,7 +209,6 @@ exitStorage (void)
 void
 freeStorage (void)
 {
-    stgFree(all_steps); // frees all the steps
     stgFree(generations);
     freeAllMBlocks();
 #if defined(THREADED_RTS)
@@ -380,7 +324,7 @@ newDynCAF(StgClosure *caf)
    -------------------------------------------------------------------------- */
 
 static bdescr *
-allocNursery (step *stp, bdescr *tail, nat blocks)
+allocNursery (bdescr *tail, nat blocks)
 {
     bdescr *bd;
     nat i;
@@ -401,7 +345,7 @@ allocNursery (step *stp, bdescr *tail, nat blocks)
 	if (tail != NULL) {
 	    tail->u.back = bd;
 	}
-        initBdescr(bd, stp);
+        initBdescr(bd, g0, g0);
 	bd->flags = 0;
 	bd->free = bd->start;
 	tail = bd;
@@ -429,11 +373,9 @@ allocNurseries( void )
 
     for (i = 0; i < n_capabilities; i++) {
 	nurseries[i].blocks = 
-	    allocNursery(&nurseries[i], NULL, 
-			 RtsFlags.GcFlags.minAllocAreaSize);
-	nurseries[i].n_blocks    = RtsFlags.GcFlags.minAllocAreaSize;
-	nurseries[i].old_blocks   = NULL;
-	nurseries[i].n_old_blocks = 0;
+            allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize);
+	nurseries[i].n_blocks =
+            RtsFlags.GcFlags.minAllocAreaSize;
     }
     assignNurseriesToCapabilities();
 }
@@ -443,20 +385,14 @@ resetNurseries( void )
 {
     nat i;
     bdescr *bd;
-    step *stp;
 
     for (i = 0; i < n_capabilities; i++) {
-	stp = &nurseries[i];
-	for (bd = stp->blocks; bd; bd = bd->link) {
+	for (bd = nurseries[i].blocks; bd; bd = bd->link) {
 	    bd->free = bd->start;
 	    ASSERT(bd->gen_no == 0);
-	    ASSERT(bd->step == stp);
+	    ASSERT(bd->gen == g0);
 	    IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
 	}
-        // these large objects are dead, since we have just GC'd
-        freeChain(stp->large_objects);
-        stp->large_objects = NULL;
-        stp->n_large_blocks = 0;
     }
     assignNurseriesToCapabilities();
 }
@@ -469,24 +405,23 @@ countNurseryBlocks (void)
 
     for (i = 0; i < n_capabilities; i++) {
 	blocks += nurseries[i].n_blocks;
-        blocks += nurseries[i].n_large_blocks;
     }
     return blocks;
 }
 
 static void
-resizeNursery ( step *stp, nat blocks )
+resizeNursery ( nursery *nursery, nat blocks )
 {
   bdescr *bd;
   nat nursery_blocks;
 
-  nursery_blocks = stp->n_blocks;
+  nursery_blocks = nursery->n_blocks;
   if (nursery_blocks == blocks) return;
 
   if (nursery_blocks < blocks) {
       debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks", 
 		 blocks);
-    stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
+    nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks);
   } 
   else {
     bdescr *next_bd;
@@ -494,7 +429,7 @@ resizeNursery ( step *stp, nat blocks )
     debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks", 
 	       blocks);
 
-    bd = stp->blocks;
+    bd = nursery->blocks;
     while (nursery_blocks > blocks) {
 	next_bd = bd->link;
 	next_bd->u.back = NULL;
@@ -502,16 +437,16 @@ resizeNursery ( step *stp, nat blocks )
 	freeGroup(bd);
 	bd = next_bd;
     }
-    stp->blocks = bd;
+    nursery->blocks = bd;
     // might have gone just under, by freeing a large block, so make
     // up the difference.
     if (nursery_blocks < blocks) {
-	stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks);
+	nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks);
     }
   }
   
-  stp->n_blocks = blocks;
-  ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
+  nursery->n_blocks = blocks;
+  ASSERT(countBlocks(nursery->blocks) == nursery->n_blocks);
 }
 
 // 
@@ -566,26 +501,26 @@ splitLargeBlock (bdescr *bd, nat blocks)
 
     ACQUIRE_SM_LOCK;
 
-    ASSERT(countBlocks(bd->step->large_objects) == bd->step->n_large_blocks);
+    ASSERT(countBlocks(bd->gen->large_objects) == bd->gen->n_large_blocks);
 
     // subtract the original number of blocks from the counter first
-    bd->step->n_large_blocks -= bd->blocks;
+    bd->gen->n_large_blocks -= bd->blocks;
 
     new_bd = splitBlockGroup (bd, blocks);
-    initBdescr(new_bd, bd->step);
+    initBdescr(new_bd, bd->gen, bd->gen->to);
     new_bd->flags   = BF_LARGE | (bd->flags & BF_EVACUATED); 
     // if new_bd is in an old generation, we have to set BF_EVACUATED
     new_bd->free    = bd->free;
-    dbl_link_onto(new_bd, &bd->step->large_objects);
+    dbl_link_onto(new_bd, &bd->gen->large_objects);
 
     ASSERT(new_bd->free <= new_bd->start + new_bd->blocks * BLOCK_SIZE_W);
 
     // add the new number of blocks to the counter.  Due to the gaps
     // for block descriptors, new_bd->blocks + bd->blocks might not be
     // equal to the original bd->blocks, which is why we do it this way.
-    bd->step->n_large_blocks += bd->blocks + new_bd->blocks;
+    bd->gen->n_large_blocks += bd->blocks + new_bd->blocks;
 
-    ASSERT(countBlocks(bd->step->large_objects) == bd->step->n_large_blocks);
+    ASSERT(countBlocks(bd->gen->large_objects) == bd->gen->n_large_blocks);
 
     RELEASE_SM_LOCK;
 
@@ -610,7 +545,6 @@ allocate (Capability *cap, lnat n)
 {
     bdescr *bd;
     StgPtr p;
-    step *stp;
 
     if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
 	lnat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
@@ -629,14 +563,12 @@ allocate (Capability *cap, lnat n)
 	    stg_exit(EXIT_HEAPOVERFLOW);
         }
 
-        stp = &nurseries[cap->no];
-
         ACQUIRE_SM_LOCK
 	bd = allocGroup(req_blocks);
+	dbl_link_onto(bd, &g0->large_objects);
+	g0->n_large_blocks += bd->blocks; // might be larger than req_blocks
         RELEASE_SM_LOCK;
-	dbl_link_onto(bd, &stp->large_objects);
-	stp->n_large_blocks += bd->blocks; // might be larger than req_blocks
-        initBdescr(bd, stp);
+        initBdescr(bd, g0, g0);
 	bd->flags = BF_LARGE;
 	bd->free = bd->start + n;
 	return bd->start;
@@ -662,7 +594,7 @@ allocate (Capability *cap, lnat n)
             bd = allocBlock();
             cap->r.rNursery->n_blocks++;
             RELEASE_SM_LOCK;
-            initBdescr(bd, cap->r.rNursery);
+            initBdescr(bd, g0, g0);
             bd->flags = 0;
             // If we had to allocate a new block, then we'll GC
             // pretty quickly now, because MAYBE_GC() will
@@ -690,7 +622,7 @@ allocate (Capability *cap, lnat n)
 
    We allocate small pinned objects into a single block, allocating a
    new block when the current one overflows.  The block is chained
-   onto the large_object_list of generation 0 step 0.
+   onto the large_object_list of generation 0.
 
    NOTE: The GC can't in general handle pinned objects.  This
    interface is only safe to use for ByteArrays, which have no
@@ -713,7 +645,6 @@ allocatePinned (Capability *cap, lnat n)
 {
     StgPtr p;
     bdescr *bd;
-    step *stp;
 
     // If the request is for a large object, then allocate()
     // will give us a pinned object anyway.
@@ -731,13 +662,12 @@ allocatePinned (Capability *cap, lnat n)
     // If we don't have a block of pinned objects yet, or the current
     // one isn't large enough to hold the new object, allocate a new one.
     if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
-        ACQUIRE_SM_LOCK
+        ACQUIRE_SM_LOCK;
 	cap->pinned_object_block = bd = allocBlock();
-        RELEASE_SM_LOCK
-        stp = &nurseries[cap->no];
-	dbl_link_onto(bd, &stp->large_objects);
-	stp->n_large_blocks++;
-        initBdescr(bd, stp);
+	dbl_link_onto(bd, &g0->large_objects);
+	g0->n_large_blocks++;
+        RELEASE_SM_LOCK;
+        initBdescr(bd, g0, g0);
 	bd->flags  = BF_PINNED | BF_LARGE;
 	bd->free   = bd->start;
     }
@@ -861,30 +791,23 @@ calcAllocated( void )
 /* Approximate the amount of live data in the heap.  To be called just
  * after garbage collection (see GarbageCollect()).
  */
-lnat 
-calcLiveBlocks(void)
+lnat calcLiveBlocks (void)
 {
-  nat g, s;
+  nat g;
   lnat live = 0;
-  step *stp;
+  generation *gen;
 
   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-    for (s = 0; s < generations[g].n_steps; s++) {
       /* approximate amount of live data (doesn't take into account slop
        * at end of each block).
        */
-      if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) { 
-	  continue; 
-      }
-      stp = &generations[g].steps[s];
-      live += stp->n_large_blocks + stp->n_blocks;
-    }
+      gen = &generations[g];
+      live += gen->n_large_blocks + gen->n_blocks;
   }
   return live;
 }
 
-lnat
-countOccupied(bdescr *bd)
+lnat countOccupied (bdescr *bd)
 {
     lnat words;
 
@@ -898,20 +821,16 @@ countOccupied(bdescr *bd)
 
 // Return an accurate count of the live data in the heap, excluding
 // generation 0.
-lnat
-calcLiveWords(void)
+lnat calcLiveWords (void)
 {
-    nat g, s;
+    nat g;
     lnat live;
-    step *stp;
+    generation *gen;
     
     live = 0;
     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-        for (s = 0; s < generations[g].n_steps; s++) {
-            if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) continue; 
-            stp = &generations[g].steps[s];
-            live += stp->n_words + countOccupied(stp->large_objects);
-        } 
+        gen = &generations[g];
+        live += gen->n_words + countOccupied(gen->large_objects);
     }
     return live;
 }
@@ -919,44 +838,39 @@ calcLiveWords(void)
 /* Approximate the number of blocks that will be needed at the next
  * garbage collection.
  *
- * Assume: all data currently live will remain live.  Steps that will
- * be collected next time will therefore need twice as many blocks
- * since all the data will be copied.
+ * Assume: all data currently live will remain live.  Generationss
+ * that will be collected next time will therefore need twice as many
+ * blocks since all the data will be copied.
  */
 extern lnat 
 calcNeeded(void)
 {
     lnat needed = 0;
-    nat g, s;
-    step *stp;
+    nat g;
+    generation *gen;
     
     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-	for (s = 0; s < generations[g].n_steps; s++) {
-	    if (g == 0 && s == 0) { continue; }
-	    stp = &generations[g].steps[s];
-
-            // we need at least this much space
-            needed += stp->n_blocks + stp->n_large_blocks;
-
-            // any additional space needed to collect this gen next time?
-	    if (g == 0 || // always collect gen 0
-                (generations[g].steps[0].n_blocks +
-                 generations[g].steps[0].n_large_blocks 
-                 > generations[g].max_blocks)) {
-                // we will collect this gen next time
-                if (stp->mark) {
-                    //  bitmap:
-                    needed += stp->n_blocks / BITS_IN(W_);
-                    //  mark stack:
-                    needed += stp->n_blocks / 100;
-                }
-                if (stp->compact) {
-                    continue; // no additional space needed for compaction
-                } else {
-                    needed += stp->n_blocks;
-                }
-	    }
-	}
+        gen = &generations[g];
+
+        // we need at least this much space
+        needed += gen->n_blocks + gen->n_large_blocks;
+        
+        // any additional space needed to collect this gen next time?
+        if (g == 0 || // always collect gen 0
+            (gen->n_blocks + gen->n_large_blocks > gen->max_blocks)) {
+            // we will collect this gen next time
+            if (gen->mark) {
+                //  bitmap:
+                needed += gen->n_blocks / BITS_IN(W_);
+                //  mark stack:
+                needed += gen->n_blocks / 100;
+            }
+            if (gen->compact) {
+                continue; // no additional space needed for compaction
+            } else {
+                needed += gen->n_blocks;
+            }
+        }
     }
     return needed;
 }
diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h
index 02f5637..6dbccd8 100644
--- a/rts/sm/Storage.h
+++ b/rts/sm/Storage.h
@@ -29,7 +29,7 @@ INLINE_HEADER rtsBool
 doYouWantToGC( Capability *cap )
 {
   return (cap->r.rCurrentNursery->link == NULL ||
-          cap->r.rNursery->n_large_blocks >= alloc_blocks_lim);
+          g0->n_large_blocks >= alloc_blocks_lim);
 }
 
 /* for splitting blocks groups in two */
@@ -121,7 +121,7 @@ void dirty_MVAR(StgRegTable *reg, StgClosure *p);
    Nursery manipulation
    -------------------------------------------------------------------------- */
 
-extern step *nurseries;
+extern nursery *nurseries;
 
 void     resetNurseries       ( void );
 void     resizeNurseries      ( nat blocks );
diff --git a/rts/sm/Sweep.c b/rts/sm/Sweep.c
index 0525f7e..acbf6f8 100644
--- a/rts/sm/Sweep.c
+++ b/rts/sm/Sweep.c
@@ -19,20 +19,20 @@
 #include "Trace.h"
 
 void
-sweep(step *step)
+sweep(generation *gen)
 {
     bdescr *bd, *prev, *next;
     nat i;
     nat freed, resid, fragd, blocks, live;
     
-    ASSERT(countBlocks(step->old_blocks) == step->n_old_blocks);
+    ASSERT(countBlocks(gen->old_blocks) == gen->n_old_blocks);
 
     live = 0; // estimate of live data in this gen
     freed = 0;
     fragd = 0;
     blocks = 0;
     prev = NULL;
-    for (bd = step->old_blocks; bd != NULL; bd = next)
+    for (bd = gen->old_blocks; bd != NULL; bd = next)
     {
         next = bd->link;
 
@@ -52,9 +52,9 @@ sweep(step *step)
         if (resid == 0)
         {
             freed++;
-            step->n_old_blocks--;
+            gen->n_old_blocks--;
             if (prev == NULL) {
-                step->old_blocks = next;
+                gen->old_blocks = next;
             } else {
                 prev->link = next;
             }
@@ -70,15 +70,15 @@ sweep(step *step)
         }
     }
 
-    step->live_estimate = live;
+    gen->live_estimate = live;
 
     debugTrace(DEBUG_gc, "sweeping: %d blocks, %d were copied, %d freed (%d%%), %d are fragmented, live estimate: %ld%%",
-          step->n_old_blocks + freed,
-          step->n_old_blocks - blocks + freed,
+          gen->n_old_blocks + freed,
+          gen->n_old_blocks - blocks + freed,
           freed,
           blocks == 0 ? 0 : (freed * 100) / blocks,
           fragd, 
           (unsigned long)((blocks - freed) == 0 ? 0 : ((live / BLOCK_SIZE_W) * 100) / (blocks - freed)));
 
-    ASSERT(countBlocks(step->old_blocks) == step->n_old_blocks);
+    ASSERT(countBlocks(gen->old_blocks) == gen->n_old_blocks);
 }
diff --git a/rts/sm/Sweep.h b/rts/sm/Sweep.h
index f540cb3..4eacd11 100644
--- a/rts/sm/Sweep.h
+++ b/rts/sm/Sweep.h
@@ -14,6 +14,6 @@
 #ifndef SM_SWEEP_H
 #define SM_SWEEP_H
 
-RTS_PRIVATE void sweep(step *gen);
+RTS_PRIVATE void sweep(generation *gen);
 
 #endif /* SM_SWEEP_H */
-- 
1.7.10.4