RTS tidyup sweep, first phase
[ghc-hetmet.git] / rts / sm / Scav.c
index b969de3..9ebd4c5 100644 (file)
  *
  * ---------------------------------------------------------------------------*/
 
+#include "PosixSource.h"
 #include "Rts.h"
-#include "RtsFlags.h"
+
 #include "Storage.h"
-#include "MBlock.h"
 #include "GC.h"
 #include "GCThread.h"
 #include "GCUtils.h"
@@ -23,8 +23,8 @@
 #include "Scav.h"
 #include "Apply.h"
 #include "Trace.h"
-#include "LdvProfile.h"
 #include "Sanity.h"
+#include "Capability.h"
 
 static void scavenge_stack (StgPtr p, StgPtr stack_end);
 
@@ -32,6 +32,173 @@ static void scavenge_large_bitmap (StgPtr p,
                                   StgLargeBitmap *large_bitmap, 
                                   nat size );
 
+#if defined(THREADED_RTS) && !defined(PARALLEL_GC)
+# define evacuate(a) evacuate1(a)
+# define recordMutableGen_GC(a,b) recordMutableGen(a,b)
+# define scavenge_loop(a) scavenge_loop1(a)
+# define scavenge_mutable_list(bd,g) scavenge_mutable_list1(bd,g)
+# define scavenge_capability_mut_lists(cap) scavenge_capability_mut_Lists1(cap)
+#endif
+
+/* -----------------------------------------------------------------------------
+   Scavenge a TSO.
+   -------------------------------------------------------------------------- */
+
+STATIC_INLINE void
+scavenge_TSO_link (StgTSO *tso)
+{
+    // We don't always chase the link field: TSOs on the blackhole
+    // queue are not automatically alive, so the link field is a
+    // "weak" pointer in that case.
+    if (tso->why_blocked != BlockedOnBlackHole) {
+        evacuate((StgClosure **)&tso->_link);
+    }
+}
+
+static void
+scavengeTSO (StgTSO *tso)
+{
+    rtsBool saved_eager;
+
+    if (tso->what_next == ThreadRelocated) {
+        // the only way this can happen is if the old TSO was on the
+        // mutable list.  We might have other links to this defunct
+        // TSO, so we must update its link field.
+        evacuate((StgClosure**)&tso->_link);
+        return;
+    }
+
+    debugTrace(DEBUG_gc,"scavenging thread %d",(int)tso->id);
+
+    saved_eager = gct->eager_promotion;
+    gct->eager_promotion = rtsFalse;
+
+    if (   tso->why_blocked == BlockedOnMVar
+       || tso->why_blocked == BlockedOnBlackHole
+       || tso->why_blocked == BlockedOnException
+       ) {
+       evacuate(&tso->block_info.closure);
+    }
+    evacuate((StgClosure **)&tso->blocked_exceptions);
+    
+    // scavange current transaction record
+    evacuate((StgClosure **)&tso->trec);
+    
+    // scavenge this thread's stack 
+    scavenge_stack(tso->sp, &(tso->stack[tso->stack_size]));
+
+    if (gct->failed_to_evac) {
+        tso->flags |= TSO_DIRTY;
+        scavenge_TSO_link(tso);
+    } else {
+        tso->flags &= ~TSO_DIRTY;
+        scavenge_TSO_link(tso);
+        if (gct->failed_to_evac) {
+            tso->flags |= TSO_LINK_DIRTY;
+        } else {
+            tso->flags &= ~TSO_LINK_DIRTY;
+        }
+    }
+
+    gct->eager_promotion = saved_eager;
+}
+
+/* -----------------------------------------------------------------------------
+   Blocks of function args occur on the stack (at the top) and
+   in PAPs.
+   -------------------------------------------------------------------------- */
+
+STATIC_INLINE StgPtr
+scavenge_arg_block (StgFunInfoTable *fun_info, StgClosure **args)
+{
+    StgPtr p;
+    StgWord bitmap;
+    nat size;
+
+    p = (StgPtr)args;
+    switch (fun_info->f.fun_type) {
+    case ARG_GEN:
+       bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
+       size = BITMAP_SIZE(fun_info->f.b.bitmap);
+       goto small_bitmap;
+    case ARG_GEN_BIG:
+       size = GET_FUN_LARGE_BITMAP(fun_info)->size;
+       scavenge_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info), size);
+       p += size;
+       break;
+    default:
+       bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
+       size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
+    small_bitmap:
+       while (size > 0) {
+           if ((bitmap & 1) == 0) {
+               evacuate((StgClosure **)p);
+           }
+           p++;
+           bitmap = bitmap >> 1;
+           size--;
+       }
+       break;
+    }
+    return p;
+}
+
+STATIC_INLINE GNUC_ATTR_HOT StgPtr
+scavenge_PAP_payload (StgClosure *fun, StgClosure **payload, StgWord size)
+{
+    StgPtr p;
+    StgWord bitmap;
+    StgFunInfoTable *fun_info;
+    
+    fun_info = get_fun_itbl(UNTAG_CLOSURE(fun));
+    ASSERT(fun_info->i.type != PAP);
+    p = (StgPtr)payload;
+
+    switch (fun_info->f.fun_type) {
+    case ARG_GEN:
+       bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
+       goto small_bitmap;
+    case ARG_GEN_BIG:
+       scavenge_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info), size);
+       p += size;
+       break;
+    case ARG_BCO:
+       scavenge_large_bitmap((StgPtr)payload, BCO_BITMAP(fun), size);
+       p += size;
+       break;
+    default:
+       bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
+    small_bitmap:
+       while (size > 0) {
+           if ((bitmap & 1) == 0) {
+               evacuate((StgClosure **)p);
+           }
+           p++;
+           bitmap = bitmap >> 1;
+           size--;
+       }
+       break;
+    }
+    return p;
+}
+
+STATIC_INLINE GNUC_ATTR_HOT StgPtr
+scavenge_PAP (StgPAP *pap)
+{
+    evacuate(&pap->fun);
+    return scavenge_PAP_payload (pap->fun, pap->payload, pap->n_args);
+}
+
+STATIC_INLINE StgPtr
+scavenge_AP (StgAP *ap)
+{
+    evacuate(&ap->fun);
+    return scavenge_PAP_payload (ap->fun, ap->payload, ap->n_args);
+}
+
+/* -----------------------------------------------------------------------------
+   Scavenge SRTs
+   -------------------------------------------------------------------------- */
 
 /* Similar to scavenge_large_bitmap(), but we don't write back the
  * pointers we get back from evacuate().
@@ -66,7 +233,7 @@ scavenge_large_srt_bitmap( StgLargeSRT *large_srt )
  * srt field in the info table.  That's ok, because we'll
  * never dereference it.
  */
-STATIC_INLINE void
+STATIC_INLINE GNUC_ATTR_HOT void
 scavenge_srt (StgClosure **srt, nat srt_bitmap)
 {
   nat bitmap;
@@ -106,7 +273,7 @@ scavenge_srt (StgClosure **srt, nat srt_bitmap)
 }
 
 
-STATIC_INLINE void
+STATIC_INLINE GNUC_ATTR_HOT void
 scavenge_thunk_srt(const StgInfoTable *info)
 {
     StgThunkInfoTable *thunk_info;
@@ -117,7 +284,7 @@ scavenge_thunk_srt(const StgInfoTable *info)
     scavenge_srt((StgClosure **)GET_SRT(thunk_info), thunk_info->i.srt_bitmap);
 }
 
-STATIC_INLINE void
+STATIC_INLINE GNUC_ATTR_HOT void
 scavenge_fun_srt(const StgInfoTable *info)
 {
     StgFunInfoTable *fun_info;
@@ -129,159 +296,431 @@ scavenge_fun_srt(const StgInfoTable *info)
 }
 
 /* -----------------------------------------------------------------------------
-   Scavenge a TSO.
+   Scavenge a block from the given scan pointer up to bd->free.
+
+   evac_step is set by the caller to be either zero (for a step in a
+   generation < N) or G where G is the generation of the step being
+   scavenged.  
+
+   We sometimes temporarily change evac_step back to zero if we're
+   scavenging a mutable object where eager promotion isn't such a good
+   idea.  
    -------------------------------------------------------------------------- */
 
-STATIC_INLINE void
-scavenge_TSO_link (StgTSO *tso)
+static GNUC_ATTR_HOT void
+scavenge_block (bdescr *bd)
 {
-    // We don't always chase the link field: TSOs on the blackhole
-    // queue are not automatically alive, so the link field is a
-    // "weak" pointer in that case.
-    if (tso->why_blocked != BlockedOnBlackHole) {
-        evacuate((StgClosure **)&tso->_link);
+  StgPtr p, q;
+  StgInfoTable *info;
+  step *saved_evac_step;
+  rtsBool saved_eager_promotion;
+  step_workspace *ws;
+
+  debugTrace(DEBUG_gc, "scavenging block %p (gen %d, step %d) @ %p",
+            bd->start, bd->gen_no, bd->step->no, bd->u.scan);
+
+  gct->scan_bd = bd;
+  gct->evac_step = bd->step;
+  saved_evac_step = gct->evac_step;
+  saved_eager_promotion = gct->eager_promotion;
+  gct->failed_to_evac = rtsFalse;
+
+  ws = &gct->steps[bd->step->abs_no];
+
+  p = bd->u.scan;
+  
+  // we might be evacuating into the very object that we're
+  // scavenging, so we have to check the real bd->free pointer each
+  // time around the loop.
+  while (p < bd->free || (bd == ws->todo_bd && p < ws->todo_free)) {
+
+      ASSERT(bd->link == NULL);
+    ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
+    info = get_itbl((StgClosure *)p);
+    
+    ASSERT(gct->thunk_selector_depth == 0);
+
+    q = p;
+    switch (info->type) {
+
+    case MVAR_CLEAN:
+    case MVAR_DIRTY:
+    { 
+       StgMVar *mvar = ((StgMVar *)p);
+       gct->eager_promotion = rtsFalse;
+       evacuate((StgClosure **)&mvar->head);
+       evacuate((StgClosure **)&mvar->tail);
+       evacuate((StgClosure **)&mvar->value);
+       gct->eager_promotion = saved_eager_promotion;
+
+       if (gct->failed_to_evac) {
+           mvar->header.info = &stg_MVAR_DIRTY_info;
+       } else {
+           mvar->header.info = &stg_MVAR_CLEAN_info;
+       }
+       p += sizeofW(StgMVar);
+       break;
     }
-}
 
-static void
-scavengeTSO (StgTSO *tso)
-{
-    rtsBool saved_eager;
+    case FUN_2_0:
+       scavenge_fun_srt(info);
+       evacuate(&((StgClosure *)p)->payload[1]);
+       evacuate(&((StgClosure *)p)->payload[0]);
+       p += sizeofW(StgHeader) + 2;
+       break;
+
+    case THUNK_2_0:
+       scavenge_thunk_srt(info);
+       evacuate(&((StgThunk *)p)->payload[1]);
+       evacuate(&((StgThunk *)p)->payload[0]);
+       p += sizeofW(StgThunk) + 2;
+       break;
+
+    case CONSTR_2_0:
+       evacuate(&((StgClosure *)p)->payload[1]);
+       evacuate(&((StgClosure *)p)->payload[0]);
+       p += sizeofW(StgHeader) + 2;
+       break;
+       
+    case THUNK_1_0:
+       scavenge_thunk_srt(info);
+       evacuate(&((StgThunk *)p)->payload[0]);
+       p += sizeofW(StgThunk) + 1;
+       break;
+       
+    case FUN_1_0:
+       scavenge_fun_srt(info);
+    case CONSTR_1_0:
+       evacuate(&((StgClosure *)p)->payload[0]);
+       p += sizeofW(StgHeader) + 1;
+       break;
+       
+    case THUNK_0_1:
+       scavenge_thunk_srt(info);
+       p += sizeofW(StgThunk) + 1;
+       break;
+       
+    case FUN_0_1:
+       scavenge_fun_srt(info);
+    case CONSTR_0_1:
+       p += sizeofW(StgHeader) + 1;
+       break;
+       
+    case THUNK_0_2:
+       scavenge_thunk_srt(info);
+       p += sizeofW(StgThunk) + 2;
+       break;
+       
+    case FUN_0_2:
+       scavenge_fun_srt(info);
+    case CONSTR_0_2:
+       p += sizeofW(StgHeader) + 2;
+       break;
+       
+    case THUNK_1_1:
+       scavenge_thunk_srt(info);
+       evacuate(&((StgThunk *)p)->payload[0]);
+       p += sizeofW(StgThunk) + 2;
+       break;
+
+    case FUN_1_1:
+       scavenge_fun_srt(info);
+    case CONSTR_1_1:
+       evacuate(&((StgClosure *)p)->payload[0]);
+       p += sizeofW(StgHeader) + 2;
+       break;
+       
+    case FUN:
+       scavenge_fun_srt(info);
+       goto gen_obj;
+
+    case THUNK:
+    {
+       StgPtr end;
+
+       scavenge_thunk_srt(info);
+       end = (P_)((StgThunk *)p)->payload + info->layout.payload.ptrs;
+       for (p = (P_)((StgThunk *)p)->payload; p < end; p++) {
+           evacuate((StgClosure **)p);
+       }
+       p += info->layout.payload.nptrs;
+       break;
+    }
+       
+    gen_obj:
+    case CONSTR:
+    case WEAK:
+    case STABLE_NAME:
+    {
+       StgPtr end;
+
+       end = (P_)((StgClosure *)p)->payload + info->layout.payload.ptrs;
+       for (p = (P_)((StgClosure *)p)->payload; p < end; p++) {
+           evacuate((StgClosure **)p);
+       }
+       p += info->layout.payload.nptrs;
+       break;
+    }
+
+    case BCO: {
+       StgBCO *bco = (StgBCO *)p;
+       evacuate((StgClosure **)&bco->instrs);
+       evacuate((StgClosure **)&bco->literals);
+       evacuate((StgClosure **)&bco->ptrs);
+       p += bco_sizeW(bco);
+       break;
+    }
+
+    case IND_PERM:
+      if (bd->gen_no != 0) {
+#ifdef PROFILING
+        // @LDV profiling
+        // No need to call LDV_recordDead_FILL_SLOP_DYNAMIC() because an 
+        // IND_OLDGEN_PERM closure is larger than an IND_PERM closure.
+        LDV_recordDead((StgClosure *)p, sizeofW(StgInd));
+#endif        
+        // 
+        // Todo: maybe use SET_HDR() and remove LDV_RECORD_CREATE()?
+        //
+       SET_INFO(((StgClosure *)p), &stg_IND_OLDGEN_PERM_info);
+
+        // We pretend that p has just been created.
+        LDV_RECORD_CREATE((StgClosure *)p);
+      }
+       // fall through 
+    case IND_OLDGEN_PERM:
+       evacuate(&((StgInd *)p)->indirectee);
+       p += sizeofW(StgInd);
+       break;
+
+    case MUT_VAR_CLEAN:
+    case MUT_VAR_DIRTY:
+       gct->eager_promotion = rtsFalse;
+       evacuate(&((StgMutVar *)p)->var);
+       gct->eager_promotion = saved_eager_promotion;
+
+       if (gct->failed_to_evac) {
+           ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info;
+       } else {
+           ((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info;
+       }
+       p += sizeofW(StgMutVar);
+       break;
+
+    case CAF_BLACKHOLE:
+    case BLACKHOLE:
+       p += BLACKHOLE_sizeW();
+       break;
+
+    case THUNK_SELECTOR:
+    { 
+       StgSelector *s = (StgSelector *)p;
+       evacuate(&s->selectee);
+       p += THUNK_SELECTOR_sizeW();
+       break;
+    }
+
+    // A chunk of stack saved in a heap object
+    case AP_STACK:
+    {
+       StgAP_STACK *ap = (StgAP_STACK *)p;
+
+       evacuate(&ap->fun);
+       scavenge_stack((StgPtr)ap->payload, (StgPtr)ap->payload + ap->size);
+       p = (StgPtr)ap->payload + ap->size;
+       break;
+    }
+
+    case PAP:
+       p = scavenge_PAP((StgPAP *)p);
+       break;
+
+    case AP:
+       p = scavenge_AP((StgAP *)p);
+       break;
+
+    case ARR_WORDS:
+       // nothing to follow 
+       p += arr_words_sizeW((StgArrWords *)p);
+       break;
+
+    case MUT_ARR_PTRS_CLEAN:
+    case MUT_ARR_PTRS_DIRTY:
+       // follow everything 
+    {
+       StgPtr next;
+
+       // We don't eagerly promote objects pointed to by a mutable
+       // array, but if we find the array only points to objects in
+       // the same or an older generation, we mark it "clean" and
+       // avoid traversing it during minor GCs.
+       gct->eager_promotion = rtsFalse;
+       next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+       for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
+           evacuate((StgClosure **)p);
+       }
+       gct->eager_promotion = saved_eager_promotion;
+
+       if (gct->failed_to_evac) {
+           ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info;
+       } else {
+           ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info;
+       }
 
-    if (tso->what_next == ThreadRelocated) {
-        // the only way this can happen is if the old TSO was on the
-        // mutable list.  We might have other links to this defunct
-        // TSO, so we must update its link field.
-        evacuate((StgClosure**)&tso->_link);
-        return;
+       gct->failed_to_evac = rtsTrue; // always put it on the mutable list.
+       break;
     }
 
-    saved_eager = gct->eager_promotion;
-    gct->eager_promotion = rtsFalse;
+    case MUT_ARR_PTRS_FROZEN:
+    case MUT_ARR_PTRS_FROZEN0:
+       // follow everything 
+    {
+       StgPtr next;
 
-    if (   tso->why_blocked == BlockedOnMVar
-       || tso->why_blocked == BlockedOnBlackHole
-       || tso->why_blocked == BlockedOnException
-       ) {
-       evacuate(&tso->block_info.closure);
+       next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+       for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) {
+           evacuate((StgClosure **)p);
+       }
+
+       // If we're going to put this object on the mutable list, then
+       // set its info ptr to MUT_ARR_PTRS_FROZEN0 to indicate that.
+       if (gct->failed_to_evac) {
+           ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN0_info;
+       } else {
+           ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_info;
+       }
+       break;
     }
-    evacuate((StgClosure **)&tso->blocked_exceptions);
-    
-    // scavange current transaction record
-    evacuate((StgClosure **)&tso->trec);
-    
-    // scavenge this thread's stack 
-    scavenge_stack(tso->sp, &(tso->stack[tso->stack_size]));
 
-    if (gct->failed_to_evac) {
-        tso->flags |= TSO_DIRTY;
-        scavenge_TSO_link(tso);
-    } else {
-        tso->flags &= ~TSO_DIRTY;
-        scavenge_TSO_link(tso);
-        if (gct->failed_to_evac) {
-            tso->flags |= TSO_LINK_DIRTY;
-        } else {
-            tso->flags &= ~TSO_LINK_DIRTY;
-        }
+    case TSO:
+    { 
+       StgTSO *tso = (StgTSO *)p;
+        scavengeTSO(tso);
+       p += tso_sizeW(tso);
+       break;
     }
 
-    gct->eager_promotion = saved_eager;
-}
+    case TVAR_WATCH_QUEUE:
+      {
+       StgTVarWatchQueue *wq = ((StgTVarWatchQueue *) p);
+       gct->evac_step = 0;
+       evacuate((StgClosure **)&wq->closure);
+       evacuate((StgClosure **)&wq->next_queue_entry);
+       evacuate((StgClosure **)&wq->prev_queue_entry);
+       gct->evac_step = saved_evac_step;
+       gct->failed_to_evac = rtsTrue; // mutable
+       p += sizeofW(StgTVarWatchQueue);
+       break;
+      }
 
-/* -----------------------------------------------------------------------------
-   Blocks of function args occur on the stack (at the top) and
-   in PAPs.
-   -------------------------------------------------------------------------- */
+    case TVAR:
+      {
+       StgTVar *tvar = ((StgTVar *) p);
+       gct->evac_step = 0;
+       evacuate((StgClosure **)&tvar->current_value);
+       evacuate((StgClosure **)&tvar->first_watch_queue_entry);
+       gct->evac_step = saved_evac_step;
+       gct->failed_to_evac = rtsTrue; // mutable
+       p += sizeofW(StgTVar);
+       break;
+      }
 
-STATIC_INLINE StgPtr
-scavenge_arg_block (StgFunInfoTable *fun_info, StgClosure **args)
-{
-    StgPtr p;
-    StgWord bitmap;
-    nat size;
+    case TREC_HEADER:
+      {
+        StgTRecHeader *trec = ((StgTRecHeader *) p);
+        gct->evac_step = 0;
+       evacuate((StgClosure **)&trec->enclosing_trec);
+       evacuate((StgClosure **)&trec->current_chunk);
+       evacuate((StgClosure **)&trec->invariants_to_check);
+       gct->evac_step = saved_evac_step;
+       gct->failed_to_evac = rtsTrue; // mutable
+       p += sizeofW(StgTRecHeader);
+        break;
+      }
 
-    p = (StgPtr)args;
-    switch (fun_info->f.fun_type) {
-    case ARG_GEN:
-       bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
-       size = BITMAP_SIZE(fun_info->f.b.bitmap);
-       goto small_bitmap;
-    case ARG_GEN_BIG:
-       size = GET_FUN_LARGE_BITMAP(fun_info)->size;
-       scavenge_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info), size);
-       p += size;
-       break;
-    default:
-       bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
-       size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
-    small_bitmap:
-       while (size > 0) {
-           if ((bitmap & 1) == 0) {
-               evacuate((StgClosure **)p);
-           }
-           p++;
-           bitmap = bitmap >> 1;
-           size--;
+    case TREC_CHUNK:
+      {
+       StgWord i;
+       StgTRecChunk *tc = ((StgTRecChunk *) p);
+       TRecEntry *e = &(tc -> entries[0]);
+       gct->evac_step = 0;
+       evacuate((StgClosure **)&tc->prev_chunk);
+       for (i = 0; i < tc -> next_entry_idx; i ++, e++ ) {
+         evacuate((StgClosure **)&e->tvar);
+         evacuate((StgClosure **)&e->expected_value);
+         evacuate((StgClosure **)&e->new_value);
        }
+       gct->evac_step = saved_evac_step;
+       gct->failed_to_evac = rtsTrue; // mutable
+       p += sizeofW(StgTRecChunk);
        break;
-    }
-    return p;
-}
+      }
 
-STATIC_INLINE StgPtr
-scavenge_PAP_payload (StgClosure *fun, StgClosure **payload, StgWord size)
-{
-    StgPtr p;
-    StgWord bitmap;
-    StgFunInfoTable *fun_info;
-    
-    fun_info = get_fun_itbl(UNTAG_CLOSURE(fun));
-    ASSERT(fun_info->i.type != PAP);
-    p = (StgPtr)payload;
+    case ATOMIC_INVARIANT:
+      {
+        StgAtomicInvariant *invariant = ((StgAtomicInvariant *) p);
+        gct->evac_step = 0;
+       evacuate(&invariant->code);
+       evacuate((StgClosure **)&invariant->last_execution);
+       gct->evac_step = saved_evac_step;
+       gct->failed_to_evac = rtsTrue; // mutable
+       p += sizeofW(StgAtomicInvariant);
+        break;
+      }
+
+    case INVARIANT_CHECK_QUEUE:
+      {
+        StgInvariantCheckQueue *queue = ((StgInvariantCheckQueue *) p);
+        gct->evac_step = 0;
+       evacuate((StgClosure **)&queue->invariant);
+       evacuate((StgClosure **)&queue->my_execution);
+       evacuate((StgClosure **)&queue->next_queue_entry);
+       gct->evac_step = saved_evac_step;
+       gct->failed_to_evac = rtsTrue; // mutable
+       p += sizeofW(StgInvariantCheckQueue);
+        break;
+      }
 
-    switch (fun_info->f.fun_type) {
-    case ARG_GEN:
-       bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
-       goto small_bitmap;
-    case ARG_GEN_BIG:
-       scavenge_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info), size);
-       p += size;
-       break;
-    case ARG_BCO:
-       scavenge_large_bitmap((StgPtr)payload, BCO_BITMAP(fun), size);
-       p += size;
-       break;
     default:
-       bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
-    small_bitmap:
-       while (size > 0) {
-           if ((bitmap & 1) == 0) {
-               evacuate((StgClosure **)p);
-           }
-           p++;
-           bitmap = bitmap >> 1;
-           size--;
+       barf("scavenge: unimplemented/strange closure type %d @ %p", 
+            info->type, p);
+    }
+
+    /*
+     * We need to record the current object on the mutable list if
+     *  (a) It is actually mutable, or 
+     *  (b) It contains pointers to a younger generation.
+     * Case (b) arises if we didn't manage to promote everything that
+     * the current object points to into the current generation.
+     */
+    if (gct->failed_to_evac) {
+       gct->failed_to_evac = rtsFalse;
+       if (bd->gen_no > 0) {
+           recordMutableGen_GC((StgClosure *)q, bd->gen_no);
        }
-       break;
     }
-    return p;
-}
+  }
 
-STATIC_INLINE StgPtr
-scavenge_PAP (StgPAP *pap)
-{
-    evacuate(&pap->fun);
-    return scavenge_PAP_payload (pap->fun, pap->payload, pap->n_args);
-}
+  if (p > bd->free)  {
+      gct->copied += ws->todo_free - bd->free;
+      bd->free = p;
+  }
 
-STATIC_INLINE StgPtr
-scavenge_AP (StgAP *ap)
-{
-    evacuate(&ap->fun);
-    return scavenge_PAP_payload (ap->fun, ap->payload, ap->n_args);
-}
+  debugTrace(DEBUG_gc, "   scavenged %ld bytes",
+             (unsigned long)((bd->free - bd->u.scan) * sizeof(W_)));
+
+  // update stats: this is a block that has been scavenged
+  gct->scanned += bd->free - bd->u.scan;
+  bd->u.scan = bd->free;
 
+  if (bd != ws->todo_bd) {
+      // we're not going to evac any more objects into
+      // this block, so push it now.
+      push_scanned_block(bd, ws);
+  }
+
+  gct->scan_bd = NULL;
+}
 /* -----------------------------------------------------------------------------
    Scavenge everything on the mark stack.
 
@@ -308,7 +747,7 @@ linear_scan:
        info = get_itbl((StgClosure *)p);
        
        q = p;
-        switch (((volatile StgWord *)info)[1] & 0xffff) {
+        switch (info->type) {
            
         case MVAR_CLEAN:
         case MVAR_DIRTY:
@@ -444,8 +883,6 @@ linear_scan:
        }
 
        case CAF_BLACKHOLE:
-       case SE_CAF_BLACKHOLE:
-       case SE_BLACKHOLE:
        case BLACKHOLE:
        case ARR_WORDS:
            break;
@@ -614,7 +1051,7 @@ linear_scan:
        if (gct->failed_to_evac) {
            gct->failed_to_evac = rtsFalse;
            if (gct->evac_step) {
-               recordMutableGen_GC((StgClosure *)q, gct->evac_step->gen);
+               recordMutableGen_GC((StgClosure *)q, gct->evac_step->gen_no);
            }
        }
        
@@ -760,8 +1197,6 @@ scavenge_one(StgPtr p)
     }
 
     case CAF_BLACKHOLE:
-    case SE_CAF_BLACKHOLE:
-    case SE_BLACKHOLE:
     case BLACKHOLE:
        break;
        
@@ -928,20 +1363,7 @@ scavenge_one(StgPtr p)
     case IND_OLDGEN:
     case IND_OLDGEN_PERM:
     case IND_STATIC:
-    {
-       /* Careful here: a THUNK can be on the mutable list because
-        * it contains pointers to young gen objects.  If such a thunk
-        * is updated, the IND_OLDGEN will be added to the mutable
-        * list again, and we'll scavenge it twice.  evacuate()
-        * doesn't check whether the object has already been
-        * evacuated, so we perform that check here.
-        */
-       StgClosure *q = ((StgInd *)p)->indirectee;
-       if (HEAP_ALLOCED(q) && Bdescr((StgPtr)q)->flags & BF_EVACUATED) {
-           break;
-       }
        evacuate(&((StgInd *)p)->indirectee);
-    }
 
 #if 0 && defined(DEBUG)
       if (RtsFlags.DebugFlags.gc) 
@@ -988,13 +1410,10 @@ scavenge_one(StgPtr p)
    -------------------------------------------------------------------------- */
 
 void
-scavenge_mutable_list(generation *gen)
+scavenge_mutable_list(bdescr *bd, generation *gen)
 {
-    bdescr *bd;
     StgPtr p, q;
 
-    bd = gen->saved_mut_list;
-
     gct->evac_step = &gen->steps[0];
     for (; bd != NULL; bd = bd->link) {
        for (q = bd->start; q < bd->free; q++) {
@@ -1025,12 +1444,12 @@ scavenge_mutable_list(generation *gen)
            // definitely doesn't point into a young generation.
            // Clean objects don't need to be scavenged.  Some clean
            // objects (MUT_VAR_CLEAN) are not kept on the mutable
-           // list at all; others, such as MUT_ARR_PTRS_CLEAN and
-           // TSO, are always on the mutable list.
+           // list at all; others, such as MUT_ARR_PTRS_CLEAN
+           // are always on the mutable list.
            //
            switch (get_itbl((StgClosure *)p)->type) {
            case MUT_ARR_PTRS_CLEAN:
-               recordMutableGen_GC((StgClosure *)p,gen);
+               recordMutableGen_GC((StgClosure *)p,gen->no);
                continue;
            case TSO: {
                StgTSO *tso = (StgTSO *)p;
@@ -1041,7 +1460,7 @@ scavenge_mutable_list(generation *gen)
 
                     scavenge_TSO_link(tso);
                     if (gct->failed_to_evac) {
-                        recordMutableGen_GC((StgClosure *)p,gen);
+                        recordMutableGen_GC((StgClosure *)p,gen->no);
                         gct->failed_to_evac = rtsFalse;
                     } else {
                         tso->flags &= ~TSO_LINK_DIRTY;
@@ -1056,14 +1475,28 @@ scavenge_mutable_list(generation *gen)
            if (scavenge_one(p)) {
                // didn't manage to promote everything, so put the
                // object back on the list.
-               recordMutableGen_GC((StgClosure *)p,gen);
+               recordMutableGen_GC((StgClosure *)p,gen->no);
            }
        }
     }
+}
+
+void
+scavenge_capability_mut_lists (Capability *cap)
+{
+    nat g;
 
-    // free the old mut_list
-    freeChain_sync(gen->saved_mut_list);
-    gen->saved_mut_list = NULL;
+    /* Mutable lists from each generation > N
+     * we want to *scavenge* these roots, not evacuate them: they're not
+     * going to move in this GC.
+     * Also do them in reverse generation order, for the usual reason:
+     * namely to reduce the likelihood of spurious old->new pointers.
+     */
+    for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
+        scavenge_mutable_list(cap->saved_mut_lists[g], &generations[g]);
+        freeChain_sync(cap->saved_mut_lists[g]);
+        cap->saved_mut_lists[g] = NULL;
+    }
 }
 
 /* -----------------------------------------------------------------------------
@@ -1129,7 +1562,7 @@ scavenge_static(void)
         */
        if (gct->failed_to_evac) {
          gct->failed_to_evac = rtsFalse;
-         recordMutableGen_GC((StgClosure *)p,oldest_gen);
+         recordMutableGen_GC((StgClosure *)p,oldest_gen->no);
        }
        break;
       }
@@ -1249,19 +1682,34 @@ scavenge_stack(StgPtr p, StgPtr stack_end)
        // the indirection into an IND_PERM, so that evacuate will
        // copy the indirection into the old generation instead of
        // discarding it.
+        //
+        // Note [upd-black-hole]
+        // One slight hiccup is that the THUNK_SELECTOR machinery can
+        // overwrite the updatee with an IND.  In parallel GC, this
+        // could even be happening concurrently, so we can't check for
+        // the IND.  Fortunately if we assume that blackholing is
+        // happening (either lazy or eager), then we can be sure that
+        // the updatee is never a THUNK_SELECTOR and we're ok.
+        // NB. this is a new invariant: blackholing is not optional.
     {
         nat type;
-        type = get_itbl(((StgUpdateFrame *)p)->updatee)->type;
-       if (type == IND) {
-           ((StgUpdateFrame *)p)->updatee->header.info = 
-               (StgInfoTable *)&stg_IND_PERM_info;
-       } else if (type == IND_OLDGEN) {
-           ((StgUpdateFrame *)p)->updatee->header.info = 
-               (StgInfoTable *)&stg_IND_OLDGEN_PERM_info;
-        }            
-       evacuate(&((StgUpdateFrame *)p)->updatee);
-       p += sizeofW(StgUpdateFrame);
-       continue;
+        const StgInfoTable *i;
+        StgClosure *updatee;
+
+        updatee = ((StgUpdateFrame *)p)->updatee;
+        i = updatee->header.info;
+        if (!IS_FORWARDING_PTR(i)) {
+            type = get_itbl(updatee)->type;
+            if (type == IND) {
+                updatee->header.info = &stg_IND_PERM_info;
+            } else if (type == IND_OLDGEN) {
+                updatee->header.info = &stg_IND_OLDGEN_PERM_info;
+            }            
+        }
+        evacuate(&((StgUpdateFrame *)p)->updatee);
+        ASSERT(GET_CLOSURE_TAG(((StgUpdateFrame *)p)->updatee) == 0);
+        p += sizeofW(StgUpdateFrame);
+        continue;
     }
 
       // small bitmap (< 32 entries, or 64 on a 64-bit machine) 
@@ -1388,7 +1836,7 @@ scavenge_large (step_workspace *ws)
        p = bd->start;
        if (scavenge_one(p)) {
            if (ws->step->gen_no > 0) {
-               recordMutableGen_GC((StgClosure *)p, ws->step->gen);
+               recordMutableGen_GC((StgClosure *)p, ws->step->gen_no);
            }
        }
 
@@ -1398,15 +1846,6 @@ scavenge_large (step_workspace *ws)
 }
 
 /* ----------------------------------------------------------------------------
-   Scavenge a block
-   ------------------------------------------------------------------------- */
-
-#define PARALLEL_GC
-#include "Scav.c-inc"
-#undef PARALLEL_GC
-#include "Scav.c-inc"
-
-/* ----------------------------------------------------------------------------
    Look for work to do.
 
    We look for the oldest step that has either a todo block that can
@@ -1451,11 +1890,7 @@ loop:
         // scavenge everything up to the free pointer.
         if (ws->todo_bd->u.scan < ws->todo_free)
         {
-            if (n_gc_threads == 1) {
-                scavenge_block1(ws->todo_bd);
-            } else {
-                scavenge_block(ws->todo_bd);
-            }
+            scavenge_block(ws->todo_bd);
             did_something = rtsTrue;
             break;
         }
@@ -1467,12 +1902,8 @@ loop:
             break;
         }
 
-        if ((bd = grab_todo_block(ws)) != NULL) {
-            if (n_gc_threads == 1) {
-                scavenge_block1(bd);
-            } else {
-                scavenge_block(bd);
-            }
+        if ((bd = grab_local_todo_block(ws)) != NULL) {
+            scavenge_block(bd);
             did_something = rtsTrue;
             break;
         }
@@ -1482,6 +1913,28 @@ loop:
         did_anything = rtsTrue;
         goto loop;
     }
+
+#if defined(THREADED_RTS)
+    if (work_stealing) {
+        // look for work to steal
+        for (s = total_steps-1; s >= 0; s--) {
+            if (s == 0 && RtsFlags.GcFlags.generations > 1) { 
+                continue; 
+            }
+            if ((bd = steal_todo_block(s)) != NULL) {
+                scavenge_block(bd);
+                did_something = rtsTrue;
+                break;
+            }
+        }
+
+        if (did_something) {
+            did_anything = rtsTrue;
+            goto loop;
+        }
+    }
+#endif
+
     // only return when there is no more work to do
 
     return did_anything;
@@ -1522,35 +1975,3 @@ loop:
     if (work_to_do) goto loop;
 }
 
-rtsBool
-any_work (void)
-{
-    int s;
-    step_workspace *ws;
-
-    gct->any_work++;
-
-    write_barrier();
-
-    // scavenge objects in compacted generation
-    if (mark_stack_overflowed || oldgen_scan_bd != NULL ||
-       (mark_stack_bdescr != NULL && !mark_stack_empty())) {
-       return rtsTrue;
-    }
-    
-    // Check for global work in any step.  We don't need to check for
-    // local work, because we have already exited scavenge_loop(),
-    // which means there is no local work for this thread.
-    for (s = total_steps-1; s >= 0; s--) {
-        if (s == 0 && RtsFlags.GcFlags.generations > 1) { 
-            continue; 
-        }
-        ws = &gct->steps[s];
-        if (ws->todo_large_objects) return rtsTrue;
-        if (ws->step->todos) return rtsTrue;
-    }
-
-    gct->no_work++;
-
-    return rtsFalse;
-}