[project @ 2006-01-17 16:03:47 by simonmar]
[ghc-hetmet.git] / ghc / rts / RetainerProfile.c
index f811d73..8217f26 100644 (file)
@@ -1,5 +1,4 @@
 /* -----------------------------------------------------------------------------
- * $Id: RetainerProfile.c,v 1.1 2001/11/22 14:25:12 simonmar Exp $
  *
  * (c) The GHC Team, 2001
  * Author: Sungwoo Park
@@ -10,6 +9,13 @@
 
 #ifdef PROFILING
 
+// Turn off inlining when debugging - it obfuscates things
+#ifdef DEBUG
+#define INLINE
+#else
+#define INLINE inline
+#endif
+
 #include "Rts.h"
 #include "RtsUtils.h"
 #include "RetainerProfile.h"
 #include "Schedule.h"
 #include "Printer.h"
 #include "Storage.h"
-#include "StoragePriv.h"
 #include "RtsFlags.h"
 #include "Weak.h"
 #include "Sanity.h"
 #include "Profiling.h"
 #include "Stats.h"
 #include "BlockAlloc.h"
-#include "Itimer.h"
-#include "Proftimer.h"
 #include "ProfHeap.h"
+#include "Apply.h"
 
 /*
   Note: what to change in order to plug-in a new retainer profiling scheme?
@@ -54,20 +58,14 @@ static nat timesAnyObjectVisited; // number of times any objects are visited
   pointer. See retainerSetOf().
  */
 
-// extract the retainer set field from c
-#define RSET(c)   ((c)->header.prof.hp.rs)
-
-static StgWord flip = 0;     // flip bit
+StgWord flip = 0;     // flip bit
                       // must be 0 if DEBUG_RETAINER is on (for static closures)
 
-#define isRetainerSetFieldValid(c) \
-  ((((StgWord)(c)->header.prof.hp.rs & 1) ^ flip) == 0)
-
 #define setRetainerSetToNull(c)   \
   (c)->header.prof.hp.rs = (RetainerSet *)((StgWord)NULL | flip)
 
-static void retainStack(StgClosure *, StgClosure *, StgClosure *, StgPtr, StgPtr);
-static void retainClosure(StgClosure *, StgClosure *, StgClosure *);
+static void retainStack(StgClosure *, retainer, StgPtr, StgPtr);
+static void retainClosure(StgClosure *, StgClosure *, retainer);
 #ifdef DEBUG_RETAINER
 static void belongToHeap(StgPtr p);
 #endif
@@ -108,6 +106,7 @@ typedef enum {
     posTypeStep,
     posTypePtrs,
     posTypeSRT,
+    posTypeLargeSRT,
 } nextPosType;
 
 typedef union {
@@ -130,8 +129,15 @@ typedef union {
     // SRT
     struct {
        StgClosure **srt;
-       StgClosure **srt_end;
+       StgWord    srt_bitmap;
     } srt;
+
+    // Large SRT
+    struct {
+       StgLargeSRT *srt;
+       StgWord offset;
+    } large_srt;
+       
 } nextPos;
 
 typedef struct {
@@ -141,7 +147,7 @@ typedef struct {
 
 typedef struct {
     StgClosure *c;
-    StgClosure *c_child_r;
+    retainer c_child_r;
     stackPos info;
 } stackElement;
 
@@ -162,7 +168,7 @@ typedef struct {
     the topmost element on the previous block group so as to satisfy
     the invariants described above.
  */
-bdescr *firstStack = NULL;
+static bdescr *firstStack = NULL;
 static bdescr *currentStack;
 static stackElement *stackBottom, *stackTop, *stackLimit;
 
@@ -198,7 +204,7 @@ static int stackSize, maxStackSize;
  * Invariants:
  *  currentStack->link == s.
  * -------------------------------------------------------------------------- */
-static inline void
+static INLINE void
 newStackBlock( bdescr *bd )
 {
     currentStack = bd;
@@ -213,7 +219,7 @@ newStackBlock( bdescr *bd )
  * Invariants:
  *   s->link == currentStack.
  * -------------------------------------------------------------------------- */
-static inline void
+static INLINE void
 returnToOldStack( bdescr *bd )
 {
     currentStack = bd;
@@ -255,17 +261,34 @@ closeTraverseStack( void )
 /* -----------------------------------------------------------------------------
  * Returns rtsTrue if the whole stack is empty.
  * -------------------------------------------------------------------------- */
-static inline rtsBool
+static INLINE rtsBool
 isEmptyRetainerStack( void )
 {
     return (firstStack == currentStack) && stackTop == stackLimit;
 }
 
 /* -----------------------------------------------------------------------------
+ * Returns size of stack
+ * -------------------------------------------------------------------------- */
+#ifdef DEBUG
+lnat
+retainerStackBlocks( void )
+{
+    bdescr* bd;
+    lnat res = 0;
+
+    for (bd = firstStack; bd != NULL; bd = bd->link) 
+      res += bd->blocks;
+
+    return res;
+}
+#endif
+
+/* -----------------------------------------------------------------------------
  * Returns rtsTrue if stackTop is at the stack boundary of the current stack,
  * i.e., if the current stack chunk is empty.
  * -------------------------------------------------------------------------- */
-static inline rtsBool
+static INLINE rtsBool
 isOnBoundary( void )
 {
     return stackTop == currentStackBoundary;
@@ -276,7 +299,7 @@ isOnBoundary( void )
  * Invariants:
  *   payload[] begins with ptrs pointers followed by non-pointers.
  * -------------------------------------------------------------------------- */
-static inline void
+static INLINE void
 init_ptrs( stackPos *info, nat ptrs, StgPtr payload )
 {
     info->type              = posTypePtrs;
@@ -288,7 +311,7 @@ init_ptrs( stackPos *info, nat ptrs, StgPtr payload )
 /* -----------------------------------------------------------------------------
  * Find the next object from *info.
  * -------------------------------------------------------------------------- */
-static inline StgClosure *
+static INLINE StgClosure *
 find_ptrs( stackPos *info )
 {
     if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
@@ -301,35 +324,92 @@ find_ptrs( stackPos *info )
 /* -----------------------------------------------------------------------------
  *  Initializes *info from SRT information stored in *infoTable.
  * -------------------------------------------------------------------------- */
-static inline void
-init_srt( stackPos *info, StgInfoTable *infoTable )
+static INLINE void
+init_srt_fun( stackPos *info, StgFunInfoTable *infoTable )
+{
+    if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
+       info->type = posTypeLargeSRT;
+       info->next.large_srt.srt = (StgLargeSRT *)GET_FUN_SRT(infoTable);
+       info->next.large_srt.offset = 0;
+    } else {
+       info->type = posTypeSRT;
+       info->next.srt.srt = (StgClosure **)GET_FUN_SRT(infoTable);
+       info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
+    }
+}
+
+static INLINE void
+init_srt_thunk( stackPos *info, StgThunkInfoTable *infoTable )
 {
-    info->type = posTypeSRT;
-    info->next.srt.srt = (StgClosure **)(infoTable->srt);
-    info->next.srt.srt_end = info->next.srt.srt + infoTable->srt_len;
+    if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) {
+       info->type = posTypeLargeSRT;
+       info->next.large_srt.srt = (StgLargeSRT *)GET_SRT(infoTable);
+       info->next.large_srt.offset = 0;
+    } else {
+       info->type = posTypeSRT;
+       info->next.srt.srt = (StgClosure **)GET_SRT(infoTable);
+       info->next.srt.srt_bitmap = infoTable->i.srt_bitmap;
+    }
 }
 
 /* -----------------------------------------------------------------------------
  * Find the next object from *info.
  * -------------------------------------------------------------------------- */
-static inline StgClosure *
+static INLINE StgClosure *
 find_srt( stackPos *info )
 {
     StgClosure *c;
+    StgWord bitmap;
 
-    if (info->next.srt.srt < info->next.srt.srt_end) {
-       // See scavenge_srt() in GC.c for details.
+    if (info->type == posTypeSRT) {
+       // Small SRT bitmap
+       bitmap = info->next.srt.srt_bitmap;
+       while (bitmap != 0) {
+           if ((bitmap & 1) != 0) {
 #ifdef ENABLE_WIN32_DLL_SUPPORT
-       if ((unsigned long)(*(info->next.srt.srt)) & 0x1)
-           c = (* (StgClosure **)((unsigned long)*(info->next.srt.srt)) & ~0x1);
-       else
-           c = *(info->next.srt.srt);
+               
+               if ((unsigned long)(*(info->next.srt.srt)) & 0x1)
+                   c = (* (StgClosure **)((unsigned long)*(info->next.srt.srt)) & ~0x1);
+               else
+                   c = *(info->next.srt.srt);
 #else
-       c = *(info->next.srt.srt);
+               c = *(info->next.srt.srt);
 #endif
-       info->next.srt.srt++;
-       return c;
-    } else {
+               bitmap = bitmap >> 1;
+               info->next.srt.srt++;
+               info->next.srt.srt_bitmap = bitmap;
+               return c;
+           }
+           bitmap = bitmap >> 1;
+           info->next.srt.srt++;
+       }
+       // bitmap is now zero...
+       return NULL;
+    }
+    else {
+       // Large SRT bitmap
+       nat i = info->next.large_srt.offset;
+       StgWord bitmap;
+
+       // Follow the pattern from GC.c:scavenge_large_srt_bitmap().
+       bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
+       bitmap = bitmap >> (i % BITS_IN(StgWord));
+       while (i < info->next.large_srt.srt->l.size) {
+           if ((bitmap & 1) != 0) {
+               c = ((StgClosure **)info->next.large_srt.srt->srt)[i];
+               i++;
+               info->next.large_srt.offset = i;
+               return c;
+           }
+           i++;
+           if (i % BITS_IN(W_) == 0) {
+               bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)];
+           } else {
+               bitmap = bitmap >> 1;
+           }
+       }
+       // reached the end of this bitmap.
+       info->next.large_srt.offset = i;
        return NULL;
     }
 }
@@ -345,23 +425,22 @@ find_srt( stackPos *info )
 
  *  Invariants:
  *     *c_child_r is the most recent retainer of *c's children.
- *     *c is not any of TSO, PAP, or AP_UPD, which means that
+ *     *c is not any of TSO, AP, PAP, AP_STACK, which means that
  *        there cannot be any stack objects.
  *  Note: SRTs are considered to  be children as well.
  * -------------------------------------------------------------------------- */
-static inline void
-push( StgClosure *c, StgClosure *c_child_r, StgClosure **first_child )
+static INLINE void
+push( StgClosure *c, retainer c_child_r, StgClosure **first_child )
 {
     stackElement se;
     bdescr *nbd;      // Next Block Descriptor
 
 #ifdef DEBUG_RETAINER
-    // fprintf(stderr, "push(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
+    // debugBelch("push(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
 #endif
 
     ASSERT(get_itbl(c)->type != TSO);
-    ASSERT(get_itbl(c)->type != PAP);
-    ASSERT(get_itbl(c)->type != AP_UPD);
+    ASSERT(get_itbl(c)->type != AP_STACK);
 
     //
     // fill in se
@@ -385,21 +464,15 @@ push( StgClosure *c, StgClosure *c_child_r, StgClosure **first_child )
 
        // one child (fixed), no SRT
     case MUT_VAR:
-    case MUT_CONS:
        *first_child = ((StgMutVar *)c)->var;
        return;
-    case BLACKHOLE_BQ:
-       // blocking_queue must be TSO and the head of a linked list of TSOs.
-       // Shoule it be a child? Seems to be yes.
-       *first_child = (StgClosure *)((StgBlockingQueue *)c)->blocking_queue;
-       return;
     case THUNK_SELECTOR:
        *first_child = ((StgSelector *)c)->selectee;
        return;
     case IND_PERM:
     case IND_OLDGEN_PERM:
     case IND_OLDGEN:
-       *first_child = ((StgIndOldGen *)c)->indirectee;
+       *first_child = ((StgInd *)c)->indirectee;
        return;
     case CONSTR_1_0:
     case CONSTR_1_1:
@@ -437,7 +510,6 @@ push( StgClosure *c, StgClosure *c_child_r, StgClosure **first_child )
 
        // layout.payload.ptrs, no SRT
     case CONSTR:
-    case FOREIGN:
     case STABLE_NAME:
     case BCO:
     case CONSTR_STATIC:
@@ -449,8 +521,10 @@ push( StgClosure *c, StgClosure *c_child_r, StgClosure **first_child )
        break;
 
        // StgMutArrPtr.ptrs, no SRT
-    case MUT_ARR_PTRS:
+    case MUT_ARR_PTRS_CLEAN:
+    case MUT_ARR_PTRS_DIRTY:
     case MUT_ARR_PTRS_FROZEN:
+    case MUT_ARR_PTRS_FROZEN0:
        init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
                  (StgPtr)(((StgMutArrPtrs *)c)->payload));
        *first_child = find_ptrs(&se.info);
@@ -461,35 +535,56 @@ push( StgClosure *c, StgClosure *c_child_r, StgClosure **first_child )
     // layout.payload.ptrs, SRT
     case FUN:           // *c is a heap object.
     case FUN_2_0:
+       init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
+       *first_child = find_ptrs(&se.info);
+       if (*first_child == NULL)
+           // no child from ptrs, so check SRT
+           goto fun_srt_only;
+       break;
+
     case THUNK:
     case THUNK_2_0:
-       init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
+       init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, 
+                 (StgPtr)((StgThunk *)c)->payload);
        *first_child = find_ptrs(&se.info);
        if (*first_child == NULL)
            // no child from ptrs, so check SRT
-           goto srt_only;
+           goto thunk_srt_only;
        break;
 
        // 1 fixed child, SRT
     case FUN_1_0:
     case FUN_1_1:
+       *first_child = c->payload[0];
+       ASSERT(*first_child != NULL);
+       init_srt_fun(&se.info, get_fun_itbl(c));
+       break;
+
     case THUNK_1_0:
     case THUNK_1_1:
-       *first_child = c->payload[0];
+       *first_child = ((StgThunk *)c)->payload[0];
        ASSERT(*first_child != NULL);
-       init_srt(&se.info, get_itbl(c));
+       init_srt_thunk(&se.info, get_thunk_itbl(c));
        break;
 
-    // SRT only
-    case THUNK_STATIC:
     case FUN_STATIC:      // *c is a heap object.
-       ASSERT(get_itbl(c)->srt_len != 0);
+       ASSERT(get_itbl(c)->srt_bitmap != 0);
     case FUN_0_1:
     case FUN_0_2:
+    fun_srt_only:
+        init_srt_fun(&se.info, get_fun_itbl(c));
+       *first_child = find_srt(&se.info);
+       if (*first_child == NULL)
+           return;     // no child
+       break;
+
+    // SRT only
+    case THUNK_STATIC:
+       ASSERT(get_itbl(c)->srt_bitmap != 0);
     case THUNK_0_1:
     case THUNK_0_2:
-    srt_only:
-        init_srt(&se.info, get_itbl(c));
+    thunk_srt_only:
+        init_srt_thunk(&se.info, get_thunk_itbl(c));
        *first_child = find_srt(&se.info);
        if (*first_child == NULL)
            return;     // no child
@@ -497,7 +592,8 @@ push( StgClosure *c, StgClosure *c_child_r, StgClosure **first_child )
 
        // cannot appear
     case PAP:
-    case AP_UPD:
+    case AP:
+    case AP_STACK:
     case TSO:
     case IND_STATIC:
     case CONSTR_INTLIKE:
@@ -507,7 +603,6 @@ push( StgClosure *c, StgClosure *c_child_r, StgClosure **first_child )
     case UPDATE_FRAME:
     case CATCH_FRAME:
     case STOP_FRAME:
-    case SEQ_FRAME:
     case RET_DYN:
     case RET_BCO:
     case RET_SMALL:
@@ -530,7 +625,7 @@ push( StgClosure *c, StgClosure *c_child_r, StgClosure **first_child )
 
     if (stackTop - 1 < stackBottom) {
 #ifdef DEBUG_RETAINER
-       // fprintf(stderr, "push() to the next stack.\n");
+       // debugBelch("push() to the next stack.\n");
 #endif
        // currentStack->free is updated when the active stack is switched
        // to the next stack.
@@ -559,7 +654,7 @@ push( StgClosure *c, StgClosure *c_child_r, StgClosure **first_child )
     stackSize++;
     if (stackSize > maxStackSize) maxStackSize = stackSize;
     // ASSERT(stackSize >= 0);
-    // fprintf(stderr, "stackSize = %d\n", stackSize);
+    // debugBelch("stackSize = %d\n", stackSize);
 #endif
 }
 
@@ -573,7 +668,7 @@ push( StgClosure *c, StgClosure *c_child_r, StgClosure **first_child )
  *    executed at the end of popOff() in necessary. Since popOff() is
  *    likely to be executed quite often while popOffReal() is not, we
  *    separate popOffReal() from popOff(), which is declared as an
- *    inline function (for the sake of execution speed).  popOffReal()
+ *    INLINE function (for the sake of execution speed).  popOffReal()
  *    is called only within popOff() and nowhere else.
  * -------------------------------------------------------------------------- */
 static void
@@ -582,7 +677,7 @@ popOffReal(void)
     bdescr *pbd;    // Previous Block Descriptor
 
 #ifdef DEBUG_RETAINER
-    // fprintf(stderr, "pop() to the previous stack.\n");
+    // debugBelch("pop() to the previous stack.\n");
 #endif
 
     ASSERT(stackTop + 1 == stackLimit);
@@ -597,7 +692,7 @@ popOffReal(void)
        if (stackSize > maxStackSize) maxStackSize = stackSize;
        /*
          ASSERT(stackSize >= 0);
-         fprintf(stderr, "stackSize = %d\n", stackSize);
+         debugBelch("stackSize = %d\n", stackSize);
        */
 #endif
        return;
@@ -618,15 +713,15 @@ popOffReal(void)
     if (stackSize > maxStackSize) maxStackSize = stackSize;
     /*
       ASSERT(stackSize >= 0);
-      fprintf(stderr, "stackSize = %d\n", stackSize);
+      debugBelch("stackSize = %d\n", stackSize);
     */
 #endif
 }
 
-static inline void
+static INLINE void
 popOff(void) {
 #ifdef DEBUG_RETAINER
-    // fprintf(stderr, "\tpopOff(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
+    // debugBelch("\tpopOff(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
 #endif
 
     ASSERT(stackTop != stackLimit);
@@ -640,7 +735,7 @@ popOff(void) {
        if (stackSize > maxStackSize) maxStackSize = stackSize;
        /*
          ASSERT(stackSize >= 0);
-         fprintf(stderr, "stackSize = %d\n", stackSize);
+         debugBelch("stackSize = %d\n", stackSize);
        */
 #endif
        return;
@@ -665,13 +760,13 @@ popOff(void) {
  *    It is okay to call this function even when the current stack chunk
  *    is empty.
  * -------------------------------------------------------------------------- */
-static inline void
-pop( StgClosure **c, StgClosure **cp, StgClosure **r )
+static INLINE void
+pop( StgClosure **c, StgClosure **cp, retainer *r )
 {
     stackElement *se;
 
 #ifdef DEBUG_RETAINER
-    // fprintf(stderr, "pop(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
+    // debugBelch("pop(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
 #endif
 
     do {
@@ -722,13 +817,14 @@ pop( StgClosure **c, StgClosure **cp, StgClosure **r )
            return;
 
        case CONSTR:
-       case FOREIGN:
        case STABLE_NAME:
        case BCO:
        case CONSTR_STATIC:
            // StgMutArrPtr.ptrs, no SRT
-       case MUT_ARR_PTRS:
+       case MUT_ARR_PTRS_CLEAN:
+       case MUT_ARR_PTRS_DIRTY:
        case MUT_ARR_PTRS_FROZEN:
+       case MUT_ARR_PTRS_FROZEN0:
            *c = find_ptrs(&se->info);
            if (*c == NULL) {
                popOff();
@@ -741,6 +837,17 @@ pop( StgClosure **c, StgClosure **cp, StgClosure **r )
            // layout.payload.ptrs, SRT
        case FUN:         // always a heap object
        case FUN_2_0:
+           if (se->info.type == posTypePtrs) {
+               *c = find_ptrs(&se->info);
+               if (*c != NULL) {
+                   *cp = se->c;
+                   *r = se->c_child_r;
+                   return;
+               }
+               init_srt_fun(&se->info, get_fun_itbl(se->c));
+           }
+           goto do_srt;
+
        case THUNK:
        case THUNK_2_0:
            if (se->info.type == posTypePtrs) {
@@ -750,11 +857,12 @@ pop( StgClosure **c, StgClosure **cp, StgClosure **r )
                    *r = se->c_child_r;
                    return;
                }
-               init_srt(&se->info, get_itbl(se->c));
+               init_srt_thunk(&se->info, get_thunk_itbl(se->c));
            }
-           // fall through
+           goto do_srt;
 
            // SRT
+       do_srt:
        case THUNK_STATIC:
        case FUN_STATIC:
        case FUN_0_1:
@@ -784,8 +892,6 @@ pop( StgClosure **c, StgClosure **cp, StgClosure **r )
        case ARR_WORDS:
            // one child (fixed), no SRT
        case MUT_VAR:
-       case MUT_CONS:
-       case BLACKHOLE_BQ:
        case THUNK_SELECTOR:
        case IND_PERM:
        case IND_OLDGEN_PERM:
@@ -793,7 +899,8 @@ pop( StgClosure **c, StgClosure **cp, StgClosure **r )
        case CONSTR_1_1:
            // cannot appear
        case PAP:
-       case AP_UPD:
+       case AP:
+       case AP_STACK:
        case TSO:
        case IND_STATIC:
        case CONSTR_INTLIKE:
@@ -804,7 +911,6 @@ pop( StgClosure **c, StgClosure **cp, StgClosure **r )
        case UPDATE_FRAME:
        case CATCH_FRAME:
        case STOP_FRAME:
-       case SEQ_FRAME:
        case RET_BCO:
        case RET_SMALL:
        case RET_VEC_SMALL:
@@ -862,7 +968,7 @@ endRetainerProfiling( void )
  *    We have to perform an XOR (^) operation each time a closure is examined.
  *    The reason is that we do not know when a closure is visited last.
  * -------------------------------------------------------------------------- */
-static inline void
+static INLINE void
 maybeInitRetainerSet( StgClosure *c )
 {
     if (!isRetainerSetFieldValid(c)) {
@@ -870,167 +976,12 @@ maybeInitRetainerSet( StgClosure *c )
     }
 }
 
-static inline RetainerSet *
-retainerSetOf( StgClosure *c )
-{
-    ASSERT( isRetainerSetFieldValid(c) );
-    // StgWord has the same size as pointers, so the following type
-    // casting is okay.
-    return (RetainerSet *)((StgWord)RSET(c) ^ flip);
-}
-
-/* -----------------------------------------------------------------------------
- *  Returns the cost of the closure *c, e.g., the amount of heap memory
- *  allocated to *c. Static objects cost 0.
- *  The cost includes even the words allocated for profiling purpose.
- *  Cf. costPure().
- * -------------------------------------------------------------------------- */
-static inline nat
-cost( StgClosure *c )
-{
-    StgInfoTable *info;
-
-    info = get_itbl(c);
-    switch (info->type) {
-    case TSO:
-       return tso_sizeW((StgTSO *)c);
-
-    case THUNK:
-    case THUNK_1_0:
-    case THUNK_0_1:
-    case THUNK_2_0:
-    case THUNK_1_1:
-    case THUNK_0_2:
-       return stg_max(sizeW_fromITBL(info), sizeofW(StgHeader) + MIN_UPD_SIZE);
-
-       // static objects
-    case CONSTR_STATIC:
-    case FUN_STATIC:
-    case THUNK_STATIC:
-       return 0;
-
-    case MVAR:
-       return sizeofW(StgMVar);
-
-    case MUT_ARR_PTRS:
-    case MUT_ARR_PTRS_FROZEN:
-       return mut_arr_ptrs_sizeW((StgMutArrPtrs *)c);
-
-    case AP_UPD:
-    case PAP:
-       return pap_sizeW((StgPAP *)c);
-
-    case ARR_WORDS:
-       return arr_words_sizeW((StgArrWords *)c);
-
-    case CONSTR:
-    case CONSTR_1_0:
-    case CONSTR_0_1:
-    case CONSTR_2_0:
-    case CONSTR_1_1:
-    case CONSTR_0_2:
-    case FUN:
-    case FUN_1_0:
-    case FUN_0_1:
-    case FUN_2_0:
-    case FUN_1_1:
-    case FUN_0_2:
-    case WEAK:
-    case MUT_VAR:
-    case MUT_CONS:
-    case CAF_BLACKHOLE:
-    case BLACKHOLE:
-    case SE_BLACKHOLE:
-    case SE_CAF_BLACKHOLE:
-    case BLACKHOLE_BQ:
-    case IND_PERM:
-    case IND_OLDGEN:
-    case IND_OLDGEN_PERM:
-    case FOREIGN:
-    case BCO:
-    case STABLE_NAME:
-       return sizeW_fromITBL(info);
-
-    case THUNK_SELECTOR:
-       return sizeofW(StgHeader) + MIN_UPD_SIZE;
-
-       /*
-         Error case
-       */
-       // IND_STATIC cannot be *c, *cp, *r in the retainer profiling loop.
-    case IND_STATIC:
-       // CONSTR_INTLIKE, CONSTR_CHARLIKE, and CONSTR_NOCAF_STATIC
-       // cannot be *c, *cp, *r in the retainer profiling loop.
-    case CONSTR_INTLIKE:
-    case CONSTR_CHARLIKE:
-    case CONSTR_NOCAF_STATIC:
-       // Stack objects are invalid because they are never treated as
-       // legal objects during retainer profiling.
-    case UPDATE_FRAME:
-    case CATCH_FRAME:
-    case STOP_FRAME:
-    case SEQ_FRAME:
-    case RET_DYN:
-    case RET_BCO:
-    case RET_SMALL:
-    case RET_VEC_SMALL:
-    case RET_BIG:
-    case RET_VEC_BIG:
-       // other cases
-    case IND:
-    case BLOCKED_FETCH:
-    case FETCH_ME:
-    case FETCH_ME_BQ:
-    case RBH:
-    case REMOTE_REF:
-    case EVACUATED:
-    case INVALID_OBJECT:
-    default:
-       barf("Invalid object in cost(): %d", get_itbl(c)->type);
-    }
-}
-
-/* -----------------------------------------------------------------------------
- *  Returns the pure cost of the closure *c, i.e., the size of memory
- *  allocated for this object without profiling.
- *  Note & Todo:
- *    costPure() subtracts the overhead incurred by profiling for all types
- *    of objects except TSO. Even though the overhead in the TSO object
- *    itself is taken into account, the additional costs due to larger
- *    stack objects (with unnecessary retainer profiling fields) is not
- *    considered. Still, costPure() should result in an accurate estimate
- *    of heap use because stacks in TSO objects are allocated in large blocks.
- *    If we get rid of the (currently unused) retainer profiling field in
- *    all stack objects, the result will be accurate.
- * ------------------------------------------------------------------------- */
-static inline nat
-costPure( StgClosure *c )
-{
-    nat cst;
-
-    if (!closureSatisfiesConstraints(c)) {
-       return 0;
-    }
-
-    cst = cost(c);
-
-    ASSERT(cst == 0 || cst - sizeofW(StgProfHeader) > 0);
-
-    if (cst > 0) {
-       return cst - sizeofW(StgProfHeader);
-    } else {
-       return 0;
-    }
-}
-
 /* -----------------------------------------------------------------------------
  * Returns rtsTrue if *c is a retainer.
  * -------------------------------------------------------------------------- */
-static inline rtsBool
+static INLINE rtsBool
 isRetainer( StgClosure *c )
 {
-    if (get_itbl(c)->prof.closure_desc != NULL && !strcmp(get_itbl(c)->prof.closure_desc,"PCS")) { return rtsTrue; }
-
     switch (get_itbl(c)->type) {
        //
        //  True case
@@ -1041,9 +992,10 @@ isRetainer( StgClosure *c )
        // mutable objects
     case MVAR:
     case MUT_VAR:
-    case MUT_CONS:
-    case MUT_ARR_PTRS:
+    case MUT_ARR_PTRS_CLEAN:
+    case MUT_ARR_PTRS_DIRTY:
     case MUT_ARR_PTRS_FROZEN:
+    case MUT_ARR_PTRS_FROZEN0:
 
        // thunks are retainers.
     case THUNK:
@@ -1053,7 +1005,8 @@ isRetainer( StgClosure *c )
     case THUNK_1_1:
     case THUNK_0_2:
     case THUNK_SELECTOR:
-    case AP_UPD:
+    case AP:
+    case AP_STACK:
 
        // Static thunks, or CAFS, are obviously retainers.
     case THUNK_STATIC:
@@ -1088,7 +1041,6 @@ isRetainer( StgClosure *c )
     case BLACKHOLE:
     case SE_BLACKHOLE:
     case SE_CAF_BLACKHOLE:
-    case BLACKHOLE_BQ:
        // indirection
     case IND_PERM:
     case IND_OLDGEN_PERM:
@@ -1097,7 +1049,6 @@ isRetainer( StgClosure *c )
     case CONSTR_STATIC:
     case FUN_STATIC:
        // misc
-    case FOREIGN:
     case STABLE_NAME:
     case BCO:
     case ARR_WORDS:
@@ -1118,7 +1069,6 @@ isRetainer( StgClosure *c )
     case UPDATE_FRAME:
     case CATCH_FRAME:
     case STOP_FRAME:
-    case SEQ_FRAME:
     case RET_DYN:
     case RET_BCO:
     case RET_SMALL:
@@ -1149,12 +1099,12 @@ isRetainer( StgClosure *c )
  *    Depending on the definition of this function, the maintenance of retainer
  *    sets can be made easier. If most retainer sets are likely to be created
  *    again across garbage collections, refreshAllRetainerSet() in
- *    RetainerSet.c can simply set the cost field of each retainer set.
+ *    RetainerSet.c can simply do nothing.
  *    If this is not the case, we can free all the retainer sets and
  *    re-initialize the hash table.
  *    See refreshAllRetainerSet() in RetainerSet.c.
  * -------------------------------------------------------------------------- */
-static inline retainer
+static INLINE retainer
 getRetainerFrom( StgClosure *c )
 {
     ASSERT(isRetainer(c));
@@ -1178,20 +1128,117 @@ getRetainerFrom( StgClosure *c )
  *    c != NULL
  *    s != NULL
  * -------------------------------------------------------------------------- */
-static inline void
-associate( StgClosure *c, RetainerSet *rsOfc, RetainerSet *s )
+static INLINE void
+associate( StgClosure *c, RetainerSet *s )
 {
-    nat cost_c;
-
-    cost_c = costPure(c);             // not cost(c)
-    if (rsOfc != NULL) {
-       ASSERT(rsOfc->cost >= cost_c);
-       rsOfc->cost -= cost_c;
-    }
     // StgWord has the same size as pointers, so the following type
     // casting is okay.
     RSET(c) = (RetainerSet *)((StgWord)s | flip);
-    s->cost += cost_c;
+}
+
+/* -----------------------------------------------------------------------------
+   Call retainClosure for each of the closures covered by a large bitmap.
+   -------------------------------------------------------------------------- */
+
+static void
+retain_large_bitmap (StgPtr p, StgLargeBitmap *large_bitmap, nat size,
+                    StgClosure *c, retainer c_child_r)
+{
+    nat i, b;
+    StgWord bitmap;
+    
+    b = 0;
+    bitmap = large_bitmap->bitmap[b];
+    for (i = 0; i < size; ) {
+       if ((bitmap & 1) == 0) {
+           retainClosure((StgClosure *)*p, c, c_child_r);
+       }
+       i++;
+       p++;
+       if (i % BITS_IN(W_) == 0) {
+           b++;
+           bitmap = large_bitmap->bitmap[b];
+       } else {
+           bitmap = bitmap >> 1;
+       }
+    }
+}
+
+static INLINE StgPtr
+retain_small_bitmap (StgPtr p, nat size, StgWord bitmap,
+                    StgClosure *c, retainer c_child_r)
+{
+    while (size > 0) {
+       if ((bitmap & 1) == 0) {
+           retainClosure((StgClosure *)*p, c, c_child_r);
+       }
+       p++;
+       bitmap = bitmap >> 1;
+       size--;
+    }
+    return p;
+}
+
+/* -----------------------------------------------------------------------------
+ * Call retainClosure for each of the closures in an SRT.
+ * ------------------------------------------------------------------------- */
+
+static void
+retain_large_srt_bitmap (StgLargeSRT *srt, StgClosure *c, retainer c_child_r)
+{
+    nat i, b, size;
+    StgWord bitmap;
+    StgClosure **p;
+    
+    b = 0;
+    p = (StgClosure **)srt->srt;
+    size   = srt->l.size;
+    bitmap = srt->l.bitmap[b];
+    for (i = 0; i < size; ) {
+       if ((bitmap & 1) != 0) {
+           retainClosure((StgClosure *)*p, c, c_child_r);
+       }
+       i++;
+       p++;
+       if (i % BITS_IN(W_) == 0) {
+           b++;
+           bitmap = srt->l.bitmap[b];
+       } else {
+           bitmap = bitmap >> 1;
+       }
+    }
+}
+
+static INLINE void
+retainSRT (StgClosure **srt, nat srt_bitmap, StgClosure *c, retainer c_child_r)
+{
+  nat bitmap;
+  StgClosure **p;
+
+  bitmap = srt_bitmap;
+  p = srt;
+
+  if (bitmap == (StgHalfWord)(-1)) {  
+      retain_large_srt_bitmap( (StgLargeSRT *)srt, c, c_child_r );
+      return;
+  }
+
+  while (bitmap != 0) {
+      if ((bitmap & 1) != 0) {
+#ifdef ENABLE_WIN32_DLL_SUPPORT
+         if ( (unsigned long)(*srt) & 0x1 ) {
+             retainClosure(*stgCast(StgClosure**,(stgCast(unsigned long, *srt) & ~0x1)), 
+                           c, c_child_r);
+         } else {
+             retainClosure(*srt,c,c_child_r);
+         }
+#else
+         retainClosure(*srt,c,c_child_r);
+#endif
+      }
+      p++;
+      bitmap = bitmap >> 1;
+  }
 }
 
 /* -----------------------------------------------------------------------------
@@ -1200,9 +1247,7 @@ associate( StgClosure *c, RetainerSet *rsOfc, RetainerSet *s )
  *  respectively. Treat stackOptionalFun as another child of *c if it is
  *  not NULL.
  *  Invariants:
- *    *c is one of the following: TSO, PAP, and AP_UPD.
- *    If *c is AP_UPD or PAP, stackOptionalFun is not NULL. Otherwise,
- *    it is NULL.
+ *    *c is one of the following: TSO, AP_STACK.
  *    If *c is TSO, c == c_child_r.
  *    stackStart < stackEnd.
  *    RSET(c) and RSET(c_child_r) are valid, i.e., their
@@ -1215,14 +1260,14 @@ associate( StgClosure *c, RetainerSet *rsOfc, RetainerSet *s )
  *    retainClosure() is invoked instead of evacuate().
  * -------------------------------------------------------------------------- */
 static void
-retainStack( StgClosure *c, StgClosure *c_child_r,
-            StgClosure *stackOptionalFun, StgPtr stackStart,
-            StgPtr stackEnd )
+retainStack( StgClosure *c, retainer c_child_r,
+            StgPtr stackStart, StgPtr stackEnd )
 {
     stackElement *oldStackBoundary;
-    StgPtr p, q;
-    StgInfoTable *info;
+    StgPtr p;
+    StgRetInfoTable *info;
     StgWord32 bitmap;
+    nat size;
 
 #ifdef DEBUG_RETAINER
     cStackSize++;
@@ -1239,65 +1284,19 @@ retainStack( StgClosure *c, StgClosure *c_child_r,
     currentStackBoundary = stackTop;
 
 #ifdef DEBUG_RETAINER
-    // fprintf(stderr, "retainStack() called: oldStackBoundary = 0x%x, currentStackBoundary = 0x%x\n", oldStackBoundary, currentStackBoundary);
+    // debugBelch("retainStack() called: oldStackBoundary = 0x%x, currentStackBoundary = 0x%x\n", oldStackBoundary, currentStackBoundary);
 #endif
 
-    if (stackOptionalFun != NULL) {
-       ASSERT(get_itbl(c)->type == AP_UPD || get_itbl(c)->type == PAP);
-       retainClosure(stackOptionalFun, c, c_child_r);
-    } else {
-       ASSERT(get_itbl(c)->type == TSO);
-       ASSERT(((StgTSO *)c)->what_next != ThreadRelocated &&
-              ((StgTSO *)c)->what_next != ThreadComplete &&
-              ((StgTSO *)c)->what_next != ThreadKilled);
-    }
-
+    ASSERT(get_itbl(c)->type != TSO || 
+          (((StgTSO *)c)->what_next != ThreadRelocated &&
+           ((StgTSO *)c)->what_next != ThreadComplete &&
+           ((StgTSO *)c)->what_next != ThreadKilled));
+    
     p = stackStart;
     while (p < stackEnd) {
-       q = *(StgPtr *)p;
+       info = get_ret_itbl((StgClosure *)p);
 
-    //
-    // Note & Todo:
-    //   The correctness of retainer profiling is subject to the
-    //   correctness of the two macros IS_ARG_TAG() and
-    //   LOOKS_LIKE_GHC_INFO(). Since LOOKS_LIKE_GHC_INFO() is a bit
-    //   precarious macro, so I believe that the current
-    //   implementation may not be quite safe. Also, scavenge_stack()
-    //   in GC.c also exploits this macro in order to identify shallow
-    //   pointers.  I am not sure whether scavenge_stack() takes
-    //   further measurements to discern real shallow pointers.
-    //
-    //   I think this can be a serious problem if a stack chunk
-    //   contains some word which looks like a pointer but is
-    //   actually, say, a word constituting a floating number.
-    //
-
-       // skip tagged words
-       if (IS_ARG_TAG((StgWord)q)) {
-           p += 1 + ARG_SIZE(q);
-           continue;
-       }
-
-       // check if *p is a shallow closure pointer
-       if (!LOOKS_LIKE_GHC_INFO(q)) {
-           retainClosure((StgClosure *)q, c, c_child_r);
-           p++;
-           continue;
-       }
-
-       // regular stack objects
-       info = get_itbl((StgClosure *)p);
-       switch(info->type) {
-       case RET_DYN:
-           bitmap = ((StgRetDyn *)p)->liveness;
-           p = ((StgRetDyn *)p)->payload;
-           goto small_bitmap;
-
-           // FUN and FUN_STATIC keep only their info pointer.
-       case FUN:
-       case FUN_STATIC:
-           p++;
-           goto follow_srt;
+       switch(info->i.type) {
 
        case UPDATE_FRAME:
            retainClosure(((StgUpdateFrame *)p)->updatee, c, c_child_r);
@@ -1306,77 +1305,102 @@ retainStack( StgClosure *c, StgClosure *c_child_r,
 
        case STOP_FRAME:
        case CATCH_FRAME:
-       case SEQ_FRAME:
-       case RET_BCO:
        case RET_SMALL:
        case RET_VEC_SMALL:
-           bitmap = info->layout.bitmap;
+           bitmap = BITMAP_BITS(info->i.layout.bitmap);
+           size   = BITMAP_SIZE(info->i.layout.bitmap);
            p++;
-       small_bitmap:
-           while (bitmap != 0) {
-               if ((bitmap & 1) == 0)
-                   retainClosure((StgClosure *)*p, c, c_child_r);
-               p++;
-               bitmap = bitmap >> 1;
-           }
+           p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
+
        follow_srt:
-           {
-               StgClosure **srt, **srt_end;
+           retainSRT((StgClosure **)GET_SRT(info), info->i.srt_bitmap, c, c_child_r);
+           continue;
 
-               srt = (StgClosure **)(info->srt);
-               srt_end = srt + info->srt_len;
-               for (; srt < srt_end; srt++) {
-                   // See scavenge_srt() in GC.c for details.
-#ifdef ENABLE_WIN32_DLL_SUPPORT
-                   if ((unsigned long)(*srt) & 0x1)
-                       retainClosure(*(StgClosure **)(((unsigned long)*srt & ~0x1)), c, c_child_r);
-                   else
-                       retainClosure(*srt, c, c_child_r);
-#else
-                   retainClosure(*srt, c, c_child_r);
-#endif
-               }
-           }
+       case RET_BCO: {
+           StgBCO *bco;
+           
+           p++;
+           retainClosure((StgClosure *)*p, c, c_child_r);
+           bco = (StgBCO *)*p;
+           p++;
+           size = BCO_BITMAP_SIZE(bco);
+           retain_large_bitmap(p, BCO_BITMAP(bco), size, c, c_child_r);
+           p += size;
            continue;
+       }
 
+           // large bitmap (> 32 entries, or > 64 on a 64-bit machine) 
        case RET_BIG:
        case RET_VEC_BIG:
-       {
-           StgPtr q;
-           StgLargeBitmap *large_bitmap;
-           nat i;
-
-           large_bitmap = info->layout.large_bitmap;
+           size = GET_LARGE_BITMAP(&info->i)->size;
            p++;
+           retain_large_bitmap(p, GET_LARGE_BITMAP(&info->i),
+                               size, c, c_child_r);
+           p += size;
+           // and don't forget to follow the SRT 
+           goto follow_srt;
 
-           for (i = 0; i < large_bitmap->size; i++) {
-               bitmap = large_bitmap->bitmap[i];
-               q = p + sizeofW(StgWord) * 8;
-               while (bitmap != 0) {
-                   if ((bitmap & 1) == 0)
-                       retainClosure((StgClosure *)*p, c, c_child_r);
-                   p++;
-                   bitmap = bitmap >> 1;
-               }
-               if (i + 1 < large_bitmap->size) {
-                   while (p < q) {
-                       retainClosure((StgClosure *)*p, c, c_child_r);
-                       p++;
-                   }
-               }
+           // Dynamic bitmap: the mask is stored on the stack 
+       case RET_DYN: {
+           StgWord dyn;
+           dyn = ((StgRetDyn *)p)->liveness;
+
+           // traverse the bitmap first
+           bitmap = RET_DYN_LIVENESS(dyn);
+           p      = (P_)&((StgRetDyn *)p)->payload[0];
+           size   = RET_DYN_BITMAP_SIZE;
+           p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
+           
+           // skip over the non-ptr words
+           p += RET_DYN_NONPTRS(dyn) + RET_DYN_NONPTR_REGS_SIZE;
+           
+           // follow the ptr words
+           for (size = RET_DYN_PTRS(dyn); size > 0; size--) {
+               retainClosure((StgClosure *)*p, c, c_child_r);
+               p++;
            }
+           continue;
        }
-       goto follow_srt;
+
+       case RET_FUN: {
+           StgRetFun *ret_fun = (StgRetFun *)p;
+           StgFunInfoTable *fun_info;
+           
+           retainClosure(ret_fun->fun, c, c_child_r);
+           fun_info = get_fun_itbl(ret_fun->fun);
+           
+           p = (P_)&ret_fun->payload;
+           switch (fun_info->f.fun_type) {
+           case ARG_GEN:
+               bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
+               size = BITMAP_SIZE(fun_info->f.b.bitmap);
+               p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
+               break;
+           case ARG_GEN_BIG:
+               size = GET_FUN_LARGE_BITMAP(fun_info)->size;
+               retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info), 
+                                   size, c, c_child_r);
+               p += size;
+               break;
+           default:
+               bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
+               size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
+               p = retain_small_bitmap(p, size, bitmap, c, c_child_r);
+               break;
+           }
+           goto follow_srt;
+       }
+
        default:
            barf("Invalid object found in retainStack(): %d",
-                (int)(info->type));
+                (int)(info->i.type));
        }
     }
 
     // restore currentStackBoundary
     currentStackBoundary = oldStackBoundary;
 #ifdef DEBUG_RETAINER
-    // fprintf(stderr, "retainStack() finished: currentStackBoundary = 0x%x\n", currentStackBoundary);
+    // debugBelch("retainStack() finished: currentStackBoundary = 0x%x\n", currentStackBoundary);
 #endif
 
 #ifdef DEBUG_RETAINER
@@ -1384,6 +1408,48 @@ retainStack( StgClosure *c, StgClosure *c_child_r,
 #endif
 }
 
+/* ----------------------------------------------------------------------------
+ * Call retainClosure for each of the children of a PAP/AP
+ * ------------------------------------------------------------------------- */
+
+static INLINE StgPtr
+retain_PAP_payload (StgClosure *pap,  retainer c_child_r, StgClosure *fun, 
+                   StgClosure** payload, StgWord n_args)
+{
+    StgPtr p;
+    StgWord bitmap;
+    StgFunInfoTable *fun_info;
+
+    retainClosure(fun, pap, c_child_r);
+    fun_info = get_fun_itbl(fun);
+    ASSERT(fun_info->i.type != PAP);
+
+    p = (StgPtr)payload;
+
+    switch (fun_info->f.fun_type) {
+    case ARG_GEN:
+       bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
+       p = retain_small_bitmap(p, n_args, bitmap, 
+                               pap, c_child_r);
+       break;
+    case ARG_GEN_BIG:
+       retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info),
+                           n_args, pap, c_child_r);
+       p += n_args;
+       break;
+    case ARG_BCO:
+       retain_large_bitmap((StgPtr)payload, BCO_BITMAP(fun),
+                           n_args, pap, c_child_r);
+       p += n_args;
+       break;
+    default:
+       bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
+       p = retain_small_bitmap(p, n_args, bitmap, pap, c_child_r);
+       break;
+    }
+    return p;
+}
+
 /* -----------------------------------------------------------------------------
  *  Compute the retainer set of *c0 and all its desecents by traversing.
  *  *cp0 is the parent of *c0, and *r0 is the most recent retainer of *c0.
@@ -1398,19 +1464,19 @@ retainStack( StgClosure *c, StgClosure *c_child_r,
  *    its descendants.
  *  Note:
  *    stackTop must be the same at the beginning and the exit of this function.
- *    *c0 can be TSO (as well as PAP and AP_UPD).
+ *    *c0 can be TSO (as well as AP_STACK).
  * -------------------------------------------------------------------------- */
 static void
-retainClosure( StgClosure *c0, StgClosure *cp0, StgClosure *r0 )
+retainClosure( StgClosure *c0, StgClosure *cp0, retainer r0 )
 {
     // c = Current closure
     // cp = Current closure's Parent
     // r = current closures' most recent Retainer
     // c_child_r = current closure's children's most recent retainer
     // first_child = first child of c
-    StgClosure *c, *cp, *r, *c_child_r, *first_child;
+    StgClosure *c, *cp, *first_child;
     RetainerSet *s, *retainerSetOfc;
-    retainer R_r;
+    retainer r, c_child_r;
     StgWord typeOfc;
 
 #ifdef DEBUG_RETAINER
@@ -1419,7 +1485,7 @@ retainClosure( StgClosure *c0, StgClosure *cp0, StgClosure *r0 )
 
 #ifdef DEBUG_RETAINER
     // oldStackTop = stackTop;
-    // fprintf(stderr, "retainClosure() called: c0 = 0x%x, cp0 = 0x%x, r0 = 0x%x\n", c0, cp0, r0);
+    // debugBelch("retainClosure() called: c0 = 0x%x, cp0 = 0x%x, r0 = 0x%x\n", c0, cp0, r0);
 #endif
 
     // (c, cp, r) = (c0, cp0, r0)
@@ -1429,18 +1495,18 @@ retainClosure( StgClosure *c0, StgClosure *cp0, StgClosure *r0 )
     goto inner_loop;
 
 loop:
-    //fprintf(stderr, "loop");
+    //debugBelch("loop");
     // pop to (c, cp, r);
     pop(&c, &cp, &r);
 
     if (c == NULL) {
 #ifdef DEBUG_RETAINER
-       // fprintf(stderr, "retainClosure() ends: oldStackTop = 0x%x, stackTop = 0x%x\n", oldStackTop, stackTop);
+       // debugBelch("retainClosure() ends: oldStackTop = 0x%x, stackTop = 0x%x\n", oldStackTop, stackTop);
 #endif
        return;
     }
 
-    //fprintf(stderr, "inner_loop");
+    //debugBelch("inner_loop");
 
 inner_loop:
     // c  = current closure under consideration,
@@ -1482,13 +1548,13 @@ inner_loop:
        if (((StgTSO *)c)->what_next == ThreadComplete ||
            ((StgTSO *)c)->what_next == ThreadKilled) {
 #ifdef DEBUG_RETAINER
-           fprintf(stderr, "ThreadComplete or ThreadKilled encountered in retainClosure()\n");
+           debugBelch("ThreadComplete or ThreadKilled encountered in retainClosure()\n");
 #endif
            goto loop;
        }
        if (((StgTSO *)c)->what_next == ThreadRelocated) {
 #ifdef DEBUG_RETAINER
-           fprintf(stderr, "ThreadRelocated encountered in retainClosure()\n");
+           debugBelch("ThreadRelocated encountered in retainClosure()\n");
 #endif
            c = (StgClosure *)((StgTSO *)c)->link;
            goto inner_loop;
@@ -1510,7 +1576,7 @@ inner_loop:
        goto loop;
     case THUNK_STATIC:
     case FUN_STATIC:
-       if (get_itbl(c)->srt_len == 0) {
+       if (get_itbl(c)->srt_bitmap == 0) {
            // No need to compute the retainer set; no dynamic objects
            // are reachable from *c.
            //
@@ -1558,7 +1624,6 @@ inner_loop:
        s = retainerSetOf(cp);
 
     // (c, cp, r, s) is available.
-    R_r = getRetainerFrom(r);
 
     // (c, cp, r, s, R_r) is available, so compute the retainer set for *c.
     if (retainerSetOfc == NULL) {
@@ -1566,31 +1631,31 @@ inner_loop:
        numObjectVisited++;
 
        if (s == NULL)
-           associate(c, NULL, singleton(R_r));
+           associate(c, singleton(r));
        else
            // s is actually the retainer set of *c!
-           associate(c, NULL, s);
+           associate(c, s);
 
        // compute c_child_r
-       c_child_r = isRetainer(c) ? c : r;
+       c_child_r = isRetainer(c) ? getRetainerFrom(c) : r;
     } else {
        // This is not the first visit to *c.
-       if (isMember(R_r, retainerSetOfc))
+       if (isMember(r, retainerSetOfc))
            goto loop;          // no need to process child
 
        if (s == NULL)
-           associate(c, retainerSetOfc, addElement(R_r, retainerSetOfc));
+           associate(c, addElement(r, retainerSetOfc));
        else {
            // s is not NULL and cp is not a retainer. This means that
            // each time *cp is visited, so is *c. Thus, if s has
            // exactly one more element in its retainer set than c, s
            // is also the new retainer set for *c.
            if (s->num == retainerSetOfc->num + 1) {
-               associate(c, retainerSetOfc, s);
+               associate(c, s);
            }
            // Otherwise, just add R_r to the current retainer set of *c.
            else {
-               associate(c, retainerSetOfc, addElement(R_r, retainerSetOfc));
+               associate(c, addElement(r, retainerSetOfc));
            }
        }
 
@@ -1606,27 +1671,36 @@ inner_loop:
 
     // process child
 
-    if (typeOfc == TSO) {
+    // Special case closures: we process these all in one go rather
+    // than attempting to save the current position, because doing so
+    // would be hard.
+    switch (typeOfc) {
+    case TSO:
        retainStack(c, c_child_r,
-                   NULL,
                    ((StgTSO *)c)->sp,
                    ((StgTSO *)c)->stack + ((StgTSO *)c)->stack_size);
-       // no more children
        goto loop;
-    } else if (typeOfc == PAP) {
-       retainStack(c, c_child_r,
-                   ((StgPAP *)c)->fun,
-                   (StgPtr)((StgPAP *)c)->payload,
-                   (StgPtr)((StgPAP *)c)->payload + ((StgPAP *)c)->n_args);
-       // no more children
+
+    case PAP:
+    {
+       StgPAP *pap = (StgPAP *)c;
+       retain_PAP_payload(c, c_child_r, pap->fun, pap->payload, pap->n_args);
        goto loop;
-    } else if (typeOfc == AP_UPD) {
+    }
+
+    case AP:
+    {
+       StgAP *ap = (StgAP *)c;
+       retain_PAP_payload(c, c_child_r, ap->fun, ap->payload, ap->n_args);
+       goto loop;
+    }
+
+    case AP_STACK:
+       retainClosure(((StgAP_STACK *)c)->fun, c, c_child_r);
        retainStack(c, c_child_r,
-                   ((StgAP_UPD *)c)->fun,
-                   (StgPtr)((StgAP_UPD *)c)->payload,
-                   (StgPtr)((StgAP_UPD *)c)->payload +
-                            ((StgAP_UPD *)c)->n_args);
-       // no more children
+                   (StgPtr)((StgAP_STACK *)c)->payload,
+                   (StgPtr)((StgAP_STACK *)c)->payload +
+                            ((StgAP_STACK *)c)->size);
        goto loop;
     }
 
@@ -1657,7 +1731,11 @@ retainRoot( StgClosure **tl )
     ASSERT(isEmptyRetainerStack());
     currentStackBoundary = stackTop;
 
-    retainClosure(*tl, *tl, *tl);
+    if (isRetainer(*tl)) {
+       retainClosure(*tl, *tl, getRetainerFrom(*tl));
+    } else {
+       retainClosure(*tl, *tl, CCS_SYSTEM);
+    }
 
     // NOT TRUE: ASSERT(isMember(getRetainerFrom(*tl), retainerSetOf(*tl)));
     // *tl might be a TSO which is ThreadComplete, in which
@@ -1673,7 +1751,8 @@ computeRetainerSet( void )
     StgWeak *weak;
     RetainerSet *rtl;
     nat g;
-    StgMutClosure *ml;
+    StgPtr ml;
+    bdescr *bd;
 #ifdef DEBUG_RETAINER
     RetainerSet tmpRetainerSet;
 #endif
@@ -1689,85 +1768,51 @@ computeRetainerSet( void )
        // retainRoot((StgClosure *)weak);
        retainRoot((StgClosure **)&weak);
 
+    // Consider roots from the stable ptr table.
+    markStablePtrTable(retainRoot);
+
     // The following code resets the rs field of each unvisited mutable
     // object (computing sumOfNewCostExtra and updating costArray[] when
     // debugging retainer profiler).
     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-       ASSERT(g != 0 ||
-              (generations[g].mut_list == END_MUT_LIST &&
-               generations[g].mut_once_list == END_MUT_LIST));
+        ASSERT(g != 0 || (generations[g].mut_list == NULL));
 
-       // Todo:
-       // I think traversing through mut_list is unnecessary.
-       // Think about removing this part.
-       for (ml = generations[g].mut_list; ml != END_MUT_LIST;
-            ml = ml->mut_link) {
-
-           maybeInitRetainerSet((StgClosure *)ml);
-           rtl = retainerSetOf((StgClosure *)ml);
-
-#ifdef DEBUG_RETAINER
-           if (rtl == NULL) {
-               // first visit to *ml
-               // This is a violation of the interface rule!
-               RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip);
-
-               switch (get_itbl((StgClosure *)ml)->type) {
-               case IND_STATIC:
-                   // no cost involved
-                   break;
-               case CONSTR_INTLIKE:
-               case CONSTR_CHARLIKE:
-               case CONSTR_NOCAF_STATIC:
-               case CONSTR_STATIC:
-               case THUNK_STATIC:
-               case FUN_STATIC:
-                   barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type);
-                   break;
-               default:
-                   // dynamic objects
-                   costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml);
-                   sumOfNewCostExtra += cost((StgClosure *)ml);
-                   break;
-               }
-           }
-#endif
-       }
-
-       // Traversing through mut_once_list is, in contrast, necessary
+       // Traversing through mut_list is necessary
        // because we can find MUT_VAR objects which have not been
        // visited during retainer profiling.
-       for (ml = generations[g].mut_once_list; ml != END_MUT_LIST;
-            ml = ml->mut_link) {
+       for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
+           for (ml = bd->start; ml < bd->free; ml++) {
+
+               maybeInitRetainerSet((StgClosure *)*ml);
+               rtl = retainerSetOf((StgClosure *)*ml);
 
-           maybeInitRetainerSet((StgClosure *)ml);
-           rtl = retainerSetOf((StgClosure *)ml);
 #ifdef DEBUG_RETAINER
-           if (rtl == NULL) {
-               // first visit to *ml
-               // This is a violation of the interface rule!
-               RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip);
-
-               switch (get_itbl((StgClosure *)ml)->type) {
-               case IND_STATIC:
-                   // no cost involved
-                   break;
-               case CONSTR_INTLIKE:
-               case CONSTR_CHARLIKE:
-               case CONSTR_NOCAF_STATIC:
-               case CONSTR_STATIC:
-               case THUNK_STATIC:
-               case FUN_STATIC:
-                   barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type);
-                   break;
-               default:
-                   // dynamic objects
-                   costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml);
-                   sumOfNewCostExtra += cost((StgClosure *)ml);
-                   break;
+               if (rtl == NULL) {
+                   // first visit to *ml
+                   // This is a violation of the interface rule!
+                   RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip);
+                   
+                   switch (get_itbl((StgClosure *)ml)->type) {
+                   case IND_STATIC:
+                       // no cost involved
+                       break;
+                   case CONSTR_INTLIKE:
+                   case CONSTR_CHARLIKE:
+                   case CONSTR_NOCAF_STATIC:
+                   case CONSTR_STATIC:
+                   case THUNK_STATIC:
+                   case FUN_STATIC:
+                       barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type);
+                       break;
+                   default:
+                       // dynamic objects
+                       costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml);
+                       sumOfNewCostExtra += cost((StgClosure *)ml);
+                       break;
+                   }
                }
-           }
 #endif
+           }
        }
     }
 }
@@ -1809,19 +1854,19 @@ resetStaticObjectForRetainerProfiling( void )
            // Since we do not compute the retainer set of any
            // IND_STATIC object, we don't have to reset its retainer
            // field.
-           p = IND_STATIC_LINK(p);
+           p = (StgClosure*)*IND_STATIC_LINK(p);
            break;
        case THUNK_STATIC:
            maybeInitRetainerSet(p);
-           p = THUNK_STATIC_LINK(p);
+           p = (StgClosure*)*THUNK_STATIC_LINK(p);
            break;
        case FUN_STATIC:
            maybeInitRetainerSet(p);
-           p = FUN_STATIC_LINK(p);
+           p = (StgClosure*)*FUN_STATIC_LINK(p);
            break;
        case CONSTR_STATIC:
            maybeInitRetainerSet(p);
-           p = STATIC_LINK(get_itbl(p), p);
+           p = (StgClosure*)*STATIC_LINK(get_itbl(p), p);
            break;
        default:
            barf("resetStaticObjectForRetainerProfiling: %p (%s)",
@@ -1830,7 +1875,7 @@ resetStaticObjectForRetainerProfiling( void )
        }
     }
 #ifdef DEBUG_RETAINER
-    // fprintf(stderr, "count in scavenged_static_objects = %d\n", count);
+    // debugBelch("count in scavenged_static_objects = %d\n", count);
 #endif
 }
 
@@ -1846,32 +1891,31 @@ resetStaticObjectForRetainerProfiling( void )
 void
 retainerProfile(void)
 {
-  nat allCost, numSet;
 #ifdef DEBUG_RETAINER
   nat i;
   nat totalHeapSize;        // total raw heap size (computed by linear scanning)
 #endif
 
 #ifdef DEBUG_RETAINER
-  fprintf(stderr, " < retainerProfile() invoked : %d>\n", retainerGeneration);
+  debugBelch(" < retainerProfile() invoked : %d>\n", retainerGeneration);
 #endif
 
   stat_startRP();
 
   // We haven't flipped the bit yet.
 #ifdef DEBUG_RETAINER
-  fprintf(stderr, "Before traversing:\n");
+  debugBelch("Before traversing:\n");
   sumOfCostLinear = 0;
   for (i = 0;i < N_CLOSURE_TYPES; i++)
     costArrayLinear[i] = 0;
   totalHeapSize = checkHeapSanityForRetainerProfiling();
 
-  fprintf(stderr, "\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
+  debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
   /*
-  fprintf(stderr, "costArrayLinear[] = ");
+  debugBelch("costArrayLinear[] = ");
   for (i = 0;i < N_CLOSURE_TYPES; i++)
-    fprintf(stderr, "[%u:%u] ", i, costArrayLinear[i]);
-  fprintf(stderr, "\n");
+    debugBelch("[%u:%u] ", i, costArrayLinear[i]);
+  debugBelch("\n");
   */
 
   ASSERT(sumOfCostLinear == totalHeapSize);
@@ -1879,7 +1923,7 @@ retainerProfile(void)
 /*
 #define pcostArrayLinear(index) \
   if (costArrayLinear[index] > 0) \
-    fprintf(stderr, "costArrayLinear[" #index "] = %u\n", costArrayLinear[index])
+    debugBelch("costArrayLinear[" #index "] = %u\n", costArrayLinear[index])
   pcostArrayLinear(THUNK_STATIC);
   pcostArrayLinear(FUN_STATIC);
   pcostArrayLinear(CONSTR_STATIC);
@@ -1902,7 +1946,7 @@ retainerProfile(void)
   timesAnyObjectVisited = 0;
 
 #ifdef DEBUG_RETAINER
-  fprintf(stderr, "During traversing:\n");
+  debugBelch("During traversing:\n");
   sumOfNewCost = 0;
   sumOfNewCostExtra = 0;
   for (i = 0;i < N_CLOSURE_TYPES; i++)
@@ -1923,16 +1967,14 @@ retainerProfile(void)
 #endif
   computeRetainerSet();
 
-  outputRetainerSet(hp_file, &allCost, &numSet);
-
 #ifdef DEBUG_RETAINER
-  fprintf(stderr, "After traversing:\n");
+  debugBelch("After traversing:\n");
   sumOfCostLinear = 0;
   for (i = 0;i < N_CLOSURE_TYPES; i++)
     costArrayLinear[i] = 0;
   totalHeapSize = checkHeapSanityForRetainerProfiling();
 
-  fprintf(stderr, "\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
+  debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
   ASSERT(sumOfCostLinear == totalHeapSize);
 
   // now, compare the two results
@@ -1943,22 +1985,22 @@ retainerProfile(void)
         1) Dead weak pointers, whose type is CONSTR. These objects are not
            reachable from any roots.
   */
-  fprintf(stderr, "Comparison:\n");
-  fprintf(stderr, "\tcostArrayLinear[] (must be empty) = ");
+  debugBelch("Comparison:\n");
+  debugBelch("\tcostArrayLinear[] (must be empty) = ");
   for (i = 0;i < N_CLOSURE_TYPES; i++)
     if (costArray[i] != costArrayLinear[i])
       // nothing should be printed except MUT_VAR after major GCs
-      fprintf(stderr, "[%u:%u] ", i, costArrayLinear[i]);
-  fprintf(stderr, "\n");
+      debugBelch("[%u:%u] ", i, costArrayLinear[i]);
+  debugBelch("\n");
 
-  fprintf(stderr, "\tsumOfNewCost = %u\n", sumOfNewCost);
-  fprintf(stderr, "\tsumOfNewCostExtra = %u\n", sumOfNewCostExtra);
-  fprintf(stderr, "\tcostArray[] (must be empty) = ");
+  debugBelch("\tsumOfNewCost = %u\n", sumOfNewCost);
+  debugBelch("\tsumOfNewCostExtra = %u\n", sumOfNewCostExtra);
+  debugBelch("\tcostArray[] (must be empty) = ");
   for (i = 0;i < N_CLOSURE_TYPES; i++)
     if (costArray[i] != costArrayLinear[i])
       // nothing should be printed except MUT_VAR after major GCs
-      fprintf(stderr, "[%u:%u] ", i, costArray[i]);
-  fprintf(stderr, "\n");
+      debugBelch("[%u:%u] ", i, costArray[i]);
+  debugBelch("\n");
 
   // only for major garbage collection
   ASSERT(sumOfNewCost + sumOfNewCostExtra == sumOfCostLinear);
@@ -1978,8 +2020,7 @@ retainerProfile(void)
 #ifdef DEBUG_RETAINER
     maxCStackSize, maxStackSize,
 #endif
-    (double)timesAnyObjectVisited / numObjectVisited,
-    allCost, numSet);
+    (double)timesAnyObjectVisited / numObjectVisited);
 }
 
 /* -----------------------------------------------------------------------------
@@ -1989,7 +2030,7 @@ retainerProfile(void)
 #ifdef DEBUG_RETAINER
 
 #define LOOKS_LIKE_PTR(r) ((LOOKS_LIKE_STATIC_CLOSURE(r) || \
-        ((HEAP_ALLOCED(r) && Bdescr((P_)r)->free != (void *)-1))) && \
+        ((HEAP_ALLOCED(r) && ((Bdescr((P_)r)->flags & BF_FREE) == 0)))) && \
         ((StgWord)(*(StgPtr)r)!=0xaaaaaaaa))
 
 static nat
@@ -2005,17 +2046,17 @@ sanityCheckHeapClosure( StgClosure *c )
        if (get_itbl(c)->type == CONSTR &&
            !strcmp(get_itbl(c)->prof.closure_type, "DEAD_WEAK") &&
            !strcmp(get_itbl(c)->prof.closure_desc, "DEAD_WEAK")) {
-           fprintf(stderr, "\tUnvisited dead weak pointer object found: c = %p\n", c);
+           debugBelch("\tUnvisited dead weak pointer object found: c = %p\n", c);
            costArray[get_itbl(c)->type] += cost(c);
            sumOfNewCost += cost(c);
        } else
-           fprintf(stderr,
+           debugBelch(
                    "Unvisited object: flip = %d, c = %p(%d, %s, %s), rs = %p\n",
                    flip, c, get_itbl(c)->type,
                    get_itbl(c)->prof.closure_type, get_itbl(c)->prof.closure_desc,
                    RSET(c));
     } else {
-       // fprintf(stderr, "sanityCheckHeapClosure) S: flip = %d, c = %p(%d), rs = %p\n", flip, c, get_itbl(c)->type, RSET(c));
+       // debugBelch("sanityCheckHeapClosure) S: flip = %d, c = %p(%d), rs = %p\n", flip, c, get_itbl(c)->type, RSET(c));
     }
 
     info = get_itbl(c);
@@ -2034,14 +2075,19 @@ sanityCheckHeapClosure( StgClosure *c )
     case MVAR:
        return sizeofW(StgMVar);
 
-    case MUT_ARR_PTRS:
+    case MUT_ARR_PTRS_CLEAN:
+    case MUT_ARR_PTRS_DIRTY:
     case MUT_ARR_PTRS_FROZEN:
+    case MUT_ARR_PTRS_FROZEN0:
        return mut_arr_ptrs_sizeW((StgMutArrPtrs *)c);
 
-    case AP_UPD:
+    case AP:
     case PAP:
        return pap_sizeW((StgPAP *)c);
 
+    case AP:
+       return ap_stack_sizeW((StgAP_STACK *)c);
+
     case ARR_WORDS:
        return arr_words_sizeW((StgArrWords *)c);
 
@@ -2059,16 +2105,13 @@ sanityCheckHeapClosure( StgClosure *c )
     case FUN_0_2:
     case WEAK:
     case MUT_VAR:
-    case MUT_CONS:
     case CAF_BLACKHOLE:
     case BLACKHOLE:
     case SE_BLACKHOLE:
     case SE_CAF_BLACKHOLE:
-    case BLACKHOLE_BQ:
     case IND_PERM:
     case IND_OLDGEN:
     case IND_OLDGEN_PERM:
-    case FOREIGN:
     case BCO:
     case STABLE_NAME:
        return sizeW_fromITBL(info);
@@ -2089,7 +2132,6 @@ sanityCheckHeapClosure( StgClosure *c )
     case UPDATE_FRAME:
     case CATCH_FRAME:
     case STOP_FRAME:
-    case SEQ_FRAME:
     case RET_DYN:
     case RET_BCO:
     case RET_SMALL:
@@ -2202,12 +2244,12 @@ checkHeapSanityForRetainerProfiling( void )
     nat costSum, g, s;
 
     costSum = 0;
-    fprintf(stderr, "START: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
+    debugBelch("START: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
     if (RtsFlags.GcFlags.generations == 1) {
        costSum += heapCheck(g0s0->to_blocks);
-       fprintf(stderr, "heapCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
+       debugBelch("heapCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
        costSum += chainCheck(g0s0->large_objects);
-       fprintf(stderr, "chainCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
+       debugBelch("chainCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
     } else {
        for (g = 0; g < RtsFlags.GcFlags.generations; g++)
        for (s = 0; s < generations[g].n_steps; s++) {
@@ -2220,14 +2262,14 @@ checkHeapSanityForRetainerProfiling( void )
            */
            if (g == 0 && s == 0) {
                costSum += smallObjectPoolCheck();
-               fprintf(stderr, "smallObjectPoolCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
+               debugBelch("smallObjectPoolCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
                costSum += chainCheck(generations[g].steps[s].large_objects);
-               fprintf(stderr, "chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
+               debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
            } else {
                costSum += heapCheck(generations[g].steps[s].blocks);
-               fprintf(stderr, "heapCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
+               debugBelch("heapCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
                costSum += chainCheck(generations[g].steps[s].large_objects);
-               fprintf(stderr, "chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
+               debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
            }
        }
     }
@@ -2251,7 +2293,7 @@ findPointer(StgPtr p)
                    if (*q == (StgWord)p) {
                        r = q;
                        while (!LOOKS_LIKE_GHC_INFO(*r)) r--;
-                       fprintf(stderr, "Found in gen[%d], step[%d]: q = %p, r = %p\n", g, s, q, r);
+                       debugBelch("Found in gen[%d], step[%d]: q = %p, r = %p\n", g, s, q, r);
                        // return;
                    }
                }
@@ -2263,7 +2305,7 @@ findPointer(StgPtr p)
                    if (*q == (StgWord)p) {
                        r = q;
                        while (*r == 0 || !LOOKS_LIKE_GHC_INFO(*r)) r--;
-                       fprintf(stderr, "Found in gen[%d], large_objects: %p\n", g, r);
+                       debugBelch("Found in gen[%d], large_objects: %p\n", g, r);
                        // return;
                    }
                }
@@ -2284,20 +2326,20 @@ belongToHeap(StgPtr p)
            bd = generations[g].steps[s].blocks;
            for (; bd; bd = bd->link) {
                if (bd->start <= p && p < bd->free) {
-                   fprintf(stderr, "Belongs to gen[%d], step[%d]", g, s);
+                   debugBelch("Belongs to gen[%d], step[%d]", g, s);
                    return;
                }
            }
            bd = generations[g].steps[s].large_objects;
            for (; bd; bd = bd->link) {
                if (bd->start <= p && p < bd->start + getHeapClosureSize((StgClosure *)bd->start)) {
-                   fprintf(stderr, "Found in gen[%d], large_objects: %p\n", g, bd->start);
+                   debugBelch("Found in gen[%d], large_objects: %p\n", g, bd->start);
                    return;
                }
            }
        }
     }
 }
-#endif // DEBUG_RETAINER
+#endif /* DEBUG_RETAINER */
 
 #endif /* PROFILING */