[project @ 2004-11-10 02:13:12 by wolfgang]
[ghc-hetmet.git] / ghc / rts / ProfHeap.c
index 21e83e5..932c069 100644 (file)
@@ -1,7 +1,6 @@
 /* -----------------------------------------------------------------------------
- * $Id: ProfHeap.c,v 1.27 2001/11/26 16:54:21 simonmar Exp $
  *
- * (c) The GHC Team, 1998-2000
+ * (c) The GHC Team, 1998-2003
  *
  * Support for heap profiling
  *
 #include "RetainerProfile.h"
 #include "LdvProfile.h"
 #include "Arena.h"
-
-#ifdef DEBUG_HEAP_PROF
 #include "Printer.h"
-static void fprint_data(FILE *fp);
-#endif
+
+#include <string.h>
+#include <stdlib.h>
+#include <math.h>
 
 /* -----------------------------------------------------------------------------
  * era stores the current time period.  It is the same as the
@@ -41,33 +40,48 @@ static void fprint_data(FILE *fp);
  * RESTRICTION:
  *   era must be no longer than LDV_SHIFT (15 or 30) bits.
  * Invariants:
- *   era is initialized to 0 in initHeapProfiling().
+ *   era is initialized to 1 in initHeapProfiling().
  *
  * max_era is initialized to 2^LDV_SHIFT in initHeapProfiling().
  * When era reaches max_era, the profiling stops because a closure can
  * store only up to (max_era - 1) as its creation or last use time.
  * -------------------------------------------------------------------------- */
-nat era;
+unsigned int era;
 static nat max_era;
 
 /* -----------------------------------------------------------------------------
-   counters
-   -------------------------------------------------------------------------- */
+ * Counters
+ *
+ * For most heap profiles each closure identity gets a simple count
+ * of live words in the heap at each census.  However, if we're
+ * selecting by biography, then we have to keep the various
+ * lag/drag/void counters for each identity.
+ * -------------------------------------------------------------------------- */
 typedef struct _counter {
     void *identity;
     union {
        nat resid;
        struct {
            int prim;     // total size of 'inherently used' closures
-           int unused;   // total size of 'never used' closures
+           int not_used; // total size of 'never used' closures
            int used;     // total size of 'used at least once' closures
-           int void_new;  // current total size of 'destroyed without being used' closures
-           int drag_new;  // current total size of 'used at least once and waiting to die'
+           int void_total;  // current total size of 'destroyed without being used' closures
+           int drag_total;  // current total size of 'used at least once and waiting to die'
        } ldv;
     } c;
     struct _counter *next;
 } counter;
 
+STATIC_INLINE void
+initLDVCtr( counter *ctr )
+{
+    ctr->c.ldv.prim = 0;
+    ctr->c.ldv.not_used = 0;
+    ctr->c.ldv.used = 0;
+    ctr->c.ldv.void_total = 0;
+    ctr->c.ldv.drag_total = 0;
+}
+
 typedef struct {
     double      time;    // the time in MUT time when the census is made
     HashTable * hash;
@@ -82,21 +96,133 @@ typedef struct {
     int       drag_total;
 } Census;
 
-Census *censuses = NULL;
-nat n_censuses = 0;
+static Census *censuses = NULL;
+static nat n_censuses = 0;
+
+#ifdef PROFILING
+static void aggregateCensusInfo( void );
+#endif
+
+static void dumpCensus( Census *census );
+
+/* -----------------------------------------------------------------------------
+   Closure Type Profiling;
+
+   PROBABLY TOTALLY OUT OF DATE -- ToDo (SDM)
+   -------------------------------------------------------------------------- */
+
+#ifdef DEBUG_HEAP_PROF
+static char *type_names[] = {
+      "INVALID_OBJECT"
+    , "CONSTR"
+    , "CONSTR_INTLIKE"
+    , "CONSTR_CHARLIKE"
+    , "CONSTR_STATIC"
+    , "CONSTR_NOCAF_STATIC"
+
+    , "FUN"
+    , "FUN_STATIC"
+
+    , "THUNK"
+    , "THUNK_STATIC"
+    , "THUNK_SELECTOR"
+
+    , "BCO"
+    , "AP_STACK"
+    , "AP"
+
+    , "PAP"
+
+    , "IND"
+    , "IND_OLDGEN"
+    , "IND_PERM"
+    , "IND_OLDGEN_PERM"
+    , "IND_STATIC"
+
+    , "RET_BCO"
+    , "RET_SMALL"
+    , "RET_VEC_SMALL"
+    , "RET_BIG"
+    , "RET_VEC_BIG"
+    , "RET_DYN"
+    , "UPDATE_FRAME"
+    , "CATCH_FRAME"
+    , "STOP_FRAME"
+
+    , "BLACKHOLE"
+    , "BLACKHOLE_BQ"
+    , "MVAR"
+
+    , "ARR_WORDS"
+
+    , "MUT_ARR_PTRS"
+    , "MUT_ARR_PTRS_FROZEN"
+    , "MUT_VAR"
+
+    , "WEAK"
+    , "FOREIGN"
+  
+    , "TSO"
+
+    , "BLOCKED_FETCH"
+    , "FETCH_ME"
+
+    , "EVACUATED"
+};
+
+#endif /* DEBUG_HEAP_PROF */
+
+/* -----------------------------------------------------------------------------
+ * Find the "closure identity", which is a unique pointer reresenting
+ * the band to which this closure's heap space is attributed in the
+ * heap profile.
+ * ------------------------------------------------------------------------- */
+STATIC_INLINE void *
+closureIdentity( StgClosure *p )
+{
+    switch (RtsFlags.ProfFlags.doHeapProfile) {
+
+#ifdef PROFILING
+    case HEAP_BY_CCS:
+       return p->header.prof.ccs;
+    case HEAP_BY_MOD:
+       return p->header.prof.ccs->cc->module;
+    case HEAP_BY_DESCR:
+       return get_itbl(p)->prof.closure_desc;
+    case HEAP_BY_TYPE:
+       return get_itbl(p)->prof.closure_type;
+    case HEAP_BY_RETAINER:
+       // AFAIK, the only closures in the heap which might not have a
+       // valid retainer set are DEAD_WEAK closures.
+       if (isRetainerSetFieldValid(p))
+           return retainerSetOf(p);
+       else
+           return NULL;
+
+#else // DEBUG
+    case HEAP_BY_INFOPTR:
+       return (void *)((StgClosure *)p)->header.info; 
+    case HEAP_BY_CLOSURE_TYPE:
+       return type_names[get_itbl(p)->type];
+
+#endif
+    default:
+       barf("closureIdentity");
+    }
+}
 
 /* --------------------------------------------------------------------------
  * Profiling type predicates
  * ----------------------------------------------------------------------- */
 #ifdef PROFILING
-static inline rtsBool
+STATIC_INLINE rtsBool
 doingLDVProfiling( void )
 {
     return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV 
            || RtsFlags.ProfFlags.bioSelector != NULL);
 }
 
-static inline rtsBool
+STATIC_INLINE rtsBool
 doingRetainerProfiling( void )
 {
     return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER
@@ -117,20 +243,59 @@ doingRetainerProfiling( void )
 void
 LDV_recordDead( StgClosure *c, nat size )
 {
+    void *id;
+    nat t;
+    counter *ctr;
+
     if (era > 0 && closureSatisfiesConstraints(c)) {
-       nat t;
        size -= sizeofW(StgProfHeader);
+       ASSERT(LDVW(c) != 0);
        if ((LDVW((c)) & LDV_STATE_MASK) == LDV_STATE_CREATE) {
            t = (LDVW((c)) & LDV_CREATE_MASK) >> LDV_SHIFT;
            if (t < era) {
-               censuses[t].void_total   += (int)size;
-               censuses[era].void_total -= (int)size;
+               if (RtsFlags.ProfFlags.bioSelector == NULL) {
+                   censuses[t].void_total   += (int)size;
+                   censuses[era].void_total -= (int)size;
+               } else {
+                   id = closureIdentity(c);
+                   ctr = lookupHashTable(censuses[t].hash, (StgWord)id);
+                   ASSERT( ctr != NULL );
+                   ctr->c.ldv.void_total += (int)size;
+                   ctr = lookupHashTable(censuses[era].hash, (StgWord)id);
+                   if (ctr == NULL) {
+                       ctr = arenaAlloc(censuses[era].arena, sizeof(counter));
+                       initLDVCtr(ctr);
+                       insertHashTable(censuses[era].hash, (StgWord)id, ctr);
+                       ctr->identity = id;
+                       ctr->next = censuses[era].ctrs;
+                       censuses[era].ctrs = ctr;
+                   }
+                   ctr->c.ldv.void_total -= (int)size;
+               }
            }
        } else {
            t = LDVW((c)) & LDV_LAST_MASK;
            if (t + 1 < era) {
-               censuses[t + 1].drag_total += size;
-               censuses[era].drag_total   -= size;
+               if (RtsFlags.ProfFlags.bioSelector == NULL) {
+                   censuses[t+1].drag_total += size;
+                   censuses[era].drag_total -= size;
+               } else {
+                   void *id;
+                   id = closureIdentity(c);
+                   ctr = lookupHashTable(censuses[t+1].hash, (StgWord)id);
+                   ASSERT( ctr != NULL );
+                   ctr->c.ldv.drag_total += (int)size;
+                   ctr = lookupHashTable(censuses[era].hash, (StgWord)id);
+                   if (ctr == NULL) {
+                       ctr = arenaAlloc(censuses[era].arena, sizeof(counter));
+                       initLDVCtr(ctr);
+                       insertHashTable(censuses[era].hash, (StgWord)id, ctr);
+                       ctr->identity = id;
+                       ctr->next = censuses[era].ctrs;
+                       censuses[era].ctrs = ctr;
+                   }
+                   ctr->c.ldv.drag_total -= (int)size;
+               }
            }
        }
     }
@@ -140,14 +305,18 @@ LDV_recordDead( StgClosure *c, nat size )
 /* --------------------------------------------------------------------------
  * Initialize censuses[era];
  * ----------------------------------------------------------------------- */
-static inline void
-initEra(void)
+STATIC_INLINE void
+initEra(Census *census)
 {
-    censuses[era].not_used = 0;
-    censuses[era].used     = 0;
-    censuses[era].prim     = 0;
-    censuses[era].void_total = 0;
-    censuses[era].drag_total = 0;
+    census->hash  = allocHashTable();
+    census->ctrs  = NULL;
+    census->arena = newArena();
+
+    census->not_used   = 0;
+    census->used       = 0;
+    census->prim       = 0;
+    census->void_total = 0;
+    census->drag_total = 0;
 }
 
 /* --------------------------------------------------------------------------
@@ -162,7 +331,8 @@ nextEra( void )
        era++;
 
        if (era == max_era) {
-           barf("maximum number of censuses reached; use +RTS -i to reduce");
+           errorBelch("maximum number of censuses reached; use +RTS -i to reduce");
+           stg_exit(EXIT_FAILURE);
        }
        
        if (era == n_censuses) {
@@ -172,14 +342,17 @@ nextEra( void )
        }
     }
 #endif // PROFILING
-       
-    initEra();
+
+    initEra( &censuses[era] );
 }
 
-/* -------------------------------------------------------------------------- */
+/* -----------------------------------------------------------------------------
+ * DEBUG heap profiling, by info table
+ * -------------------------------------------------------------------------- */
 
 #ifdef DEBUG_HEAP_PROF
 FILE *hp_file;
+static char *hp_filename;
 
 void initProfiling1( void )
 {
@@ -187,6 +360,20 @@ void initProfiling1( void )
 
 void initProfiling2( void )
 {
+  if (RtsFlags.ProfFlags.doHeapProfile) {
+    /* Initialise the log file name */
+    hp_filename = stgMallocBytes(strlen(prog_name) + 6, "hpFileName");
+    sprintf(hp_filename, "%s.hp", prog_name);
+    
+    /* open the log file */
+    if ((hp_file = fopen(hp_filename, "w")) == NULL) {
+      debugBelch("Can't open profiling report file %s\n", 
+             hp_filename);
+      RtsFlags.ProfFlags.doHeapProfile = 0;
+      return;
+    }
+  }
+  
   initHeapProfiling();
 }
 
@@ -196,6 +383,19 @@ void endProfiling( void )
 }
 #endif /* DEBUG_HEAP_PROF */
 
+static void
+printSample(rtsBool beginSample, StgDouble sampleValue)
+{
+    StgDouble fractionalPart, integralPart;
+    fractionalPart = modf(sampleValue, &integralPart);
+    fprintf(hp_file, "%s %d.%02d\n",
+            (beginSample ? "BEGIN_SAMPLE" : "END_SAMPLE"),
+            (int)integralPart, (int)(fractionalPart * 100 + 0.5));
+}
+
+/* --------------------------------------------------------------------------
+ * Initialize the heap profilier
+ * ----------------------------------------------------------------------- */
 nat
 initHeapProfiling(void)
 {
@@ -203,6 +403,13 @@ initHeapProfiling(void)
         return 0;
     }
 
+#ifdef PROFILING
+    if (doingLDVProfiling() && doingRetainerProfiling()) {
+       errorBelch("cannot mix -hb and -hr");
+       stg_exit(1);
+    }
+#endif
+
     // we only count eras if we're doing LDV profiling.  Otherwise era
     // is fixed at zero.
 #ifdef PROFILING
@@ -224,17 +431,19 @@ initHeapProfiling(void)
     n_censuses = 32;
     censuses = stgMallocBytes(sizeof(Census) * n_censuses, "initHeapProfiling");
 
-    fprintf(hp_file, "JOB \"%s", prog_argv[0]);
+    initEra( &censuses[era] );
+
+    /* initProfilingLogFile(); */
+    fprintf(hp_file, "JOB \"%s", prog_name);
 
 #ifdef PROFILING
     {
        int count;
        for(count = 1; count < prog_argc; count++)
            fprintf(hp_file, " %s", prog_argv[count]);
-       fprintf(hp_file, " +RTS ");
+       fprintf(hp_file, " +RTS");
        for(count = 0; count < rts_argc; count++)
-           fprintf(hp_file, "%s ", rts_argv[count]);
-       fprintf(hp_file, "\n");
+           fprintf(hp_file, " %s", rts_argv[count]);
     }
 #endif /* PROFILING */
 
@@ -245,11 +454,11 @@ initHeapProfiling(void)
     fprintf(hp_file, "SAMPLE_UNIT \"seconds\"\n");
     fprintf(hp_file, "VALUE_UNIT \"bytes\"\n");
 
-    fprintf(hp_file, "BEGIN_SAMPLE 0.00\n");
-    fprintf(hp_file, "END_SAMPLE 0.00\n");
+    printSample(rtsTrue, 0);
+    printSample(rtsFalse, 0);
 
 #ifdef DEBUG_HEAP_PROF
-    DEBUG_LoadSymbols(prog_argv[0]);
+    DEBUG_LoadSymbols(prog_name);
 #endif
 
 #ifdef PROFILING
@@ -277,130 +486,43 @@ endHeapProfiling(void)
 #endif
 
 #ifdef PROFILING
-  // Note: 
-  //   We do not need to perform a major garbage collection because all the
-  //   closures created since the last census will not affect the profiling
-  //   statistics anyhow.
-  if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) 
-    LdvCensusKillAll();
-#endif
-
-#ifdef PROFILING
-    // At last... we can output the census info for LDV profiling
-    if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
+    if (doingLDVProfiling()) {
        nat t;
-       int sumVoidNew, sumDragNew;
-
-       // Now we compute void_total and drag_total for each census
-       sumVoidNew = 0;
-       sumDragNew = 0;
-       for (t = 1; t < era; t++) { // note: start at 1, not 0
-           sumVoidNew += censuses[t].void_total;
-           sumDragNew += censuses[t].drag_total;
-           censuses[t].void_total = sumVoidNew;
-           censuses[t].drag_total = sumDragNew;
-           ASSERT( censuses[t].void_total < censuses[t].not_used );
-           ASSERT( censuses[t].drag_total < censuses[t].used );
-       }
-       
-       for (t = 1; t < era; t++) { // note: start at 1, not 0
-           fprintf(hp_file, "MARK %f\n", censuses[t].time);
-           fprintf(hp_file, "BEGIN_SAMPLE %f\n", censuses[t].time);
-           fprintf(hp_file, "VOID\t%u\n", censuses[t].void_total * sizeof(W_));
-           fprintf(hp_file, "LAG\t%u\n", 
-                   (censuses[t].not_used - censuses[t].void_total) * sizeof(W_));
-           fprintf(hp_file, "USE\t%u\n", 
-                   (censuses[t].used - censuses[t].drag_total) * sizeof(W_));
-           fprintf(hp_file, "INHERENT_USE\t%u\n", 
-                   censuses[t].prim * sizeof(W_));
-           fprintf(hp_file, "DRAG\t%u\n", censuses[t].drag_total * sizeof(W_));
-           fprintf(hp_file, "END_SAMPLE %f\n", censuses[t].time);
+       LdvCensusKillAll();
+       aggregateCensusInfo();
+       for (t = 1; t < era; t++) {
+           dumpCensus( &censuses[t] );
        }
     }
 #endif
 
     seconds = mut_user_time();
-    fprintf(hp_file, "BEGIN_SAMPLE %0.2f\n", seconds);
-    fprintf(hp_file, "END_SAMPLE %0.2f\n", seconds);
+    printSample(rtsTrue, seconds);
+    printSample(rtsFalse, seconds);
     fclose(hp_file);
 }
 
-#ifdef DEBUG_HEAP_PROF
-/* -----------------------------------------------------------------------------
-   Closure Type Profiling;
-
-   PROBABLY TOTALLY OUT OF DATE -- ToDo (SDM)
-   -------------------------------------------------------------------------- */
-
-static char *type_names[] = {
-      "INVALID_OBJECT"
-    , "CONSTR"
-    , "CONSTR_INTLIKE"
-    , "CONSTR_CHARLIKE"
-    , "CONSTR_STATIC"
-    , "CONSTR_NOCAF_STATIC"
-
-    , "FUN"
-    , "FUN_STATIC"
-
-    , "THUNK"
-    , "THUNK_STATIC"
-    , "THUNK_SELECTOR"
-
-    , "BCO"
-    , "AP_UPD"
-
-    , "PAP"
-
-    , "IND"
-    , "IND_OLDGEN"
-    , "IND_PERM"
-    , "IND_OLDGEN_PERM"
-    , "IND_STATIC"
-
-    , "RET_BCO"
-    , "RET_SMALL"
-    , "RET_VEC_SMALL"
-    , "RET_BIG"
-    , "RET_VEC_BIG"
-    , "RET_DYN"
-    , "UPDATE_FRAME"
-    , "CATCH_FRAME"
-    , "STOP_FRAME"
-    , "SEQ_FRAME"
-
-    , "BLACKHOLE"
-    , "BLACKHOLE_BQ"
-    , "MVAR"
 
-    , "ARR_WORDS"
 
-    , "MUT_ARR_PTRS"
-    , "MUT_ARR_PTRS_FROZEN"
-    , "MUT_VAR"
-
-    , "WEAK"
-    , "FOREIGN"
-  
-    , "TSO"
-
-    , "BLOCKED_FETCH"
-    , "FETCH_ME"
-
-    , "EVACUATED"
-};
-
-#endif /* DEBUG_HEAP_PROF */
+#ifdef PROFILING
+static size_t
+buf_append(char *p, const char *q, char *end)
+{
+    int m;
 
+    for (m = 0; p < end; p++, q++, m++) {
+       *p = *q;
+       if (*q == '\0') { break; }
+    }
+    return m;
+}
 
-#ifdef PROFILING
 static void
 fprint_ccs(FILE *fp, CostCentreStack *ccs, nat max_length)
 {
-    char buf[max_length+1];
+    char buf[max_length+1], *p, *buf_end;
     nat next_offset = 0;
     nat written;
-    char *template;
 
     // MAIN on its own gets printed as "MAIN", otherwise we ignore MAIN.
     if (ccs == CCS_MAIN) {
@@ -408,6 +530,11 @@ fprint_ccs(FILE *fp, CostCentreStack *ccs, nat max_length)
        return;
     }
 
+    fprintf(fp, "(%d)", ccs->ccsID);
+
+    p = buf;
+    buf_end = buf + max_length + 1;
+
     // keep printing components of the stack until we run out of space
     // in the buffer.  If we run out of space, end with "...".
     for (; ccs != NULL && ccs != CCS_MAIN; ccs = ccs->prevStack) {
@@ -415,21 +542,16 @@ fprint_ccs(FILE *fp, CostCentreStack *ccs, nat max_length)
        // CAF cost centres print as M.CAF, but we leave the module
        // name out of all the others to save space.
        if (!strcmp(ccs->cc->label,"CAF")) {
-           written = snprintf(buf+next_offset, 
-                              (int)max_length-3-(int)next_offset,
-                              "%s.CAF", ccs->cc->module);
+           p += buf_append(p, ccs->cc->module, buf_end);
+           p += buf_append(p, ".CAF", buf_end);
        } else {
            if (ccs->prevStack != NULL && ccs->prevStack != CCS_MAIN) {
-               template = "%s/";
-           } else {
-               template = "%s";
+               p += buf_append(p, "/", buf_end);
            }
-           written = snprintf(buf+next_offset, 
-                              (int)max_length-3-(int)next_offset,
-                              template, ccs->cc->label);
+           p += buf_append(p, ccs->cc->label, buf_end);
        }
-
-       if (next_offset+written >= max_length-4) {
+       
+       if (p >= buf_end) {
            sprintf(buf+max_length-4, "...");
            break;
        } else {
@@ -438,12 +560,13 @@ fprint_ccs(FILE *fp, CostCentreStack *ccs, nat max_length)
     }
     fprintf(fp, "%s", buf);
 }
+#endif // PROFILING
 
-static rtsBool
-str_matches_selector( char* str, char* sel )
+rtsBool
+strMatchesSelector( char* str, char* sel )
 {
    char* p;
-   // fprintf(stderr, "str_matches_selector %s %s\n", str, sel);
+   // debugBelch("str_matches_selector %s %s\n", str, sel);
    while (1) {
        // Compare str against wherever we've got to in sel.
        p = str;
@@ -463,48 +586,151 @@ str_matches_selector( char* str, char* sel )
    }
 }
 
-// Figure out whether a closure should be counted in this census, by
-// testing against all the specified constraints.
+/* -----------------------------------------------------------------------------
+ * Figure out whether a closure should be counted in this census, by
+ * testing against all the specified constraints.
+ * -------------------------------------------------------------------------- */
 rtsBool
 closureSatisfiesConstraints( StgClosure* p )
 {
+#ifdef DEBUG_HEAP_PROF
+    (void)p;   /* keep gcc -Wall happy */
+    return rtsTrue;
+#else
    rtsBool b;
-   if (RtsFlags.ProfFlags.modSelector) {
-       b = str_matches_selector( ((StgClosure *)p)->header.prof.ccs->cc->module,
-                                RtsFlags.ProfFlags.modSelector );
-       if (!b) return rtsFalse;
+
+   // The CCS has a selected field to indicate whether this closure is
+   // deselected by not being mentioned in the module, CC, or CCS
+   // selectors.
+   if (!p->header.prof.ccs->selected) {
+       return rtsFalse;
    }
+
    if (RtsFlags.ProfFlags.descrSelector) {
-       b = str_matches_selector( (get_itbl((StgClosure *)p))->prof.closure_desc,
+       b = strMatchesSelector( (get_itbl((StgClosure *)p))->prof.closure_desc,
                                 RtsFlags.ProfFlags.descrSelector );
        if (!b) return rtsFalse;
    }
    if (RtsFlags.ProfFlags.typeSelector) {
-       b = str_matches_selector( (get_itbl((StgClosure *)p))->prof.closure_type,
+       b = strMatchesSelector( (get_itbl((StgClosure *)p))->prof.closure_type,
                                 RtsFlags.ProfFlags.typeSelector );
        if (!b) return rtsFalse;
    }
-   if (RtsFlags.ProfFlags.ccSelector) {
-       b = str_matches_selector( ((StgClosure *)p)->header.prof.ccs->cc->label,
-                                RtsFlags.ProfFlags.ccSelector );
-       if (!b) return rtsFalse;
-   }
    if (RtsFlags.ProfFlags.retainerSelector) {
        RetainerSet *rs;
        nat i;
-       rs = retainerSetOf((StgClosure *)p);
-       if (rs != NULL) {
-          for (i = 0; i < rs->num; i++) {
-              b = str_matches_selector( rs->element[i]->cc->label,
-                                        RtsFlags.ProfFlags.retainerSelector );
-              if (b) return rtsTrue;
+       // We must check that the retainer set is valid here.  One
+       // reason it might not be valid is if this closure is a
+       // a newly deceased weak pointer (i.e. a DEAD_WEAK), since
+       // these aren't reached by the retainer profiler's traversal.
+       if (isRetainerSetFieldValid((StgClosure *)p)) {
+          rs = retainerSetOf((StgClosure *)p);
+          if (rs != NULL) {
+              for (i = 0; i < rs->num; i++) {
+                  b = strMatchesSelector( rs->element[i]->cc->label,
+                                          RtsFlags.ProfFlags.retainerSelector );
+                  if (b) return rtsTrue;
+              }
           }
        }
        return rtsFalse;
    }
    return rtsTrue;
-}
 #endif /* PROFILING */
+}
+
+/* -----------------------------------------------------------------------------
+ * Aggregate the heap census info for biographical profiling
+ * -------------------------------------------------------------------------- */
+#ifdef PROFILING
+static void
+aggregateCensusInfo( void )
+{
+    HashTable *acc;
+    nat t;
+    counter *c, *d, *ctrs;
+    Arena *arena;
+
+    if (!doingLDVProfiling()) return;
+
+    // Aggregate the LDV counters when displaying by biography.
+    if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
+       int void_total, drag_total;
+
+       // Now we compute void_total and drag_total for each census
+       void_total = 0;
+       drag_total = 0;
+       for (t = 1; t < era; t++) { // note: start at 1, not 0
+           void_total += censuses[t].void_total;
+           drag_total += censuses[t].drag_total;
+           censuses[t].void_total = void_total;
+           censuses[t].drag_total = drag_total;
+           ASSERT( censuses[t].void_total <= censuses[t].not_used );
+           ASSERT( censuses[t].drag_total <= censuses[t].used );
+       }
+       
+       return;
+    }
+
+    // otherwise... we're doing a heap profile that is restricted to
+    // some combination of lag, drag, void or use.  We've kept all the
+    // census info for all censuses so far, but we still need to
+    // aggregate the counters forwards.
+
+    arena = newArena();
+    acc = allocHashTable();
+    ctrs = NULL;
+
+    for (t = 1; t < era; t++) {
+
+       // first look through all the counters we're aggregating
+       for (c = ctrs; c != NULL; c = c->next) {
+           // if one of the totals is non-zero, then this closure
+           // type must be present in the heap at this census time...
+           d = lookupHashTable(censuses[t].hash, (StgWord)c->identity);
+
+           if (d == NULL) {
+               // if this closure identity isn't present in the
+               // census for this time period, then our running
+               // totals *must* be zero.
+               ASSERT(c->c.ldv.void_total == 0 && c->c.ldv.drag_total == 0);
+
+               // debugCCS(c->identity);
+               // debugBelch(" census=%d void_total=%d drag_total=%d\n",
+               //         t, c->c.ldv.void_total, c->c.ldv.drag_total);
+           } else {
+               d->c.ldv.void_total += c->c.ldv.void_total;
+               d->c.ldv.drag_total += c->c.ldv.drag_total;
+               c->c.ldv.void_total =  d->c.ldv.void_total;
+               c->c.ldv.drag_total =  d->c.ldv.drag_total;
+
+               ASSERT( c->c.ldv.void_total >= 0 );
+               ASSERT( c->c.ldv.drag_total >= 0 );
+           }
+       }
+
+       // now look through the counters in this census to find new ones
+       for (c = censuses[t].ctrs; c != NULL; c = c->next) {
+           d = lookupHashTable(acc, (StgWord)c->identity);
+           if (d == NULL) {
+               d = arenaAlloc( arena, sizeof(counter) );
+               initLDVCtr(d);
+               insertHashTable( acc, (StgWord)c->identity, d );
+               d->identity = c->identity;
+               d->next = ctrs;
+               ctrs = d;
+               d->c.ldv.void_total = c->c.ldv.void_total;
+               d->c.ldv.drag_total = c->c.ldv.drag_total;
+           }
+           ASSERT( c->c.ldv.void_total >= 0 );
+           ASSERT( c->c.ldv.drag_total >= 0 );
+       }
+    }
+
+    freeHashTable(acc, NULL);
+    arenaFree(arena);
+}
+#endif
 
 /* -----------------------------------------------------------------------------
  * Print out the results of a heap census.
@@ -513,24 +739,56 @@ static void
 dumpCensus( Census *census )
 {
     counter *ctr;
+    int count;
+
+    printSample(rtsTrue, census->time);
 
 #ifdef PROFILING
-    // We can't generate any info for LDV profiling until
-    // the end of the run...
-    if (doingLDVProfiling()) { return; }
+    if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
+       fprintf(hp_file, "VOID\t%u\n", census->void_total * sizeof(W_));
+       fprintf(hp_file, "LAG\t%u\n", 
+               (census->not_used - census->void_total) * sizeof(W_));
+       fprintf(hp_file, "USE\t%u\n", 
+               (census->used - census->drag_total) * sizeof(W_));
+       fprintf(hp_file, "INHERENT_USE\t%u\n", 
+               census->prim * sizeof(W_));
+       fprintf(hp_file, "DRAG\t%u\n", census->drag_total *
+               sizeof(W_));
+       printSample(rtsFalse, census->time);
+       return;
+    }
 #endif
 
-    fprintf(hp_file, "BEGIN_SAMPLE %0.2f\n", census->time);
-
     for (ctr = census->ctrs; ctr != NULL; ctr = ctr->next) {
 
+#ifdef PROFILING
+       if (RtsFlags.ProfFlags.bioSelector != NULL) {
+           count = 0;
+           if (strMatchesSelector("lag", RtsFlags.ProfFlags.bioSelector))
+               count += ctr->c.ldv.not_used - ctr->c.ldv.void_total;
+           if (strMatchesSelector("drag", RtsFlags.ProfFlags.bioSelector))
+               count += ctr->c.ldv.drag_total;
+           if (strMatchesSelector("void", RtsFlags.ProfFlags.bioSelector))
+               count += ctr->c.ldv.void_total;
+           if (strMatchesSelector("use", RtsFlags.ProfFlags.bioSelector))
+               count += ctr->c.ldv.used - ctr->c.ldv.drag_total;
+       } else
+#endif
+       {
+           count = ctr->c.resid;
+       }
+
+       ASSERT( count >= 0 );
+
+       if (count == 0) continue;
+
 #ifdef DEBUG_HEAP_PROF
        switch (RtsFlags.ProfFlags.doHeapProfile) {
        case HEAP_BY_INFOPTR:
-           fprint_data(hp_file);
+           fprintf(hp_file, "%s", lookupGHCName(ctr->identity));
            break;
        case HEAP_BY_CLOSURE_TYPE:
-           fprint_closure_types(hp_file);
+           fprintf(hp_file, "%s", (char *)ctr->identity);
            break;
        }
 #endif
@@ -538,7 +796,7 @@ dumpCensus( Census *census )
 #ifdef PROFILING
        switch (RtsFlags.ProfFlags.doHeapProfile) {
        case HEAP_BY_CCS:
-           fprint_ccs(hp_file, (CostCentreStack *)ctr->identity, 30);
+           fprint_ccs(hp_file, (CostCentreStack *)ctr->identity, 25);
            break;
        case HEAP_BY_MOD:
        case HEAP_BY_DESCR:
@@ -549,6 +807,12 @@ dumpCensus( Census *census )
        {
            RetainerSet *rs = (RetainerSet *)ctr->identity;
 
+           // it might be the distinguished retainer set rs_MANY:
+           if (rs == &rs_MANY) {
+               fprintf(hp_file, "MANY");
+               break;
+           }
+
            // Mark this retainer set by negating its id, because it
            // has appeared in at least one census.  We print the
            // values of all such retainer sets into the log file at
@@ -567,10 +831,10 @@ dumpCensus( Census *census )
        }
 #endif
 
-       fprintf(hp_file, "\t%d\n", ctr->c.resid * sizeof(W_));
+       fprintf(hp_file, "\t%d\n", count * sizeof(W_));
     }
 
-    fprintf(hp_file, "END_SAMPLE %0.2f\n", census->time);
+    printSample(rtsFalse, census->time);
 }
 
 /* -----------------------------------------------------------------------------
@@ -588,6 +852,15 @@ heapCensusChain( Census *census, bdescr *bd )
     rtsBool prim;
 
     for (; bd != NULL; bd = bd->link) {
+
+       // HACK: ignore pinned blocks, because they contain gaps.
+       // It's not clear exactly what we'd like to do here, since we
+       // can't tell which objects in the block are actually alive.
+       // Perhaps the whole block should be counted as SYSTEM memory.
+       if (bd->flags & BF_PINNED) {
+           continue;
+       }
+
        p = bd->start;
        while (p < bd->free) {
            info = get_itbl((StgClosure *)p);
@@ -599,6 +872,7 @@ heapCensusChain( Census *census, bdescr *bd )
            case FUN:
            case THUNK:
            case IND_PERM:
+           case IND_OLDGEN:
            case IND_OLDGEN_PERM:
            case CAF_BLACKHOLE:
            case SE_CAF_BLACKHOLE:
@@ -624,6 +898,10 @@ heapCensusChain( Census *census, bdescr *bd )
                break;
                
            case BCO:
+               prim = rtsTrue;
+               size = bco_sizeW((StgBCO *)p);
+               break;
+
            case MVAR:
            case WEAK:
            case FOREIGN:
@@ -640,10 +918,14 @@ heapCensusChain( Census *census, bdescr *bd )
                size = sizeofW(StgHeader) + MIN_UPD_SIZE;
                break;
 
+           case AP:
            case PAP:
-           case AP_UPD:
                size = pap_sizeW((StgPAP *)p);
                break;
+
+           case AP_STACK:
+               size = ap_stack_sizeW((StgAP_STACK *)p);
+               break;
                
            case ARR_WORDS:
                prim = rtsTrue;
@@ -658,9 +940,20 @@ heapCensusChain( Census *census, bdescr *bd )
                
            case TSO:
                prim = rtsTrue;
+#ifdef DEBUG_HEAP_PROF
                size = tso_sizeW((StgTSO *)p);
                break;
-               
+#else
+               if (RtsFlags.ProfFlags.includeTSOs) {
+                   size = tso_sizeW((StgTSO *)p);
+                   break;
+               } else {
+                   // Skip this TSO and move on to the next object
+                   p += tso_sizeW((StgTSO *)p);
+                   continue;
+               }
+#endif
+
            default:
                barf("heapCensus");
            }
@@ -669,67 +962,64 @@ heapCensusChain( Census *census, bdescr *bd )
 
 #ifdef DEBUG_HEAP_PROF
            real_size = size;
-           switch (RtsFlags.ProfFlags.doHeapProfile) {
-           case HEAP_BY_INFOPTR:
-               identity = (void *)((StgClosure *)p)->header.info; 
-               break;
-           case HEAP_BY_CLOSURE_TYPE:
-               identity = type_names[info->type];
-               break;
-           default:
-               barf("heapCensus; doHeapProfile");
-           }
-#endif
-           
-#ifdef PROFILING
+#else
            // subtract the profiling overhead
            real_size = size - sizeofW(StgProfHeader);
+#endif
 
            if (closureSatisfiesConstraints((StgClosure*)p)) {
-               switch (RtsFlags.ProfFlags.doHeapProfile) {
-               case HEAP_BY_CCS:
-                   identity = ((StgClosure *)p)->header.prof.ccs;
-                   break;
-               case HEAP_BY_MOD:
-                   identity = ((StgClosure *)p)->header.prof.ccs->cc->module;
-                   break;
-               case HEAP_BY_DESCR:
-                   identity = (get_itbl((StgClosure *)p))->prof.closure_desc;
-                   break;
-               case HEAP_BY_TYPE:
-                   identity = (get_itbl((StgClosure *)p))->prof.closure_type;
-                   break;
-               case HEAP_BY_RETAINER:
-                   identity = retainerSetOf((StgClosure *)p);
-                   break;
-               case HEAP_BY_LDV:
+#ifdef PROFILING
+               if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
                    if (prim)
                        census->prim += real_size;
                    else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
                        census->not_used += real_size;
                    else
                        census->used += real_size;
-                   // NOTE: don't break here.  We're not using the
-                   // hash table.
-                   p += size;
-                   continue;
-               default:
-                   barf("heapCensus; doHeapProfile");
-               }
-           }
+               } else
 #endif
+               {
+                   identity = closureIdentity((StgClosure *)p);
 
-           if (identity != NULL) {
-               ctr = lookupHashTable( census->hash, (StgWord)identity );
-               if (ctr != NULL) {
-                   ctr->c.resid += real_size;
-               } else {
-                   ctr = arenaAlloc( census->arena, sizeof(counter) );
-                   insertHashTable( census->hash, (StgWord)identity, ctr );
-                   ctr->c.resid = real_size;
-                   ctr->identity = identity;
-                   ctr->next = census->ctrs;
-                   census->ctrs = ctr;
+                   if (identity != NULL) {
+                       ctr = lookupHashTable( census->hash, (StgWord)identity );
+                       if (ctr != NULL) {
+#ifdef PROFILING
+                           if (RtsFlags.ProfFlags.bioSelector != NULL) {
+                               if (prim)
+                                   ctr->c.ldv.prim += real_size;
+                               else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
+                                   ctr->c.ldv.not_used += real_size;
+                               else
+                                   ctr->c.ldv.used += real_size;
+                           } else
+#endif
+                           {
+                               ctr->c.resid += real_size;
+                           }
+                       } else {
+                           ctr = arenaAlloc( census->arena, sizeof(counter) );
+                           initLDVCtr(ctr);
+                           insertHashTable( census->hash, (StgWord)identity, ctr );
+                           ctr->identity = identity;
+                           ctr->next = census->ctrs;
+                           census->ctrs = ctr;
+
+#ifdef PROFILING
+                           if (RtsFlags.ProfFlags.bioSelector != NULL) {
+                               if (prim)
+                                   ctr->c.ldv.prim = real_size;
+                               else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
+                                   ctr->c.ldv.not_used = real_size;
+                               else
+                                   ctr->c.ldv.used = real_size;
+                           } else
+#endif
+                           {
+                               ctr->c.resid = real_size;
+                           }
+                       }
+                   }
                }
            }
 
@@ -744,13 +1034,8 @@ heapCensus( void )
   nat g, s;
   Census *census;
 
-  stat_startHeapCensus();
-
   census = &censuses[era];
   census->time  = mut_user_time();
-  census->hash  = allocHashTable();
-  census->ctrs  = NULL;
-  census->arena = newArena();
     
   // calculate retainer sets if necessary
 #ifdef PROFILING
@@ -759,8 +1044,18 @@ heapCensus( void )
   }
 #endif
 
-  // traverse the heap, collecting the census info
+#ifdef PROFILING
+  stat_startHeapCensus();
+#endif
+
+  // Traverse the heap, collecting the census info
+
+  // First the small_alloc_list: we have to fix the free pointer at
+  // the end by calling tidyAllocatedLists() first.
+  tidyAllocateLists();
   heapCensusChain( census, small_alloc_list );
+
+  // Now traverse the heap in each generation/step.
   if (RtsFlags.GcFlags.generations == 1) {
       heapCensusChain( census, g0s0->to_blocks );
   } else {
@@ -769,22 +1064,40 @@ heapCensus( void )
              heapCensusChain( census, generations[g].steps[s].blocks );
              // Are we interested in large objects?  might be
              // confusing to include the stack in a heap profile.
-             // heapCensusChain( census, generations[g].steps[s].large_objects );
+             heapCensusChain( census, generations[g].steps[s].large_objects );
          }
       }
   }
 
   // dump out the census info
-  dumpCensus( census );
+#ifdef PROFILING
+    // We can't generate any info for LDV profiling until
+    // the end of the run...
+    if (!doingLDVProfiling())
+       dumpCensus( census );
+#else
+    dumpCensus( census );
+#endif
+
 
-  // free our storage
-  freeHashTable(census->hash, NULL/* don't free the elements */);
-  arenaFree(census->arena);
+  // free our storage, unless we're keeping all the census info for
+  // future restriction by biography.
+#ifdef PROFILING
+  if (RtsFlags.ProfFlags.bioSelector == NULL)
+#endif
+  {
+      freeHashTable( census->hash, NULL/* don't free the elements */ );
+      arenaFree( census->arena );
+      census->hash = NULL;
+      census->arena = NULL;
+  }
 
   // we're into the next time period now
   nextEra();
 
+#ifdef PROFILING
   stat_endHeapCensus();
+#endif
 }    
 
 #endif /* PROFILING || DEBUG_HEAP_PROF */