[project @ 2001-11-26 16:54:21 by simonmar]
[ghc-hetmet.git] / ghc / rts / ProfHeap.c
index fc4f421..21e83e5 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: ProfHeap.c,v 1.26 2001/11/22 14:25:12 simonmar Exp $
+ * $Id: ProfHeap.c,v 1.27 2001/11/26 16:54:21 simonmar Exp $
  *
  * (c) The GHC Team, 1998-2000
  *
 #include "StrHash.h"
 #include "RetainerProfile.h"
 #include "LdvProfile.h"
+#include "Arena.h"
 
 #ifdef DEBUG_HEAP_PROF
 #include "Printer.h"
-static void initSymbolHash(void);
-static void clear_table_data(void);
 static void fprint_data(FILE *fp);
 #endif
 
 /* -----------------------------------------------------------------------------
- * Hash tables.
+ * era stores the current time period.  It is the same as the
+ * number of censuses that have been performed.
  *
- * For profiling by module, constructor or closure type we need to be
- * able to get from a string describing the category to a structure
- * containing the counters for that category.  The strings aren't
- * unique (although gcc will do a fairly good job of commoning them up
- * where possible), so we have a many->one mapping.
- *
- * We represent the many->one mapping with a hash table.  In order to
- * find the unique counter associated with a string the first time we
- * encounter a particular string, we need another hash table, mapping
- * hashed strings to buckets of counters.  The string is hashed, then
- * the bucket is searched for an existing counter for the same
- * string. 
+ * RESTRICTION:
+ *   era must be no longer than LDV_SHIFT (15 or 30) bits.
+ * Invariants:
+ *   era is initialized to 0 in initHeapProfiling().
  *
+ * max_era is initialized to 2^LDV_SHIFT in initHeapProfiling().
+ * When era reaches max_era, the profiling stops because a closure can
+ * store only up to (max_era - 1) as its creation or last use time.
  * -------------------------------------------------------------------------- */
+nat era;
+static nat max_era;
 
-#ifdef PROFILING
-typedef struct _ctr {
-    const char *str;
-    unsigned long mem_resid;
-    struct _ctr *next;
-    struct _ctr *next_bucket;
-} prof_ctr;
-
-/* Linked list of all existing ctr structs */
-prof_ctr *all_ctrs;
-
-/* Hash table mapping (char *) -> (struct _ctr) */
-HashTable *str_to_ctr;
-
-/* Hash table mapping hash_t (hashed string) -> (struct _ctr) */
-HashTable *hashstr_to_ctrs;
+/* -----------------------------------------------------------------------------
+   counters
+   -------------------------------------------------------------------------- */
+typedef struct _counter {
+    void *identity;
+    union {
+       nat resid;
+       struct {
+           int prim;     // total size of 'inherently used' closures
+           int unused;   // total size of 'never used' closures
+           int used;     // total size of 'used at least once' closures
+           int void_new;  // current total size of 'destroyed without being used' closures
+           int drag_new;  // current total size of 'used at least once and waiting to die'
+       } ldv;
+    } c;
+    struct _counter *next;
+} counter;
 
-static void
-initHashTables( void )
+typedef struct {
+    double      time;    // the time in MUT time when the census is made
+    HashTable * hash;
+    counter   * ctrs;
+    Arena     * arena;
+
+    // for LDV profiling, when just displaying by LDV
+    int       prim;
+    int       not_used;
+    int       used;
+    int       void_total;
+    int       drag_total;
+} Census;
+
+Census *censuses = NULL;
+nat n_censuses = 0;
+
+/* --------------------------------------------------------------------------
+ * Profiling type predicates
+ * ----------------------------------------------------------------------- */
+#ifdef PROFILING
+static inline rtsBool
+doingLDVProfiling( void )
 {
-    str_to_ctr      = allocHashTable();
-    hashstr_to_ctrs = allocHashTable();
-    all_ctrs = NULL;
+    return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV 
+           || RtsFlags.ProfFlags.bioSelector != NULL);
 }
 
-static prof_ctr *
-strToCtr(const char *str)
+static inline rtsBool
+doingRetainerProfiling( void )
 {
-    prof_ctr *ctr;
-
-    ctr = lookupHashTable( str_to_ctr, (W_)str );
-
-    if (ctr != NULL) { return ctr; }
-
-    else {
-       hash_t str_hash = hash_str((char *)str);
-       prof_ctr *prev;
+    return (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER
+           || RtsFlags.ProfFlags.retainerSelector != NULL);
+}
+#endif // PROFILING
 
-       ctr = lookupHashTable( hashstr_to_ctrs, (W_)str_hash );
-       prev = NULL;
+// Precesses a closure 'c' being destroyed whose size is 'size'.
+// Make sure that LDV_recordDead() is not invoked on 'inherently used' closures
+// such as TSO; they should not be involved in computing dragNew or voidNew.
+// 
+// Even though era is checked in both LdvCensusForDead() and 
+// LdvCensusKillAll(), we still need to make sure that era is > 0 because 
+// LDV_recordDead() may be called from elsewhere in the runtime system. E.g., 
+// when a thunk is replaced by an indirection object.
 
-       for (; ctr != NULL; prev = ctr, ctr = ctr->next_bucket ) {
-           if (!strcmp(ctr->str, str)) {
-               insertHashTable( str_to_ctr, (W_)str, ctr );
-#ifdef DEBUG_CTR
-               fprintf(stderr,"strToCtr: existing ctr for `%s'\n",str);
-#endif
-               return ctr;
+#ifdef PROFILING
+void
+LDV_recordDead( StgClosure *c, nat size )
+{
+    if (era > 0 && closureSatisfiesConstraints(c)) {
+       nat t;
+       size -= sizeofW(StgProfHeader);
+       if ((LDVW((c)) & LDV_STATE_MASK) == LDV_STATE_CREATE) {
+           t = (LDVW((c)) & LDV_CREATE_MASK) >> LDV_SHIFT;
+           if (t < era) {
+               censuses[t].void_total   += (int)size;
+               censuses[era].void_total -= (int)size;
            }
-       }
-
-       ctr = stgMallocBytes(sizeof(prof_ctr), "strToCtr");
-       ctr->mem_resid = 0;
-       ctr->str = str;
-       ctr->next_bucket = NULL;
-       ctr->next = all_ctrs;
-       all_ctrs = ctr;
-
-#ifdef DEBUG_CTR
-       fprintf(stderr,"strToCtr: new ctr for `%s'\n",str);
-#endif
-
-       if (prev != NULL) {
-           prev->next_bucket = ctr;
        } else {
-           insertHashTable( hashstr_to_ctrs, str_hash, ctr );
+           t = LDVW((c)) & LDV_LAST_MASK;
+           if (t + 1 < era) {
+               censuses[t + 1].drag_total += size;
+               censuses[era].drag_total   -= size;
+           }
        }
-       insertHashTable( str_to_ctr, (W_)str, ctr);
-       return ctr;
     }
 }
+#endif
 
-static void
-clearCtrResid( void )
+/* --------------------------------------------------------------------------
+ * Initialize censuses[era];
+ * ----------------------------------------------------------------------- */
+static inline void
+initEra(void)
 {
-    prof_ctr *ctr;
-    
-    for (ctr = all_ctrs; ctr != NULL; ctr = ctr->next) {
-       ctr->mem_resid = 0;
-    }
+    censuses[era].not_used = 0;
+    censuses[era].used     = 0;
+    censuses[era].prim     = 0;
+    censuses[era].void_total = 0;
+    censuses[era].drag_total = 0;
 }
 
+/* --------------------------------------------------------------------------
+ * Increases era by 1 and initialize census[era].
+ * Reallocates gi[] and increases its size if needed.
+ * ----------------------------------------------------------------------- */
 static void
-reportCtrResid(FILE *fp)
+nextEra( void )
 {
-    prof_ctr *ctr;
-    
-    for (ctr = all_ctrs; ctr != NULL; ctr = ctr->next) {
-       if (ctr->mem_resid != 0) {
-           fprintf(fp,"   %s %ld\n", ctr->str, ctr->mem_resid * sizeof(W_));
+#ifdef PROFILING
+    if (doingLDVProfiling()) { 
+       era++;
+
+       if (era == max_era) {
+           barf("maximum number of censuses reached; use +RTS -i to reduce");
+       }
+       
+       if (era == n_censuses) {
+           n_censuses *= 2;
+           censuses = stgReallocBytes(censuses, sizeof(Census) * n_censuses,
+                                      "nextEra");
        }
     }
+#endif // PROFILING
+       
+    initEra();
 }
-#endif /* PROFILING */
 
 /* -------------------------------------------------------------------------- */
 
@@ -175,6 +203,27 @@ initHeapProfiling(void)
         return 0;
     }
 
+    // we only count eras if we're doing LDV profiling.  Otherwise era
+    // is fixed at zero.
+#ifdef PROFILING
+    if (doingLDVProfiling()) {
+       era = 1;
+    } else
+#endif
+    {
+       era = 0;
+    }
+
+    {   // max_era = 2^LDV_SHIFT
+       nat p;
+       max_era = 1;
+       for (p = 0; p < LDV_SHIFT; p++)
+           max_era *= 2;
+    }
+
+    n_censuses = 32;
+    censuses = stgMallocBytes(sizeof(Census) * n_censuses, "initHeapProfiling");
+
     fprintf(hp_file, "JOB \"%s", prog_argv[0]);
 
 #ifdef PROFILING
@@ -201,11 +250,12 @@ initHeapProfiling(void)
 
 #ifdef DEBUG_HEAP_PROF
     DEBUG_LoadSymbols(prog_argv[0]);
-    initSymbolHash();
 #endif
 
 #ifdef PROFILING
-    initHashTables();
+    if (doingRetainerProfiling()) {
+       initRetainerProfiling();
+    }
 #endif
 
     return 0;
@@ -221,120 +271,67 @@ endHeapProfiling(void)
     }
 
 #ifdef PROFILING
-    switch (RtsFlags.ProfFlags.doHeapProfile) {
-    case HEAP_BY_RETAINER:
+    if (doingRetainerProfiling()) {
        endRetainerProfiling();
-       break;
-    case HEAP_BY_LDV:
-       endLdvProfiling();
-       break;
     }
 #endif
 
-    seconds = mut_user_time();
-    fprintf(hp_file, "BEGIN_SAMPLE %0.2f\n", seconds);
-    fprintf(hp_file, "END_SAMPLE %0.2f\n", seconds);
-    fclose(hp_file);
-}
-
-#ifdef DEBUG_HEAP_PROF
-/* -----------------------------------------------------------------------------
-   Hash table for symbols.
-   -------------------------------------------------------------------------- */
-
-typedef struct {
-    const char *name;
-    void *ptr;
-    nat data;
-} symbol_info;
-
-#define SYMBOL_HASH_SIZE 0x3fff
-
-symbol_info symbol_hash[SYMBOL_HASH_SIZE];
-
-static inline nat
-hash(void *ptr)
-{
-    return ((W_)ptr)>>4 & 0x3fff;
-}
-
-static void
-initSymbolHash(void)
-{
-    nat i;
-
-    for (i=0; i < SYMBOL_HASH_SIZE; i++) {
-       symbol_hash[i].ptr = NULL;
-    }
-}
-
-static nat
-lookup_symbol(void *addr)
-{
-    nat orig_bucket = hash(addr);
-    nat bucket;
+#ifdef PROFILING
+  // Note: 
+  //   We do not need to perform a major garbage collection because all the
+  //   closures created since the last census will not affect the profiling
+  //   statistics anyhow.
+  if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) 
+    LdvCensusKillAll();
+#endif
 
-    bucket = orig_bucket;
-    while (bucket < SYMBOL_HASH_SIZE && symbol_hash[bucket].ptr != NULL) {
-       if (symbol_hash[bucket].ptr == addr) {
-           return bucket;
-       }
-       bucket++;
-    }
-    if (bucket == SYMBOL_HASH_SIZE) {
-       bucket = 0;
-       while (bucket < orig_bucket && symbol_hash[bucket].ptr != NULL) {
-           if (symbol_hash[bucket].ptr == addr) {
-               return bucket;
-           }
-           bucket++;
-       }
-       if (bucket == orig_bucket) {
-           barf("out of symbol table space");
+#ifdef PROFILING
+    // At last... we can output the census info for LDV profiling
+    if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
+       nat t;
+       int sumVoidNew, sumDragNew;
+
+       // Now we compute void_total and drag_total for each census
+       sumVoidNew = 0;
+       sumDragNew = 0;
+       for (t = 1; t < era; t++) { // note: start at 1, not 0
+           sumVoidNew += censuses[t].void_total;
+           sumDragNew += censuses[t].drag_total;
+           censuses[t].void_total = sumVoidNew;
+           censuses[t].drag_total = sumDragNew;
+           ASSERT( censuses[t].void_total < censuses[t].not_used );
+           ASSERT( censuses[t].drag_total < censuses[t].used );
        }
-    }
-    
-    symbol_hash[bucket].ptr  = addr;
-    lookupGHCName(addr,&symbol_hash[bucket].name);
-    symbol_hash[bucket].data = 0;
-    return bucket;
-}
-
-static void
-clear_table_data(void)
-{
-    nat i;
-
-    for (i = 0; i < SYMBOL_HASH_SIZE; i++) {
-       symbol_hash[i].data = 0;
-    }
-}
-
-static void
-fprint_data(FILE *fp)
-{
-    nat i;
-    for (i = 0; i < SYMBOL_HASH_SIZE; i++) {
-       if (symbol_hash[i].data > 0) {
-           fprintf(fp, "   %s %lu\n", symbol_hash[i].name, (unsigned long)symbol_hash[i].data);
+       
+       for (t = 1; t < era; t++) { // note: start at 1, not 0
+           fprintf(hp_file, "MARK %f\n", censuses[t].time);
+           fprintf(hp_file, "BEGIN_SAMPLE %f\n", censuses[t].time);
+           fprintf(hp_file, "VOID\t%u\n", censuses[t].void_total * sizeof(W_));
+           fprintf(hp_file, "LAG\t%u\n", 
+                   (censuses[t].not_used - censuses[t].void_total) * sizeof(W_));
+           fprintf(hp_file, "USE\t%u\n", 
+                   (censuses[t].used - censuses[t].drag_total) * sizeof(W_));
+           fprintf(hp_file, "INHERENT_USE\t%u\n", 
+                   censuses[t].prim * sizeof(W_));
+           fprintf(hp_file, "DRAG\t%u\n", censuses[t].drag_total * sizeof(W_));
+           fprintf(hp_file, "END_SAMPLE %f\n", censuses[t].time);
        }
     }
-}
+#endif
 
-static inline void
-add_data(void *addr, nat data)
-{
-    symbol_hash[lookup_symbol(addr)].data += data;
+    seconds = mut_user_time();
+    fprintf(hp_file, "BEGIN_SAMPLE %0.2f\n", seconds);
+    fprintf(hp_file, "END_SAMPLE %0.2f\n", seconds);
+    fclose(hp_file);
 }
 
+#ifdef DEBUG_HEAP_PROF
 /* -----------------------------------------------------------------------------
    Closure Type Profiling;
 
    PROBABLY TOTALLY OUT OF DATE -- ToDo (SDM)
    -------------------------------------------------------------------------- */
 
-static nat closure_types[N_CLOSURE_TYPES];
-
 static char *type_names[] = {
       "INVALID_OBJECT"
     , "CONSTR"
@@ -393,37 +390,11 @@ static char *type_names[] = {
     , "EVACUATED"
 };
 
-static void 
-fprint_closure_types(FILE *fp)
-{
-  nat i;
-
-  for (i = 0; i < N_CLOSURE_TYPES; i++) {
-    if (closure_types[i]) {
-      fprintf(fp, "   %s %lu\n", type_names[i], (unsigned long)closure_types[i]);
-    }
-  }
-}
-
 #endif /* DEBUG_HEAP_PROF */
 
 
 #ifdef PROFILING
 static void
-clearCCSResid(CostCentreStack *ccs)
-{
-  IndexTable *i;
-
-  ccs->mem_resid = 0;
-
-  for (i = ccs->indexTable; i != 0; i = i->next) {
-    if (!i->back_edge) {
-      clearCCSResid(i->ccs);
-    }
-  }
-}
-
-static void
 fprint_ccs(FILE *fp, CostCentreStack *ccs, nat max_length)
 {
     char buf[max_length+1];
@@ -468,25 +439,6 @@ fprint_ccs(FILE *fp, CostCentreStack *ccs, nat max_length)
     fprintf(fp, "%s", buf);
 }
 
-static void
-reportCCSResid(FILE *fp, CostCentreStack *ccs)
-{
-  IndexTable *i;
-
-  if (ccs->mem_resid != 0) {
-    fprintf(fp,"   ");
-    // print as much of the CCS as possible in 20 chars, ending with "..."
-    fprint_ccs(fp,ccs,30);
-    fprintf(fp," %ld\n", ccs->mem_resid * sizeof(W_));
-  }
-
-  for (i = ccs->indexTable; i != 0; i = i->next) {
-    if (!i->back_edge) {
-      reportCCSResid(fp,i->ccs);
-    }
-  }
-}
-
 static rtsBool
 str_matches_selector( char* str, char* sel )
 {
@@ -537,32 +489,113 @@ closureSatisfiesConstraints( StgClosure* p )
                                 RtsFlags.ProfFlags.ccSelector );
        if (!b) return rtsFalse;
    }
+   if (RtsFlags.ProfFlags.retainerSelector) {
+       RetainerSet *rs;
+       nat i;
+       rs = retainerSetOf((StgClosure *)p);
+       if (rs != NULL) {
+          for (i = 0; i < rs->num; i++) {
+              b = str_matches_selector( rs->element[i]->cc->label,
+                                        RtsFlags.ProfFlags.retainerSelector );
+              if (b) return rtsTrue;
+          }
+       }
+       return rtsFalse;
+   }
    return rtsTrue;
 }
 #endif /* PROFILING */
 
 /* -----------------------------------------------------------------------------
+ * Print out the results of a heap census.
+ * -------------------------------------------------------------------------- */
+static void
+dumpCensus( Census *census )
+{
+    counter *ctr;
+
+#ifdef PROFILING
+    // We can't generate any info for LDV profiling until
+    // the end of the run...
+    if (doingLDVProfiling()) { return; }
+#endif
+
+    fprintf(hp_file, "BEGIN_SAMPLE %0.2f\n", census->time);
+
+    for (ctr = census->ctrs; ctr != NULL; ctr = ctr->next) {
+
+#ifdef DEBUG_HEAP_PROF
+       switch (RtsFlags.ProfFlags.doHeapProfile) {
+       case HEAP_BY_INFOPTR:
+           fprint_data(hp_file);
+           break;
+       case HEAP_BY_CLOSURE_TYPE:
+           fprint_closure_types(hp_file);
+           break;
+       }
+#endif
+       
+#ifdef PROFILING
+       switch (RtsFlags.ProfFlags.doHeapProfile) {
+       case HEAP_BY_CCS:
+           fprint_ccs(hp_file, (CostCentreStack *)ctr->identity, 30);
+           break;
+       case HEAP_BY_MOD:
+       case HEAP_BY_DESCR:
+       case HEAP_BY_TYPE:
+           fprintf(hp_file, "%s", (char *)ctr->identity);
+           break;
+       case HEAP_BY_RETAINER:
+       {
+           RetainerSet *rs = (RetainerSet *)ctr->identity;
+
+           // Mark this retainer set by negating its id, because it
+           // has appeared in at least one census.  We print the
+           // values of all such retainer sets into the log file at
+           // the end.  A retainer set may exist but not feature in
+           // any censuses if it arose as the intermediate retainer
+           // set for some closure during retainer set calculation.
+           if (rs->id > 0)
+               rs->id = -(rs->id);
+
+           // report in the unit of bytes: * sizeof(StgWord)
+           printRetainerSetShort(hp_file, rs);
+           break;
+       }
+       default:
+           barf("dumpCensus; doHeapProfile");
+       }
+#endif
+
+       fprintf(hp_file, "\t%d\n", ctr->c.resid * sizeof(W_));
+    }
+
+    fprintf(hp_file, "END_SAMPLE %0.2f\n", census->time);
+}
+
+/* -----------------------------------------------------------------------------
  * Code to perform a heap census.
  * -------------------------------------------------------------------------- */
 static void
-heapCensusChain( bdescr *bd )
+heapCensusChain( Census *census, bdescr *bd )
 {
     StgPtr p;
     StgInfoTable *info;
+    void *identity;
     nat size;
-#ifdef PROFILING
+    counter *ctr;
     nat real_size;
-#endif
+    rtsBool prim;
 
     for (; bd != NULL; bd = bd->link) {
        p = bd->start;
        while (p < bd->free) {
            info = get_itbl((StgClosure *)p);
+           prim = rtsFalse;
            
            switch (info->type) {
 
            case CONSTR:
-           case BCO:
            case FUN:
            case THUNK:
            case IND_PERM:
@@ -572,12 +605,6 @@ heapCensusChain( bdescr *bd )
            case SE_BLACKHOLE:
            case BLACKHOLE:
            case BLACKHOLE_BQ:
-           case WEAK:
-           case FOREIGN:
-           case STABLE_NAME:
-           case MVAR:
-           case MUT_VAR:
-           case MUT_CONS:
            case CONSTR_INTLIKE:
            case CONSTR_CHARLIKE:
            case FUN_1_0:
@@ -596,6 +623,17 @@ heapCensusChain( bdescr *bd )
                size = sizeW_fromITBL(info);
                break;
                
+           case BCO:
+           case MVAR:
+           case WEAK:
+           case FOREIGN:
+           case STABLE_NAME:
+           case MUT_VAR:
+           case MUT_CONS:
+               prim = rtsTrue;
+               size = sizeW_fromITBL(info);
+               break;
+
            case THUNK_1_0:             /* ToDo - shouldn't be here */
            case THUNK_0_1:             /* "  ditto  " */
            case THUNK_SELECTOR:
@@ -608,15 +646,18 @@ heapCensusChain( bdescr *bd )
                break;
                
            case ARR_WORDS:
+               prim = rtsTrue;
                size = arr_words_sizeW(stgCast(StgArrWords*,p));
                break;
                
            case MUT_ARR_PTRS:
            case MUT_ARR_PTRS_FROZEN:
+               prim = rtsTrue;
                size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)p);
                break;
                
            case TSO:
+               prim = rtsTrue;
                size = tso_sizeW((StgTSO *)p);
                break;
                
@@ -624,14 +665,19 @@ heapCensusChain( bdescr *bd )
                barf("heapCensus");
            }
            
+           identity = NULL;
+
 #ifdef DEBUG_HEAP_PROF
+           real_size = size;
            switch (RtsFlags.ProfFlags.doHeapProfile) {
            case HEAP_BY_INFOPTR:
-               add_data((void *)(*p), size * sizeof(W_));
+               identity = (void *)((StgClosure *)p)->header.info; 
                break;
            case HEAP_BY_CLOSURE_TYPE:
-               closure_types[info->type] += size * sizeof(W_);
+               identity = type_names[info->type];
                break;
+           default:
+               barf("heapCensus; doHeapProfile");
            }
 #endif
            
@@ -642,25 +688,51 @@ heapCensusChain( bdescr *bd )
            if (closureSatisfiesConstraints((StgClosure*)p)) {
                switch (RtsFlags.ProfFlags.doHeapProfile) {
                case HEAP_BY_CCS:
-                   ((StgClosure *)p)->header.prof.ccs->mem_resid += real_size;
+                   identity = ((StgClosure *)p)->header.prof.ccs;
                    break;
                case HEAP_BY_MOD:
-                   strToCtr(((StgClosure *)p)->header.prof.ccs->cc->module)
-                       ->mem_resid += real_size;
+                   identity = ((StgClosure *)p)->header.prof.ccs->cc->module;
                    break;
                case HEAP_BY_DESCR:
-                   strToCtr(get_itbl(((StgClosure *)p))->prof.closure_desc)->mem_resid 
-                       += real_size;
+                   identity = (get_itbl((StgClosure *)p))->prof.closure_desc;
                    break;
                case HEAP_BY_TYPE:
-                   strToCtr(get_itbl(((StgClosure *)p))->prof.closure_type)->mem_resid
-                       += real_size;
+                   identity = (get_itbl((StgClosure *)p))->prof.closure_type;
+                   break;
+               case HEAP_BY_RETAINER:
+                   identity = retainerSetOf((StgClosure *)p);
                    break;
+               case HEAP_BY_LDV:
+                   if (prim)
+                       census->prim += real_size;
+                   else if ((LDVW(p) & LDV_STATE_MASK) == LDV_STATE_CREATE)
+                       census->not_used += real_size;
+                   else
+                       census->used += real_size;
+                   // NOTE: don't break here.  We're not using the
+                   // hash table.
+                   p += size;
+                   continue;
                default:
                    barf("heapCensus; doHeapProfile");
                }
            }
 #endif
+
+           if (identity != NULL) {
+               ctr = lookupHashTable( census->hash, (StgWord)identity );
+               if (ctr != NULL) {
+                   ctr->c.resid += real_size;
+               } else {
+                   ctr = arenaAlloc( census->arena, sizeof(counter) );
+                   insertHashTable( census->hash, (StgWord)identity, ctr );
+                   ctr->c.resid = real_size;
+                   ctr->identity = identity;
+                   ctr->next = census->ctrs;
+                   census->ctrs = ctr;
+               }
+           }
+
            p += size;
        }
     }
@@ -669,83 +741,50 @@ heapCensusChain( bdescr *bd )
 void
 heapCensus( void )
 {
-  StgDouble time;
   nat g, s;
-    
-#ifdef DEBUG_HEAP_PROF
-  switch (RtsFlags.ProfFlags.doHeapProfile) {
-  case HEAP_BY_INFOPTR:
-    clear_table_data();
-    break;
-  case HEAP_BY_CLOSURE_TYPE:
-#if 0
-#   error fix me      
-    memset(closure_types, 0, N_CLOSURE_TYPES * sizeof(nat));
-#endif
-    break;
-  default:
-    return;
-  }
-#endif
+  Census *census;
+
+  stat_startHeapCensus();
 
+  census = &censuses[era];
+  census->time  = mut_user_time();
+  census->hash  = allocHashTable();
+  census->ctrs  = NULL;
+  census->arena = newArena();
+    
+  // calculate retainer sets if necessary
 #ifdef PROFILING
-  switch (RtsFlags.ProfFlags.doHeapProfile) {
-  case NO_HEAP_PROFILING:
-      return;
-  case HEAP_BY_CCS:
-      /* zero all the residency counters */
-      clearCCSResid(CCS_MAIN);
-      break;
-  case HEAP_BY_MOD:
-  case HEAP_BY_DESCR:
-  case HEAP_BY_TYPE:
-      clearCtrResid();
-      break;
-  default:
-      barf("heapCensus; doHeapProfile");
+  if (doingRetainerProfiling()) {
+      retainerProfile();
   }
 #endif
 
-  time = mut_user_time_during_GC();  
-  fprintf(hp_file, "BEGIN_SAMPLE %0.2f\n", time);
-
+  // traverse the heap, collecting the census info
+  heapCensusChain( census, small_alloc_list );
   if (RtsFlags.GcFlags.generations == 1) {
-      heapCensusChain( g0s0->to_blocks );
+      heapCensusChain( census, g0s0->to_blocks );
   } else {
       for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
          for (s = 0; s < generations[g].n_steps; s++) {
-             heapCensusChain( generations[g].steps[s].blocks );
+             heapCensusChain( census, generations[g].steps[s].blocks );
+             // Are we interested in large objects?  might be
+             // confusing to include the stack in a heap profile.
+             // heapCensusChain( census, generations[g].steps[s].large_objects );
          }
       }
   }
 
-#ifdef DEBUG_HEAP_PROF
-  switch (RtsFlags.ProfFlags.doHeapProfile) {
-  case HEAP_BY_INFOPTR:
-      fprint_data(hp_file);
-      break;
-  case HEAP_BY_CLOSURE_TYPE:
-      fprint_closure_types(hp_file);
-      break;
-  }
-#endif
-    
-#ifdef PROFILING
-  switch (RtsFlags.ProfFlags.doHeapProfile) {
-  case HEAP_BY_CCS:
-      reportCCSResid(hp_file,CCS_MAIN);
-      break;
-  case HEAP_BY_MOD:
-  case HEAP_BY_DESCR:
-  case HEAP_BY_TYPE:
-      reportCtrResid(hp_file);
-      break;
-  default:
-      barf("heapCensus; doHeapProfile");
-  }
-#endif
+  // dump out the census info
+  dumpCensus( census );
+
+  // free our storage
+  freeHashTable(census->hash, NULL/* don't free the elements */);
+  arenaFree(census->arena);
+
+  // we're into the next time period now
+  nextEra();
 
-  fprintf(hp_file, "END_SAMPLE %0.2f\n", time);
+  stat_endHeapCensus();
 }    
 
 #endif /* PROFILING || DEBUG_HEAP_PROF */