[project @ 2001-11-22 14:25:11 by simonmar]
authorsimonmar <unknown>
Thu, 22 Nov 2001 14:25:13 +0000 (14:25 +0000)
committersimonmar <unknown>
Thu, 22 Nov 2001 14:25:13 +0000 (14:25 +0000)
Retainer Profiling / Lag-drag-void profiling.

This is mostly work by Sungwoo Park, who spent a summer internship at
MSR Cambridge this year implementing these two types of heap profiling
in GHC.

Relative to Sungwoo's original work, I've made some improvements to
the code:

   - it's now possible to apply constraints to retainer and LDV profiles
     in the same way as we do for other types of heap profile (eg.
     +RTS -hc{foo,bar} -hR -RTS gives you a retainer profiling considering
     only closures with cost centres 'foo' and 'bar').

   - the heap-profile timer implementation is cleaned up.

   - heap profiling no longer has to be run in a two-space heap.

   - general cleanup of the code and application of the SDM C coding
     style guidelines.

Profiling will be a little slower and require more space than before,
mainly because closures have an extra header word to support either
retainer profiling or LDV profiling (you can't do both at the same
time).

We've used the new profiling tools on GHC itself, with moderate
success.  Fixes for some space leaks in GHC to follow...

39 files changed:
ghc/includes/ClosureMacros.h
ghc/includes/Closures.h
ghc/includes/Stg.h
ghc/includes/StgLdvProf.h [new file with mode: 0644]
ghc/includes/StgMacros.h
ghc/includes/StgProf.h
ghc/includes/StgRetainerProf.h [new file with mode: 0644]
ghc/includes/Updates.h
ghc/rts/Exception.hc
ghc/rts/GC.c
ghc/rts/HeapStackCheck.hc
ghc/rts/Itimer.c
ghc/rts/Itimer.h
ghc/rts/LdvProfile.c [new file with mode: 0644]
ghc/rts/LdvProfile.h [new file with mode: 0644]
ghc/rts/PrimOps.hc
ghc/rts/ProfHeap.c
ghc/rts/ProfHeap.h
ghc/rts/Profiling.c
ghc/rts/Profiling.h
ghc/rts/Proftimer.c
ghc/rts/Proftimer.h
ghc/rts/RetainerProfile.c [new file with mode: 0644]
ghc/rts/RetainerProfile.h [new file with mode: 0644]
ghc/rts/RetainerSet.c [new file with mode: 0644]
ghc/rts/RetainerSet.h [new file with mode: 0644]
ghc/rts/RtsFlags.c
ghc/rts/RtsStartup.c
ghc/rts/Schedule.c
ghc/rts/Schedule.h
ghc/rts/Stats.c
ghc/rts/Stats.h
ghc/rts/StgMiscClosures.hc
ghc/rts/StgStartup.hc
ghc/rts/StgStdThunks.hc
ghc/rts/Storage.c
ghc/rts/Storage.h
ghc/rts/Updates.hc
ghc/rts/Weak.c

index 0690981..70470e9 100644 (file)
@@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
- * $Id: ClosureMacros.h,v 1.32 2001/02/06 11:41:04 rrt Exp $
+ * $Id: ClosureMacros.h,v 1.33 2001/11/22 14:25:11 simonmar Exp $
  *
  * (c) The GHC Team, 1998-1999
  *
@@ -79,8 +79,39 @@ static __inline__ StgFunPtr get_entry(const StgInfoTable *itbl) {
    -------------------------------------------------------------------------- */
 
 #ifdef PROFILING
-#define SET_PROF_HDR(c,ccs_)           (c)->header.prof.ccs = ccs_
-#define SET_STATIC_PROF_HDR(ccs_)      prof : { ccs : ccs_ },
+#ifdef DEBUG_RETAINER
+/* 
+  For the sake of debugging, we take the safest way for the moment. Actually, this 
+  is useful to check the sanity of heap before beginning retainer profiling.
+  flip is defined in RetainerProfile.c, and declared as extern in RetainerProfile.h.
+  Note: change those functions building Haskell objects from C datatypes, i.e.,
+  all rts_mk???() functions in RtsAPI.c, as well.
+ */
+extern StgWord flip;
+#define SET_PROF_HDR(c,ccs_)            \
+        ((c)->header.prof.ccs = ccs_, (c)->header.prof.hp.rs = (retainerSet *)((StgWord)NULL | flip))
+#else
+/*
+  For retainer profiling only: we do not have to set (c)->header.prof.hp.rs to
+  NULL | flip (flip is defined in RetainerProfile.c) because even when flip
+  is 1, rs is invalid and will be initialized to NULL | flip later when 
+  the closure *c is visited.
+ */
+/*
+#define SET_PROF_HDR(c,ccs_)            \
+        ((c)->header.prof.ccs = ccs_, (c)->header.prof.hp.rs = NULL)
+ */
+/*
+  The following macro works for both retainer profiling and LDV profiling:
+  for retainer profiling, ldvTime remains 0, so rs fields are initialized to 0.
+  See the invariants on ldvTime.
+ */
+#define SET_PROF_HDR(c,ccs_)            \
+        ((c)->header.prof.ccs = ccs_,   \
+        LDV_recordCreate((c)))
+#endif  // DEBUG_RETAINER
+#define SET_STATIC_PROF_HDR(ccs_)       \
+        prof : { ccs : ccs_, hp : { rs : NULL } },
 #else
 #define SET_PROF_HDR(c,ccs)
 #define SET_STATIC_PROF_HDR(ccs)
@@ -109,6 +140,7 @@ static __inline__ StgFunPtr get_entry(const StgInfoTable *itbl) {
 #define SET_TICKY_HDR(c,stuff)
 #define SET_STATIC_TICKY_HDR(stuff)
 #endif
+
 #define SET_HDR(c,info,ccs)                            \
    {                                                   \
        SET_INFO(c,info);                               \
index 5f0b570..0f413b5 100644 (file)
@@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
- * $Id: Closures.h,v 1.28 2001/10/03 13:57:42 simonmar Exp $
+ * $Id: Closures.h,v 1.29 2001/11/22 14:25:11 simonmar Exp $
  *
  * (c) The GHC Team, 1998-1999
  *
    -------------------------------------------------------------------------- */
 
 typedef struct {
-   CostCentreStack *ccs;
+  CostCentreStack *ccs;
+  union {
+    RetainerSet *rs;          // Retainer Set
+    StgWord ldvw;             // Lag/Drag/Void Word
+  } hp;
 } StgProfHeader;
 
 /* -----------------------------------------------------------------------------
index 9fc9c77..9d302a7 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: Stg.h,v 1.39 2001/10/27 21:44:54 sof Exp $
+ * $Id: Stg.h,v 1.40 2001/11/22 14:25:11 simonmar Exp $
  *
  * (c) The GHC Team, 1998-1999
  *
@@ -150,6 +150,8 @@ typedef StgWord64       LW_;
 
 /* Profiling information */
 #include "StgProf.h"
+#include "StgRetainerProf.h"
+#include "StgLdvProf.h"
 
 /* Storage format definitions */
 #include "Closures.h"
diff --git a/ghc/includes/StgLdvProf.h b/ghc/includes/StgLdvProf.h
new file mode 100644 (file)
index 0000000..7ece731
--- /dev/null
@@ -0,0 +1,132 @@
+/* -----------------------------------------------------------------------------
+ * $Id: StgLdvProf.h,v 1.1 2001/11/22 14:25:11 simonmar Exp $
+ *
+ * (c) The GHC Team, 2001
+ * Author: Sungwoo Park
+ *
+ * Lag/Drag/Void profiling.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef STGLDVPROF_H
+#define STGLDVPROF_H
+
+#ifdef PROFILING
+
+// Engine
+
+// declared in LdvProfile.c
+extern nat ldvTime;
+
+// LdvGenInfo stores the statistics for one specific census. 
+typedef struct {
+  double time;    // the time in MUT time at the corresponding census is made
+
+  // We employ int instead of nat, for some values may be negative temporarily,
+  // e.g., dragNew.
+
+  // computed at each census
+  int inherentlyUsed;   // total size of 'inherently used' closures
+  int notUsed;          // total size of 'never used' closures
+  int used;             // total size of 'used at least once' closures
+
+  /*
+    voidNew and dragNew are updated when a closure is destroyed.
+    For instance, when a 'never used' closure of size s and creation time 
+    t is destroyed at time u, voidNew of eras t through u - 1 is increased
+    by s. 
+    Likewise, when a 'used at least once' closure of size s and last use time
+    t is destroyed at time u, dragNew of eras t + 1 through u - 1 is increase
+    by s.
+    In our implementation, voidNew and dragNew are computed indirectly: instead
+    of updating voidNew or dragNew of all intervening eras, we update that
+    of the end two eras (one is increased and the other is decreased). 
+   */
+  int voidNew;  // current total size of 'destroyed without being used' closures
+  int dragNew;  // current total size of 'used at least once and waiting to die'
+                // closures
+
+  // computed post-mortem
+  int voidTotal;  // total size of closures in 'void' state
+  // lagTotal == notUsed - voidTotal    // in 'lag' state
+  int dragTotal;  // total size of closures in 'drag' state 
+  // useTotal == used - dragTotal       // in 'use' state
+} LdvGenInfo;
+
+extern LdvGenInfo *gi;
+
+// retrieves the LDV word from closure c
+#define LDVW(c)                 (((StgClosure *)(c))->header.prof.hp.ldvw)
+
+/*
+  An LDV word is divided into 3 parts: state bits (LDV_STATE_MASK), creation 
+  time bits (LDV_CREATE_MASK), and last use time bits (LDV_LAST_MASK). 
+ */
+#if SIZEOF_VOID_P == 8
+#define LDV_SHIFT               30
+#define LDV_STATE_MASK          0x1000000000000000
+#define LDV_CREATE_MASK         0x0FFFFFFFC0000000
+#define LDV_LAST_MASK           0x000000003FFFFFFF
+#define LDV_STATE_CREATE        0x0000000000000000
+#define LDV_STATE_USE           0x1000000000000000
+#else
+#define LDV_SHIFT               15
+#define LDV_STATE_MASK          0x40000000 
+#define LDV_CREATE_MASK         0x3FFF8000
+#define LDV_LAST_MASK           0x00007FFF
+#define LDV_STATE_CREATE        0x00000000
+#define LDV_STATE_USE           0x40000000
+#endif  // SIZEOF_VOID_P
+
+// Stores the creation time for closure c. 
+// This macro is called at the very moment of closure creation.
+//
+// NOTE: this initializes LDVW(c) to zero, which ensures that there
+// is no conflict between retainer profiling and LDV profiling,
+// because retainer profiling also expects LDVW(c) to be initialised
+// to zero.
+#define LDV_recordCreate(c)   \
+  LDVW((c)) = (ldvTime << LDV_SHIFT) | LDV_STATE_CREATE
+
+// Stores the last use time for closure c.
+// This macro *must* be called whenever a closure is used, that is, it is 
+// entered.
+#define LDV_recordUse(c)                                \
+  {                                                     \
+    if (ldvTime > 0)                                    \
+      LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) |       \
+                  ldvTime |                             \
+                  LDV_STATE_USE;                        \
+  }
+
+// Creates a 0-filled slop of size 'howManyBackwards' backwards from the
+// address 'from'. 
+//
+// Invoked when: 
+//   1) Hp is incremented and exceeds HpLim (in Updates.hc).
+//   2) copypart() is called (in GC.c).
+#define FILL_SLOP(from, howManyBackwards)    \
+  if (ldvTime > 0) {                                    \
+    int i;                                              \
+    for (i = 0;i < (howManyBackwards); i++)             \
+      ((StgWord *)(from))[-i] = 0;                      \
+  }
+
+// Informs the LDV profiler that closure c has just been evacuated.
+// Evacuated objects are no longer needed, so we just store its original size in
+// the LDV field.
+#define SET_EVACUAEE_FOR_LDV(c, size)   \
+    LDVW((c)) = (size)
+
+// Macros called when a closure is entered. 
+// The closure is not an 'inherently used' one.
+// The closure is not IND or IND_OLDGEN because neither is considered for LDV
+// profiling.
+#define LDV_ENTER(c)            LDV_recordUse((c))
+
+#else  // !PROFILING
+
+#define LDV_ENTER(c)            
+
+#endif // PROFILING
+#endif // STGLDVPROF_H
index da3a425..6bd5887 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: StgMacros.h,v 1.41 2001/11/08 16:37:54 simonmar Exp $
+ * $Id: StgMacros.h,v 1.42 2001/11/22 14:25:11 simonmar Exp $
  *
  * (c) The GHC Team, 1998-1999
  *
@@ -144,7 +144,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
            tag_assts                                           \
            (r) = (P_)ret;                                      \
            JMP_(stg_chk_##layout);                             \
-       }
+       }                                                       
 
 #define HP_STK_CHK(stk_headroom,hp_headroom,ret,r,layout,tag_assts) \
         DO_GRAN_ALLOCATE(hp_headroom)                              \
@@ -153,7 +153,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
            tag_assts                                           \
            (r) = (P_)ret;                                      \
            JMP_(stg_chk_##layout);                             \
-       }
+       }                                                       
 
 /* -----------------------------------------------------------------------------
    A Heap Check in a case alternative are much simpler: everything is
@@ -186,7 +186,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
             HpAlloc = (headroom);                              \
             tag_assts                                          \
            JMP_(stg_gc_enter_##ptrs);                          \
-       }
+       }                                                       
 
 #define HP_CHK_SEQ_NP(headroom,ptrs,tag_assts)                 \
         DO_GRAN_ALLOCATE(headroom)                              \
@@ -194,7 +194,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
             HpAlloc = (headroom);                              \
             tag_assts                                          \
            JMP_(stg_gc_seq_##ptrs);                            \
-       }
+       }                                                       
 
 #define HP_STK_CHK_NP(stk_headroom, hp_headroom, ptrs, tag_assts) \
         DO_GRAN_ALLOCATE(hp_headroom)                              \
@@ -202,7 +202,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
             HpAlloc = (hp_headroom);                           \
             tag_assts                                          \
            JMP_(stg_gc_enter_##ptrs);                          \
-       }
+       }                                                       
 
 
 /* Heap checks for branches of a primitive case / unboxed tuple return */
@@ -214,7 +214,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
             HpAlloc = (headroom);                              \
             tag_assts                                          \
            JMP_(lbl);                                          \
-       }
+       }                                                       
 
 #define HP_CHK_NOREGS(headroom,tag_assts) \
     GEN_HP_CHK_ALT(headroom,stg_gc_noregs,tag_assts);
@@ -298,7 +298,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }
        R9.w = (W_)LIVENESS_MASK(liveness);             \
         R10.w = (W_)reentry;                           \
         JMP_(stg_gen_chk);                             \
-   }
+    }                                                       
 
 #define HP_CHK_GEN_TICKY(headroom,liveness,reentry,tag_assts)  \
    HP_CHK_GEN(headroom,liveness,reentry,tag_assts);            \
@@ -435,12 +435,29 @@ EXTINFO_RTS(stg_gen_chk_info);
        }                                                       \
         SET_INFO(R1.cl,&stg_BLACKHOLE_info)
 #  else
+#   ifndef PROFILING
 #    define UPD_BH_UPDATABLE(info)             \
         TICK_UPD_BH_UPDATABLE();               \
         SET_INFO(R1.cl,&stg_BLACKHOLE_info)
 #    define UPD_BH_SINGLE_ENTRY(info)          \
         TICK_UPD_BH_SINGLE_ENTRY();            \
         SET_INFO(R1.cl,&stg_SE_BLACKHOLE_info)
+#   else
+// An object is replaced by a blackhole, so we fill the slop with zeros.
+// 
+// Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
+// 
+#    define UPD_BH_UPDATABLE(info)             \
+        TICK_UPD_BH_UPDATABLE();               \
+        LDV_recordDead_FILL_SLOP_DYNAMIC(R1.cl);               \
+        SET_INFO(R1.cl,&stg_BLACKHOLE_info);    \
+        LDV_recordCreate(R1.cl)
+#    define UPD_BH_SINGLE_ENTRY(info)          \
+        TICK_UPD_BH_SINGLE_ENTRY();            \
+        LDV_recordDead_FILL_SLOP_DYNAMIC(R1.cl);               \
+        SET_INFO(R1.cl,&stg_SE_BLACKHOLE_info)  \
+        LDV_recordCreate(R1.cl)
+#   endif /* PROFILING */
 #  endif
 #else /* !EAGER_BLACKHOLING */
 #  define UPD_BH_UPDATABLE(thunk)    /* nothing */
index 2c89d94..825c846 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: StgProf.h,v 1.13 2001/10/18 13:46:47 simonmar Exp $
+ * $Id: StgProf.h,v 1.14 2001/11/22 14:25:11 simonmar Exp $
  *
  * (c) The GHC Team, 1998
  *
@@ -349,9 +349,6 @@ extern CostCentreStack *CCS_LIST;         /* registered CCS list */
 #define ENTER_CCS_PAP_CL(closure)  \
         ENTER_CCS_PAP((closure)->header.prof.ccs)
 
- /* temp EW */
-#define STATIC_CCS_REF(ccs) (ccs)
-
 /* -----------------------------------------------------------------------------
    When not profiling, these macros do nothing...
    -------------------------------------------------------------------------- */
diff --git a/ghc/includes/StgRetainerProf.h b/ghc/includes/StgRetainerProf.h
new file mode 100644 (file)
index 0000000..2b77772
--- /dev/null
@@ -0,0 +1,75 @@
+/* -----------------------------------------------------------------------------
+ * $Id: StgRetainerProf.h,v 1.1 2001/11/22 14:25:12 simonmar Exp $
+ *
+ * (c) The GHC Team, 2001
+ *
+ * Retainer profiling
+ * ---------------------------------------------------------------------------*/
+
+#ifndef STGRETAINERPROF_H
+#define STGRETAINERPROF_H
+
+/*
+  Type 'retainer' defines the retainer identity.
+
+  Invariant:
+    1. The retainer identity of a given retainer cannot change during 
+    program execution, no matter where it is actually stored.
+    For instance, the memory address of a retainer cannot be used as
+    its retainer identity because its location may change during garbage
+    collections.
+    2. Type 'retainer' must come with comparison operations as well as
+    an equality operation. That it, <, >, and == must be supported -
+    this is necessary to store retainers in a sorted order in retainer sets.
+    Therefore, you cannot use a huge structure type as 'retainer', for instance.
+
+  We illustrate three possibilities of defining 'retainer identity'.
+  Choose one of the following three compiler directives:
+
+   Retainer scheme 1 (RETAINER_SCHEME_INFO) : retainer = info table
+   Retainer scheme 2 (RETAINER_SCHEME_CCS)  : retainer = cost centre stack
+   Retainer scheme 3 (RETAINER_SCHEME_CC)   : retainer = cost centre
+*/
+
+// #define RETAINER_SCHEME_INFO
+#define RETAINER_SCHEME_CCS
+// #define RETAINER_SCHEME_CC
+
+#ifdef RETAINER_SCHEME_INFO
+struct _StgInfoTable;
+typedef struct _StgInfoTable *retainer;
+#endif
+
+#ifdef RETAINER_SCHEME_CCS
+typedef CostCentreStack *retainer;
+#endif
+
+#ifdef RETAINER_SCHEME_CC
+typedef CostCentre *retainer;
+#endif
+
+/*
+  Type 'retainerSet' defines an abstract datatype for sets of retainers.  
+
+  Invariants:
+    A retainer set stores its elements in increasing order (in element[] array).
+ */
+
+typedef struct _RetainerSet {
+  nat num;                      // number of elements
+  nat cost;                     // cost associated with this retainer set
+  StgWord hashKey;              // hash key for this retainer set
+  struct _RetainerSet *link;    // link to the next retainer set in the bucket
+  int id;   // unique id of this retainer set (used when printing)
+            // Its absolute value is interpreted as its true id; if id is
+            // negative, it indicates that this retainer set has had a postive
+            // cost after some retainer profiling.
+  retainer element[0];          // elements of this retainer set
+  // do not put anything below here!
+} RetainerSet;
+
+//
+// retainerSet - interface: see rts/RetainerSet.h
+//
+
+#endif /* STGRETAINERPROF_H */
index d203324..3b2461b 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: Updates.h,v 1.25 2001/11/08 12:46:31 simonmar Exp $
+ * $Id: Updates.h,v 1.26 2001/11/22 14:25:12 simonmar Exp $
  *
  * (c) The GHC Team, 1998-1999
  *
@@ -178,7 +178,9 @@ extern void awakenBlockedQueue(StgTSO *q);
    ------------------------------------------------------------------------- */
 
 #if defined(PROFILING)
-#define PUSH_STD_CCCS(frame) frame->header.prof.ccs = CCCS
+// frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0) is unnecessary 
+// because it is not used anyhow.
+#define PUSH_STD_CCCS(frame) (frame->header.prof.ccs = CCCS)
 #else
 #define PUSH_STD_CCCS(frame)
 #endif
index f7b5887..8cb24e9 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: Exception.hc,v 1.21 2001/08/17 14:44:54 simonmar Exp $
+ * $Id: Exception.hc,v 1.22 2001/11/22 14:25:12 simonmar Exp $
  *
  * (c) The GHC Team, 1998-2000
  *
@@ -260,8 +260,8 @@ CATCH_FRAME_ENTRY_TEMPLATE(stg_catch_frame_5_entry,RET_VEC(Sp[SP_OFF],5));
 CATCH_FRAME_ENTRY_TEMPLATE(stg_catch_frame_6_entry,RET_VEC(Sp[SP_OFF],6));
 CATCH_FRAME_ENTRY_TEMPLATE(stg_catch_frame_7_entry,RET_VEC(Sp[SP_OFF],7));
 
-#ifdef PROFILING
-#define CATCH_FRAME_BITMAP 7
+#if defined(PROFILING)
+#define CATCH_FRAME_BITMAP 15
 #else
 #define CATCH_FRAME_BITMAP 3
 #endif
@@ -355,7 +355,7 @@ FN_(raisezh_fast)
      * the info was only displayed for an *uncaught* exception.
      */
     if (RtsFlags.ProfFlags.showCCSOnException) {
-      STGCALL2(print_ccs,stderr,CCCS);
+      STGCALL2(fprintCCS,stderr,CCCS);
     }
 #endif
 
@@ -365,8 +365,18 @@ FN_(raisezh_fast)
      * is the exception raise.  It is used to overwrite all the
      * thunks which are currently under evaluataion.
      */
+    /*    
+    // @LDV profiling
+    // stg_raise_info has THUNK as its closure type. Since a THUNK takes at least
+    // MIN_UPD_SIZE words in its payload, MIN_UPD_SIZE is more approprate than 1.
+    // It seems that 1 does not cause any problem unless profiling is performed.
+    // However, when LDV profiling goes on, we need to linearly scan small object pool,
+    // where raise_closure is stored, so we should use MIN_UPD_SIZE.
     raise_closure = (StgClosure *)RET_STGCALL1(P_,allocate,
                                               sizeofW(StgClosure)+1);
+     */
+    raise_closure = (StgClosure *)RET_STGCALL1(P_,allocate,
+                                              sizeofW(StgClosure)+MIN_UPD_SIZE);
     SET_HDR(raise_closure, &stg_raise_info, CCCS);
     raise_closure->payload[0] = R1.cl;
 
index 3ecde2b..8a9e2ac 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: GC.c,v 1.126 2001/11/08 12:46:31 simonmar Exp $
+ * $Id: GC.c,v 1.127 2001/11/22 14:25:12 simonmar Exp $
  *
  * (c) The GHC Team 1998-1999
  *
@@ -42,6 +42,9 @@
 #include "FrontPanel.h"
 #endif
 
+#include "RetainerProfile.h"
+#include "LdvProfile.h"
+
 /* STATIC OBJECT LIST.
  *
  * During GC:
@@ -602,6 +605,13 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
       }
   }
 
+#ifdef PROFILING
+  // We call processHeapClosureForDead() on every closure destroyed during
+  // the current garbage collection, so we invoke LdvCensusForDead().
+  if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV)
+    LdvCensusForDead(N);
+#endif
+
   // NO MORE EVACUATION AFTER THIS POINT!
   // Finally: compaction of the oldest generation.
   if (major_gc && oldest_gen->steps[0].is_compacted) {
@@ -933,6 +943,12 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
   if (major_gc) { gcCAFs(); }
 #endif
   
+#ifdef PROFILING
+  // resetStaticObjectForRetainerProfiling() must be called before
+  // zeroing below.
+  resetStaticObjectForRetainerProfiling();
+#endif
+
   // zero the scavenged static object list 
   if (major_gc) {
     zero_static_object_list(scavenged_static_objects);
@@ -963,7 +979,6 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc )
 
   // restore enclosing cost centre 
 #ifdef PROFILING
-  heapCensus();
   CCCS = prev_CCS;
 #endif
 
@@ -1271,6 +1286,10 @@ static __inline__ StgClosure *
 copy(StgClosure *src, nat size, step *stp)
 {
   P_ to, from, dest;
+#ifdef PROFILING
+  // @LDV profiling
+  nat size_org = size;
+#endif
 
   TICK_GC_WORDS_COPIED(size);
   /* Find out where we're going, using the handy "to" pointer in 
@@ -1300,6 +1319,12 @@ copy(StgClosure *src, nat size, step *stp)
   dest = stp->hp;
   stp->hp = to;
   upd_evacuee(src,(StgClosure *)dest);
+#ifdef PROFILING
+  // @LDV profiling
+  // We store the size of the just evacuated object in the LDV word so that
+  // the profiler can guess the position of the next object later.
+  SET_EVACUAEE_FOR_LDV(src, size_org);
+#endif
   return (StgClosure *)dest;
 }
 
@@ -1309,10 +1334,14 @@ copy(StgClosure *src, nat size, step *stp)
  */
 
 
-static __inline__ StgClosure *
+static StgClosure *
 copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *stp)
 {
   P_ dest, to, from;
+#ifdef PROFILING
+  // @LDV profiling
+  nat size_to_copy_org = size_to_copy;
+#endif
 
   TICK_GC_WORDS_COPIED(size_to_copy);
   if (stp->gen_no < evac_gen) {
@@ -1334,6 +1363,17 @@ copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *stp)
   dest = stp->hp;
   stp->hp += size_to_reserve;
   upd_evacuee(src,(StgClosure *)dest);
+#ifdef PROFILING
+  // @LDV profiling
+  // We store the size of the just evacuated object in the LDV word so that
+  // the profiler can guess the position of the next object later.
+  // size_to_copy_org is wrong because the closure already occupies size_to_reserve
+  // words.
+  SET_EVACUAEE_FOR_LDV(src, size_to_reserve);
+  // fill the slop
+  if (size_to_reserve - size_to_copy_org > 0)
+    FILL_SLOP(stp->hp - 1, (int)(size_to_reserve - size_to_copy_org)); 
+#endif
   return (StgClosure *)dest;
 }
 
@@ -2162,9 +2202,23 @@ scavenge(step *stp)
     }
 
     case IND_PERM:
-       if (stp->gen_no != 0) {
-           SET_INFO(((StgClosure *)p), &stg_IND_OLDGEN_PERM_info);
-       }
+      if (stp->gen->no != 0) {
+#ifdef PROFILING
+        // @LDV profiling
+        // No need to call LDV_recordDead_FILL_SLOP_DYNAMIC() because an 
+        // IND_OLDGEN_PERM closure is larger than an IND_PERM closure.
+        LDV_recordDead((StgClosure *)p, sizeofW(StgInd));
+#endif        
+        // 
+        // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
+        //
+       SET_INFO(((StgClosure *)p), &stg_IND_OLDGEN_PERM_info);
+#ifdef PROFILING
+        // @LDV profiling
+        // We pretend that p has just been created.
+        LDV_recordCreate((StgClosure *)p);
+#endif
+      }
        // fall through 
     case IND_OLDGEN_PERM:
        ((StgIndOldGen *)p)->indirectee = 
@@ -3590,7 +3644,17 @@ threadLazyBlackHole(StgTSO *tso)
 #if (!defined(LAZY_BLACKHOLING)) && defined(DEBUG)
         belch("Unexpected lazy BHing required at 0x%04x",(int)bh);
 #endif
+#ifdef PROFILING
+        // @LDV profiling
+        // We pretend that bh is now dead.
+        LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)bh);
+#endif
        SET_INFO(bh,&stg_BLACKHOLE_info);
+#ifdef PROFILING
+        // @LDV profiling
+        // We pretend that bh has just been created.
+        LDV_recordCreate(bh);
+#endif
       }
 
       update_frame = update_frame->link;
@@ -3832,7 +3896,20 @@ threadSqueezeStack(StgTSO *tso)
              }
          }
 #endif
+#ifdef PROFILING
+          // @LDV profiling
+          // We pretend that bh is now dead.
+          LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)bh);
+#endif
+          // 
+          // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
+          // 
          SET_INFO(bh,&stg_BLACKHOLE_info);
+#ifdef PROFILING
+          // @LDV profiling
+          // We pretend that bh has just been created.
+          LDV_recordCreate(bh);
+#endif
        }
       }
 
index 5fa5f10..52a9985 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: HeapStackCheck.hc,v 1.18 2001/11/08 12:46:31 simonmar Exp $
+ * $Id: HeapStackCheck.hc,v 1.19 2001/11/22 14:25:12 simonmar Exp $
  *
  * (c) The GHC Team, 1998-1999
  *
@@ -47,7 +47,6 @@
  * ThreadRunGHC thread.
  */
 
-
 #define GC_GENERIC                                     \
   if (Hp > HpLim) {                                    \
     Hp -= HpAlloc;                                     \
index 2ec3ea9..d1821f6 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: Itimer.c,v 1.25 2001/11/21 20:55:10 sof Exp $
+ * $Id: Itimer.c,v 1.26 2001/11/22 14:25:12 simonmar Exp $
  *
  * (c) The GHC Team, 1995-1999
  *
@@ -142,6 +142,8 @@ initialize_virtual_timer(nat ms)
     }
   }
 
+  initProfTimer();
+
   return 0;
 }
  
@@ -158,6 +160,10 @@ initialize_virtual_timer(nat ms)
 
     timestamp = getourtimeofday();
 
+#ifdef PROFILING
+    initProfTimer();
+#endif
+
     it.it_value.tv_sec = ms / 1000;
     it.it_value.tv_usec = 1000 * (ms - (1000 * it.it_value.tv_sec));
     it.it_interval = it.it_value;
@@ -178,6 +184,8 @@ initialize_virtual_timer(nat ms)
 
     timestamp = getourtimeofday();
 
+    initProfTimer();
+
     se.sigev_notify = SIGEV_SIGNAL;
     se.sigev_signo = SIGVTALRM;
     se.sigev_value.sival_int = SIGVTALRM;
index f3a185a..9de549c 100644 (file)
@@ -1,7 +1,7 @@
 /* -----------------------------------------------------------------------------
- * $Id: Itimer.h,v 1.8 2001/11/21 20:55:10 sof Exp $
+ * $Id: Itimer.h,v 1.9 2001/11/22 14:25:12 simonmar Exp $
  *
- * (c) The GHC Team 1998-1999
+ * (c) The GHC Team 1998-2001
  *
  * Interval timer for profiling and pre-emptive scheduling.
  *
  */
 #define CS_MIN_MILLISECS TICK_MILLISECS       /* milliseconds per slice */
  
-extern rtsBool do_prof_ticks;  /* profiling ticks on/off */
-
-/* Total number of ticks since startup */
-extern lnat total_ticks;
-
 int  initialize_virtual_timer  ( nat ms );
 int  install_vtalrm_handler    ( void );
 void block_vtalrm_signal       ( void );
diff --git a/ghc/rts/LdvProfile.c b/ghc/rts/LdvProfile.c
new file mode 100644 (file)
index 0000000..59a758f
--- /dev/null
@@ -0,0 +1,857 @@
+/* -----------------------------------------------------------------------------
+ * $Id: LdvProfile.c,v 1.1 2001/11/22 14:25:12 simonmar Exp $
+ *
+ * (c) The GHC Team, 2001
+ * Author: Sungwoo Park
+ *
+ * Lag/Drag/Void profiling.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifdef PROFILING
+
+#include "Stg.h"
+#include "Rts.h"
+#include "LdvProfile.h"
+#include "RtsFlags.h"
+#include "Itimer.h"
+#include "Proftimer.h"
+#include "Profiling.h"
+#include "Stats.h"
+#include "Storage.h"
+#include "RtsUtils.h"
+#include "Schedule.h"
+
+/*
+  ldvTime stores the current LDV time, that is, the current era.  It
+  is one larger than the number of times LDV profiling has been
+  performed, i.e.,
+  ldvTime - 1 == the number of time LDV profiling was executed
+              == the number of censuses made so far.
+  RESTRICTION:
+    ldvTime must be no longer than LDV_SHIFT (15 or 30) bits.
+  Invariants:
+    LDV profiling is turned off if ldvTime is 0.
+    LDV profiling is turned on if ldvTime is > 0.
+    ldvTime is initialized to 1 in initLdvProfiling().
+    If LDV profiling is not performed, ldvTime must remain 0 (e.g., when we
+    are doing retainer profiling).
+  ldvTime is set to 1 in initLdvProfiling().
+  ldvTime is set back to 0 in shutdownHaskell().
+  In the meanwhile, ldvTime increments.
+*/
+nat ldvTime = 0;
+#
+// ldvTimeSave is set in LdvCensusKillAll(), and stores the final number of
+// times that LDV profiling was proformed.
+static nat ldvTimeSave;
+
+// gi[] stores the statistics obtained at each heap census.
+// gi[0] is not used. See initLdvProfiling().
+LdvGenInfo *gi;
+
+#define giINCREMENT   32      // allocation unit for gi[]
+static nat giLength;          // current length of gi[]
+
+// giMax is initialized to 2^LDV_SHIFT in initLdvProfiling().
+// When ldvTime reaches giMax, the profiling stops because a closure can
+// store only up to (giMax - 1) as its creation or last use time.
+static nat giMax;
+
+/* --------------------------------------------------------------------------
+ * Fills in the slop when a *dynamic* closure changes its type.
+ * First calls LDV_recordDead() to declare the closure is dead, and then
+ * fills in the slop.
+ * 
+ *  Invoked when:
+ *    1) blackholing, UPD_BH_UPDATABLE() and UPD_BH_SINGLE_ENTRY (in
+ *      includes/StgMacros.h), threadLazyBlackHole() and 
+ *      threadSqueezeStack() (in GC.c).
+ *    2) updating with indirection closures, updateWithIndirection() 
+ *      and updateWithPermIndirection() (in Storage.h).
+ * 
+ *  LDV_recordDead_FILL_SLOP_DYNAMIC() is not called on 'inherently used' 
+ *  closures such as TSO. It is not called on PAP because PAP is not updatable.
+ *  ----------------------------------------------------------------------- */
+void 
+LDV_recordDead_FILL_SLOP_DYNAMIC( StgClosure *p )
+{
+    if (ldvTime > 0) {
+       StgInfoTable *inf = get_itbl((p));
+       nat nw, i;
+       switch (inf->type) {
+       case THUNK_1_0:
+       case THUNK_0_1:
+       case THUNK_2_0:
+       case THUNK_1_1:
+       case THUNK_0_2:
+       case THUNK_SELECTOR:
+           nw = MIN_UPD_SIZE;
+           break;
+       case THUNK:
+           nw = inf->layout.payload.ptrs + inf->layout.payload.nptrs;
+           if (nw < MIN_UPD_SIZE)
+               nw = MIN_UPD_SIZE;
+           break;
+       case AP_UPD:
+           nw = sizeofW(StgPAP) - sizeofW(StgHeader) + ((StgPAP *)p)->n_args;
+           break;
+       case CAF_BLACKHOLE:
+       case BLACKHOLE:
+       case SE_BLACKHOLE:
+       case SE_CAF_BLACKHOLE:
+           nw = inf->layout.payload.ptrs + inf->layout.payload.nptrs;
+           break;
+       default:
+           barf("Unexpected closure type %u in LDV_recordDead_FILL_SLOP_DYNAMIC()", inf->type);
+           break;
+       }
+       LDV_recordDead((StgClosure *)(p), nw + sizeofW(StgHeader));
+       for (i = 0; i < nw; i++) {
+           ((StgClosure *)(p))->payload[i] = 0;
+       }
+    }
+}
+
+/* --------------------------------------------------------------------------
+ * Initialize gi[ldvTime].
+ * ----------------------------------------------------------------------- */
+static inline void
+giInitForCurrentEra(void)
+{
+    gi[ldvTime].notUsed = 0;
+    gi[ldvTime].inherentlyUsed = 0;
+    gi[ldvTime].used = 0;
+
+    gi[ldvTime].voidNew = 0;
+    gi[ldvTime].dragNew = 0;
+}
+
+/* --------------------------------------------------------------------------
+ * Increases ldvTime by 1 and initialize gi[ldvTime].
+ * Reallocates gi[] and increases its size if needed.
+ * ----------------------------------------------------------------------- */
+static void
+incrementLdvTime( void )
+{
+    ldvTime++;
+
+    if (ldvTime == giMax) {
+       fprintf(stderr,
+               "Lag/Drag/Void profiling limit %u reached. "
+               "Please increase the profiling interval with -L option.\n",
+               giLength);
+       barf("Current profiling interval = %f seconds",
+            (float)RtsFlags.ProfFlags.profileInterval / 1000.0 );
+    }
+
+    if (ldvTime % giINCREMENT == 0) {
+       gi = stgReallocBytes(gi, sizeof(LdvGenInfo) * (giLength + giINCREMENT),
+                             "incrementLdvTime");
+       giLength += giINCREMENT;
+    }
+
+    // What a stupid bug I struggled against for such a long time! I
+    // placed giInitForCurrentEra() before the above rellocation part,
+    // and it cost me three hours!
+    giInitForCurrentEra();
+}
+
+/* --------------------------------------------------------------------------
+ * Initialization code for LDV profiling.
+ * ----------------------------------------------------------------------- */
+void
+initLdvProfiling( void )
+{
+    nat p;
+
+    gi = stgMallocBytes(sizeof(LdvGenInfo) * giINCREMENT, "initLdvProfiling");
+    giLength = giINCREMENT;
+
+    ldvTime = 1;              // turn on LDV profiling.
+    giInitForCurrentEra();
+
+    // giMax = 2^LDV_SHIFT
+    giMax = 1;
+    for (p = 0; p < LDV_SHIFT; p++)
+       giMax *= 2;
+}
+
+/* --------------------------------------------------------------------------
+ * This function must be called before f-closing prof_file.
+ * Still hp_file is open; see endHeapProfiling() in ProfHeap.c.
+ * ----------------------------------------------------------------------- */
+void
+endLdvProfiling( void )
+{
+    nat t;
+    int sumVoidNew, sumDragNew;
+
+    // Now we compute voidTotal and dragTotal of each LdvGenInfo structure.
+    sumVoidNew = 0;
+    sumDragNew = 0;
+    for (t = 0; t < ldvTimeSave; t++) {
+       sumVoidNew += gi[t].voidNew;
+       sumDragNew += gi[t].dragNew;
+       gi[t].voidTotal = sumVoidNew;
+       gi[t].dragTotal = sumDragNew;
+    }
+
+    // t = 0 is wrong (because ldvTime == 0 indicates LDV profiling is
+    // turned off.
+    for (t = 1;t < ldvTimeSave; t++) {
+       fprintf(hp_file, "MARK %f\n", gi[t].time);
+       fprintf(hp_file, "BEGIN_SAMPLE %f\n", gi[t].time);
+       fprintf(hp_file, "VOID\t%u\n", gi[t].voidTotal * sizeof(StgWord));
+       fprintf(hp_file, "LAG\t%u\n", (gi[t].notUsed - gi[t].voidTotal) * sizeof(StgWord));
+       fprintf(hp_file, "USE\t%u\n", (gi[t].used - gi[t].dragTotal) * sizeof(StgWord));
+       fprintf(hp_file, "INHERENT_USE\t%u\n", gi[t].inherentlyUsed * sizeof(StgWord));
+       fprintf(hp_file, "DRAG\t%u\n", gi[t].dragTotal * sizeof(StgWord));
+       fprintf(hp_file, "END_SAMPLE %f\n", gi[t].time);
+    }
+}
+
+/* --------------------------------------------------------------------------
+ * Print the statistics.
+ * This function is called after each retainer profiling.
+ * ----------------------------------------------------------------------- */
+static void
+outputLdvSet( void )
+{
+}
+
+/* --------------------------------------------------------------------------
+ * This function is eventually called on every object in the heap
+ * during a census.  Any census is initiated immediately after a major
+ * garbage collection, and we exploit this fact in the implementation.
+ * If c is an 'inherently used' closure, gi[ldvTime].inherentlyUsed is
+ * updated.  If c is an ordinary closure, either gi[ldvTime].notUsed or
+ * gi[ldvTime].used is updated.
+ * ----------------------------------------------------------------------- */
+static inline nat
+processHeapClosure(StgClosure *c)
+{
+    nat size;
+    StgInfoTable *info;
+
+    info = get_itbl(c);
+
+    ASSERT(
+       ((LDVW(c) & LDV_CREATE_MASK) >> LDV_SHIFT) <= ldvTime &&
+       ((LDVW(c) & LDV_CREATE_MASK) >> LDV_SHIFT) > 0
+        );
+    ASSERT(
+       ((LDVW(c) & LDV_STATE_MASK) == LDV_STATE_CREATE) ||
+       (
+           (LDVW(c) & LDV_LAST_MASK) <= ldvTime &&
+           (LDVW(c) & LDV_LAST_MASK) > 0
+           )
+       );
+
+    switch (info->type) {
+       /*
+         'inherently used' cases: add to gi[ldvTime].inherentlyUsed
+       */
+
+    case TSO:
+       size = tso_sizeW((StgTSO *)c);
+       goto inherently_used;
+
+    case MVAR:
+       size = sizeofW(StgMVar);
+       goto inherently_used;
+
+    case MUT_ARR_PTRS:
+    case MUT_ARR_PTRS_FROZEN:
+       size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)c);
+       goto inherently_used;
+
+    case ARR_WORDS:
+       size = arr_words_sizeW((StgArrWords *)c);
+       goto inherently_used;
+
+    case WEAK:
+    case MUT_VAR:
+    case MUT_CONS:
+    case FOREIGN:
+    case BCO:
+    case STABLE_NAME:
+       size = sizeW_fromITBL(info);
+       goto inherently_used;
+
+       /*
+         ordinary cases: add to gi[ldvTime].notUsed if c is not being used.
+         add to gi[ldvTime].used if c is being used.
+       */
+    case THUNK:
+       size = stg_max(sizeW_fromITBL(info), sizeofW(StgHeader) + MIN_UPD_SIZE);
+       break;
+
+    case THUNK_1_0:
+    case THUNK_0_1:
+    case THUNK_2_0:
+    case THUNK_1_1:
+    case THUNK_0_2:
+    case THUNK_SELECTOR:
+       size = sizeofW(StgHeader) + MIN_UPD_SIZE;
+       break;
+
+    case AP_UPD:
+    case PAP:
+       size = pap_sizeW((StgPAP *)c);
+       break;
+
+    case CONSTR:
+    case CONSTR_1_0:
+    case CONSTR_0_1:
+    case CONSTR_2_0:
+    case CONSTR_1_1:
+    case CONSTR_0_2:
+
+    case FUN:
+    case FUN_1_0:
+    case FUN_0_1:
+    case FUN_2_0:
+    case FUN_1_1:
+    case FUN_0_2:
+
+    case BLACKHOLE_BQ:
+    case BLACKHOLE:
+    case SE_BLACKHOLE:
+    case CAF_BLACKHOLE:
+    case SE_CAF_BLACKHOLE:
+       size = sizeW_fromITBL(info);
+       break;
+
+    case IND_PERM:
+       size = sizeofW(StgInd);
+       break;
+
+    case IND_OLDGEN_PERM:
+       size = sizeofW(StgIndOldGen);
+       break;
+
+       /*
+         Error case
+       */
+    case IND:           // IND cannot appear after major GCs.
+    case IND_OLDGEN:    // IND_OLDGEN cannot appear major GCs.
+    case EVACUATED:     // EVACUATED is encountered only during GCs.
+       // static objects
+    case IND_STATIC:
+    case CONSTR_STATIC:
+    case FUN_STATIC:
+    case THUNK_STATIC:
+    case CONSTR_INTLIKE:
+    case CONSTR_CHARLIKE:
+    case CONSTR_NOCAF_STATIC:
+       // stack objects
+    case UPDATE_FRAME:
+    case CATCH_FRAME:
+    case STOP_FRAME:
+    case SEQ_FRAME:
+    case RET_DYN:
+    case RET_BCO:
+    case RET_SMALL:
+    case RET_VEC_SMALL:
+    case RET_BIG:
+    case RET_VEC_BIG:
+       // others
+    case BLOCKED_FETCH:
+    case FETCH_ME:
+    case FETCH_ME_BQ:
+    case RBH:
+    case REMOTE_REF:
+    case INVALID_OBJECT:
+    default:
+       barf("Invalid object in processHeapClosure(): %d", info->type);
+       return 0;
+    }
+
+    /*
+       ordinary cases:
+       We can compute either gi[ldvTime].notUsed or gi[ldvTime].used; the other
+       can be computed from the total sum of costs.
+       At the moment, we choose to compute gi[ldvTime].notUsed, which seems to
+       be smaller than gi[ldvTime].used.
+    */
+
+    // ignore closures that don't satisfy our constraints.
+    if (closureSatisfiesConstraints(c)) {
+       if ((LDVW(c) & LDV_STATE_MASK) == LDV_STATE_CREATE)
+           gi[ldvTime].notUsed += size - sizeofW(StgProfHeader);
+       else
+           gi[ldvTime].used += size - sizeofW(StgProfHeader);
+    }
+    return size;
+
+inherently_used:
+    // ignore closures that don't satisfy our constraints.
+    if (closureSatisfiesConstraints(c)) {
+       gi[ldvTime].inherentlyUsed += size - sizeofW(StgProfHeader);
+    }
+    return size;
+}
+
+/* --------------------------------------------------------------------------
+ * Calls processHeapClosure() on every closure in the heap blocks
+ * begining at bd during a census.
+ * ----------------------------------------------------------------------- */
+static void
+processHeap( bdescr *bd )
+{
+    StgPtr p;
+    nat size;
+
+    while (bd != NULL) {
+       p = bd->start;
+       while (p < bd->free) {
+           size = processHeapClosure((StgClosure *)p);
+           p += size;
+           while (p < bd->free && !*p)   // skip slop
+               p++;
+       }
+       ASSERT(p == bd->free);
+       bd = bd->link;
+    }
+}
+
+/* --------------------------------------------------------------------------
+ * Calls processHeapClosure() on every closure in the small object pool
+ * during a census.
+ * ----------------------------------------------------------------------- */
+static void
+processSmallObjectPool( void )
+{
+    bdescr *bd;
+    StgPtr p;
+    nat size;
+
+    bd = small_alloc_list;
+
+    // first block
+    if (bd == NULL)
+       return;
+
+    p = bd->start;
+    while (p < alloc_Hp) {
+       size = processHeapClosure((StgClosure *)p);
+       p += size;
+       while (p < alloc_Hp && !*p)     // skip slop
+           p++;
+    }
+    ASSERT(p == alloc_Hp);
+
+    bd = bd->link;
+    while (bd != NULL) {
+       p = bd->start;
+       while (p < bd->free) {
+           size = processHeapClosure((StgClosure *)p);
+           p += size;
+           while (p < bd->free && !*p)    // skip slop
+               p++;
+       }
+       ASSERT(p == bd->free);
+       bd = bd->link;
+    }
+}
+
+/* --------------------------------------------------------------------------
+ * Calls processHeapClosure() on every (large) closure in the object
+ * chain beginning at bd during a census.
+ * ----------------------------------------------------------------------- */
+static void
+processChain( bdescr *bd )
+{
+    while (bd != NULL) {
+       // bd->free - bd->start is not an accurate measurement of the
+       // object size.  Actually it is always zero, so we compute its
+       // size explicitly.
+       processHeapClosure((StgClosure *)bd->start);
+       bd = bd->link;
+  }
+}
+
+/* --------------------------------------------------------------------------
+ * Starts a census for LDV profiling.
+ * Invariants:
+ *   Any call to LdvCensus() is preceded by a major garbage collection.
+ * ----------------------------------------------------------------------- */
+void
+LdvCensus( void )
+{
+    nat g, s;
+
+    // ldvTime == 0 means that LDV profiling is currently turned off.
+    if (ldvTime == 0)
+       return;
+
+    stat_startLDV();
+    //
+    // Todo: when we perform LDV profiling, the Haskell mutator time seems to
+    //       be affected by -S or -s runtime option. For instance, the
+    //       following two options should result in nearly same
+    //       profiling outputs, but the second run (without -Sstderr
+    //       option) spends almost twice as long in the Haskell
+    //       mutator as the first run:
+    //
+    //       1) +RTS -Sstderr -hL -RTS
+    //       2) +RTS -hL -RTS
+    //
+    //       This is quite a subtle bug because this wierd phenomenon is not
+    //       observed in retainer profiling, yet mut_user_time_during_LDV() is
+    //       completely orthogonal to mut_user_time_during_RP(). However, the
+    //       overall shapes of the resultant graphs are almost the same.
+    //
+    gi[ldvTime].time = mut_user_time_during_LDV();
+    if (RtsFlags.GcFlags.generations == 1) {
+       //
+       // Todo: support LDV for two-space garbage collection.
+       //
+       barf("Lag/Drag/Void profiling not supported with -G1");
+    } else {
+       for (g = 0; g < RtsFlags.GcFlags.generations; g++)
+           for (s = 0; s < generations[g].n_steps; s++) {
+               if (g == 0 && s == 0) {
+                   // after a major GC, the nursery must be empty,
+                   // and no need to call processNursery().
+                   ASSERT(MainCapability.r.rNursery->start ==
+                          MainCapability.r.rNursery->free);
+                   processSmallObjectPool();
+                   processChain(generations[g].steps[s].large_objects);
+               } else{
+                   processHeap(generations[g].steps[s].blocks);
+                   processChain(generations[g].steps[s].large_objects);
+               }
+           }
+    }
+    outputLdvSet();   // output to hp_file
+    stat_endLDV();    // output to prof_file
+
+    incrementLdvTime();
+}
+
+/* --------------------------------------------------------------------------
+ * This function is called eventually on every object destroyed during
+ * a garbage collection, whether it is a major garbage collection or
+ * not.  If c is an 'inherently used' closure, nothing happens.  If c
+ * is an ordinary closure, LDV_recordDead() is called on c with its
+ * proper size which excludes the profiling header portion in the
+ * closure.  Returns the size of the closure, including the profiling
+ * header portion, so that the caller can find the next closure.
+ * ----------------------------------------------------------------------- */
+static inline nat
+processHeapClosureForDead( StgClosure *c )
+{
+    nat size;
+    StgInfoTable *info;
+
+    info = get_itbl(c);
+
+    if (info->type != EVACUATED) {
+       ASSERT(((LDVW(c) & LDV_CREATE_MASK) >> LDV_SHIFT) <= ldvTime &&
+              ((LDVW(c) & LDV_CREATE_MASK) >> LDV_SHIFT) > 0);
+       ASSERT(((LDVW(c) & LDV_STATE_MASK) == LDV_STATE_CREATE) ||
+              (
+                  (LDVW(c) & LDV_LAST_MASK) <= ldvTime &&
+                  (LDVW(c) & LDV_LAST_MASK) > 0
+                  ));
+    }
+
+    switch (info->type) {
+       /*
+         'inherently used' cases: do nothing.
+       */
+
+    case TSO:
+       size = tso_sizeW((StgTSO *)c);
+       return size;
+
+    case MVAR:
+       size = sizeofW(StgMVar);
+       return size;
+
+    case MUT_ARR_PTRS:
+    case MUT_ARR_PTRS_FROZEN:
+       size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)c);
+       return size;
+
+    case ARR_WORDS:
+       size = arr_words_sizeW((StgArrWords *)c);
+       return size;
+
+    case WEAK:
+    case MUT_VAR:
+    case MUT_CONS:
+    case FOREIGN:
+    case BCO:
+    case STABLE_NAME:
+       size = sizeW_fromITBL(info);
+       return size;
+
+       /*
+         ordinary cases: call LDV_recordDead().
+       */
+
+    case THUNK:
+       size = stg_max(sizeW_fromITBL(info), sizeofW(StgHeader) + MIN_UPD_SIZE);
+       break;
+
+    case THUNK_1_0:
+    case THUNK_0_1:
+    case THUNK_2_0:
+    case THUNK_1_1:
+    case THUNK_0_2:
+    case THUNK_SELECTOR:
+       size = sizeofW(StgHeader) + MIN_UPD_SIZE;
+       break;
+
+    case AP_UPD:
+    case PAP:
+       size = pap_sizeW((StgPAP *)c);
+       break;
+
+    case CONSTR:
+    case CONSTR_1_0:
+    case CONSTR_0_1:
+    case CONSTR_2_0:
+    case CONSTR_1_1:
+    case CONSTR_0_2:
+
+    case FUN:
+    case FUN_1_0:
+    case FUN_0_1:
+    case FUN_2_0:
+    case FUN_1_1:
+    case FUN_0_2:
+
+    case BLACKHOLE_BQ:
+    case BLACKHOLE:
+    case SE_BLACKHOLE:
+    case CAF_BLACKHOLE:
+    case SE_CAF_BLACKHOLE:
+       size = sizeW_fromITBL(info);
+       break;
+
+    case IND_PERM:
+       size = sizeofW(StgInd);
+       break;
+
+    case IND_OLDGEN_PERM:
+       size = sizeofW(StgIndOldGen);
+       break;
+
+       /*
+         'Ingore' cases
+       */
+       // Why can we ignore IND/IND_OLDGEN closures? We assume that
+       // any census is preceded by a major garbage collection, which
+       // IND/IND_OLDGEN closures cannot survive. Therefore, it is no
+       // use considering IND/IND_OLDGEN closures in the meanwhile
+       // because they will perish before the next census at any
+       // rate.
+    case IND:
+       size = sizeofW(StgInd);
+       return size;
+
+    case IND_OLDGEN:
+       size = sizeofW(StgIndOldGen);
+       return size;
+
+    case EVACUATED:
+       // The size of the evacuated closure is currently stored in
+       // the LDV field.  See SET_EVACUAEE_FOR_LDV() in
+       // includes/StgLdvProf.h.
+       return LDVW(c);
+
+       /*
+         Error case
+       */
+       // static objects
+    case IND_STATIC:
+    case CONSTR_STATIC:
+    case FUN_STATIC:
+    case THUNK_STATIC:
+    case CONSTR_INTLIKE:
+    case CONSTR_CHARLIKE:
+    case CONSTR_NOCAF_STATIC:
+       // stack objects
+    case UPDATE_FRAME:
+    case CATCH_FRAME:
+    case STOP_FRAME:
+    case SEQ_FRAME:
+    case RET_DYN:
+    case RET_BCO:
+    case RET_SMALL:
+    case RET_VEC_SMALL:
+    case RET_BIG:
+    case RET_VEC_BIG:
+       // others
+    case BLOCKED_FETCH:
+    case FETCH_ME:
+    case FETCH_ME_BQ:
+    case RBH:
+    case REMOTE_REF:
+    case INVALID_OBJECT:
+    default:
+       barf("Invalid object in processHeapClosureForDead(): %d", info->type);
+       return 0;
+    }
+
+    // Found a dead closure: record its size
+    LDV_recordDead(c, size);
+    return size;
+}
+
+/* --------------------------------------------------------------------------
+ * Calls processHeapClosureForDead() on every *dead* closures in the
+ * heap blocks starting at bd.
+ * ----------------------------------------------------------------------- */
+static void
+processHeapForDead( bdescr *bd )
+{
+    StgPtr p;
+
+    while (bd != NULL) {
+       p = bd->start;
+       while (p < bd->free) {
+           p += processHeapClosureForDead((StgClosure *)p);
+           while (p < bd->free && !*p)   // skip slop
+               p++;
+       }
+       ASSERT(p == bd->free);
+       bd = bd->link;
+    }
+}
+
+/* --------------------------------------------------------------------------
+ * Calls processHeapClosureForDead() on every *dead* closures in the nursery.
+ * ----------------------------------------------------------------------- */
+static void
+processNurseryForDead( void )
+{
+    StgPtr p, bdLimit;
+    bdescr *bd;
+
+    bd = MainCapability.r.rNursery;
+    while (bd->start < bd->free) {
+       p = bd->start;
+       bdLimit = bd->start + BLOCK_SIZE_W;
+       while (p < bd->free && p < bdLimit) {
+           p += processHeapClosureForDead((StgClosure *)p);
+           while (p < bd->free && p < bdLimit && !*p)  // skip slop
+               p++;
+       }
+       bd = bd->link;
+       if (bd == NULL)
+           break;
+    }
+}
+
+/* --------------------------------------------------------------------------
+ * Calls processHeapClosureForDead() on every *dead* closures in the
+ * small object pool.
+ * ----------------------------------------------------------------------- */
+static void
+processSmallObjectPoolForDead( void )
+{
+    bdescr *bd;
+    StgPtr p;
+
+    bd = small_alloc_list;
+
+    // first block
+    if (bd == NULL)
+       return;
+
+    p = bd->start;
+    while (p < alloc_Hp) {
+       p += processHeapClosureForDead((StgClosure *)p);
+       while (p < alloc_Hp && !*p)     // skip slop
+           p++;
+    }
+    ASSERT(p == alloc_Hp);
+
+    bd = bd->link;
+    while (bd != NULL) {
+       p = bd->start;
+       while (p < bd->free) {
+           p += processHeapClosureForDead((StgClosure *)p);
+           while (p < bd->free && !*p)    // skip slop
+               p++;
+       }
+       ASSERT(p == bd->free);
+       bd = bd->link;
+    }
+}
+
+/* --------------------------------------------------------------------------
+ * Calls processHeapClosureForDead() on every *dead* closures in the closure
+ * chain.
+ * ----------------------------------------------------------------------- */
+static void
+processChainForDead( bdescr *bd )
+{
+    // Any object still in the chain is dead!
+    while (bd != NULL) {
+       processHeapClosureForDead((StgClosure *)bd->start);
+       bd = bd->link;
+    }
+}
+
+/* --------------------------------------------------------------------------
+ * Start a census for *dead* closures, and calls
+ * processHeapClosureForDead() on every closure which died in the
+ * current garbage collection.  This function is called from a garbage
+ * collector right before tidying up, when all dead closures are still
+ * stored in the heap and easy to identify.  Generations 0 through N
+ * have just beed garbage collected.
+ * ----------------------------------------------------------------------- */
+void
+LdvCensusForDead( nat N )
+{
+    nat g, s;
+
+    // ldvTime == 0 means that LDV profiling is currently turned off.
+    if (ldvTime == 0)
+       return;
+
+    if (RtsFlags.GcFlags.generations == 1) {
+       //
+       // Todo: support LDV for two-space garbage collection.
+       //
+       barf("Lag/Drag/Void profiling not supported with -G1");
+    } else {
+       for (g = 0; g <= N; g++)
+           for (s = 0; s < generations[g].n_steps; s++) {
+               if (g == 0 && s == 0) {
+                   processSmallObjectPoolForDead();
+                   processNurseryForDead();
+                   processChainForDead(generations[g].steps[s].large_objects);
+               } else{
+                   processHeapForDead(generations[g].steps[s].blocks);
+                   processChainForDead(generations[g].steps[s].large_objects);
+               }
+           }
+    }
+}
+
+/* --------------------------------------------------------------------------
+ * Regard any closure in the current heap as dead or moribund and update
+ * LDV statistics accordingly.
+ * Called from shutdownHaskell() in RtsStartup.c.
+ * Also, stops LDV profiling by resetting ldvTime to 0.
+ * ----------------------------------------------------------------------- */
+void
+LdvCensusKillAll( void )
+{
+    LdvCensusForDead(RtsFlags.GcFlags.generations - 1);
+
+    // record the time when LDV profiling stops.
+    ldvTimeSave = ldvTime;
+
+    // and, stops LDV profiling.
+    ldvTime = 0;
+}
+
+#endif /* PROFILING */
diff --git a/ghc/rts/LdvProfile.h b/ghc/rts/LdvProfile.h
new file mode 100644 (file)
index 0000000..b722fbc
--- /dev/null
@@ -0,0 +1,63 @@
+/* -----------------------------------------------------------------------------
+ * $Id: LdvProfile.h,v 1.1 2001/11/22 14:25:12 simonmar Exp $
+ *
+ * (c) The GHC Team, 2001
+ * Author: Sungwoo Park
+ *
+ * Lag/Drag/Void profiling.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef LDVPROFILE_H
+#define LDVPROFILE_H
+
+#ifdef PROFILING
+
+#include "ProfHeap.h"
+
+void  LDV_recordDead_FILL_SLOP_DYNAMIC( StgClosure *p );
+
+// Precesses a closure 'c' being destroyed whose size is 'size'.
+// Make sure that LDV_recordDead() is not invoked on 'inherently used' closures
+// such as TSO; they should not be involved in computing dragNew or voidNew.
+// 
+// Note: ldvTime is 0 if LDV profiling is turned off.
+//       ldvTime is > 0 if LDV profiling is turned on.
+//       size does not include StgProfHeader.
+//
+// Even though ldvTime is checked in both LdvCensusForDead() and 
+// LdvCensusKillAll(), we still need to make sure that ldvTime is > 0 because 
+// LDV_recordDead() may be called from elsewhere in the runtime system. E.g., 
+// when a thunk is replaced by an indirection object.
+
+static inline void
+LDV_recordDead( StgClosure *c, nat size )
+{
+    if (ldvTime > 0 && closureSatisfiesConstraints(c)) {
+       nat t;
+       size -= sizeofW(StgProfHeader);
+       if ((LDVW((c)) & LDV_STATE_MASK) == LDV_STATE_CREATE) {
+           t = (LDVW((c)) & LDV_CREATE_MASK) >> LDV_SHIFT;
+           if (t < ldvTime) {
+               gi[t].voidNew += (int)size;
+               gi[ldvTime].voidNew -= (int)size;
+           }
+       } else {
+           t = LDVW((c)) & LDV_LAST_MASK;
+           if (t + 1 < ldvTime) {
+               gi[t + 1].dragNew += size;
+               gi[ldvTime].dragNew -= size;
+           }
+       }
+    }
+}
+
+extern void initLdvProfiling ( void );
+extern void endLdvProfiling  ( void );
+extern void LdvCensus        ( void );
+extern void LdvCensusForDead ( nat );
+extern void LdvCensusKillAll ( void );
+
+#endif /* PROFILING */
+
+#endif /* LDVPROFILE_H */
index d36c18e..2036768 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: PrimOps.hc,v 1.84 2001/11/08 12:46:31 simonmar Exp $
+ * $Id: PrimOps.hc,v 1.85 2001/11/22 14:25:12 simonmar Exp $
  *
  * (c) The GHC Team, 1998-2000
  *
@@ -420,7 +420,25 @@ FN_(finalizzeWeakzh_fast)
   }
 
   /* kill it */
+#ifdef PROFILING
+  // @LDV profiling
+  // A weak pointer is inherently used, so we do not need to call
+  // LDV_recordDead_FILL_SLOP_DYNAMIC():
+  //    LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)w);
+  // or, LDV_recordDead():
+  //    LDV_recordDead((StgClosure *)w, sizeofW(StgWeak) - sizeofW(StgProfHeader));
+  // Furthermore, when PROFILING is turned on, dead weak pointers are exactly as 
+  // large as weak pointers, so there is no need to fill the slop, either.
+  // See stg_DEAD_WEAK_info in StgMiscClosures.hc.
+#endif
+  //
+  // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
+  //
   w->header.info = &stg_DEAD_WEAK_info;
+#ifdef PROFILING
+  // @LDV profiling
+  LDV_recordCreate((StgClosure *)w);
+#endif
   f = ((StgWeak *)w)->finalizer;
   w->link = ((StgWeak *)w)->link;
 
index 5597792..fc4f421 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: ProfHeap.c,v 1.25 2001/08/14 13:40:09 sewardj Exp $
+ * $Id: ProfHeap.c,v 1.26 2001/11/22 14:25:12 simonmar Exp $
  *
  * (c) The GHC Team, 1998-2000
  *
@@ -25,6 +25,8 @@
 #include "Stats.h"
 #include "Hash.h"
 #include "StrHash.h"
+#include "RetainerProfile.h"
+#include "LdvProfile.h"
 
 #ifdef DEBUG_HEAP_PROF
 #include "Printer.h"
@@ -95,7 +97,7 @@ strToCtr(const char *str)
        for (; ctr != NULL; prev = ctr, ctr = ctr->next_bucket ) {
            if (!strcmp(ctr->str, str)) {
                insertHashTable( str_to_ctr, (W_)str, ctr );
-#ifdef DEBUG
+#ifdef DEBUG_CTR
                fprintf(stderr,"strToCtr: existing ctr for `%s'\n",str);
 #endif
                return ctr;
@@ -109,7 +111,7 @@ strToCtr(const char *str)
        ctr->next = all_ctrs;
        all_ctrs = ctr;
 
-#ifdef DEBUG
+#ifdef DEBUG_CTR
        fprintf(stderr,"strToCtr: new ctr for `%s'\n",str);
 #endif
 
@@ -175,23 +177,17 @@ initHeapProfiling(void)
 
     fprintf(hp_file, "JOB \"%s", prog_argv[0]);
 
-#   ifdef PROFILING
-    switch (RtsFlags.ProfFlags.doHeapProfile) {
-       case HEAP_BY_CCS:   fprintf(hp_file, " -h%c", CCchar); break;
-       case HEAP_BY_MOD:   fprintf(hp_file, " -h%c", MODchar); break;
-       case HEAP_BY_DESCR: fprintf(hp_file, " -h%c", DESCRchar); break;
-       case HEAP_BY_TYPE:  fprintf(hp_file, " -h%c", TYPEchar); break;
-       default: /* nothing */
+#ifdef PROFILING
+    {
+       int count;
+       for(count = 1; count < prog_argc; count++)
+           fprintf(hp_file, " %s", prog_argv[count]);
+       fprintf(hp_file, " +RTS ");
+       for(count = 0; count < rts_argc; count++)
+           fprintf(hp_file, "%s ", rts_argv[count]);
+       fprintf(hp_file, "\n");
     }
-    if (RtsFlags.ProfFlags.ccSelector)
-       fprintf(hp_file, " -hc{%s}", RtsFlags.ProfFlags.ccSelector);
-    if (RtsFlags.ProfFlags.modSelector)
-       fprintf(hp_file, " -hm{%s}", RtsFlags.ProfFlags.modSelector);
-    if (RtsFlags.ProfFlags.descrSelector)
-       fprintf(hp_file, " -hd{%s}", RtsFlags.ProfFlags.descrSelector);
-    if (RtsFlags.ProfFlags.typeSelector)
-       fprintf(hp_file, " -hy{%s}", RtsFlags.ProfFlags.typeSelector);
-#   endif /* PROFILING */
+#endif /* PROFILING */
 
     fprintf(hp_file, "\"\n" );
 
@@ -224,6 +220,17 @@ endHeapProfiling(void)
         return;
     }
 
+#ifdef PROFILING
+    switch (RtsFlags.ProfFlags.doHeapProfile) {
+    case HEAP_BY_RETAINER:
+       endRetainerProfiling();
+       break;
+    case HEAP_BY_LDV:
+       endLdvProfiling();
+       break;
+    }
+#endif
+
     seconds = mut_user_time();
     fprintf(hp_file, "BEGIN_SAMPLE %0.2f\n", seconds);
     fprintf(hp_file, "END_SAMPLE %0.2f\n", seconds);
@@ -417,24 +424,48 @@ clearCCSResid(CostCentreStack *ccs)
 }
 
 static void
-fprint_ccs(FILE *fp, CostCentreStack *ccs, nat components)
+fprint_ccs(FILE *fp, CostCentreStack *ccs, nat max_length)
 {
-  CostCentre *cc;
-  CostCentreStack *prev;
+    char buf[max_length+1];
+    nat next_offset = 0;
+    nat written;
+    char *template;
+
+    // MAIN on its own gets printed as "MAIN", otherwise we ignore MAIN.
+    if (ccs == CCS_MAIN) {
+       fprintf(fp, "MAIN");
+       return;
+    }
 
-  cc = ccs->cc;
-  prev = ccs->prevStack;
+    // keep printing components of the stack until we run out of space
+    // in the buffer.  If we run out of space, end with "...".
+    for (; ccs != NULL && ccs != CCS_MAIN; ccs = ccs->prevStack) {
 
-  if (prev == NULL
-      || prev->cc->is_caf != CC_IS_BORING
-      || components == 1) { 
-    fprintf(fp,"%s",cc->label);
-    return; 
+       // CAF cost centres print as M.CAF, but we leave the module
+       // name out of all the others to save space.
+       if (!strcmp(ccs->cc->label,"CAF")) {
+           written = snprintf(buf+next_offset, 
+                              (int)max_length-3-(int)next_offset,
+                              "%s.CAF", ccs->cc->module);
+       } else {
+           if (ccs->prevStack != NULL && ccs->prevStack != CCS_MAIN) {
+               template = "%s/";
+           } else {
+               template = "%s";
+           }
+           written = snprintf(buf+next_offset, 
+                              (int)max_length-3-(int)next_offset,
+                              template, ccs->cc->label);
+       }
 
-  } else {
-    fprint_ccs(fp, ccs->prevStack,components-1);
-    fprintf(fp,"/%s",cc->label);
-  }
+       if (next_offset+written >= max_length-4) {
+           sprintf(buf+max_length-4, "...");
+           break;
+       } else {
+           next_offset += written;
+       }
+    }
+    fprintf(fp, "%s", buf);
 }
 
 static void
@@ -444,7 +475,8 @@ reportCCSResid(FILE *fp, CostCentreStack *ccs)
 
   if (ccs->mem_resid != 0) {
     fprintf(fp,"   ");
-    fprint_ccs(fp,ccs,2/*print 2 components only*/);
+    // print as much of the CCS as possible in 20 chars, ending with "..."
+    fprint_ccs(fp,ccs,30);
     fprintf(fp," %ld\n", ccs->mem_resid * sizeof(W_));
   }
 
@@ -455,75 +487,190 @@ reportCCSResid(FILE *fp, CostCentreStack *ccs)
   }
 }
 
-static
-rtsBool str_matches_selector ( char* str, char* sel )
+static rtsBool
+str_matches_selector( char* str, char* sel )
 {
    char* p;
-   /* fprintf(stderr, "str_matches_selector %s %s\n", str, sel); */
+   // fprintf(stderr, "str_matches_selector %s %s\n", str, sel);
    while (1) {
-      /* Compare str against wherever we've got to in sel. */
-      p = str;
-      while (*p != '\0' && *sel != ',' && *sel != '\0' && *p == *sel) {
-         p++; sel++;
-      }
-      /* Match if all of str used and have reached the end of a sel
-         fragment. */
-      if (*p == '\0' && (*sel == ',' || *sel == '\0'))
-         return rtsTrue;
-
-      /* No match.  Advance sel to the start of the next elem. */
-      while (*sel != ',' && *sel != '\0') sel++;
-      if (*sel == ',') sel++;
-
-      /* Run out of sel ?? */
-      if (*sel == '\0') return rtsFalse;
+       // Compare str against wherever we've got to in sel.
+       p = str;
+       while (*p != '\0' && *sel != ',' && *sel != '\0' && *p == *sel) {
+          p++; sel++;
+       }
+       // Match if all of str used and have reached the end of a sel fragment.
+       if (*p == '\0' && (*sel == ',' || *sel == '\0'))
+          return rtsTrue;
+       
+       // No match.  Advance sel to the start of the next elem.
+       while (*sel != ',' && *sel != '\0') sel++;
+       if (*sel == ',') sel++;
+       
+       /* Run out of sel ?? */
+       if (*sel == '\0') return rtsFalse;
    }
 }
 
-/* Figure out whether a closure should be counted in this census, by
-   testing against all the specified constraints. */
-static
-rtsBool satisfies_constraints ( StgClosure* p )
+// Figure out whether a closure should be counted in this census, by
+// testing against all the specified constraints.
+rtsBool
+closureSatisfiesConstraints( StgClosure* p )
 {
    rtsBool b;
    if (RtsFlags.ProfFlags.modSelector) {
-      b = str_matches_selector( ((StgClosure *)p)->header.prof.ccs->cc->module,
-                                RtsFlags.ProfFlags.modSelector );
-      if (!b) return rtsFalse;
+       b = str_matches_selector( ((StgClosure *)p)->header.prof.ccs->cc->module,
+                                RtsFlags.ProfFlags.modSelector );
+       if (!b) return rtsFalse;
    }
    if (RtsFlags.ProfFlags.descrSelector) {
-      b = str_matches_selector( (get_itbl((StgClosure *)p))->prof.closure_desc,
-                                RtsFlags.ProfFlags.descrSelector );
-      if (!b) return rtsFalse;
+       b = str_matches_selector( (get_itbl((StgClosure *)p))->prof.closure_desc,
+                                RtsFlags.ProfFlags.descrSelector );
+       if (!b) return rtsFalse;
    }
    if (RtsFlags.ProfFlags.typeSelector) {
-      b = str_matches_selector( (get_itbl((StgClosure *)p))->prof.closure_type,
+       b = str_matches_selector( (get_itbl((StgClosure *)p))->prof.closure_type,
                                 RtsFlags.ProfFlags.typeSelector );
-      if (!b) return rtsFalse;
+       if (!b) return rtsFalse;
    }
    if (RtsFlags.ProfFlags.ccSelector) {
-      b = str_matches_selector( ((StgClosure *)p)->header.prof.ccs->cc->label,
-                                RtsFlags.ProfFlags.ccSelector );
-      if (!b) return rtsFalse;
+       b = str_matches_selector( ((StgClosure *)p)->header.prof.ccs->cc->label,
+                                RtsFlags.ProfFlags.ccSelector );
+       if (!b) return rtsFalse;
    }
    return rtsTrue;
 }
 #endif /* PROFILING */
 
+/* -----------------------------------------------------------------------------
+ * Code to perform a heap census.
+ * -------------------------------------------------------------------------- */
+static void
+heapCensusChain( bdescr *bd )
+{
+    StgPtr p;
+    StgInfoTable *info;
+    nat size;
+#ifdef PROFILING
+    nat real_size;
+#endif
 
-static double time_of_last_heapCensus = 0.0;
+    for (; bd != NULL; bd = bd->link) {
+       p = bd->start;
+       while (p < bd->free) {
+           info = get_itbl((StgClosure *)p);
+           
+           switch (info->type) {
+
+           case CONSTR:
+           case BCO:
+           case FUN:
+           case THUNK:
+           case IND_PERM:
+           case IND_OLDGEN_PERM:
+           case CAF_BLACKHOLE:
+           case SE_CAF_BLACKHOLE:
+           case SE_BLACKHOLE:
+           case BLACKHOLE:
+           case BLACKHOLE_BQ:
+           case WEAK:
+           case FOREIGN:
+           case STABLE_NAME:
+           case MVAR:
+           case MUT_VAR:
+           case MUT_CONS:
+           case CONSTR_INTLIKE:
+           case CONSTR_CHARLIKE:
+           case FUN_1_0:
+           case FUN_0_1:
+           case FUN_1_1:
+           case FUN_0_2:
+           case FUN_2_0:
+           case THUNK_1_1:
+           case THUNK_0_2:
+           case THUNK_2_0:
+           case CONSTR_1_0:
+           case CONSTR_0_1:
+           case CONSTR_1_1:
+           case CONSTR_0_2:
+           case CONSTR_2_0:
+               size = sizeW_fromITBL(info);
+               break;
+               
+           case THUNK_1_0:             /* ToDo - shouldn't be here */
+           case THUNK_0_1:             /* "  ditto  " */
+           case THUNK_SELECTOR:
+               size = sizeofW(StgHeader) + MIN_UPD_SIZE;
+               break;
+
+           case PAP:
+           case AP_UPD:
+               size = pap_sizeW((StgPAP *)p);
+               break;
+               
+           case ARR_WORDS:
+               size = arr_words_sizeW(stgCast(StgArrWords*,p));
+               break;
+               
+           case MUT_ARR_PTRS:
+           case MUT_ARR_PTRS_FROZEN:
+               size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)p);
+               break;
+               
+           case TSO:
+               size = tso_sizeW((StgTSO *)p);
+               break;
+               
+           default:
+               barf("heapCensus");
+           }
+           
+#ifdef DEBUG_HEAP_PROF
+           switch (RtsFlags.ProfFlags.doHeapProfile) {
+           case HEAP_BY_INFOPTR:
+               add_data((void *)(*p), size * sizeof(W_));
+               break;
+           case HEAP_BY_CLOSURE_TYPE:
+               closure_types[info->type] += size * sizeof(W_);
+               break;
+           }
+#endif
+           
+#ifdef PROFILING
+           // subtract the profiling overhead
+           real_size = size - sizeofW(StgProfHeader);
+
+           if (closureSatisfiesConstraints((StgClosure*)p)) {
+               switch (RtsFlags.ProfFlags.doHeapProfile) {
+               case HEAP_BY_CCS:
+                   ((StgClosure *)p)->header.prof.ccs->mem_resid += real_size;
+                   break;
+               case HEAP_BY_MOD:
+                   strToCtr(((StgClosure *)p)->header.prof.ccs->cc->module)
+                       ->mem_resid += real_size;
+                   break;
+               case HEAP_BY_DESCR:
+                   strToCtr(get_itbl(((StgClosure *)p))->prof.closure_desc)->mem_resid 
+                       += real_size;
+                   break;
+               case HEAP_BY_TYPE:
+                   strToCtr(get_itbl(((StgClosure *)p))->prof.closure_type)->mem_resid
+                       += real_size;
+                   break;
+               default:
+                   barf("heapCensus; doHeapProfile");
+               }
+           }
+#endif
+           p += size;
+       }
+    }
+}
 
 void
-heapCensus(void)
+heapCensus( void )
 {
-  bdescr *bd;
-  const StgInfoTable *info;
   StgDouble time;
-  nat size;
-  StgPtr p;
-#ifdef PROFILING
-  nat elapsed;
-#endif
+  nat g, s;
     
 #ifdef DEBUG_HEAP_PROF
   switch (RtsFlags.ProfFlags.doHeapProfile) {
@@ -542,21 +689,6 @@ heapCensus(void)
 #endif
 
 #ifdef PROFILING
-  /*
-   * We only continue iff we've waited long enough,
-   * otherwise, we just dont do the census.
-   */
-
-  time = mut_user_time_during_GC();  
-  elapsed = (time - time_of_last_heapCensus) * 1000;
-  if (elapsed < RtsFlags.ProfFlags.profileFrequency) {
-      return;
-    }
-  time_of_last_heapCensus = time;
-#endif
-
-
-#ifdef PROFILING
   switch (RtsFlags.ProfFlags.doHeapProfile) {
   case NO_HEAP_PROFILING:
       return;
@@ -574,136 +706,27 @@ heapCensus(void)
   }
 #endif
 
-  /* Only do heap profiling in a two-space heap */
-  ASSERT(RtsFlags.GcFlags.generations == 1);
-  bd = g0s0->to_blocks;
-
+  time = mut_user_time_during_GC();  
   fprintf(hp_file, "BEGIN_SAMPLE %0.2f\n", time);
-  
-  while (bd != NULL) {
-    p = bd->start;
-    while (p < bd->free) {
-      info = get_itbl((StgClosure *)p);
-
-      switch (info->type) {
-       
-      case CONSTR:
-       if (((StgClosure *)p)->header.info == &stg_DEAD_WEAK_info
-           && !(LOOKS_LIKE_GHC_INFO(*(p + sizeW_fromITBL(info))))) {
-           size = sizeofW(StgWeak);
-           break;
-       }
-       /* else, fall through... */
-
-      case BCO:
-      case FUN:
-      case THUNK:
-      case IND_PERM:
-      case IND_OLDGEN_PERM:
-      case CAF_BLACKHOLE:
-      case SE_CAF_BLACKHOLE:
-      case SE_BLACKHOLE:
-      case BLACKHOLE:
-      case BLACKHOLE_BQ:
-      case WEAK:
-      case FOREIGN:
-      case STABLE_NAME:
-      case MVAR:
-      case MUT_VAR:
-      case CONSTR_INTLIKE:
-      case CONSTR_CHARLIKE:
-      case FUN_1_0:
-      case FUN_0_1:
-      case FUN_1_1:
-      case FUN_0_2:
-      case FUN_2_0:
-      case THUNK_1_1:
-      case THUNK_0_2:
-      case THUNK_2_0:
-      case CONSTR_1_0:
-      case CONSTR_0_1:
-      case CONSTR_1_1:
-      case CONSTR_0_2:
-      case CONSTR_2_0:
-       size = sizeW_fromITBL(info);
-       break;
-       
-      case THUNK_1_0:          /* ToDo - shouldn't be here */
-      case THUNK_0_1:          /* "  ditto  " */
-      case THUNK_SELECTOR:
-       size = sizeofW(StgHeader) + MIN_UPD_SIZE;
-       break;
-       
-      case AP_UPD: /* we can treat this as being the same as a PAP */
-      case PAP:
-       size = pap_sizeW((StgPAP *)p);
-       break;
-       
-      case ARR_WORDS:
-       size = arr_words_sizeW(stgCast(StgArrWords*,p));
-       break;
-       
-      case MUT_ARR_PTRS:
-      case MUT_ARR_PTRS_FROZEN:
-       size = mut_arr_ptrs_sizeW((StgMutArrPtrs *)p);
-       break;
-       
-      case TSO:
-       size = tso_sizeW((StgTSO *)p);
-       break;
-       
-      default:
-       barf("heapCensus");
-      }
-
-#ifdef DEBUG_HEAP_PROF
-      switch (RtsFlags.ProfFlags.doHeapProfile) {
-      case HEAP_BY_INFOPTR:
-       add_data((void *)(*p), size * sizeof(W_));
-       break;
-      case HEAP_BY_CLOSURE_TYPE:
-       closure_types[info->type] += size * sizeof(W_);
-       break;
-      }
-#endif
 
-#     ifdef PROFILING
-      if (satisfies_constraints((StgClosure*)p)) {
-         switch (RtsFlags.ProfFlags.doHeapProfile) {
-            case HEAP_BY_CCS:
-              ((StgClosure *)p)->header.prof.ccs->mem_resid += size;
-               break;
-            case HEAP_BY_MOD:
-               strToCtr(((StgClosure *)p)->header.prof.ccs->cc->module)
-                  ->mem_resid += size;
-               break;
-            case HEAP_BY_DESCR:
-               strToCtr(get_itbl(((StgClosure *)p))->prof.closure_desc)->mem_resid 
-                  += size;
-               break;
-            case HEAP_BY_TYPE:
-               strToCtr(get_itbl(((StgClosure *)p))->prof.closure_type)->mem_resid
-                  += size;
-               break;
-            default:
-               barf("heapCensus; doHeapProfile");
-         }
+  if (RtsFlags.GcFlags.generations == 1) {
+      heapCensusChain( g0s0->to_blocks );
+  } else {
+      for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+         for (s = 0; s < generations[g].n_steps; s++) {
+             heapCensusChain( generations[g].steps[s].blocks );
+         }
       }
-#     endif
-
-      p += size;
-    }
-    bd = bd->link;
   }
 
 #ifdef DEBUG_HEAP_PROF
   switch (RtsFlags.ProfFlags.doHeapProfile) {
   case HEAP_BY_INFOPTR:
-    fprint_data(hp_file);
-    break;
+      fprint_data(hp_file);
+      break;
   case HEAP_BY_CLOSURE_TYPE:
-    fprint_closure_types(hp_file);
-    break;
+      fprint_closure_types(hp_file);
+      break;
   }
 #endif
     
index 270dc55..852a828 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: ProfHeap.h,v 1.1 1999/09/15 13:46:29 simonmar Exp $
+ * $Id: ProfHeap.h,v 1.2 2001/11/22 14:25:12 simonmar Exp $
  *
  * (c) The GHC Team, 1998-1999
  *
@@ -8,6 +8,7 @@
  * ---------------------------------------------------------------------------*/
 
 
-void heapCensus(void);
-extern nat initHeapProfiling(void);
-void endHeapProfiling(void);
+extern void    heapCensus( void );
+extern nat     initHeapProfiling( void );
+extern void    endHeapProfiling( void );
+extern rtsBool closureSatisfiesConstraints( StgClosure* p );
index a8cf7a4..fc863e9 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: Profiling.c,v 1.24 2001/10/18 14:41:01 simonmar Exp $
+ * $Id: Profiling.c,v 1.25 2001/11/22 14:25:12 simonmar Exp $
  *
  * (c) The GHC Team, 1998-2000
  *
@@ -19,6 +19,8 @@
 #include "Itimer.h"
 #include "ProfHeap.h"
 #include "Arena.h"
+#include "RetainerProfile.h"
+#include "LdvProfile.h"
 
 /*
  * Profiling allocation arena.
@@ -144,9 +146,6 @@ static  IndexTable *      AddToIndexTable ( IndexTable *, CostCentreStack *,
 
 
 
-#ifdef DEBUG
-static    void printCCS            ( CostCentreStack *ccs );
-#endif
 static    void initTimeProfiling   ( void );
 static    void initProfilingLogFile( void );
 
@@ -195,6 +194,15 @@ initProfiling1 (void)
   /* cost centres are registered by the per-module 
    * initialisation code now... 
    */
+
+  switch (RtsFlags.ProfFlags.doHeapProfile) {
+  case HEAP_BY_RETAINER:
+      initRetainerProfiling();
+      break;
+  case HEAP_BY_LDV:
+      initLdvProfiling();
+      break;
+  }
 }
 
 void
@@ -242,6 +250,13 @@ initProfilingLogFile(void)
     if ((prof_file = fopen(prof_filename, "w")) == NULL) {
        fprintf(stderr, "Can't open profiling report file %s\n", prof_filename);
        RtsFlags.CcFlags.doCostCentres = 0;
+        // @retainer profiling
+        // @LDV profiling
+        // The following line was added by Sung; retainer/LDV profiling may need
+        // two output files, i.e., <program>.prof/hp.
+        if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER ||
+            RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV)
+            RtsFlags.ProfFlags.doHeapProfile = 0;
        return;
     }
 
@@ -328,7 +343,7 @@ PushCostCentre ( CostCentreStack *ccs, CostCentre *cc )
 {
   IF_DEBUG(prof, 
           fprintf(stderr,"Pushing %s on ", cc->label);
-          printCCS(ccs);
+          fprintCCS(stderr,ccs);
           fprintf(stderr,"\n"));
   return PushCostCentre(ccs,cc);
 }
@@ -390,9 +405,9 @@ AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 )
   IF_DEBUG(prof, 
           if (ccs1 != ccs2) {
             fprintf(stderr,"Appending ");
-            printCCS(ccs1);
+            fprintCCS(stderr,ccs1);
             fprintf(stderr," to ");
-            printCCS(ccs2);
+            fprintCCS(stderr,ccs2);
             fprintf(stderr,"\n");});
   return AppendCCS(ccs1,ccs2);
 }
@@ -688,7 +703,11 @@ report_ccs_profiling( void )
     fprint_header();
     reportCCS(pruneCCSTree(CCS_MAIN), 0);
 
-    fclose(prof_file);
+    // @retainer profiling
+    // @LDV profiling
+    // Now, prof_file is closed in shutdownHaskell() because this file
+    // is also used for retainer/LDV profiling. See shutdownHaskell().
+    // fclose(prof_file);
 }
 
 static void 
@@ -862,39 +881,16 @@ reportCCS_XML(CostCentreStack *ccs)
 }
 
 void
-print_ccs (FILE *fp, CostCentreStack *ccs)
-{
-  if (ccs == CCCS) {
-    fprintf(fp, "Cost-Centre Stack: ");
-  }
-  
-  if (ccs != CCS_MAIN)
-    {
-      print_ccs(fp, ccs->prevStack);
-      fprintf(fp, "->[%s,%s]", ccs->cc->label, ccs->cc->module);
-    } else {
-      fprintf(fp, "[%s,%s]", ccs->cc->label, ccs->cc->module);
-    }
-
-  if (ccs == CCCS) {
-    fprintf(fp, "\n");
-  }
-}
-
-
-#ifdef DEBUG
-static void
-printCCS ( CostCentreStack *ccs )
+fprintCCS( FILE *f, CostCentreStack *ccs )
 {
-  fprintf(stderr,"<");
-  for (; ccs; ccs = ccs->prevStack ) {
-    fprintf(stderr,ccs->cc->label);
-    if (ccs->prevStack) {
-      fprintf(stderr,",");
-    }
+  fprintf(f,"<");
+  for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) {
+      fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label);
+      if (ccs->prevStack && ccs->prevStack != CCS_MAIN) {
+         fprintf(f,",");
+      }
   }
-  fprintf(stderr,">");
+  fprintf(f,">");
 }
-#endif
 
 #endif /* PROFILING */
index d75d661..52db2da 100644 (file)
@@ -1,5 +1,6 @@
 /* -----------------------------------------------------------------------------
- * $Id: Profiling.h,v 1.2 2000/04/19 12:42:48 simonmar Exp $
+
+ * $Id: Profiling.h,v 1.3 2001/11/22 14:25:12 simonmar Exp $
  *
  * (c) The GHC Team, 1998-2000
  *
@@ -30,4 +31,9 @@ void print_ccs (FILE *, CostCentreStack *);
 
 extern rtsBool time_profiling;
 
+extern lnat total_prof_ticks;
+
+extern void fprintCCS( FILE *f, CostCentreStack *ccs );
+
+
 #endif
index 42766d3..390dd69 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: Proftimer.c,v 1.7 2001/08/14 13:40:09 sewardj Exp $
+ * $Id: Proftimer.c,v 1.8 2001/11/22 14:25:12 simonmar Exp $
  *
  * (c) The GHC Team, 1998-1999
  *
 #if defined (PROFILING)
 
 #include "PosixSource.h"
+
 #include "Rts.h"
 #include "Profiling.h"
 #include "Itimer.h"
 #include "Proftimer.h"
+#include "RtsFlags.h"
+
+static rtsBool do_prof_ticks = rtsFalse;       // enable profiling ticks
+static rtsBool do_heap_prof_ticks = rtsFalse;  // enable heap profiling ticks
 
-rtsBool do_prof_ticks = rtsFalse;       /* enable profiling ticks */
+// Number of ticks until next heap census
+static int ticks_to_heap_profile;
+
+// Time for a heap profile on the next context switch
+rtsBool performHeapProfile;
 
 void
-stopProfTimer(void)
-{                              /* Stops time profile */
+stopProfTimer( void )
+{
     if (time_profiling) {
        do_prof_ticks = rtsFalse;
     }
-};
+}
 
 void
-startProfTimer(void)
-{                              /* Starts time profile */
+startProfTimer( void )
+{
     if (time_profiling) {
        do_prof_ticks = rtsTrue;
     }
-};
+}
+
+void
+stopHeapProfTimer( void )
+{
+    do_heap_prof_ticks = rtsFalse;
+}
+
+void
+startHeapProfTimer( void )
+{
+    if (RtsFlags.ProfFlags.doHeapProfile) {
+       do_heap_prof_ticks = rtsTrue;
+    }
+}
+
+void
+initProfTimer( void )
+{
+    performHeapProfile = rtsFalse;
+
+    RtsFlags.ProfFlags.profileIntervalTicks = 
+       RtsFlags.ProfFlags.profileInterval / TICK_MILLISECS;
+
+    ticks_to_heap_profile = RtsFlags.ProfFlags.profileIntervalTicks;
+
+    startHeapProfTimer();
+}
+    
 
 void
 handleProfTick(void)
@@ -39,5 +76,14 @@ handleProfTick(void)
     if (do_prof_ticks) {
        CCS_TICK(CCCS);
     }
+
+    if (do_heap_prof_ticks) {
+       ticks_to_heap_profile--;
+       if (ticks_to_heap_profile <= 0) {
+           ticks_to_heap_profile = RtsFlags.ProfFlags.profileIntervalTicks;
+           performHeapProfile = rtsTrue;
+       }
+    }
 }
+
 #endif /* PROFILING */
index 231f8da..1ddfc50 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: Proftimer.h,v 1.5 2000/04/03 15:54:50 simonmar Exp $
+ * $Id: Proftimer.h,v 1.6 2001/11/22 14:25:12 simonmar Exp $
  *
  * (c) The GHC Team, 1998
  *
@@ -7,10 +7,12 @@
  *
  * ---------------------------------------------------------------------------*/
 
-extern rtsBool do_prof_ticks;
-extern lnat total_prof_ticks;
+extern void initProfTimer      ( void );
+extern void handleProfTick     ( void );
 
-extern void initProfTimer(nat ms);
-extern void stopProfTimer(void);
-extern void startProfTimer(void);
-extern void handleProfTick(void);
+extern void stopProfTimer      ( void );
+extern void startProfTimer     ( void );
+extern void stopHeapProfTimer  ( void );
+extern void startHeapProfTimer ( void );
+
+extern rtsBool performHeapProfile;
diff --git a/ghc/rts/RetainerProfile.c b/ghc/rts/RetainerProfile.c
new file mode 100644 (file)
index 0000000..f811d73
--- /dev/null
@@ -0,0 +1,2303 @@
+/* -----------------------------------------------------------------------------
+ * $Id: RetainerProfile.c,v 1.1 2001/11/22 14:25:12 simonmar Exp $
+ *
+ * (c) The GHC Team, 2001
+ * Author: Sungwoo Park
+ *
+ * Retainer profiling.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifdef PROFILING
+
+#include "Rts.h"
+#include "RtsUtils.h"
+#include "RetainerProfile.h"
+#include "RetainerSet.h"
+#include "Schedule.h"
+#include "Printer.h"
+#include "Storage.h"
+#include "StoragePriv.h"
+#include "RtsFlags.h"
+#include "Weak.h"
+#include "Sanity.h"
+#include "Profiling.h"
+#include "Stats.h"
+#include "BlockAlloc.h"
+#include "Itimer.h"
+#include "Proftimer.h"
+#include "ProfHeap.h"
+
+/*
+  Note: what to change in order to plug-in a new retainer profiling scheme?
+    (1) type retainer in ../includes/StgRetainerProf.h
+    (2) retainer function R(), i.e., getRetainerFrom()
+    (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(),
+        in RetainerSet.h, if needed.
+    (4) printRetainer() and printRetainerSetShort() in RetainerSet.c.
+ */
+
+/* -----------------------------------------------------------------------------
+ * Declarations...
+ * -------------------------------------------------------------------------- */
+
+static nat retainerGeneration; // generation
+
+static nat numObjectVisited;   // total number of objects visited
+static nat timesAnyObjectVisited; // number of times any objects are visited
+
+/*
+  The rs field in the profile header of any object points to its retainer
+  set in an indirect way: if flip is 0, it points to the retainer set;
+  if flip is 1, it points to the next byte after the retainer set (even
+  for NULL pointers). Therefore, with flip 1, (rs ^ 1) is the actual
+  pointer. See retainerSetOf().
+ */
+
+// extract the retainer set field from c
+#define RSET(c)   ((c)->header.prof.hp.rs)
+
+static StgWord flip = 0;     // flip bit
+                      // must be 0 if DEBUG_RETAINER is on (for static closures)
+
+#define isRetainerSetFieldValid(c) \
+  ((((StgWord)(c)->header.prof.hp.rs & 1) ^ flip) == 0)
+
+#define setRetainerSetToNull(c)   \
+  (c)->header.prof.hp.rs = (RetainerSet *)((StgWord)NULL | flip)
+
+static void retainStack(StgClosure *, StgClosure *, StgClosure *, StgPtr, StgPtr);
+static void retainClosure(StgClosure *, StgClosure *, StgClosure *);
+#ifdef DEBUG_RETAINER
+static void belongToHeap(StgPtr p);
+#endif
+
+#ifdef DEBUG_RETAINER
+/*
+  cStackSize records how many times retainStack() has been invoked recursively,
+  that is, the number of activation records for retainStack() on the C stack.
+  maxCStackSize records its max value.
+  Invariants:
+    cStackSize <= maxCStackSize
+ */
+static nat cStackSize, maxCStackSize;
+
+static nat sumOfNewCost;       // sum of the cost of each object, computed
+                               // when the object is first visited
+static nat sumOfNewCostExtra;   // for those objects not visited during
+                                // retainer profiling, e.g., MUT_VAR
+static nat costArray[N_CLOSURE_TYPES];
+
+nat sumOfCostLinear;           // sum of the costs of all object, computed
+                               // when linearly traversing the heap after
+                               // retainer profiling
+nat costArrayLinear[N_CLOSURE_TYPES];
+#endif
+
+/* -----------------------------------------------------------------------------
+ * Retainer stack - header
+ *   Note:
+ *     Although the retainer stack implementation could be separated *
+ *     from the retainer profiling engine, there does not seem to be
+ *     any advantage in doing that; retainer stack is an integral part
+ *     of retainer profiling engine and cannot be use elsewhere at
+ *     all.
+ * -------------------------------------------------------------------------- */
+
+typedef enum {
+    posTypeStep,
+    posTypePtrs,
+    posTypeSRT,
+} nextPosType;
+
+typedef union {
+    // fixed layout or layout specified by a field in the closure
+    StgWord step;
+
+    // layout.payload
+    struct {
+    // See StgClosureInfo in InfoTables.h
+#if SIZEOF_VOID_P == 8
+       StgWord32 pos;
+       StgWord32 ptrs;
+#else
+       StgWord16 pos;
+       StgWord16 ptrs;
+#endif
+       StgPtr payload;
+    } ptrs;
+
+    // SRT
+    struct {
+       StgClosure **srt;
+       StgClosure **srt_end;
+    } srt;
+} nextPos;
+
+typedef struct {
+    nextPosType type;
+    nextPos next;
+} stackPos;
+
+typedef struct {
+    StgClosure *c;
+    StgClosure *c_child_r;
+    stackPos info;
+} stackElement;
+
+/*
+  Invariants:
+    firstStack points to the first block group.
+    currentStack points to the block group currently being used.
+    currentStack->free == stackLimit.
+    stackTop points to the topmost byte in the stack of currentStack.
+    Unless the whole stack is empty, stackTop must point to the topmost
+    object (or byte) in the whole stack. Thus, it is only when the whole stack
+    is empty that stackTop == stackLimit (not during the execution of push()
+    and pop()).
+    stackBottom == currentStack->start.
+    stackLimit == currentStack->start + BLOCK_SIZE_W * currentStack->blocks.
+  Note:
+    When a current stack becomes empty, stackTop is set to point to
+    the topmost element on the previous block group so as to satisfy
+    the invariants described above.
+ */
+bdescr *firstStack = NULL;
+static bdescr *currentStack;
+static stackElement *stackBottom, *stackTop, *stackLimit;
+
+/*
+  currentStackBoundary is used to mark the current stack chunk.
+  If stackTop == currentStackBoundary, it means that the current stack chunk
+  is empty. It is the responsibility of the user to keep currentStackBoundary
+  valid all the time if it is to be employed.
+ */
+static stackElement *currentStackBoundary;
+
+/*
+  stackSize records the current size of the stack.
+  maxStackSize records its high water mark.
+  Invariants:
+    stackSize <= maxStackSize
+  Note:
+    stackSize is just an estimate measure of the depth of the graph. The reason
+    is that some heap objects have only a single child and may not result
+    in a new element being pushed onto the stack. Therefore, at the end of
+    retainer profiling, maxStackSize + maxCStackSize is some value no greater
+    than the actual depth of the graph.
+ */
+#ifdef DEBUG_RETAINER
+static int stackSize, maxStackSize;
+#endif
+
+// number of blocks allocated for one stack
+#define BLOCKS_IN_STACK 1
+
+/* -----------------------------------------------------------------------------
+ * Add a new block group to the stack.
+ * Invariants:
+ *  currentStack->link == s.
+ * -------------------------------------------------------------------------- */
+static inline void
+newStackBlock( bdescr *bd )
+{
+    currentStack = bd;
+    stackTop     = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
+    stackBottom  = (stackElement *)bd->start;
+    stackLimit   = (stackElement *)stackTop;
+    bd->free     = (StgPtr)stackLimit;
+}
+
+/* -----------------------------------------------------------------------------
+ * Return to the previous block group.
+ * Invariants:
+ *   s->link == currentStack.
+ * -------------------------------------------------------------------------- */
+static inline void
+returnToOldStack( bdescr *bd )
+{
+    currentStack = bd;
+    stackTop = (stackElement *)bd->free;
+    stackBottom = (stackElement *)bd->start;
+    stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
+    bd->free = (StgPtr)stackLimit;
+}
+
+/* -----------------------------------------------------------------------------
+ *  Initializes the traverse stack.
+ * -------------------------------------------------------------------------- */
+static void
+initializeTraverseStack( void )
+{
+    if (firstStack != NULL) {
+       freeChain(firstStack);
+    }
+
+    firstStack = allocGroup(BLOCKS_IN_STACK);
+    firstStack->link = NULL;
+    firstStack->u.back = NULL;
+
+    newStackBlock(firstStack);
+}
+
+/* -----------------------------------------------------------------------------
+ * Frees all the block groups in the traverse stack.
+ * Invariants:
+ *   firstStack != NULL
+ * -------------------------------------------------------------------------- */
+static void
+closeTraverseStack( void )
+{
+    freeChain(firstStack);
+    firstStack = NULL;
+}
+
+/* -----------------------------------------------------------------------------
+ * Returns rtsTrue if the whole stack is empty.
+ * -------------------------------------------------------------------------- */
+static inline rtsBool
+isEmptyRetainerStack( void )
+{
+    return (firstStack == currentStack) && stackTop == stackLimit;
+}
+
+/* -----------------------------------------------------------------------------
+ * Returns rtsTrue if stackTop is at the stack boundary of the current stack,
+ * i.e., if the current stack chunk is empty.
+ * -------------------------------------------------------------------------- */
+static inline rtsBool
+isOnBoundary( void )
+{
+    return stackTop == currentStackBoundary;
+}
+
+/* -----------------------------------------------------------------------------
+ * Initializes *info from ptrs and payload.
+ * Invariants:
+ *   payload[] begins with ptrs pointers followed by non-pointers.
+ * -------------------------------------------------------------------------- */
+static inline void
+init_ptrs( stackPos *info, nat ptrs, StgPtr payload )
+{
+    info->type              = posTypePtrs;
+    info->next.ptrs.pos     = 0;
+    info->next.ptrs.ptrs    = ptrs;
+    info->next.ptrs.payload = payload;
+}
+
+/* -----------------------------------------------------------------------------
+ * Find the next object from *info.
+ * -------------------------------------------------------------------------- */
+static inline StgClosure *
+find_ptrs( stackPos *info )
+{
+    if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
+       return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
+    } else {
+       return NULL;
+    }
+}
+
+/* -----------------------------------------------------------------------------
+ *  Initializes *info from SRT information stored in *infoTable.
+ * -------------------------------------------------------------------------- */
+static inline void
+init_srt( stackPos *info, StgInfoTable *infoTable )
+{
+    info->type = posTypeSRT;
+    info->next.srt.srt = (StgClosure **)(infoTable->srt);
+    info->next.srt.srt_end = info->next.srt.srt + infoTable->srt_len;
+}
+
+/* -----------------------------------------------------------------------------
+ * Find the next object from *info.
+ * -------------------------------------------------------------------------- */
+static inline StgClosure *
+find_srt( stackPos *info )
+{
+    StgClosure *c;
+
+    if (info->next.srt.srt < info->next.srt.srt_end) {
+       // See scavenge_srt() in GC.c for details.
+#ifdef ENABLE_WIN32_DLL_SUPPORT
+       if ((unsigned long)(*(info->next.srt.srt)) & 0x1)
+           c = (* (StgClosure **)((unsigned long)*(info->next.srt.srt)) & ~0x1);
+       else
+           c = *(info->next.srt.srt);
+#else
+       c = *(info->next.srt.srt);
+#endif
+       info->next.srt.srt++;
+       return c;
+    } else {
+       return NULL;
+    }
+}
+
+/* -----------------------------------------------------------------------------
+ *  push() pushes a stackElement representing the next child of *c
+ *  onto the traverse stack. If *c has no child, *first_child is set
+ *  to NULL and nothing is pushed onto the stack. If *c has only one
+ *  child, *c_chlid is set to that child and nothing is pushed onto
+ *  the stack.  If *c has more than two children, *first_child is set
+ *  to the first child and a stackElement representing the second
+ *  child is pushed onto the stack.
+
+ *  Invariants:
+ *     *c_child_r is the most recent retainer of *c's children.
+ *     *c is not any of TSO, PAP, or AP_UPD, which means that
+ *        there cannot be any stack objects.
+ *  Note: SRTs are considered to  be children as well.
+ * -------------------------------------------------------------------------- */
+static inline void
+push( StgClosure *c, StgClosure *c_child_r, StgClosure **first_child )
+{
+    stackElement se;
+    bdescr *nbd;      // Next Block Descriptor
+
+#ifdef DEBUG_RETAINER
+    // fprintf(stderr, "push(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
+#endif
+
+    ASSERT(get_itbl(c)->type != TSO);
+    ASSERT(get_itbl(c)->type != PAP);
+    ASSERT(get_itbl(c)->type != AP_UPD);
+
+    //
+    // fill in se
+    //
+
+    se.c = c;
+    se.c_child_r = c_child_r;
+
+    // fill in se.info
+    switch (get_itbl(c)->type) {
+       // no child, no SRT
+    case CONSTR_0_1:
+    case CONSTR_0_2:
+    case CAF_BLACKHOLE:
+    case BLACKHOLE:
+    case SE_BLACKHOLE:
+    case SE_CAF_BLACKHOLE:
+    case ARR_WORDS:
+       *first_child = NULL;
+       return;
+
+       // one child (fixed), no SRT
+    case MUT_VAR:
+    case MUT_CONS:
+       *first_child = ((StgMutVar *)c)->var;
+       return;
+    case BLACKHOLE_BQ:
+       // blocking_queue must be TSO and the head of a linked list of TSOs.
+       // Shoule it be a child? Seems to be yes.
+       *first_child = (StgClosure *)((StgBlockingQueue *)c)->blocking_queue;
+       return;
+    case THUNK_SELECTOR:
+       *first_child = ((StgSelector *)c)->selectee;
+       return;
+    case IND_PERM:
+    case IND_OLDGEN_PERM:
+    case IND_OLDGEN:
+       *first_child = ((StgIndOldGen *)c)->indirectee;
+       return;
+    case CONSTR_1_0:
+    case CONSTR_1_1:
+       *first_child = c->payload[0];
+       return;
+
+       // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
+       // of the next child. We do not write a separate initialization code.
+       // Also we do not have to initialize info.type;
+
+       // two children (fixed), no SRT
+       // need to push a stackElement, but nothing to store in se.info
+    case CONSTR_2_0:
+       *first_child = c->payload[0];         // return the first pointer
+       // se.info.type = posTypeStep;
+       // se.info.next.step = 2;            // 2 = second
+       break;
+
+       // three children (fixed), no SRT
+       // need to push a stackElement
+    case MVAR:
+       // head must be TSO and the head of a linked list of TSOs.
+       // Shoule it be a child? Seems to be yes.
+       *first_child = (StgClosure *)((StgMVar *)c)->head;
+       // se.info.type = posTypeStep;
+       se.info.next.step = 2;            // 2 = second
+       break;
+
+       // three children (fixed), no SRT
+    case WEAK:
+       *first_child = ((StgWeak *)c)->key;
+       // se.info.type = posTypeStep;
+       se.info.next.step = 2;
+       break;
+
+       // layout.payload.ptrs, no SRT
+    case CONSTR:
+    case FOREIGN:
+    case STABLE_NAME:
+    case BCO:
+    case CONSTR_STATIC:
+       init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
+                 (StgPtr)c->payload);
+       *first_child = find_ptrs(&se.info);
+       if (*first_child == NULL)
+           return;   // no child
+       break;
+
+       // StgMutArrPtr.ptrs, no SRT
+    case MUT_ARR_PTRS:
+    case MUT_ARR_PTRS_FROZEN:
+       init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
+                 (StgPtr)(((StgMutArrPtrs *)c)->payload));
+       *first_child = find_ptrs(&se.info);
+       if (*first_child == NULL)
+           return;
+       break;
+
+    // layout.payload.ptrs, SRT
+    case FUN:           // *c is a heap object.
+    case FUN_2_0:
+    case THUNK:
+    case THUNK_2_0:
+       init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
+       *first_child = find_ptrs(&se.info);
+       if (*first_child == NULL)
+           // no child from ptrs, so check SRT
+           goto srt_only;
+       break;
+
+       // 1 fixed child, SRT
+    case FUN_1_0:
+    case FUN_1_1:
+    case THUNK_1_0:
+    case THUNK_1_1:
+       *first_child = c->payload[0];
+       ASSERT(*first_child != NULL);
+       init_srt(&se.info, get_itbl(c));
+       break;
+
+    // SRT only
+    case THUNK_STATIC:
+    case FUN_STATIC:      // *c is a heap object.
+       ASSERT(get_itbl(c)->srt_len != 0);
+    case FUN_0_1:
+    case FUN_0_2:
+    case THUNK_0_1:
+    case THUNK_0_2:
+    srt_only:
+        init_srt(&se.info, get_itbl(c));
+       *first_child = find_srt(&se.info);
+       if (*first_child == NULL)
+           return;     // no child
+       break;
+
+       // cannot appear
+    case PAP:
+    case AP_UPD:
+    case TSO:
+    case IND_STATIC:
+    case CONSTR_INTLIKE:
+    case CONSTR_CHARLIKE:
+    case CONSTR_NOCAF_STATIC:
+       // stack objects
+    case UPDATE_FRAME:
+    case CATCH_FRAME:
+    case STOP_FRAME:
+    case SEQ_FRAME:
+    case RET_DYN:
+    case RET_BCO:
+    case RET_SMALL:
+    case RET_VEC_SMALL:
+    case RET_BIG:
+    case RET_VEC_BIG:
+       // invalid objects
+    case IND:
+    case BLOCKED_FETCH:
+    case FETCH_ME:
+    case FETCH_ME_BQ:
+    case RBH:
+    case REMOTE_REF:
+    case EVACUATED:
+    case INVALID_OBJECT:
+    default:
+       barf("Invalid object *c in push()");
+       return;
+    }
+
+    if (stackTop - 1 < stackBottom) {
+#ifdef DEBUG_RETAINER
+       // fprintf(stderr, "push() to the next stack.\n");
+#endif
+       // currentStack->free is updated when the active stack is switched
+       // to the next stack.
+       currentStack->free = (StgPtr)stackTop;
+
+       if (currentStack->link == NULL) {
+           nbd = allocGroup(BLOCKS_IN_STACK);
+           nbd->link = NULL;
+           nbd->u.back = currentStack;
+           currentStack->link = nbd;
+       } else
+           nbd = currentStack->link;
+
+       newStackBlock(nbd);
+    }
+
+    // adjust stackTop (acutal push)
+    stackTop--;
+    // If the size of stackElement was huge, we would better replace the
+    // following statement by either a memcpy() call or a switch statement
+    // on the type of the element. Currently, the size of stackElement is
+    // small enough (5 words) that this direct assignment seems to be enough.
+    *stackTop = se;
+
+#ifdef DEBUG_RETAINER
+    stackSize++;
+    if (stackSize > maxStackSize) maxStackSize = stackSize;
+    // ASSERT(stackSize >= 0);
+    // fprintf(stderr, "stackSize = %d\n", stackSize);
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+ *  popOff() and popOffReal(): Pop a stackElement off the traverse stack.
+ *  Invariants:
+ *    stackTop cannot be equal to stackLimit unless the whole stack is
+ *    empty, in which case popOff() is not allowed.
+ *  Note:
+ *    You can think of popOffReal() as a part of popOff() which is
+ *    executed at the end of popOff() in necessary. Since popOff() is
+ *    likely to be executed quite often while popOffReal() is not, we
+ *    separate popOffReal() from popOff(), which is declared as an
+ *    inline function (for the sake of execution speed).  popOffReal()
+ *    is called only within popOff() and nowhere else.
+ * -------------------------------------------------------------------------- */
+static void
+popOffReal(void)
+{
+    bdescr *pbd;    // Previous Block Descriptor
+
+#ifdef DEBUG_RETAINER
+    // fprintf(stderr, "pop() to the previous stack.\n");
+#endif
+
+    ASSERT(stackTop + 1 == stackLimit);
+    ASSERT(stackBottom == (stackElement *)currentStack->start);
+
+    if (firstStack == currentStack) {
+       // The stack is completely empty.
+       stackTop++;
+       ASSERT(stackTop == stackLimit);
+#ifdef DEBUG_RETAINER
+       stackSize--;
+       if (stackSize > maxStackSize) maxStackSize = stackSize;
+       /*
+         ASSERT(stackSize >= 0);
+         fprintf(stderr, "stackSize = %d\n", stackSize);
+       */
+#endif
+       return;
+    }
+
+    // currentStack->free is updated when the active stack is switched back
+    // to the previous stack.
+    currentStack->free = (StgPtr)stackLimit;
+
+    // find the previous block descriptor
+    pbd = currentStack->u.back;
+    ASSERT(pbd != NULL);
+
+    returnToOldStack(pbd);
+
+#ifdef DEBUG_RETAINER
+    stackSize--;
+    if (stackSize > maxStackSize) maxStackSize = stackSize;
+    /*
+      ASSERT(stackSize >= 0);
+      fprintf(stderr, "stackSize = %d\n", stackSize);
+    */
+#endif
+}
+
+static inline void
+popOff(void) {
+#ifdef DEBUG_RETAINER
+    // fprintf(stderr, "\tpopOff(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
+#endif
+
+    ASSERT(stackTop != stackLimit);
+    ASSERT(!isEmptyRetainerStack());
+
+    // <= (instead of <) is wrong!
+    if (stackTop + 1 < stackLimit) {
+       stackTop++;
+#ifdef DEBUG_RETAINER
+       stackSize--;
+       if (stackSize > maxStackSize) maxStackSize = stackSize;
+       /*
+         ASSERT(stackSize >= 0);
+         fprintf(stderr, "stackSize = %d\n", stackSize);
+       */
+#endif
+       return;
+    }
+
+    popOffReal();
+}
+
+/* -----------------------------------------------------------------------------
+ *  Finds the next object to be considered for retainer profiling and store
+ *  its pointer to *c.
+ *  Test if the topmost stack element indicates that more objects are left,
+ *  and if so, retrieve the first object and store its pointer to *c. Also,
+ *  set *cp and *r appropriately, both of which are stored in the stack element.
+ *  The topmost stack element then is overwritten so as for it to now denote
+ *  the next object.
+ *  If the topmost stack element indicates no more objects are left, pop
+ *  off the stack element until either an object can be retrieved or
+ *  the current stack chunk becomes empty, indicated by rtsTrue returned by
+ *  isOnBoundary(), in which case *c is set to NULL.
+ *  Note:
+ *    It is okay to call this function even when the current stack chunk
+ *    is empty.
+ * -------------------------------------------------------------------------- */
+static inline void
+pop( StgClosure **c, StgClosure **cp, StgClosure **r )
+{
+    stackElement *se;
+
+#ifdef DEBUG_RETAINER
+    // fprintf(stderr, "pop(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary);
+#endif
+
+    do {
+       if (isOnBoundary()) {     // if the current stack chunk is depleted
+           *c = NULL;
+           return;
+       }
+
+       se = stackTop;
+
+       switch (get_itbl(se->c)->type) {
+           // two children (fixed), no SRT
+           // nothing in se.info
+       case CONSTR_2_0:
+           *c = se->c->payload[1];
+           *cp = se->c;
+           *r = se->c_child_r;
+           popOff();
+           return;
+
+           // three children (fixed), no SRT
+           // need to push a stackElement
+       case MVAR:
+           if (se->info.next.step == 2) {
+               *c = (StgClosure *)((StgMVar *)se->c)->tail;
+               se->info.next.step++;             // move to the next step
+               // no popOff
+           } else {
+               *c = ((StgMVar *)se->c)->value;
+               popOff();
+           }
+           *cp = se->c;
+           *r = se->c_child_r;
+           return;
+
+           // three children (fixed), no SRT
+       case WEAK:
+           if (se->info.next.step == 2) {
+               *c = ((StgWeak *)se->c)->value;
+               se->info.next.step++;
+               // no popOff
+           } else {
+               *c = ((StgWeak *)se->c)->finalizer;
+               popOff();
+           }
+           *cp = se->c;
+           *r = se->c_child_r;
+           return;
+
+       case CONSTR:
+       case FOREIGN:
+       case STABLE_NAME:
+       case BCO:
+       case CONSTR_STATIC:
+           // StgMutArrPtr.ptrs, no SRT
+       case MUT_ARR_PTRS:
+       case MUT_ARR_PTRS_FROZEN:
+           *c = find_ptrs(&se->info);
+           if (*c == NULL) {
+               popOff();
+               break;
+           }
+           *cp = se->c;
+           *r = se->c_child_r;
+           return;
+
+           // layout.payload.ptrs, SRT
+       case FUN:         // always a heap object
+       case FUN_2_0:
+       case THUNK:
+       case THUNK_2_0:
+           if (se->info.type == posTypePtrs) {
+               *c = find_ptrs(&se->info);
+               if (*c != NULL) {
+                   *cp = se->c;
+                   *r = se->c_child_r;
+                   return;
+               }
+               init_srt(&se->info, get_itbl(se->c));
+           }
+           // fall through
+
+           // SRT
+       case THUNK_STATIC:
+       case FUN_STATIC:
+       case FUN_0_1:
+       case FUN_0_2:
+       case THUNK_0_1:
+       case THUNK_0_2:
+       case FUN_1_0:
+       case FUN_1_1:
+       case THUNK_1_0:
+       case THUNK_1_1:
+           *c = find_srt(&se->info);
+           if (*c != NULL) {
+               *cp = se->c;
+               *r = se->c_child_r;
+               return;
+           }
+           popOff();
+           break;
+
+           // no child (fixed), no SRT
+       case CONSTR_0_1:
+       case CONSTR_0_2:
+       case CAF_BLACKHOLE:
+       case BLACKHOLE:
+       case SE_BLACKHOLE:
+       case SE_CAF_BLACKHOLE:
+       case ARR_WORDS:
+           // one child (fixed), no SRT
+       case MUT_VAR:
+       case MUT_CONS:
+       case BLACKHOLE_BQ:
+       case THUNK_SELECTOR:
+       case IND_PERM:
+       case IND_OLDGEN_PERM:
+       case IND_OLDGEN:
+       case CONSTR_1_1:
+           // cannot appear
+       case PAP:
+       case AP_UPD:
+       case TSO:
+       case IND_STATIC:
+       case CONSTR_INTLIKE:
+       case CONSTR_CHARLIKE:
+       case CONSTR_NOCAF_STATIC:
+           // stack objects
+       case RET_DYN:
+       case UPDATE_FRAME:
+       case CATCH_FRAME:
+       case STOP_FRAME:
+       case SEQ_FRAME:
+       case RET_BCO:
+       case RET_SMALL:
+       case RET_VEC_SMALL:
+       case RET_BIG:
+       case RET_VEC_BIG:
+           // invalid objects
+       case IND:
+       case BLOCKED_FETCH:
+       case FETCH_ME:
+       case FETCH_ME_BQ:
+       case RBH:
+       case REMOTE_REF:
+       case EVACUATED:
+       case INVALID_OBJECT:
+       default:
+           barf("Invalid object *c in pop()");
+           return;
+       }
+    } while (rtsTrue);
+}
+
+/* -----------------------------------------------------------------------------
+ * RETAINER PROFILING ENGINE
+ * -------------------------------------------------------------------------- */
+
+void
+initRetainerProfiling( void )
+{
+    initializeAllRetainerSet();
+    retainerGeneration = 0;
+}
+
+/* -----------------------------------------------------------------------------
+ *  This function must be called before f-closing prof_file.
+ * -------------------------------------------------------------------------- */
+void
+endRetainerProfiling( void )
+{
+#ifdef SECOND_APPROACH
+    outputAllRetainerSet(prof_file);
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+ *  Returns the actual pointer to the retainer set of the closure *c.
+ *  It may adjust RSET(c) subject to flip.
+ *  Side effects:
+ *    RSET(c) is initialized to NULL if its current value does not
+ *    conform to flip.
+ *  Note:
+ *    Even though this function has side effects, they CAN be ignored because
+ *    subsequent calls to retainerSetOf() always result in the same return value
+ *    and retainerSetOf() is the only way to retrieve retainerSet of a given
+ *    closure.
+ *    We have to perform an XOR (^) operation each time a closure is examined.
+ *    The reason is that we do not know when a closure is visited last.
+ * -------------------------------------------------------------------------- */
+static inline void
+maybeInitRetainerSet( StgClosure *c )
+{
+    if (!isRetainerSetFieldValid(c)) {
+       setRetainerSetToNull(c);
+    }
+}
+
+static inline RetainerSet *
+retainerSetOf( StgClosure *c )
+{
+    ASSERT( isRetainerSetFieldValid(c) );
+    // StgWord has the same size as pointers, so the following type
+    // casting is okay.
+    return (RetainerSet *)((StgWord)RSET(c) ^ flip);
+}
+
+/* -----------------------------------------------------------------------------
+ *  Returns the cost of the closure *c, e.g., the amount of heap memory
+ *  allocated to *c. Static objects cost 0.
+ *  The cost includes even the words allocated for profiling purpose.
+ *  Cf. costPure().
+ * -------------------------------------------------------------------------- */
+static inline nat
+cost( StgClosure *c )
+{
+    StgInfoTable *info;
+
+    info = get_itbl(c);
+    switch (info->type) {
+    case TSO:
+       return tso_sizeW((StgTSO *)c);
+
+    case THUNK:
+    case THUNK_1_0:
+    case THUNK_0_1:
+    case THUNK_2_0:
+    case THUNK_1_1:
+    case THUNK_0_2:
+       return stg_max(sizeW_fromITBL(info), sizeofW(StgHeader) + MIN_UPD_SIZE);
+
+       // static objects
+    case CONSTR_STATIC:
+    case FUN_STATIC:
+    case THUNK_STATIC:
+       return 0;
+
+    case MVAR:
+       return sizeofW(StgMVar);
+
+    case MUT_ARR_PTRS:
+    case MUT_ARR_PTRS_FROZEN:
+       return mut_arr_ptrs_sizeW((StgMutArrPtrs *)c);
+
+    case AP_UPD:
+    case PAP:
+       return pap_sizeW((StgPAP *)c);
+
+    case ARR_WORDS:
+       return arr_words_sizeW((StgArrWords *)c);
+
+    case CONSTR:
+    case CONSTR_1_0:
+    case CONSTR_0_1:
+    case CONSTR_2_0:
+    case CONSTR_1_1:
+    case CONSTR_0_2:
+    case FUN:
+    case FUN_1_0:
+    case FUN_0_1:
+    case FUN_2_0:
+    case FUN_1_1:
+    case FUN_0_2:
+    case WEAK:
+    case MUT_VAR:
+    case MUT_CONS:
+    case CAF_BLACKHOLE:
+    case BLACKHOLE:
+    case SE_BLACKHOLE:
+    case SE_CAF_BLACKHOLE:
+    case BLACKHOLE_BQ:
+    case IND_PERM:
+    case IND_OLDGEN:
+    case IND_OLDGEN_PERM:
+    case FOREIGN:
+    case BCO:
+    case STABLE_NAME:
+       return sizeW_fromITBL(info);
+
+    case THUNK_SELECTOR:
+       return sizeofW(StgHeader) + MIN_UPD_SIZE;
+
+       /*
+         Error case
+       */
+       // IND_STATIC cannot be *c, *cp, *r in the retainer profiling loop.
+    case IND_STATIC:
+       // CONSTR_INTLIKE, CONSTR_CHARLIKE, and CONSTR_NOCAF_STATIC
+       // cannot be *c, *cp, *r in the retainer profiling loop.
+    case CONSTR_INTLIKE:
+    case CONSTR_CHARLIKE:
+    case CONSTR_NOCAF_STATIC:
+       // Stack objects are invalid because they are never treated as
+       // legal objects during retainer profiling.
+    case UPDATE_FRAME:
+    case CATCH_FRAME:
+    case STOP_FRAME:
+    case SEQ_FRAME:
+    case RET_DYN:
+    case RET_BCO:
+    case RET_SMALL:
+    case RET_VEC_SMALL:
+    case RET_BIG:
+    case RET_VEC_BIG:
+       // other cases
+    case IND:
+    case BLOCKED_FETCH:
+    case FETCH_ME:
+    case FETCH_ME_BQ:
+    case RBH:
+    case REMOTE_REF:
+    case EVACUATED:
+    case INVALID_OBJECT:
+    default:
+       barf("Invalid object in cost(): %d", get_itbl(c)->type);
+    }
+}
+
+/* -----------------------------------------------------------------------------
+ *  Returns the pure cost of the closure *c, i.e., the size of memory
+ *  allocated for this object without profiling.
+ *  Note & Todo:
+ *    costPure() subtracts the overhead incurred by profiling for all types
+ *    of objects except TSO. Even though the overhead in the TSO object
+ *    itself is taken into account, the additional costs due to larger
+ *    stack objects (with unnecessary retainer profiling fields) is not
+ *    considered. Still, costPure() should result in an accurate estimate
+ *    of heap use because stacks in TSO objects are allocated in large blocks.
+ *    If we get rid of the (currently unused) retainer profiling field in
+ *    all stack objects, the result will be accurate.
+ * ------------------------------------------------------------------------- */
+static inline nat
+costPure( StgClosure *c )
+{
+    nat cst;
+
+    if (!closureSatisfiesConstraints(c)) {
+       return 0;
+    }
+
+    cst = cost(c);
+
+    ASSERT(cst == 0 || cst - sizeofW(StgProfHeader) > 0);
+
+    if (cst > 0) {
+       return cst - sizeofW(StgProfHeader);
+    } else {
+       return 0;
+    }
+}
+
+/* -----------------------------------------------------------------------------
+ * Returns rtsTrue if *c is a retainer.
+ * -------------------------------------------------------------------------- */
+static inline rtsBool
+isRetainer( StgClosure *c )
+{
+    if (get_itbl(c)->prof.closure_desc != NULL && !strcmp(get_itbl(c)->prof.closure_desc,"PCS")) { return rtsTrue; }
+
+    switch (get_itbl(c)->type) {
+       //
+       //  True case
+       //
+       // TSOs MUST be retainers: they constitute the set of roots.
+    case TSO:
+
+       // mutable objects
+    case MVAR:
+    case MUT_VAR:
+    case MUT_CONS:
+    case MUT_ARR_PTRS:
+    case MUT_ARR_PTRS_FROZEN:
+
+       // thunks are retainers.
+    case THUNK:
+    case THUNK_1_0:
+    case THUNK_0_1:
+    case THUNK_2_0:
+    case THUNK_1_1:
+    case THUNK_0_2:
+    case THUNK_SELECTOR:
+    case AP_UPD:
+
+       // Static thunks, or CAFS, are obviously retainers.
+    case THUNK_STATIC:
+
+       // WEAK objects are roots; there is separate code in which traversing
+       // begins from WEAK objects.
+    case WEAK:
+       return rtsTrue;
+
+       //
+       // False case
+       //
+
+       // constructors
+    case CONSTR:
+    case CONSTR_1_0:
+    case CONSTR_0_1:
+    case CONSTR_2_0:
+    case CONSTR_1_1:
+    case CONSTR_0_2:
+       // functions
+    case FUN:
+    case FUN_1_0:
+    case FUN_0_1:
+    case FUN_2_0:
+    case FUN_1_1:
+    case FUN_0_2:
+       // partial applications
+    case PAP:
+       // blackholes
+    case CAF_BLACKHOLE:
+    case BLACKHOLE:
+    case SE_BLACKHOLE:
+    case SE_CAF_BLACKHOLE:
+    case BLACKHOLE_BQ:
+       // indirection
+    case IND_PERM:
+    case IND_OLDGEN_PERM:
+    case IND_OLDGEN:
+       // static objects
+    case CONSTR_STATIC:
+    case FUN_STATIC:
+       // misc
+    case FOREIGN:
+    case STABLE_NAME:
+    case BCO:
+    case ARR_WORDS:
+       return rtsFalse;
+
+       //
+       // Error case
+       //
+       // IND_STATIC cannot be *c, *cp, *r in the retainer profiling loop.
+    case IND_STATIC:
+       // CONSTR_INTLIKE, CONSTR_CHARLIKE, and CONSTR_NOCAF_STATIC
+       // cannot be *c, *cp, *r in the retainer profiling loop.
+    case CONSTR_INTLIKE:
+    case CONSTR_CHARLIKE:
+    case CONSTR_NOCAF_STATIC:
+       // Stack objects are invalid because they are never treated as
+       // legal objects during retainer profiling.
+    case UPDATE_FRAME:
+    case CATCH_FRAME:
+    case STOP_FRAME:
+    case SEQ_FRAME:
+    case RET_DYN:
+    case RET_BCO:
+    case RET_SMALL:
+    case RET_VEC_SMALL:
+    case RET_BIG:
+    case RET_VEC_BIG:
+       // other cases
+    case IND:
+    case BLOCKED_FETCH:
+    case FETCH_ME:
+    case FETCH_ME_BQ:
+    case RBH:
+    case REMOTE_REF:
+    case EVACUATED:
+    case INVALID_OBJECT:
+    default:
+       barf("Invalid object in isRetainer(): %d", get_itbl(c)->type);
+       return rtsFalse;
+    }
+}
+
+/* -----------------------------------------------------------------------------
+ *  Returns the retainer function value for the closure *c, i.e., R(*c).
+ *  This function does NOT return the retainer(s) of *c.
+ *  Invariants:
+ *    *c must be a retainer.
+ *  Note:
+ *    Depending on the definition of this function, the maintenance of retainer
+ *    sets can be made easier. If most retainer sets are likely to be created
+ *    again across garbage collections, refreshAllRetainerSet() in
+ *    RetainerSet.c can simply set the cost field of each retainer set.
+ *    If this is not the case, we can free all the retainer sets and
+ *    re-initialize the hash table.
+ *    See refreshAllRetainerSet() in RetainerSet.c.
+ * -------------------------------------------------------------------------- */
+static inline retainer
+getRetainerFrom( StgClosure *c )
+{
+    ASSERT(isRetainer(c));
+
+#if defined(RETAINER_SCHEME_INFO)
+    // Retainer scheme 1: retainer = info table
+    return get_itbl(c);
+#elif defined(RETAINER_SCHEME_CCS)
+    // Retainer scheme 2: retainer = cost centre stack
+    return c->header.prof.ccs;
+#elif defined(RETAINER_SCHEME_CC)
+    // Retainer scheme 3: retainer = cost centre
+    return c->header.prof.ccs->cc;
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+ *  Associates the retainer set *s with the closure *c, that is, *s becomes
+ *  the retainer set of *c.
+ *  Invariants:
+ *    c != NULL
+ *    s != NULL
+ * -------------------------------------------------------------------------- */
+static inline void
+associate( StgClosure *c, RetainerSet *rsOfc, RetainerSet *s )
+{
+    nat cost_c;
+
+    cost_c = costPure(c);             // not cost(c)
+    if (rsOfc != NULL) {
+       ASSERT(rsOfc->cost >= cost_c);
+       rsOfc->cost -= cost_c;
+    }
+    // StgWord has the same size as pointers, so the following type
+    // casting is okay.
+    RSET(c) = (RetainerSet *)((StgWord)s | flip);
+    s->cost += cost_c;
+}
+
+/* -----------------------------------------------------------------------------
+ *  Process all the objects in the stack chunk from stackStart to stackEnd
+ *  with *c and *c_child_r being their parent and their most recent retainer,
+ *  respectively. Treat stackOptionalFun as another child of *c if it is
+ *  not NULL.
+ *  Invariants:
+ *    *c is one of the following: TSO, PAP, and AP_UPD.
+ *    If *c is AP_UPD or PAP, stackOptionalFun is not NULL. Otherwise,
+ *    it is NULL.
+ *    If *c is TSO, c == c_child_r.
+ *    stackStart < stackEnd.
+ *    RSET(c) and RSET(c_child_r) are valid, i.e., their
+ *    interpretation conforms to the current value of flip (even when they
+ *    are interpreted to be NULL).
+ *    If *c is TSO, its state is not any of ThreadRelocated, ThreadComplete,
+ *    or ThreadKilled, which means that its stack is ready to process.
+ *  Note:
+ *    This code was almost plagiarzied from GC.c! For each pointer,
+ *    retainClosure() is invoked instead of evacuate().
+ * -------------------------------------------------------------------------- */
+static void
+retainStack( StgClosure *c, StgClosure *c_child_r,
+            StgClosure *stackOptionalFun, StgPtr stackStart,
+            StgPtr stackEnd )
+{
+    stackElement *oldStackBoundary;
+    StgPtr p, q;
+    StgInfoTable *info;
+    StgWord32 bitmap;
+
+#ifdef DEBUG_RETAINER
+    cStackSize++;
+    if (cStackSize > maxCStackSize) maxCStackSize = cStackSize;
+#endif
+
+    /*
+      Each invocation of retainStack() creates a new virtual
+      stack. Since all such stacks share a single common stack, we
+      record the current currentStackBoundary, which will be restored
+      at the exit.
+    */
+    oldStackBoundary = currentStackBoundary;
+    currentStackBoundary = stackTop;
+
+#ifdef DEBUG_RETAINER
+    // fprintf(stderr, "retainStack() called: oldStackBoundary = 0x%x, currentStackBoundary = 0x%x\n", oldStackBoundary, currentStackBoundary);
+#endif
+
+    if (stackOptionalFun != NULL) {
+       ASSERT(get_itbl(c)->type == AP_UPD || get_itbl(c)->type == PAP);
+       retainClosure(stackOptionalFun, c, c_child_r);
+    } else {
+       ASSERT(get_itbl(c)->type == TSO);
+       ASSERT(((StgTSO *)c)->what_next != ThreadRelocated &&
+              ((StgTSO *)c)->what_next != ThreadComplete &&
+              ((StgTSO *)c)->what_next != ThreadKilled);
+    }
+
+    p = stackStart;
+    while (p < stackEnd) {
+       q = *(StgPtr *)p;
+
+    //
+    // Note & Todo:
+    //   The correctness of retainer profiling is subject to the
+    //   correctness of the two macros IS_ARG_TAG() and
+    //   LOOKS_LIKE_GHC_INFO(). Since LOOKS_LIKE_GHC_INFO() is a bit
+    //   precarious macro, so I believe that the current
+    //   implementation may not be quite safe. Also, scavenge_stack()
+    //   in GC.c also exploits this macro in order to identify shallow
+    //   pointers.  I am not sure whether scavenge_stack() takes
+    //   further measurements to discern real shallow pointers.
+    //
+    //   I think this can be a serious problem if a stack chunk
+    //   contains some word which looks like a pointer but is
+    //   actually, say, a word constituting a floating number.
+    //
+
+       // skip tagged words
+       if (IS_ARG_TAG((StgWord)q)) {
+           p += 1 + ARG_SIZE(q);
+           continue;
+       }
+
+       // check if *p is a shallow closure pointer
+       if (!LOOKS_LIKE_GHC_INFO(q)) {
+           retainClosure((StgClosure *)q, c, c_child_r);
+           p++;
+           continue;
+       }
+
+       // regular stack objects
+       info = get_itbl((StgClosure *)p);
+       switch(info->type) {
+       case RET_DYN:
+           bitmap = ((StgRetDyn *)p)->liveness;
+           p = ((StgRetDyn *)p)->payload;
+           goto small_bitmap;
+
+           // FUN and FUN_STATIC keep only their info pointer.
+       case FUN:
+       case FUN_STATIC:
+           p++;
+           goto follow_srt;
+
+       case UPDATE_FRAME:
+           retainClosure(((StgUpdateFrame *)p)->updatee, c, c_child_r);
+           p += sizeofW(StgUpdateFrame);
+           continue;
+
+       case STOP_FRAME:
+       case CATCH_FRAME:
+       case SEQ_FRAME:
+       case RET_BCO:
+       case RET_SMALL:
+       case RET_VEC_SMALL:
+           bitmap = info->layout.bitmap;
+           p++;
+       small_bitmap:
+           while (bitmap != 0) {
+               if ((bitmap & 1) == 0)
+                   retainClosure((StgClosure *)*p, c, c_child_r);
+               p++;
+               bitmap = bitmap >> 1;
+           }
+       follow_srt:
+           {
+               StgClosure **srt, **srt_end;
+
+               srt = (StgClosure **)(info->srt);
+               srt_end = srt + info->srt_len;
+               for (; srt < srt_end; srt++) {
+                   // See scavenge_srt() in GC.c for details.
+#ifdef ENABLE_WIN32_DLL_SUPPORT
+                   if ((unsigned long)(*srt) & 0x1)
+                       retainClosure(*(StgClosure **)(((unsigned long)*srt & ~0x1)), c, c_child_r);
+                   else
+                       retainClosure(*srt, c, c_child_r);
+#else
+                   retainClosure(*srt, c, c_child_r);
+#endif
+               }
+           }
+           continue;
+
+       case RET_BIG:
+       case RET_VEC_BIG:
+       {
+           StgPtr q;
+           StgLargeBitmap *large_bitmap;
+           nat i;
+
+           large_bitmap = info->layout.large_bitmap;
+           p++;
+
+           for (i = 0; i < large_bitmap->size; i++) {
+               bitmap = large_bitmap->bitmap[i];
+               q = p + sizeofW(StgWord) * 8;
+               while (bitmap != 0) {
+                   if ((bitmap & 1) == 0)
+                       retainClosure((StgClosure *)*p, c, c_child_r);
+                   p++;
+                   bitmap = bitmap >> 1;
+               }
+               if (i + 1 < large_bitmap->size) {
+                   while (p < q) {
+                       retainClosure((StgClosure *)*p, c, c_child_r);
+                       p++;
+                   }
+               }
+           }
+       }
+       goto follow_srt;
+       default:
+           barf("Invalid object found in retainStack(): %d",
+                (int)(info->type));
+       }
+    }
+
+    // restore currentStackBoundary
+    currentStackBoundary = oldStackBoundary;
+#ifdef DEBUG_RETAINER
+    // fprintf(stderr, "retainStack() finished: currentStackBoundary = 0x%x\n", currentStackBoundary);
+#endif
+
+#ifdef DEBUG_RETAINER
+    cStackSize--;
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+ *  Compute the retainer set of *c0 and all its desecents by traversing.
+ *  *cp0 is the parent of *c0, and *r0 is the most recent retainer of *c0.
+ *  Invariants:
+ *    c0 = cp0 = r0 holds only for root objects.
+ *    RSET(cp0) and RSET(r0) are valid, i.e., their
+ *    interpretation conforms to the current value of flip (even when they
+ *    are interpreted to be NULL).
+ *    However, RSET(c0) may be corrupt, i.e., it may not conform to
+ *    the current value of flip. If it does not, during the execution
+ *    of this function, RSET(c0) must be initialized as well as all
+ *    its descendants.
+ *  Note:
+ *    stackTop must be the same at the beginning and the exit of this function.
+ *    *c0 can be TSO (as well as PAP and AP_UPD).
+ * -------------------------------------------------------------------------- */
+static void
+retainClosure( StgClosure *c0, StgClosure *cp0, StgClosure *r0 )
+{
+    // c = Current closure
+    // cp = Current closure's Parent
+    // r = current closures' most recent Retainer
+    // c_child_r = current closure's children's most recent retainer
+    // first_child = first child of c
+    StgClosure *c, *cp, *r, *c_child_r, *first_child;
+    RetainerSet *s, *retainerSetOfc;
+    retainer R_r;
+    StgWord typeOfc;
+
+#ifdef DEBUG_RETAINER
+    // StgPtr oldStackTop;
+#endif
+
+#ifdef DEBUG_RETAINER
+    // oldStackTop = stackTop;
+    // fprintf(stderr, "retainClosure() called: c0 = 0x%x, cp0 = 0x%x, r0 = 0x%x\n", c0, cp0, r0);
+#endif
+
+    // (c, cp, r) = (c0, cp0, r0)
+    c = c0;
+    cp = cp0;
+    r = r0;
+    goto inner_loop;
+
+loop:
+    //fprintf(stderr, "loop");
+    // pop to (c, cp, r);
+    pop(&c, &cp, &r);
+
+    if (c == NULL) {
+#ifdef DEBUG_RETAINER
+       // fprintf(stderr, "retainClosure() ends: oldStackTop = 0x%x, stackTop = 0x%x\n", oldStackTop, stackTop);
+#endif
+       return;
+    }
+
+    //fprintf(stderr, "inner_loop");
+
+inner_loop:
+    // c  = current closure under consideration,
+    // cp = current closure's parent,
+    // r  = current closure's most recent retainer
+    //
+    // Loop invariants (on the meaning of c, cp, r, and their retainer sets):
+    //   RSET(cp) and RSET(r) are valid.
+    //   RSET(c) is valid only if c has been visited before.
+    //
+    // Loop invariants (on the relation between c, cp, and r)
+    //   if cp is not a retainer, r belongs to RSET(cp).
+    //   if cp is a retainer, r == cp.
+
+    typeOfc = get_itbl(c)->type;
+
+#ifdef DEBUG_RETAINER
+    switch (typeOfc) {
+    case IND_STATIC:
+    case CONSTR_INTLIKE:
+    case CONSTR_CHARLIKE:
+    case CONSTR_NOCAF_STATIC:
+    case CONSTR_STATIC:
+    case THUNK_STATIC:
+    case FUN_STATIC:
+       break;
+    default:
+       if (retainerSetOf(c) == NULL) {   // first visit?
+           costArray[typeOfc] += cost(c);
+           sumOfNewCost += cost(c);
+       }
+       break;
+    }
+#endif
+
+    // special cases
+    switch (typeOfc) {
+    case TSO:
+       if (((StgTSO *)c)->what_next == ThreadComplete ||
+           ((StgTSO *)c)->what_next == ThreadKilled) {
+#ifdef DEBUG_RETAINER
+           fprintf(stderr, "ThreadComplete or ThreadKilled encountered in retainClosure()\n");
+#endif
+           goto loop;
+       }
+       if (((StgTSO *)c)->what_next == ThreadRelocated) {
+#ifdef DEBUG_RETAINER
+           fprintf(stderr, "ThreadRelocated encountered in retainClosure()\n");
+#endif
+           c = (StgClosure *)((StgTSO *)c)->link;
+           goto inner_loop;
+       }
+       break;
+
+    case IND_STATIC:
+       // We just skip IND_STATIC, so its retainer set is never computed.
+       c = ((StgIndStatic *)c)->indirectee;
+       goto inner_loop;
+    case CONSTR_INTLIKE:
+    case CONSTR_CHARLIKE:
+       // static objects with no pointers out, so goto loop.
+    case CONSTR_NOCAF_STATIC:
+       // It is not just enough not to compute the retainer set for *c; it is
+       // mandatory because CONSTR_NOCAF_STATIC are not reachable from
+       // scavenged_static_objects, the list from which is assumed to traverse
+       // all static objects after major garbage collections.
+       goto loop;
+    case THUNK_STATIC:
+    case FUN_STATIC:
+       if (get_itbl(c)->srt_len == 0) {
+           // No need to compute the retainer set; no dynamic objects
+           // are reachable from *c.
+           //
+           // Static objects: if we traverse all the live closures,
+           // including static closures, during each heap census then
+           // we will observe that some static closures appear and
+           // disappear.  eg. a closure may contain a pointer to a
+           // static function 'f' which is not otherwise reachable
+           // (it doesn't indirectly point to any CAFs, so it doesn't
+           // appear in any SRTs), so we would find 'f' during
+           // traversal.  However on the next sweep there may be no
+           // closures pointing to 'f'.
+           //
+           // We must therefore ignore static closures whose SRT is
+           // empty, because these are exactly the closures that may
+           // "appear".  A closure with a non-empty SRT, and which is
+           // still required, will always be reachable.
+           //
+           // But what about CONSTR_STATIC?  Surely these may be able
+           // to appear, and they don't have SRTs, so we can't
+           // check.  So for now, we're calling
+           // resetStaticObjectForRetainerProfiling() from the
+           // garbage collector to reset the retainer sets in all the
+           // reachable static objects.
+           goto loop;
+       }
+    default:
+       break;
+    }
+
+    // The above objects are ignored in computing the average number of times
+    // an object is visited.
+    timesAnyObjectVisited++;
+
+    // If this is the first visit to c, initialize its retainer set.
+    maybeInitRetainerSet(c);
+    retainerSetOfc = retainerSetOf(c);
+
+    // Now compute s:
+    //    isRetainer(cp) == rtsTrue => s == NULL
+    //    isRetainer(cp) == rtsFalse => s == cp.retainer
+    if (isRetainer(cp))
+       s = NULL;
+    else
+       s = retainerSetOf(cp);
+
+    // (c, cp, r, s) is available.
+    R_r = getRetainerFrom(r);
+
+    // (c, cp, r, s, R_r) is available, so compute the retainer set for *c.
+    if (retainerSetOfc == NULL) {
+       // This is the first visit to *c.
+       numObjectVisited++;
+
+       if (s == NULL)
+           associate(c, NULL, singleton(R_r));
+       else
+           // s is actually the retainer set of *c!
+           associate(c, NULL, s);
+
+       // compute c_child_r
+       c_child_r = isRetainer(c) ? c : r;
+    } else {
+       // This is not the first visit to *c.
+       if (isMember(R_r, retainerSetOfc))
+           goto loop;          // no need to process child
+
+       if (s == NULL)
+           associate(c, retainerSetOfc, addElement(R_r, retainerSetOfc));
+       else {
+           // s is not NULL and cp is not a retainer. This means that
+           // each time *cp is visited, so is *c. Thus, if s has
+           // exactly one more element in its retainer set than c, s
+           // is also the new retainer set for *c.
+           if (s->num == retainerSetOfc->num + 1) {
+               associate(c, retainerSetOfc, s);
+           }
+           // Otherwise, just add R_r to the current retainer set of *c.
+           else {
+               associate(c, retainerSetOfc, addElement(R_r, retainerSetOfc));
+           }
+       }
+
+       if (isRetainer(c))
+           goto loop;          // no need to process child
+
+       // compute c_child_r
+       c_child_r = r;
+    }
+
+    // now, RSET() of all of *c, *cp, and *r is valid.
+    // (c, c_child_r) are available.
+
+    // process child
+
+    if (typeOfc == TSO) {
+       retainStack(c, c_child_r,
+                   NULL,
+                   ((StgTSO *)c)->sp,
+                   ((StgTSO *)c)->stack + ((StgTSO *)c)->stack_size);
+       // no more children
+       goto loop;
+    } else if (typeOfc == PAP) {
+       retainStack(c, c_child_r,
+                   ((StgPAP *)c)->fun,
+                   (StgPtr)((StgPAP *)c)->payload,
+                   (StgPtr)((StgPAP *)c)->payload + ((StgPAP *)c)->n_args);
+       // no more children
+       goto loop;
+    } else if (typeOfc == AP_UPD) {
+       retainStack(c, c_child_r,
+                   ((StgAP_UPD *)c)->fun,
+                   (StgPtr)((StgAP_UPD *)c)->payload,
+                   (StgPtr)((StgAP_UPD *)c)->payload +
+                            ((StgAP_UPD *)c)->n_args);
+       // no more children
+       goto loop;
+    }
+
+    push(c, c_child_r, &first_child);
+
+    // If first_child is null, c has no child.
+    // If first_child is not null, the top stack element points to the next
+    // object. push() may or may not push a stackElement on the stack.
+    if (first_child == NULL)
+       goto loop;
+
+    // (c, cp, r) = (first_child, c, c_child_r)
+    r = c_child_r;
+    cp = c;
+    c = first_child;
+    goto inner_loop;
+}
+
+/* -----------------------------------------------------------------------------
+ *  Compute the retainer set for every object reachable from *tl.
+ * -------------------------------------------------------------------------- */
+static void
+retainRoot( StgClosure **tl )
+{
+    // We no longer assume that only TSOs and WEAKs are roots; any closure can
+    // be a root.
+
+    ASSERT(isEmptyRetainerStack());
+    currentStackBoundary = stackTop;
+
+    retainClosure(*tl, *tl, *tl);
+
+    // NOT TRUE: ASSERT(isMember(getRetainerFrom(*tl), retainerSetOf(*tl)));
+    // *tl might be a TSO which is ThreadComplete, in which
+    // case we ignore it for the purposes of retainer profiling.
+}
+
+/* -----------------------------------------------------------------------------
+ *  Compute the retainer set for each of the objects in the heap.
+ * -------------------------------------------------------------------------- */
+static void
+computeRetainerSet( void )
+{
+    StgWeak *weak;
+    RetainerSet *rtl;
+    nat g;
+    StgMutClosure *ml;
+#ifdef DEBUG_RETAINER
+    RetainerSet tmpRetainerSet;
+#endif
+
+    GetRoots(retainRoot);      // for scheduler roots
+
+    // This function is called after a major GC, when key, value, and finalizer
+    // all are guaranteed to be valid, or reachable.
+    //
+    // The following code assumes that WEAK objects are considered to be roots
+    // for retainer profilng.
+    for (weak = weak_ptr_list; weak != NULL; weak = weak->link)
+       // retainRoot((StgClosure *)weak);
+       retainRoot((StgClosure **)&weak);
+
+    // The following code resets the rs field of each unvisited mutable
+    // object (computing sumOfNewCostExtra and updating costArray[] when
+    // debugging retainer profiler).
+    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+       ASSERT(g != 0 ||
+              (generations[g].mut_list == END_MUT_LIST &&
+               generations[g].mut_once_list == END_MUT_LIST));
+
+       // Todo:
+       // I think traversing through mut_list is unnecessary.
+       // Think about removing this part.
+       for (ml = generations[g].mut_list; ml != END_MUT_LIST;
+            ml = ml->mut_link) {
+
+           maybeInitRetainerSet((StgClosure *)ml);
+           rtl = retainerSetOf((StgClosure *)ml);
+
+#ifdef DEBUG_RETAINER
+           if (rtl == NULL) {
+               // first visit to *ml
+               // This is a violation of the interface rule!
+               RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip);
+
+               switch (get_itbl((StgClosure *)ml)->type) {
+               case IND_STATIC:
+                   // no cost involved
+                   break;
+               case CONSTR_INTLIKE:
+               case CONSTR_CHARLIKE:
+               case CONSTR_NOCAF_STATIC:
+               case CONSTR_STATIC:
+               case THUNK_STATIC:
+               case FUN_STATIC:
+                   barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type);
+                   break;
+               default:
+                   // dynamic objects
+                   costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml);
+                   sumOfNewCostExtra += cost((StgClosure *)ml);
+                   break;
+               }
+           }
+#endif
+       }
+
+       // Traversing through mut_once_list is, in contrast, necessary
+       // because we can find MUT_VAR objects which have not been
+       // visited during retainer profiling.
+       for (ml = generations[g].mut_once_list; ml != END_MUT_LIST;
+            ml = ml->mut_link) {
+
+           maybeInitRetainerSet((StgClosure *)ml);
+           rtl = retainerSetOf((StgClosure *)ml);
+#ifdef DEBUG_RETAINER
+           if (rtl == NULL) {
+               // first visit to *ml
+               // This is a violation of the interface rule!
+               RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip);
+
+               switch (get_itbl((StgClosure *)ml)->type) {
+               case IND_STATIC:
+                   // no cost involved
+                   break;
+               case CONSTR_INTLIKE:
+               case CONSTR_CHARLIKE:
+               case CONSTR_NOCAF_STATIC:
+               case CONSTR_STATIC:
+               case THUNK_STATIC:
+               case FUN_STATIC:
+                   barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type);
+                   break;
+               default:
+                   // dynamic objects
+                   costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml);
+                   sumOfNewCostExtra += cost((StgClosure *)ml);
+                   break;
+               }
+           }
+#endif
+       }
+    }
+}
+
+/* -----------------------------------------------------------------------------
+ *  Traverse all static objects for which we compute retainer sets,
+ *  and reset their rs fields to NULL, which is accomplished by
+ *  invoking maybeInitRetainerSet(). This function must be called
+ *  before zeroing all objects reachable from scavenged_static_objects
+ *  in the case of major gabage collections. See GarbageCollect() in
+ *  GC.c.
+ *  Note:
+ *    The mut_once_list of the oldest generation must also be traversed?
+ *    Why? Because if the evacuation of an object pointed to by a static
+ *    indirection object fails, it is put back to the mut_once_list of
+ *    the oldest generation.
+ *    However, this is not necessary because any static indirection objects
+ *    are just traversed through to reach dynamic objects. In other words,
+ *    they are not taken into consideration in computing retainer sets.
+ * -------------------------------------------------------------------------- */
+void
+resetStaticObjectForRetainerProfiling( void )
+{
+#ifdef DEBUG_RETAINER
+    nat count;
+#endif
+    StgClosure *p;
+
+#ifdef DEBUG_RETAINER
+    count = 0;
+#endif
+    p = scavenged_static_objects;
+    while (p != END_OF_STATIC_LIST) {
+#ifdef DEBUG_RETAINER
+       count++;
+#endif
+       switch (get_itbl(p)->type) {
+       case IND_STATIC:
+           // Since we do not compute the retainer set of any
+           // IND_STATIC object, we don't have to reset its retainer
+           // field.
+           p = IND_STATIC_LINK(p);
+           break;
+       case THUNK_STATIC:
+           maybeInitRetainerSet(p);
+           p = THUNK_STATIC_LINK(p);
+           break;
+       case FUN_STATIC:
+           maybeInitRetainerSet(p);
+           p = FUN_STATIC_LINK(p);
+           break;
+       case CONSTR_STATIC:
+           maybeInitRetainerSet(p);
+           p = STATIC_LINK(get_itbl(p), p);
+           break;
+       default:
+           barf("resetStaticObjectForRetainerProfiling: %p (%s)",
+                p, get_itbl(p)->type);
+           break;
+       }
+    }
+#ifdef DEBUG_RETAINER
+    // fprintf(stderr, "count in scavenged_static_objects = %d\n", count);
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+ * Perform retainer profiling.
+ * N is the oldest generation being profilied, where the generations are
+ * numbered starting at 0.
+ * Invariants:
+ * Note:
+ *   This function should be called only immediately after major garbage
+ *   collection.
+ * ------------------------------------------------------------------------- */
+void
+retainerProfile(void)
+{
+  nat allCost, numSet;
+#ifdef DEBUG_RETAINER
+  nat i;
+  nat totalHeapSize;        // total raw heap size (computed by linear scanning)
+#endif
+
+#ifdef DEBUG_RETAINER
+  fprintf(stderr, " < retainerProfile() invoked : %d>\n", retainerGeneration);
+#endif
+
+  stat_startRP();
+
+  // We haven't flipped the bit yet.
+#ifdef DEBUG_RETAINER
+  fprintf(stderr, "Before traversing:\n");
+  sumOfCostLinear = 0;
+  for (i = 0;i < N_CLOSURE_TYPES; i++)
+    costArrayLinear[i] = 0;
+  totalHeapSize = checkHeapSanityForRetainerProfiling();
+
+  fprintf(stderr, "\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
+  /*
+  fprintf(stderr, "costArrayLinear[] = ");
+  for (i = 0;i < N_CLOSURE_TYPES; i++)
+    fprintf(stderr, "[%u:%u] ", i, costArrayLinear[i]);
+  fprintf(stderr, "\n");
+  */
+
+  ASSERT(sumOfCostLinear == totalHeapSize);
+
+/*
+#define pcostArrayLinear(index) \
+  if (costArrayLinear[index] > 0) \
+    fprintf(stderr, "costArrayLinear[" #index "] = %u\n", costArrayLinear[index])
+  pcostArrayLinear(THUNK_STATIC);
+  pcostArrayLinear(FUN_STATIC);
+  pcostArrayLinear(CONSTR_STATIC);
+  pcostArrayLinear(CONSTR_NOCAF_STATIC);
+  pcostArrayLinear(CONSTR_INTLIKE);
+  pcostArrayLinear(CONSTR_CHARLIKE);
+*/
+#endif
+
+  // Now we flips flip.
+  flip = flip ^ 1;
+
+#ifdef DEBUG_RETAINER
+  stackSize = 0;
+  maxStackSize = 0;
+  cStackSize = 0;
+  maxCStackSize = 0;
+#endif
+  numObjectVisited = 0;
+  timesAnyObjectVisited = 0;
+
+#ifdef DEBUG_RETAINER
+  fprintf(stderr, "During traversing:\n");
+  sumOfNewCost = 0;
+  sumOfNewCostExtra = 0;
+  for (i = 0;i < N_CLOSURE_TYPES; i++)
+    costArray[i] = 0;
+#endif
+
+  /*
+    We initialize the traverse stack each time the retainer profiling is
+    performed (because the traverse stack size varies on each retainer profiling
+    and this operation is not costly anyhow). However, we just refresh the
+    retainer sets.
+   */
+  initializeTraverseStack();
+#ifdef DEBUG_RETAINER
+  initializeAllRetainerSet();
+#else
+  refreshAllRetainerSet();
+#endif
+  computeRetainerSet();
+
+  outputRetainerSet(hp_file, &allCost, &numSet);
+
+#ifdef DEBUG_RETAINER
+  fprintf(stderr, "After traversing:\n");
+  sumOfCostLinear = 0;
+  for (i = 0;i < N_CLOSURE_TYPES; i++)
+    costArrayLinear[i] = 0;
+  totalHeapSize = checkHeapSanityForRetainerProfiling();
+
+  fprintf(stderr, "\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize);
+  ASSERT(sumOfCostLinear == totalHeapSize);
+
+  // now, compare the two results
+  /*
+    Note:
+      costArray[] must be exactly the same as costArrayLinear[].
+      Known exceptions:
+        1) Dead weak pointers, whose type is CONSTR. These objects are not
+           reachable from any roots.
+  */
+  fprintf(stderr, "Comparison:\n");
+  fprintf(stderr, "\tcostArrayLinear[] (must be empty) = ");
+  for (i = 0;i < N_CLOSURE_TYPES; i++)
+    if (costArray[i] != costArrayLinear[i])
+      // nothing should be printed except MUT_VAR after major GCs
+      fprintf(stderr, "[%u:%u] ", i, costArrayLinear[i]);
+  fprintf(stderr, "\n");
+
+  fprintf(stderr, "\tsumOfNewCost = %u\n", sumOfNewCost);
+  fprintf(stderr, "\tsumOfNewCostExtra = %u\n", sumOfNewCostExtra);
+  fprintf(stderr, "\tcostArray[] (must be empty) = ");
+  for (i = 0;i < N_CLOSURE_TYPES; i++)
+    if (costArray[i] != costArrayLinear[i])
+      // nothing should be printed except MUT_VAR after major GCs
+      fprintf(stderr, "[%u:%u] ", i, costArray[i]);
+  fprintf(stderr, "\n");
+
+  // only for major garbage collection
+  ASSERT(sumOfNewCost + sumOfNewCostExtra == sumOfCostLinear);
+#endif
+
+  // post-processing
+  closeTraverseStack();
+#ifdef DEBUG_RETAINER
+  closeAllRetainerSet();
+#else
+  // Note that there is no post-processing for the retainer sets.
+#endif
+  retainerGeneration++;
+
+  stat_endRP(
+    retainerGeneration - 1,   // retainerGeneration has just been incremented!
+#ifdef DEBUG_RETAINER
+    maxCStackSize, maxStackSize,
+#endif
+    (double)timesAnyObjectVisited / numObjectVisited,
+    allCost, numSet);
+}
+
+/* -----------------------------------------------------------------------------
+ * DEBUGGING CODE
+ * -------------------------------------------------------------------------- */
+
+#ifdef DEBUG_RETAINER
+
+#define LOOKS_LIKE_PTR(r) ((LOOKS_LIKE_STATIC_CLOSURE(r) || \
+        ((HEAP_ALLOCED(r) && Bdescr((P_)r)->free != (void *)-1))) && \
+        ((StgWord)(*(StgPtr)r)!=0xaaaaaaaa))
+
+static nat
+sanityCheckHeapClosure( StgClosure *c )
+{
+    StgInfoTable *info;
+
+    ASSERT(LOOKS_LIKE_GHC_INFO(c->header.info));
+    ASSERT(!closure_STATIC(c));
+    ASSERT(LOOKS_LIKE_PTR(c));
+
+    if ((((StgWord)RSET(c) & 1) ^ flip) != 0) {
+       if (get_itbl(c)->type == CONSTR &&
+           !strcmp(get_itbl(c)->prof.closure_type, "DEAD_WEAK") &&
+           !strcmp(get_itbl(c)->prof.closure_desc, "DEAD_WEAK")) {
+           fprintf(stderr, "\tUnvisited dead weak pointer object found: c = %p\n", c);
+           costArray[get_itbl(c)->type] += cost(c);
+           sumOfNewCost += cost(c);
+       } else
+           fprintf(stderr,
+                   "Unvisited object: flip = %d, c = %p(%d, %s, %s), rs = %p\n",
+                   flip, c, get_itbl(c)->type,
+                   get_itbl(c)->prof.closure_type, get_itbl(c)->prof.closure_desc,
+                   RSET(c));
+    } else {
+       // fprintf(stderr, "sanityCheckHeapClosure) S: flip = %d, c = %p(%d), rs = %p\n", flip, c, get_itbl(c)->type, RSET(c));
+    }
+
+    info = get_itbl(c);
+    switch (info->type) {
+    case TSO:
+       return tso_sizeW((StgTSO *)c);
+
+    case THUNK:
+    case THUNK_1_0:
+    case THUNK_0_1:
+    case THUNK_2_0:
+    case THUNK_1_1:
+    case THUNK_0_2:
+       return stg_max(sizeW_fromITBL(info), sizeofW(StgHeader) + MIN_UPD_SIZE);
+
+    case MVAR:
+       return sizeofW(StgMVar);
+
+    case MUT_ARR_PTRS:
+    case MUT_ARR_PTRS_FROZEN:
+       return mut_arr_ptrs_sizeW((StgMutArrPtrs *)c);
+
+    case AP_UPD:
+    case PAP:
+       return pap_sizeW((StgPAP *)c);
+
+    case ARR_WORDS:
+       return arr_words_sizeW((StgArrWords *)c);
+
+    case CONSTR:
+    case CONSTR_1_0:
+    case CONSTR_0_1:
+    case CONSTR_2_0:
+    case CONSTR_1_1:
+    case CONSTR_0_2:
+    case FUN:
+    case FUN_1_0:
+    case FUN_0_1:
+    case FUN_2_0:
+    case FUN_1_1:
+    case FUN_0_2:
+    case WEAK:
+    case MUT_VAR:
+    case MUT_CONS:
+    case CAF_BLACKHOLE:
+    case BLACKHOLE:
+    case SE_BLACKHOLE:
+    case SE_CAF_BLACKHOLE:
+    case BLACKHOLE_BQ:
+    case IND_PERM:
+    case IND_OLDGEN:
+    case IND_OLDGEN_PERM:
+    case FOREIGN:
+    case BCO:
+    case STABLE_NAME:
+       return sizeW_fromITBL(info);
+
+    case THUNK_SELECTOR:
+       return sizeofW(StgHeader) + MIN_UPD_SIZE;
+
+       /*
+         Error case
+       */
+    case IND_STATIC:
+    case CONSTR_STATIC:
+    case FUN_STATIC:
+    case THUNK_STATIC:
+    case CONSTR_INTLIKE:
+    case CONSTR_CHARLIKE:
+    case CONSTR_NOCAF_STATIC:
+    case UPDATE_FRAME:
+    case CATCH_FRAME:
+    case STOP_FRAME:
+    case SEQ_FRAME:
+    case RET_DYN:
+    case RET_BCO:
+    case RET_SMALL:
+    case RET_VEC_SMALL:
+    case RET_BIG:
+    case RET_VEC_BIG:
+    case IND:
+    case BLOCKED_FETCH:
+    case FETCH_ME:
+    case FETCH_ME_BQ:
+    case RBH:
+    case REMOTE_REF:
+    case EVACUATED:
+    case INVALID_OBJECT:
+    default:
+       barf("Invalid object in sanityCheckHeapClosure(): %d",
+            get_itbl(c)->type);
+       return 0;
+    }
+}
+
+static nat
+heapCheck( bdescr *bd )
+{
+    StgPtr p;
+    static nat costSum, size;
+
+    costSum = 0;
+    while (bd != NULL) {
+       p = bd->start;
+       while (p < bd->free) {
+           size = sanityCheckHeapClosure((StgClosure *)p);
+           sumOfCostLinear += size;
+           costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
+           p += size;
+           // no need for slop check; I think slops are not used currently.
+       }
+       ASSERT(p == bd->free);
+       costSum += bd->free - bd->start;
+       bd = bd->link;
+    }
+
+    return costSum;
+}
+
+static nat
+smallObjectPoolCheck(void)
+{
+    bdescr *bd;
+    StgPtr p;
+    static nat costSum, size;
+
+    bd = small_alloc_list;
+    costSum = 0;
+
+    // first block
+    if (bd == NULL)
+       return costSum;
+
+    p = bd->start;
+    while (p < alloc_Hp) {
+       size = sanityCheckHeapClosure((StgClosure *)p);
+       sumOfCostLinear += size;
+       costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
+       p += size;
+    }
+    ASSERT(p == alloc_Hp);
+    costSum += alloc_Hp - bd->start;
+
+    bd = bd->link;
+    while (bd != NULL) {
+       p = bd->start;
+       while (p < bd->free) {
+           size = sanityCheckHeapClosure((StgClosure *)p);
+           sumOfCostLinear += size;
+           costArrayLinear[get_itbl((StgClosure *)p)->type] += size;
+           p += size;
+       }
+       ASSERT(p == bd->free);
+       costSum += bd->free - bd->start;
+       bd = bd->link;
+    }
+
+    return costSum;
+}
+
+static nat
+chainCheck(bdescr *bd)
+{
+    nat costSum, size;
+
+    costSum = 0;
+    while (bd != NULL) {
+       // bd->free - bd->start is not an accurate measurement of the
+       // object size.  Actually it is always zero, so we compute its
+       // size explicitly.
+       size = sanityCheckHeapClosure((StgClosure *)bd->start);
+       sumOfCostLinear += size;
+       costArrayLinear[get_itbl((StgClosure *)bd->start)->type] += size;
+       costSum += size;
+       bd = bd->link;
+    }
+
+    return costSum;
+}
+
+static nat
+checkHeapSanityForRetainerProfiling( void )
+{
+    nat costSum, g, s;
+
+    costSum = 0;
+    fprintf(stderr, "START: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
+    if (RtsFlags.GcFlags.generations == 1) {
+       costSum += heapCheck(g0s0->to_blocks);
+       fprintf(stderr, "heapCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
+       costSum += chainCheck(g0s0->large_objects);
+       fprintf(stderr, "chainCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
+    } else {
+       for (g = 0; g < RtsFlags.GcFlags.generations; g++)
+       for (s = 0; s < generations[g].n_steps; s++) {
+           /*
+             After all live objects have been scavenged, the garbage
+             collector may create some objects in
+             scheduleFinalizers(). These objects are created throught
+             allocate(), so the small object pool or the large object
+             pool of the g0s0 may not be empty.
+           */
+           if (g == 0 && s == 0) {
+               costSum += smallObjectPoolCheck();
+               fprintf(stderr, "smallObjectPoolCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
+               costSum += chainCheck(generations[g].steps[s].large_objects);
+               fprintf(stderr, "chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
+           } else {
+               costSum += heapCheck(generations[g].steps[s].blocks);
+               fprintf(stderr, "heapCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
+               costSum += chainCheck(generations[g].steps[s].large_objects);
+               fprintf(stderr, "chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum);
+           }
+       }
+    }
+
+    return costSum;
+}
+
+void
+findPointer(StgPtr p)
+{
+    StgPtr q, r, e;
+    bdescr *bd;
+    nat g, s;
+
+    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+       for (s = 0; s < generations[g].n_steps; s++) {
+           // if (g == 0 && s == 0) continue;
+           bd = generations[g].steps[s].blocks;
+           for (; bd; bd = bd->link) {
+               for (q = bd->start; q < bd->free; q++) {
+                   if (*q == (StgWord)p) {
+                       r = q;
+                       while (!LOOKS_LIKE_GHC_INFO(*r)) r--;
+                       fprintf(stderr, "Found in gen[%d], step[%d]: q = %p, r = %p\n", g, s, q, r);
+                       // return;
+                   }
+               }
+           }
+           bd = generations[g].steps[s].large_objects;
+           for (; bd; bd = bd->link) {
+               e = bd->start + cost((StgClosure *)bd->start);
+               for (q = bd->start; q < e; q++) {
+                   if (*q == (StgWord)p) {
+                       r = q;
+                       while (*r == 0 || !LOOKS_LIKE_GHC_INFO(*r)) r--;
+                       fprintf(stderr, "Found in gen[%d], large_objects: %p\n", g, r);
+                       // return;
+                   }
+               }
+           }
+       }
+    }
+}
+
+static void
+belongToHeap(StgPtr p)
+{
+    bdescr *bd;
+    nat g, s;
+
+    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+       for (s = 0; s < generations[g].n_steps; s++) {
+           // if (g == 0 && s == 0) continue;
+           bd = generations[g].steps[s].blocks;
+           for (; bd; bd = bd->link) {
+               if (bd->start <= p && p < bd->free) {
+                   fprintf(stderr, "Belongs to gen[%d], step[%d]", g, s);
+                   return;
+               }
+           }
+           bd = generations[g].steps[s].large_objects;
+           for (; bd; bd = bd->link) {
+               if (bd->start <= p && p < bd->start + getHeapClosureSize((StgClosure *)bd->start)) {
+                   fprintf(stderr, "Found in gen[%d], large_objects: %p\n", g, bd->start);
+                   return;
+               }
+           }
+       }
+    }
+}
+#endif // DEBUG_RETAINER
+
+#endif /* PROFILING */
diff --git a/ghc/rts/RetainerProfile.h b/ghc/rts/RetainerProfile.h
new file mode 100644 (file)
index 0000000..7a2f0fb
--- /dev/null
@@ -0,0 +1,29 @@
+/* -----------------------------------------------------------------------------
+ * $Id: RetainerProfile.h,v 1.1 2001/11/22 14:25:12 simonmar Exp $
+ *
+ * (c) The GHC Team, 2001
+ * Author: Sungwoo Park
+ *
+ * Retainer profiling interface.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifndef RETAINERPROFILE_H
+#define RETAINERPROFILE_H
+
+#ifdef PROFILING
+
+extern void  initRetainerProfiling ( void );
+extern void  endRetainerProfiling  ( void );
+extern void  printRetainer         ( FILE *, retainer );
+extern void  retainerProfile       ( void );
+extern void  resetStaticObjectForRetainerProfiling ( void );
+
+// firstStack is exported because memInventory() in Schedule.c uses it.
+#ifdef DEBUG
+extern bdescr *firstStack;
+#endif
+
+#endif /* PROFILING */
+
+#endif /* RETAINERPROFILE_H */
diff --git a/ghc/rts/RetainerSet.c b/ghc/rts/RetainerSet.c
new file mode 100644 (file)
index 0000000..709555a
--- /dev/null
@@ -0,0 +1,587 @@
+/* -----------------------------------------------------------------------------
+ * $Id: RetainerSet.c,v 1.1 2001/11/22 14:25:12 simonmar Exp $
+ *
+ * (c) The GHC Team, 2001
+ * Author: Sungwoo Park
+ *
+ * Retainer set implementation for retainer profiling (see RetainerProfile.c)
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifdef PROFILING
+
+#include "Rts.h"
+#include "Stats.h"
+#include "RtsUtils.h"
+#include "RetainerSet.h"
+#include "Arena.h"
+#include "Profiling.h"
+
+#include <string.h>
+
+#define HASH_TABLE_SIZE 255
+#define hash(hk)  (hk % HASH_TABLE_SIZE)
+static RetainerSet *hashTable[HASH_TABLE_SIZE];
+
+static Arena *arena;           // arena in which we store retainer sets
+
+static int nextId;              // id of next retainer set       
+
+/* -----------------------------------------------------------------------------
+ * rs_MANY is a distinguished retainer set, such that
+ *
+ *        isMember(e, rs_MANY)   = True
+ *
+ *       addElement(e, rs)      = rs_MANY,   if rs->num >= maxRetainerSetSize
+ *       addElement(e, rs_MANY) = rs_MANY
+ *
+ * The point of rs_MANY is to keep the total number of retainer sets
+ * from growing too large.
+ * -------------------------------------------------------------------------- */
+RetainerSet rs_MANY = {
+    num : 0,
+    cost : 0,
+    hashKey : 0,
+    link : NULL,
+    id : 1,
+    element : {}
+};
+
+nat maxRetainerSetSize = 16;
+
+/* -----------------------------------------------------------------------------
+ * calculate the size of a RetainerSet structure
+ * -------------------------------------------------------------------------- */
+static inline size_t
+sizeofRetainerSet( int elems )
+{
+    return (sizeof(RetainerSet) + elems * sizeof(retainer));
+}
+
+/* -----------------------------------------------------------------------------
+ * Creates the first pool and initializes hashTable[].
+ * Frees all pools if any.
+ * -------------------------------------------------------------------------- */
+void
+initializeAllRetainerSet(void)
+{
+    int i;
+
+    arena = newArena();
+
+    for (i = 0; i < HASH_TABLE_SIZE; i++)
+       hashTable[i] = NULL;
+    nextId = 2;   // Initial value must be positive, 2 is MANY.
+}
+
+/* -----------------------------------------------------------------------------
+ * Refreshes all pools for reuse and initializes hashTable[].
+ * -------------------------------------------------------------------------- */
+void
+refreshAllRetainerSet(void)
+{
+    int i;
+
+    // Choose one of the following two approaches.
+
+#ifdef FIRST_APPROACH
+    // first approach: completely refresh
+    arenaFree(arena);
+    arena = newArena();
+
+    for (i = 0; i < HASH_TABLE_SIZE; i++)
+       hashTable[i] = NULL;
+    nextId = 2;
+#endif // FIRST_APPROACH
+
+#ifdef SECOND_APPROACH
+    // second approach: leave all the retainer sets for reuse
+    RetainerSet *rs;
+    for (i = 0;i < HASH_TABLE_SIZE; i++) {
+       rs = hashTable[i];
+       while (rs != NULL) {
+           rs->cost = 0;
+           rs = rs->link;
+       }
+    }
+    rs_MANY.cost = 0;
+#endif // SECOND_APPROACH
+}
+
+/* -----------------------------------------------------------------------------
+ * Frees all pools.
+ * -------------------------------------------------------------------------- */
+void
+closeAllRetainerSet(void)
+{
+    arenaFree(arena);
+}
+
+/* -----------------------------------------------------------------------------
+ *  Finds or creates if needed a singleton retainer set.
+ * -------------------------------------------------------------------------- */
+RetainerSet *
+singleton(retainer r)
+{
+    RetainerSet *rs;
+    StgWord hk;
+
+    hk = hashKeySingleton(r);
+    for (rs = hashTable[hash(hk)]; rs != NULL; rs = rs->link)
+       if (rs->num == 1 &&  rs->element[0] == r) return rs;    // found it
+
+    // create it
+    rs = arenaAlloc( arena, sizeofRetainerSet(1) );
+    rs->num = 1;
+    rs->cost = 0;
+    rs->hashKey = hk;
+    rs->link = hashTable[hash(hk)];
+    rs->id = nextId++;
+    rs->element[0] = r;
+
+    // The new retainer set is placed at the head of the linked list.
+    hashTable[hash(hk)] = rs;
+
+    return rs;
+}
+
+/* -----------------------------------------------------------------------------
+ *   Finds or creates a retainer set *rs augmented with r.
+ *   Invariants:
+ *     r is not a member of rs, i.e., isMember(r, rs) returns rtsFalse.
+ *     rs is not NULL.
+ *   Note:
+ *     We could check if rs is NULL, in which case this function call
+ *     reverts to singleton(). We do not choose this strategy because
+ *     in most cases addElement() is invoked with non-NULL rs.
+ * -------------------------------------------------------------------------- */
+RetainerSet *
+addElement(retainer r, RetainerSet *rs)
+{
+    nat i;
+    nat nl;             // Number of retainers in *rs Less than r
+    RetainerSet *nrs;   // New Retainer Set
+    StgWord hk;         // Hash Key
+
+#ifdef DEBUG_RETAINER
+    // fprintf(stderr, "addElement(%p, %p) = ", r, rs);
+#endif
+
+    ASSERT(rs != NULL);
+    ASSERT(rs->num <= maxRetainerSetSize);
+
+    if (rs == &rs_MANY || rs->num == maxRetainerSetSize) {
+       return &rs_MANY;
+    }
+
+    ASSERT(!isMember(r, rs));
+
+    for (nl = 0; nl < rs->num; nl++)
+       if (r < rs->element[nl]) break;
+    // Now nl is the index for r into the new set.
+    // Also it denotes the number of retainers less than r in *rs.
+    // Thus, compare the first nl retainers, then r itself, and finally the
+    // remaining (rs->num - nl) retainers.
+
+    hk = hashKeyAddElement(r, rs);
+    for (nrs = hashTable[hash(hk)]; nrs != NULL; nrs = nrs->link) {
+       // test *rs and *nrs for equality
+
+       // check their size
+       if (rs->num + 1 != nrs->num) continue;
+
+       // compare the first nl retainers and find the first non-matching one.
+       for (i = 0; i < nl; i++)
+           if (rs->element[i] != nrs->element[i]) break;
+       if (i < nl) continue;
+
+       // compare r itself
+       if (r != nrs->element[i]) continue;       // i == nl
+
+       // compare the remaining retainers
+       for (; i < rs->num; i++)
+           if (rs->element[i] != nrs->element[i + 1]) break;
+       if (i < rs->num) continue;
+
+#ifdef DEBUG_RETAINER
+       // fprintf(stderr, "%p\n", nrs);
+#endif
+       // The set we are seeking already exists!
+       return nrs;
+    }
+
+    // create a new retainer set
+    nrs = arenaAlloc( arena, sizeofRetainerSet(rs->num + 1) );
+    nrs->num = rs->num + 1;
+    nrs->cost = 0;
+    nrs->hashKey = hk;
+    nrs->link = hashTable[hash(hk)];
+    nrs->id = nextId++;
+    for (i = 0; i < nl; i++) {              // copy the first nl retainers
+       nrs->element[i] = rs->element[i];
+    }
+    nrs->element[i] = r;                    // copy r
+    for (; i < rs->num; i++) {              // copy the remaining retainers
+       nrs->element[i + 1] = rs->element[i];
+    }
+
+    hashTable[hash(hk)] = nrs;
+
+#ifdef DEBUG_RETAINER
+    // fprintf(stderr, "%p\n", nrs);
+#endif
+    return nrs;
+}
+
+/* -----------------------------------------------------------------------------
+ *  Call f() for each retainer set.
+ * -------------------------------------------------------------------------- */
+void
+traverseAllRetainerSet(void (*f)(RetainerSet *))
+{
+    int i;
+    RetainerSet *rs;
+
+    (*f)(&rs_MANY);
+    for (i = 0; i < HASH_TABLE_SIZE; i++)
+       for (rs = hashTable[i]; rs != NULL; rs = rs->link)
+           (*f)(rs);
+}
+
+
+/* -----------------------------------------------------------------------------
+ *  printRetainer() prints the full information on a given retainer,
+ *  not a retainer set.
+ * -------------------------------------------------------------------------- */
+#if defined(RETAINER_SCHEME_INFO)
+// Retainer scheme 1: retainer = info table
+void
+printRetainer(FILE *f, retainer itbl)
+{
+    fprintf(f, "%s[%s]", itbl->prof.closure_desc, itbl->prof.closure_type);
+}
+#elif defined(RETAINER_SCHEME_CCS)
+// Retainer scheme 2: retainer = cost centre stack
+void
+printRetainer(FILE *f, retainer ccs)
+{
+    fprintCCS(f, ccs);
+}
+#elif defined(RETAINER_SCHEME_CC)
+// Retainer scheme 3: retainer = cost centre
+void
+printRetainer(FILE *f, retainer cc)
+{
+    fprintf(f,"%s.%s", cc->module, cc->label);
+}
+#endif
+
+/* -----------------------------------------------------------------------------
+ *  printRetainerSetShort() should always display the same output for
+ *  a given retainer set regardless of the time of invocation.
+ * -------------------------------------------------------------------------- */
+#ifdef SECOND_APPROACH
+#if defined(RETAINER_SCHEME_INFO)
+// Retainer scheme 1: retainer = info table
+void
+printRetainerSetShort(FILE *f, RetainerSet *rs)
+{
+#define MAX_RETAINER_SET_SPACE  24
+    char tmp[MAX_RETAINER_SET_SPACE + 1];
+    int size;
+    nat j;
+
+    ASSERT(rs->id < 0);
+
+    tmp[MAX_RETAINER_SET_SPACE] = '\0';
+
+    // No blank characters are allowed.
+    sprintf(tmp + 0, "(%d)", -(rs->id));
+    size = strlen(tmp);
+    ASSERT(size < MAX_RETAINER_SET_SPACE);
+
+    for (j = 0; j < rs->num; j++) {
+       if (j < rs->num - 1) {
+           strncpy(tmp + size, rs->element[j]->prof.closure_desc, MAX_RETAINER_SET_SPACE - size);
+           size = strlen(tmp);
+           if (size == MAX_RETAINER_SET_SPACE)
+               break;
+           strncpy(tmp + size, ",", MAX_RETAINER_SET_SPACE - size);
+           size = strlen(tmp);
+           if (size == MAX_RETAINER_SET_SPACE)
+               break;
+       }
+       else {
+           strncpy(tmp + size, rs->element[j]->prof.closure_desc, MAX_RETAINER_SET_SPACE - size);
+           // size = strlen(tmp);
+       }
+    }
+    fprintf(f, tmp);
+}
+#elif defined(RETAINER_SCHEME_CC)
+// Retainer scheme 3: retainer = cost centre
+void
+printRetainerSetShort(FILE *f, RetainerSet *rs)
+{
+#define MAX_RETAINER_SET_SPACE  24
+    char tmp[MAX_RETAINER_SET_SPACE + 1];
+    int size;
+    nat j;
+
+}
+#elif defined(RETAINER_SCHEME_CCS)
+// Retainer scheme 2: retainer = cost centre stack
+void
+printRetainerSetShort(FILE *f, RetainerSet *rs)
+{
+#define MAX_RETAINER_SET_SPACE  24
+    char tmp[MAX_RETAINER_SET_SPACE + 1];
+    int size;
+    nat j;
+
+    ASSERT(rs->id < 0);
+
+    tmp[MAX_RETAINER_SET_SPACE] = '\0';
+
+    // No blank characters are allowed.
+    sprintf(tmp + 0, "(%d)", -(rs->id));
+    size = strlen(tmp);
+    ASSERT(size < MAX_RETAINER_SET_SPACE);
+
+    for (j = 0; j < rs->num; j++) {
+       if (j < rs->num - 1) {
+           strncpy(tmp + size, rs->element[j]->cc->label, MAX_RETAINER_SET_SPACE - size);
+           size = strlen(tmp);
+           if (size == MAX_RETAINER_SET_SPACE)
+               break;
+           strncpy(tmp + size, ",", MAX_RETAINER_SET_SPACE - size);
+           size = strlen(tmp);
+           if (size == MAX_RETAINER_SET_SPACE)
+               break;
+       }
+       else {
+           strncpy(tmp + size, rs->element[j]->cc->label, MAX_RETAINER_SET_SPACE - size);
+           // size = strlen(tmp);
+       }
+    }
+    fprintf(f, tmp);
+}
+#elif defined(RETAINER_SCHEME_CC)
+// Retainer scheme 3: retainer = cost centre
+static void
+printRetainerSetShort(FILE *f, retainerSet *rs)
+{
+#define MAX_RETAINER_SET_SPACE  24
+    char tmp[MAX_RETAINER_SET_SPACE + 1];
+    int size;
+    nat j;
+
+    ASSERT(rs->id < 0);
+
+    tmp[MAX_RETAINER_SET_SPACE] = '\0';
+
+    // No blank characters are allowed.
+    sprintf(tmp + 0, "(%d)", -(rs->id));
+    size = strlen(tmp);
+    ASSERT(size < MAX_RETAINER_SET_SPACE);
+
+    for (j = 0; j < rs->num; j++) {
+       if (j < rs->num - 1) {
+           strncpy(tmp + size, rs->element[j]->label,
+                   MAX_RETAINER_SET_SPACE - size);
+           size = strlen(tmp);
+           if (size == MAX_RETAINER_SET_SPACE)
+               break;
+           strncpy(tmp + size, ",", MAX_RETAINER_SET_SPACE - size);
+           size = strlen(tmp);
+           if (size == MAX_RETAINER_SET_SPACE)
+               break;
+       }
+       else {
+           strncpy(tmp + size, rs->element[j]->label,
+                   MAX_RETAINER_SET_SPACE - size);
+           // size = strlen(tmp);
+       }
+    }
+    fprintf(f, tmp);
+/*
+  #define MAX_RETAINER_SET_SPACE  24
+  #define DOT_NUMBER              3
+  // 1. 32 > MAX_RETAINER_SET_SPACE + 1 (1 for '\0')
+  // 2. (MAX_RETAINER_SET_SPACE - DOT_NUMBER ) characters should be enough for
+  //    printing one natural number (plus '(' and ')').
+  char tmp[32];
+  int size, ts;
+  nat j;
+
+  ASSERT(rs->id < 0);
+
+  // No blank characters are allowed.
+  sprintf(tmp + 0, "(%d)", -(rs->id));
+  size = strlen(tmp);
+  ASSERT(size < MAX_RETAINER_SET_SPACE - DOT_NUMBER);
+
+  for (j = 0; j < rs->num; j++) {
+    ts = strlen(rs->element[j]->label);
+    if (j < rs->num - 1) {
+      if (size + ts + 1 > MAX_RETAINER_SET_SPACE - DOT_NUMBER) {
+        sprintf(tmp + size, "...");
+        break;
+      }
+      sprintf(tmp + size, "%s,", rs->element[j]->label);
+      size += ts + 1;
+    }
+    else {
+      if (size + ts > MAX_RETAINER_SET_SPACE - DOT_NUMBER) {
+        sprintf(tmp + size, "...");
+        break;
+      }
+      sprintf(tmp + size, "%s", rs->element[j]->label);
+      size += ts;
+    }
+  }
+  fprintf(f, tmp);
+*/
+}
+#endif /* RETAINER_SCHEME_CC */
+#endif /* SECOND_APPROACH */
+
+/* -----------------------------------------------------------------------------
+ * Print the statistics.  This function is called after each
+ * retainer profiling.  *allCost is set the sum of all costs retained
+ * by any retainer sets.  *numSet is set to the number of all
+ * retainer sets (including those with 0 cost).
+ * -------------------------------------------------------------------------- */
+void
+outputRetainerSet( FILE *hp_file, nat *allCost, nat *numSet )
+{
+    nat i;
+#ifdef FIRST_APPROACH
+    nat j;
+#endif
+    RetainerSet *rs;
+    double duration;
+
+    *allCost = 0;
+    *numSet = 0;
+    duration = mut_user_time_during_RP();
+
+    fprintf(hp_file, "MARK %f\n", duration);
+    fprintf(hp_file, "BEGIN_SAMPLE %f\n", duration);
+
+    if (rs_MANY.cost > 0) {
+       fprintf(hp_file, "MANY\t%u\n", rs_MANY.cost * sizeof(StgWord));
+    }
+
+    for (i = 0; i < HASH_TABLE_SIZE; i++) {
+       for (rs = hashTable[i]; rs != NULL; rs = rs->link) {
+           (*numSet)++;
+           /*
+             Note: If rs->cost is 0, it means that there exists at
+             least one object which is retained by this retainer set
+             *rs temporarily.  Since its new retainer set of this
+             object (replacing *rs) is at least larger than *rs, if
+             the cost of every object was a positive quantity, the
+             following invariants would hold: If rs->cost == 0, there
+             exists a retainer set rs' such that rs'->cost > 0 and
+             rs'->num > rs->num.  However, static objects cost zero,
+             this does not hold.  If we set the cost of each static
+             object to a positive quantity, it should hold, which is
+             actually the case.
+           */
+           if (rs->cost == 0)
+               continue;
+
+           *allCost += rs->cost;
+
+#ifdef SECOND_APPROACH
+           if (rs->id > 0)     // if having a positive cost for the first time?
+               rs->id = -(rs->id);     // mark as having a positive cost
+           // Now, this retainer set has a permanent negative id.
+
+           // report in the unit of bytes: * sizeof(StgWord)
+           printRetainerSetShort(hp_file, rs);
+           fprintf(hp_file, "\t%u\n", rs->cost * sizeof(StgWord));
+#endif
+
+#ifdef FIRST_APPROACH
+           fprintf(hp_file, "{");
+           for (j = 0; j < rs->num - 1; j++) {
+               printRetainer(hp_file, rs->element[j]);
+               fprintf(hp_file, ",");
+           }
+           printRetainer(hp_file, rs->element[j]);
+           fprintf(hp_file, "}\t%u\n", rs->cost * sizeof(StgWord));
+#endif
+       }
+    }
+    fprintf(hp_file, "END_SAMPLE %f\n", duration);
+}
+
+/*
+  This function is called at the exit of the program.
+ */
+#ifdef SECOND_APPROACH
+void
+outputAllRetainerSet(FILE *prof_file)
+{
+    nat i, j;
+    nat numSet;
+    RetainerSet *rs, **rsArray, *tmp;
+
+    // find out the number of retainer sets which have had a non-zero cost at
+    // least once during retainer profiling
+    numSet = 0;
+    for (i = 0; i < HASH_TABLE_SIZE; i++)
+       for (rs = hashTable[i]; rs != NULL; rs = rs->link) {
+           if (rs->id < 0)
+               numSet++;
+       }
+
+    if (numSet == 0)      // retainer profiling was not done at all.
+       return;
+
+    // allocate memory
+    rsArray = stgMallocBytes(numSet * sizeof(RetainerSet *),
+                            "outputAllRetainerSet()");
+
+    // prepare for sorting
+    j = 0;
+    for (i = 0; i < HASH_TABLE_SIZE; i++)
+       for (rs = hashTable[i]; rs != NULL; rs = rs->link) {
+           if (rs->id < 0) {
+               rsArray[j] = rs;
+               j++;
+           }
+       }
+
+    ASSERT(j == numSet);
+
+    // sort rsArray[] according to the id of each retainer set
+    for (i = numSet - 1; i > 0; i--) {
+       for (j = 0; j <= i - 1; j++) {
+           // if (-(rsArray[j]->id) < -(rsArray[j + 1]->id))
+           if (rsArray[j]->id < rsArray[j + 1]->id) {
+               tmp = rsArray[j];
+               rsArray[j] = rsArray[j + 1];
+               rsArray[j + 1] = tmp;
+           }
+       }
+    }
+
+    fprintf(prof_file, "\nRetainer sets created during profiling:\n");
+    for (i = 0;i < numSet; i++) {
+       fprintf(prof_file, "SET %u = {", -(rsArray[i]->id));
+       for (j = 0; j < rsArray[i]->num - 1; j++) {
+           printRetainer(prof_file, rsArray[i]->element[j]);
+           fprintf(prof_file, ", ");
+       }
+       printRetainer(prof_file, rsArray[i]->element[j]);
+       fprintf(prof_file, "}\n");
+    }
+
+    free(rsArray);
+}
+#endif // SECOND_APPROACH
+
+#endif /* PROFILING */
diff --git a/ghc/rts/RetainerSet.h b/ghc/rts/RetainerSet.h
new file mode 100644 (file)
index 0000000..feed43e
--- /dev/null
@@ -0,0 +1,139 @@
+/* -----------------------------------------------------------------------------
+ * $Id: RetainerSet.h,v 1.1 2001/11/22 14:25:12 simonmar Exp $
+ *
+ * (c) The GHC Team, 2001
+ * Author: Sungwoo Park
+ *
+ * Retainer set interface for retainer profiling.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#ifdef PROFILING
+
+/*
+  Note: 
+    There are two ways of maintaining all retainer sets. The first is simply by
+    freeing all the retainer sets and re-initialize the hash table at each
+    retainer profiling. The second is by setting the cost field of each 
+    retainer set. The second is preferred to the first if most retainer sets 
+    are likely to be observed again during the next retainer profiling. Note 
+    that in the first approach, we do not free the memory allocated for 
+    retainer sets; we just invalidate all retainer sets.
+ */
+#ifdef DEBUG_RETAINER
+// In thise case, FIRST_APPROACH must be turned on because the memory pool
+// for retainer sets is freed each time.
+#define FIRST_APPROACH
+#else
+// #define FIRST_APPROACH
+#define SECOND_APPROACH
+#endif
+
+// Creates the first pool and initializes a hash table. Frees all pools if any.
+void initializeAllRetainerSet(void);
+
+// Refreshes all pools for reuse and initializes a hash table.
+void refreshAllRetainerSet(void);
+
+// Frees all pools.
+void closeAllRetainerSet(void);
+
+// Finds or creates if needed a singleton retainer set.
+RetainerSet *singleton(retainer r);
+
+extern RetainerSet rs_MANY;
+
+// Checks if a given retainer is a memeber of the retainer set.
+// 
+// Note & (maybe) Todo:
+//   This function needs to be declared as an inline function, so it is declared
+//   as an inline static function here.
+//   This make the interface really bad, but isMember() returns a value, so
+//   it is not easy either to write it as a macro (due to my lack of C 
+//   programming experience). Sungwoo
+//
+// rtsBool isMember(retainer, retainerSet *);
+/*
+  Returns rtsTrue if r is a member of *rs.
+  Invariants:
+    rs is not NULL.
+  Note:
+    The efficiency of this function is subject to the typical size of
+    retainer sets. If it is small, linear scan is better. If it
+    is large in most cases, binary scan is better. 
+    The current implementation mixes the two search strategies.
+ */
+
+#define BINARY_SEARCH_THRESHOLD   8
+static inline rtsBool
+isMember(retainer r, RetainerSet *rs)
+{
+  int i, left, right;       // must be int, not nat (because -1 can appear)
+  retainer ri;
+
+  if (rs == &rs_MANY) { return rtsTrue; }
+
+  if (rs->num < BINARY_SEARCH_THRESHOLD) {
+    for (i = 0; i < (int)rs->num; i++) {
+      ri = rs->element[i];
+      if (r == ri) return rtsTrue;
+      else if (r < ri) return rtsFalse;
+    }
+  } else {
+    left = 0;
+    right = rs->num - 1;
+    while (left <= right) {
+      i = (left + right) / 2;
+      ri = rs->element[i];
+      if (r == ri) return rtsTrue;
+      else if (r < ri) right = i - 1;
+      else left = i + 1;
+    }
+  }
+  return rtsFalse;
+}
+
+// Finds or creates a retainer set augmented with a new retainer.
+RetainerSet *addElement(retainer, RetainerSet *);
+
+// Call f() for each retainer set.
+void traverseAllRetainerSet(void (*f)(RetainerSet *));
+
+#ifdef SECOND_APPROACH
+// Prints a single retainer set.
+void printRetainerSetShort(FILE *, RetainerSet *);
+#endif
+
+// Print the statistics on all the retainer sets.
+// store the sum of all costs and the number of all retainer sets. 
+void outputRetainerSet(FILE *, nat *, nat *);
+
+#ifdef SECOND_APPROACH
+// Print all retainer sets at the exit of the program.
+void outputAllRetainerSet(FILE *);
+#endif
+
+// Hashing functions
+/*
+  Invariants:
+    Once either initializeAllRetainerSet() or refreshAllRetainerSet()
+    is called, there exists only one copy of any retainer set created
+    through singleton() and addElement().  The pool (the storage for
+    retainer sets) is consumed linearly.  All the retainer sets of the
+    same hash function value are linked together from an element in
+    hashTable[].  See the invariants of allocateInPool() for the
+    maximum size of retainer sets.  The hashing function is defined by
+    hashKeySingleton() and hashKeyAddElement(). The hash key for a set
+    must be unique regardless of the order its elements are inserted,
+    i.e., the hashing function must be additive(?).
+*/
+#define hashKeySingleton(r)       ((StgWord)(r))
+#define hashKeyAddElement(r, s)   (hashKeySingleton((r)) + (s)->hashKey)
+
+// Prints the full information on a given retainer.
+// Note: This function is not part of retainerSet interface, but this is
+//       the best place to define it.
+void printRetainer(FILE *, retainer);
+
+#endif /* PROFILING */
+
index d070c13..e23346b 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: RtsFlags.c,v 1.51 2001/10/01 11:36:28 simonmar Exp $
+ * $Id: RtsFlags.c,v 1.52 2001/11/22 14:25:12 simonmar Exp $
  *
  * (c) The AQUA Project, Glasgow University, 1994-1997
  * (c) The GHC Team, 1998-1999
@@ -250,12 +250,13 @@ void initRtsFlagsDefaults(void)
 
 #ifdef PROFILING
     RtsFlags.ProfFlags.doHeapProfile      = rtsFalse;
-    RtsFlags.ProfFlags.profileFrequency   = 20;
+    RtsFlags.ProfFlags.profileInterval    = 20;
     RtsFlags.ProfFlags.showCCSOnException = rtsFalse;
     RtsFlags.ProfFlags.modSelector        = NULL;
     RtsFlags.ProfFlags.descrSelector      = NULL;
     RtsFlags.ProfFlags.typeSelector       = NULL;
     RtsFlags.ProfFlags.ccSelector         = NULL;
+
 #elif defined(DEBUG)
     RtsFlags.ProfFlags.doHeapProfile      = rtsFalse;
 #endif
@@ -417,6 +418,8 @@ usage_text[] = {
 "  -h<break-down> Heap residency profile (text) (output file <program>.prof)",
 "     break-down: C = cost centre stack (default), M = module",
 "                 D = closure description, Y = type description",
+"  -hR            Retainer profile (output files <program>.hp)",
+"  -hL            Lag/Drag/Void/Use profile (output files <program>.hp)",
 "  A subset of closures may be selected thusly:",
 "    -hc{cc, cc ...} specific cost centre(s) (NOT STACKS!)",
 "    -hm{mod,mod...} all cost centres from the specified modules(s)",
@@ -838,18 +841,53 @@ error = rtsTrue;
                PROFILING_BUILD_ONLY(
                switch (rts_argv[arg][2]) {
                  case '\0':
-                 case CCchar:
-                   RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_CCS;
-                   break;
-                 case MODchar:
-                   RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_MOD;
-                   break;
-                 case DESCRchar:
-                   RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_DESCR;
-                   break;
-                 case TYPEchar:
-                   RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_TYPE;
-                   break;
+                 case 'C':
+                     if (RtsFlags.ProfFlags.doHeapProfile == 0) {
+                         RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_CCS;
+                         break;
+                     } else {
+                         goto many_hps;
+                     }
+                 case 'M':
+                     if (RtsFlags.ProfFlags.doHeapProfile == 0) {
+                         RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_MOD;
+                         break;
+                     } else {
+                         goto many_hps;
+                     }
+                 case 'D':
+                     if (RtsFlags.ProfFlags.doHeapProfile == 0) {
+                         RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_DESCR;
+                         break;
+                     } else {
+                         goto many_hps;
+                     }
+                 case 'Y':
+                     if (RtsFlags.ProfFlags.doHeapProfile == 0) {
+                         RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_TYPE;
+                         break;
+                     } else {
+                         goto many_hps;
+                     }
+                 case 'R':
+                     if (RtsFlags.ProfFlags.doHeapProfile == 0) {
+                         RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_RETAINER;
+                         break;
+                     } else {
+                         goto many_hps;
+                     }
+                 case 'L':
+                     if (RtsFlags.ProfFlags.doHeapProfile == 0) {
+                         RtsFlags.ProfFlags.doHeapProfile = HEAP_BY_LDV;
+                         break;
+                     } else {
+                         goto many_hps;
+                     }
+               many_hps:
+                     prog_belch("multiple heap profile options");
+                     error = rtsTrue;
+                     break;
+                     
                   case 'c': /* cost centre label select */
                   case 'm': /* cost centre module select */
                   case 'd': /* closure descr select */
@@ -904,11 +942,9 @@ error = rtsTrue;
                    if (cst != 0 && cst < CS_MIN_MILLISECS)
                        cst = CS_MIN_MILLISECS;
 
-                   RtsFlags.ProfFlags.profileFrequency = cst;
+                   RtsFlags.ProfFlags.profileInterval = cst;
                }
-               
                break;
-
 #endif
 
              /* =========== CONCURRENT ========================= */
index 87c804f..69de672 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: RtsStartup.c,v 1.55 2001/11/08 12:46:31 simonmar Exp $
+ * $Id: RtsStartup.c,v 1.56 2001/11/22 14:25:12 simonmar Exp $
  *
  * (c) The GHC Team, 1998-2000
  *
@@ -33,6 +33,7 @@
 #if defined(PROFILING) || defined(DEBUG)
 # include "Profiling.h"
 # include "ProfHeap.h"
+# include "RetainerProfile.h"
 #endif
 
 #if defined(GRAN)
@@ -84,7 +85,7 @@ getProgArgv(int *argc, char **argv[])
 void
 startupHaskell(int argc, char *argv[], void (*init_root)(void))
 {
-    /* To avoid repeated initialisations of the RTS */
+   /* To avoid repeated initialisations of the RTS */
    if (rts_has_started_up)
      return;
    else
@@ -218,13 +219,14 @@ startupHaskell(int argc, char *argv[], void (*init_root)(void))
 
 /* The init functions use an explicit stack... 
  */
-#define INIT_STACK_SIZE  (BLOCK_SIZE * 4)
+#define INIT_STACK_BLOCKS  4
 F_ *init_stack = NULL;
 nat init_sp = 0;
 
 static void
 initModules ( void (*init_root)(void) )
 {
+    bdescr *bd;
 #ifdef SMP
     Capability cap;
 #else
@@ -232,7 +234,8 @@ initModules ( void (*init_root)(void) )
 #endif
 
     init_sp = 0;
-    init_stack = (F_ *)allocate(INIT_STACK_SIZE / sizeof(W_));
+    bd = allocGroup(4);
+    init_stack = (F_ *)bd->start;
     init_stack[init_sp++] = (F_)stg_init_ret;
     init_stack[init_sp++] = (F_)__stginit_Prelude;
     if (init_root != NULL) {
@@ -241,6 +244,8 @@ initModules ( void (*init_root)(void) )
     
     cap.r.rSp = (P_)(init_stack + init_sp);
     StgRun((StgFunPtr)stg_init, &cap.r);
+
+    freeGroup(bd);
 }
 
 /* -----------------------------------------------------------------------------
@@ -272,6 +277,26 @@ shutdownHaskell(void)
   /* start timing the shutdown */
   stat_startExit();
 
+#ifdef PROFILING
+  // @LDV profiling
+  // 
+  // Note: 
+  //   We do not need to perform a major garbage collection because all the
+  //   closures created since the last census will not affect the profiling
+  //   statistics anyhow.
+  // 
+  // Note: 
+  //   We ignore any object created afterwards. 
+  //   finalizeWeakPointersNow() may corrupt the heap (because it executes 
+  //   rts_evalIO(), which adds an initial evaluation stack again).
+  //   Thus, we call LdvCensusKillAll() here, and prohibit LDV profiling
+  //   afterwards. 
+  //   Acutally, it is pointless to call LdvCensusKillAll() any later because
+  //   no object created later will be taken into account for profiling.
+  if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) 
+    LdvCensusKillAll();
+#endif
+
 #if !defined(GRAN)
   /* Finalize any remaining weak pointers */
   finalizeWeakPointersNow();
@@ -316,12 +341,19 @@ shutdownHaskell(void)
     }
 #endif
 
+#if defined(PROFILING) 
+  report_ccs_profiling();
+#endif
+
 #if defined(PROFILING) || defined(DEBUG)
   endProfiling();
 #endif
 
-#if defined(PROFILING) 
-  report_ccs_profiling();
+#ifdef PROFILING
+  // Originally, this was in report_ccs_profiling().  Now, retainer
+  // profiling might tack some extra stuff on to the end of this file
+  // during endProfiling().
+  fclose(prof_file);
 #endif
 
 #if defined(TICKY_TICKY)
index ec0ac22..256aab9 100644 (file)
@@ -1,5 +1,5 @@
 /* ---------------------------------------------------------------------------
- * $Id: Schedule.c,v 1.106 2001/11/08 16:17:35 simonmar Exp $
+ * $Id: Schedule.c,v 1.107 2001/11/22 14:25:12 simonmar Exp $
  *
  * (c) The GHC Team, 1998-2000
  *
 #include "Stats.h"
 #include "Itimer.h"
 #include "Prelude.h"
+#ifdef PROFILING
+#include "Proftimer.h"
+#include "ProfHeap.h"
+#include "RetainerProfile.h"
+#include "LdvProfile.h"
+#endif
 #if defined(GRAN) || defined(PAR)
 # include "GranSimRts.h"
 # include "GranSim.h"
@@ -181,7 +187,6 @@ StgTSO *all_threads;
  */
 static StgTSO *suspended_ccalling_threads;
 
-static void GetRoots(evac_fn);
 static StgTSO *threadStackOverflow(StgTSO *tso);
 
 /* KH: The following two flags are shared memory locations.  There is no need
@@ -923,10 +928,14 @@ schedule( void )
      * the user specified "context switch as often as possible", with
      * +RTS -C0
      */
-    if (RtsFlags.ConcFlags.ctxtSwitchTicks == 0
-       && (run_queue_hd != END_TSO_QUEUE
-           || blocked_queue_hd != END_TSO_QUEUE
-           || sleeping_queue != END_TSO_QUEUE))
+    if (
+#ifdef PROFILING
+       RtsFlags.ProfFlags.profileInterval == 0 ||
+#endif
+       (RtsFlags.ConcFlags.ctxtSwitchTicks == 0
+        && (run_queue_hd != END_TSO_QUEUE
+            || blocked_queue_hd != END_TSO_QUEUE
+            || sleeping_queue != END_TSO_QUEUE)))
        context_switch = 1;
     else
        context_switch = 0;
@@ -936,6 +945,10 @@ schedule( void )
     IF_DEBUG(scheduler, sched_belch("-->> Running TSO %ld (%p) %s ...", 
                              t->id, t, whatNext_strs[t->what_next]));
 
+#ifdef PROFILING
+    startHeapProfTimer();
+#endif
+
     /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
     /* Run the current thread 
      */
@@ -961,6 +974,7 @@ schedule( void )
     
     /* Costs for the scheduler are assigned to CCS_SYSTEM */
 #ifdef PROFILING
+    stopHeapProfTimer();
     CCCS = CCS_SYSTEM;
 #endif
     
@@ -1262,6 +1276,39 @@ schedule( void )
     n_free_capabilities++;
 #endif
 
+#ifdef PROFILING
+    if (RtsFlags.ProfFlags.profileInterval==0 || performHeapProfile) {
+        if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) { 
+           //
+           // Note: currently retainer profiling is performed after
+           // a major garbage collection.
+           //
+           GarbageCollect(GetRoots, rtsTrue);
+           retainerProfile();
+       } else if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
+           //
+           // We have LdvCensus() preceded by a major garbage
+           // collection because we don't want *genuinely* dead
+           // closures to be involved in LDV profiling. Another good
+           // reason is to produce consistent profiling results
+           // regardless of the interval at which GCs are performed.
+           // In other words, we want LDV profiling results to be
+           // completely independent of the GC interval.
+           //
+           GarbageCollect(GetRoots, rtsTrue);
+           LdvCensus();
+       } else {
+           //
+           // Normal creator-based heap profile
+           //
+           GarbageCollect(GetRoots, rtsTrue);
+           heapCensus();
+       }
+       performHeapProfile = rtsFalse;
+       ready_to_gc = rtsFalse; // we already GC'd
+    }
+#endif
+
 #ifdef SMP
     if (ready_to_gc && n_free_capabilities == RtsFlags.ParFlags.nNodes) 
 #else
@@ -2170,7 +2217,7 @@ take_off_run_queue(StgTSO *tso) {
        KH @ 25/10/99
 */
 
-static void
+void
 GetRoots(evac_fn evac)
 {
   StgMainThread *m;
index f8976eb..b81087d 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: Schedule.h,v 1.24 2001/11/13 13:38:02 simonmar Exp $
+ * $Id: Schedule.h,v 1.25 2001/11/22 14:25:12 simonmar Exp $
  *
  * (c) The GHC Team 1998-1999
  *
@@ -93,6 +93,15 @@ void awaitEvent(rtsBool wait);  /* In Select.c */
  */
 rtsBool wakeUpSleepingThreads(nat);  /* In Select.c */
 
+/* GetRoots(evac_fn f)
+ *
+ * Call f() for each root known to the scheduler.
+ *
+ * Called from STG :  NO
+ * Locks assumed   :  ????
+ */
+void GetRoots(evac_fn);
+
 // ToDo: check whether all fcts below are used in the SMP version, too
 //@cindex awaken_blocked_queue
 #if defined(GRAN)
index 1ebd5a7..a82f395 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: Stats.c,v 1.35 2001/11/20 21:39:12 sof Exp $
+ * $Id: Stats.c,v 1.36 2001/11/22 14:25:12 simonmar Exp $
  *
  * (c) The GHC Team, 1998-1999
  *
@@ -19,6 +19,7 @@
 #include "Schedule.h"
 #include "Stats.h"
 #include "ParTicky.h"                       // ToDo: move into Rts.h
+#include "Profiling.h"
 
 #ifdef HAVE_UNISTD_H
 #include <unistd.h>
@@ -93,8 +94,14 @@ static TICK_TYPE ExitElapsedTime  = 0;
 static ullong GC_tot_alloc        = 0;
 static ullong GC_tot_copied       = 0;
 
-static TICK_TYPE GC_start_time,  GC_tot_time = 0;  /* User GC Time */
-static TICK_TYPE GCe_start_time, GCe_tot_time = 0; /* Elapsed GC time */
+static TICK_TYPE GC_start_time,  GC_tot_time = 0;    // User GC Time
+static TICK_TYPE GCe_start_time, GCe_tot_time = 0;   // Elapsed GC time
+
+static TICK_TYPE RP_start_time, RP_tot_time = 0;     // retainer prof user time
+static TICK_TYPE RPe_start_time, RPe_tot_time = 0;   // retainer prof elap time
+
+static TICK_TYPE LDV_start_time, LDV_tot_time = 0;   // LDV prof user time
+static TICK_TYPE LDVe_start_time, LDVe_tot_time = 0; // LDV prof elap time
 
 lnat MaxResidency = 0;     /* in words; for stats only */
 lnat AvgResidency = 0;
@@ -210,16 +217,33 @@ getTimes(void)
  *           stat_startGC() for details)
  */
 double
-mut_user_time_during_GC(void)
+mut_user_time_during_GC( void )
 {
-  return TICK_TO_DBL(GC_start_time - GC_tot_time);
+  return TICK_TO_DBL(GC_start_time - GC_tot_time - RP_tot_time - LDV_tot_time);
 }
 
 double
-mut_user_time(void)
+mut_user_time( void )
 {
     getTimes();
-    return TICK_TO_DBL(CurrentUserTime - GC_tot_time);
+    return TICK_TO_DBL(CurrentUserTime - GC_tot_time - RP_tot_time - LDV_tot_time);
+}
+
+/*
+  mut_user_time_during_RP() is similar to mut_user_time_during_GC();
+  it returns the MUT time during retainer profiling.
+  The same is for mut_user_time_during_LDV();
+ */
+double
+mut_user_time_during_RP( void )
+{
+  return TICK_TO_DBL(RP_start_time - GC_tot_time - RP_tot_time - LDV_tot_time);
+}
+
+double
+mut_user_time_during_LDV( void )
+{
+  return TICK_TO_DBL(LDV_start_time - GC_tot_time - RP_tot_time - LDV_tot_time);
 }
 
 static nat
@@ -332,7 +356,7 @@ stat_startExit(void)
 #ifdef SMP
     MutUserTime = CurrentUserTime;
 #else
-    MutUserTime = CurrentUserTime - GC_tot_time - InitUserTime;
+    MutUserTime = CurrentUserTime - GC_tot_time - RP_tot_time - LDV_tot_time - InitUserTime;
     if (MutUserTime < 0) { MutUserTime = 0; }
 #endif
 }
@@ -344,7 +368,7 @@ stat_endExit(void)
 #ifdef SMP
     ExitUserTime = CurrentUserTime - MutUserTime;
 #else
-    ExitUserTime = CurrentUserTime - MutUserTime - GC_tot_time - InitUserTime;
+    ExitUserTime = CurrentUserTime - MutUserTime - GC_tot_time - RP_tot_time - LDV_tot_time - InitUserTime;
 #endif
     ExitElapsedTime = CurrentElapsedTime - MutElapsedStamp;
     if (ExitUserTime < 0) {
@@ -471,6 +495,64 @@ stat_endGC(lnat alloc, lnat collect, lnat live, lnat copied, lnat gen)
 }
 
 /* -----------------------------------------------------------------------------
+   Called at the beginning of each Retainer Profiliing
+   -------------------------------------------------------------------------- */
+void stat_startRP(void)
+{
+  getTimes();
+  RP_start_time = CurrentUserTime;
+  RPe_start_time = CurrentElapsedTime;
+}
+
+/* -----------------------------------------------------------------------------
+   Called at the end of each Retainer Profiliing
+   -------------------------------------------------------------------------- */
+void stat_endRP(
+  nat retainerGeneration,
+#ifdef DEBUG_RETAINER
+  nat maxCStackSize,
+  int maxStackSize,
+#endif
+  double averageNumVisit,
+  nat allCost,
+  nat numSet)
+{
+  getTimes();
+  RP_tot_time += CurrentUserTime - RP_start_time;
+  RPe_tot_time += CurrentElapsedTime - RPe_start_time;
+
+  fprintf(prof_file, "Retainer Profiling: %d, at %f seconds\n", 
+    retainerGeneration, mut_user_time_during_RP());
+#ifdef DEBUG_RETAINER
+  fprintf(prof_file, "\tMax C stack size = %u\n", maxCStackSize);
+  fprintf(prof_file, "\tMax auxiliary stack size = %u\n", maxStackSize);
+#endif
+  fprintf(prof_file, "\tAverage number of visits per object = %f\n", averageNumVisit);
+  fprintf(prof_file, "\tCurrent total costs in bytes = %u\n", allCost * sizeof(StgWord));
+  fprintf(prof_file, "\tNumber of retainer sets = %u\n\n", numSet);
+}
+
+/* -----------------------------------------------------------------------------
+   Called at the beginning of each LDV Profiliing
+   -------------------------------------------------------------------------- */
+void stat_startLDV(void)
+{
+  getTimes();
+  LDV_start_time = CurrentUserTime;
+  LDVe_start_time = CurrentElapsedTime;
+}
+
+/* -----------------------------------------------------------------------------
+   Called at the end of each LDV Profiliing
+   -------------------------------------------------------------------------- */
+void stat_endLDV(void) 
+{
+  getTimes();
+  LDV_tot_time += CurrentUserTime - LDV_start_time;
+  LDVe_tot_time += CurrentElapsedTime - LDVe_start_time;
+}
+
+/* -----------------------------------------------------------------------------
    stat_workerStop
 
    Called under SMP when a worker thread finishes.  We drop the timing
@@ -598,6 +680,14 @@ stat_exit(int alloc)
                    TICK_TO_DBL(MutUserTime), TICK_TO_DBL(MutElapsedTime));
            fprintf(sf, "  GC    time  %6.2fs  (%6.2fs elapsed)\n",
                    TICK_TO_DBL(GC_tot_time), TICK_TO_DBL(GCe_tot_time));
+#ifdef PROFILING
+      if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER)
+             fprintf(sf, "  RP    time  %6.2fs  (%6.2fs elapsed)\n",
+                     TICK_TO_DBL(RP_tot_time), TICK_TO_DBL(RPe_tot_time));
+      if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV)
+             fprintf(sf, "  LDV   time  %6.2fs  (%6.2fs elapsed)\n",
+                     TICK_TO_DBL(LDV_tot_time), TICK_TO_DBL(LDVe_tot_time));
+#endif 
            fprintf(sf, "  EXIT  time  %6.2fs  (%6.2fs elapsed)\n",
                    TICK_TO_DBL(ExitUserTime), TICK_TO_DBL(ExitElapsedTime));
            fprintf(sf, "  Total time  %6.2fs  (%6.2fs elapsed)\n\n",
@@ -606,20 +696,20 @@ stat_exit(int alloc)
                    TICK_TO_DBL(GC_tot_time)*100/TICK_TO_DBL(time),
                    TICK_TO_DBL(GCe_tot_time)*100/TICK_TO_DBL(etime));
 
-           if (time - GC_tot_time == 0)
+           if (time - GC_tot_time - RP_tot_time - LDV_tot_time == 0)
                ullong_format_string(0, temp, rtsTrue/*commas*/);
            else
                ullong_format_string(
                    (ullong)((GC_tot_alloc*sizeof(W_))/
-                            TICK_TO_DBL(time - GC_tot_time)),
+                            TICK_TO_DBL(time - GC_tot_time - RP_tot_time - LDV_tot_time)),
                    temp, rtsTrue/*commas*/);
            
            fprintf(sf, "  Alloc rate    %s bytes per MUT second\n\n", temp);
        
            fprintf(sf, "  Productivity %5.1f%% of total user, %.1f%% of total elapsed\n\n",
-                   TICK_TO_DBL(time - GC_tot_time - InitUserTime) * 100 
+                   TICK_TO_DBL(time - GC_tot_time - RP_tot_time - LDV_tot_time - InitUserTime) * 100 
                    / TICK_TO_DBL(time), 
-                   TICK_TO_DBL(time - GC_tot_time - InitUserTime) * 100 
+                   TICK_TO_DBL(time - GC_tot_time - RP_tot_time - LDV_tot_time - InitUserTime) * 100 
                    / TICK_TO_DBL(etime));
        }
 
index b5c9826..535bef3 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: Stats.h,v 1.11 2001/07/23 17:23:20 simonmar Exp $
+ * $Id: Stats.h,v 1.12 2001/11/22 14:25:12 simonmar Exp $
  *
  * (c) The GHC Team, 1998-1999
  *
@@ -14,6 +14,16 @@ extern void      stat_startGC(void);
 extern void      stat_endGC(lnat alloc, lnat collect, lnat live, 
                            lnat copied, lnat gen);
 
+extern void      stat_startRP(void);
+extern void      stat_endRP(nat, 
+#ifdef DEBUG_RETAINER
+                            nat, int, 
+#endif
+                            double, nat, nat);
+
+extern void      stat_startLDV(void);
+extern void      stat_endLDV(void);
+
 extern void      stat_startExit(void);
 extern void      stat_endExit(void);
 
@@ -23,6 +33,11 @@ extern void      stat_workerStop(void);
 extern void      initStats(void);
 
 extern double    mut_user_time_during_GC(void);
+#ifdef PROFILING
+// @retainer profiling
+extern double    mut_user_time_during_RP(void);
+extern double    mut_user_time_during_LDV(void);
+#endif
 extern double    mut_user_time(void);
 
 extern void      statDescribeGens( void );
index de36bea..63da5b1 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: StgMiscClosures.hc,v 1.69 2001/11/08 12:46:31 simonmar Exp $
+ * $Id: StgMiscClosures.hc,v 1.70 2001/11/22 14:25:12 simonmar Exp $
  *
  * (c) The GHC Team, 1998-2000
  *
@@ -288,19 +288,18 @@ STGFUN(stg_BCO_entry) {
    Entry code for an indirection.
    -------------------------------------------------------------------------- */
 
-INFO_TABLE(stg_IND_info,stg_IND_entry,1,0,IND,,EF_,0,0);
+INFO_TABLE(stg_IND_info,stg_IND_entry,1,0,IND,,EF_,"IND","IND");
 STGFUN(stg_IND_entry)
 {
     FB_
     TICK_ENT_IND(Node);        /* tick */
-
     R1.p = (P_) ((StgInd*)R1.p)->indirectee;
     TICK_ENT_VIA_NODE();
     JMP_(ENTRY_CODE(*R1.p));
     FE_
 }
 
-INFO_TABLE(stg_IND_STATIC_info,stg_IND_STATIC_entry,1,0,IND_STATIC,,EF_,0,0);
+INFO_TABLE(stg_IND_STATIC_info,stg_IND_STATIC_entry,1,0,IND_STATIC,,EF_,"IND_STATIC","IND_STATIC");
 STGFUN(stg_IND_STATIC_entry)
 {
     FB_
@@ -323,6 +322,8 @@ STGFUN(stg_IND_PERM_entry)
     TICK_ENT_PERM_IND(R1.p); /* tick */
 #endif
 
+    LDV_ENTER((StgInd *)R1.p);
+
     /* Enter PAP cost centre -- lexical scoping only */
     ENTER_CCS_PAP_CL(R1.cl);
 
@@ -353,19 +354,18 @@ STGFUN(stg_IND_PERM_entry)
     FE_
 }  
 
-INFO_TABLE(stg_IND_OLDGEN_info,stg_IND_OLDGEN_entry,1,1,IND_OLDGEN,,EF_,0,0);
+INFO_TABLE(stg_IND_OLDGEN_info,stg_IND_OLDGEN_entry,1,1,IND_OLDGEN,,EF_,"IND_OLDGEN","IND_OLDGEN");
 STGFUN(stg_IND_OLDGEN_entry)
 {
     FB_
     TICK_ENT_IND(Node);        /* tick */
-  
     R1.p = (P_) ((StgInd*)R1.p)->indirectee;
     TICK_ENT_VIA_NODE();
     JMP_(ENTRY_CODE(*R1.p));
     FE_
 }
 
-INFO_TABLE(stg_IND_OLDGEN_PERM_info,stg_IND_OLDGEN_PERM_entry,1,1,IND_OLDGEN_PERM,,EF_,0,0);
+INFO_TABLE(stg_IND_OLDGEN_PERM_info,stg_IND_OLDGEN_PERM_entry,1,1,IND_OLDGEN_PERM,,EF_,"IND_OLDGEN_PERM","IND_OLDGEN_PERM");
 STGFUN(stg_IND_OLDGEN_PERM_entry)
 {
     FB_
@@ -375,7 +375,9 @@ STGFUN(stg_IND_OLDGEN_PERM_entry)
     /* TICKY_TICKY && !PROFILING means PERM_IND *replaces* an IND, rather than being extra  */
     TICK_ENT_PERM_IND(R1.p); /* tick */
 #endif
-  
+
+    LDV_ENTER((StgInd *)R1.p);
+
     /* Enter PAP cost centre -- lexical scoping only */
     ENTER_CCS_PAP_CL(R1.cl);
 
@@ -433,7 +435,10 @@ STGFUN(stg_BLACKHOLE_entry)
 #endif
     TICK_ENT_BH();
 
-    // Put ourselves on the blocking queue for this black hole
+    // Actually this is not necessary because R1.p is about to be destroyed.
+    LDV_ENTER((StgClosure *)R1.p);
+
+    /* Put ourselves on the blocking queue for this black hole */
 #if defined(GRAN) || defined(PAR)
     // in fact, only difference is the type of the end-of-queue marker!
     CurrentTSO->link = END_BQ_QUEUE;
@@ -446,8 +451,19 @@ STGFUN(stg_BLACKHOLE_entry)
     CurrentTSO->why_blocked = BlockedOnBlackHole;
     CurrentTSO->block_info.closure = R1.cl;
 
-    // Change the CAF_BLACKHOLE into a BLACKHOLE_BQ_STATIC
+    /* Change the BLACKHOLE into a BLACKHOLE_BQ */
+#ifdef PROFILING
+
+    // The size remains the same, so we call LDV_recordDead() - no need to fill slop.
+    LDV_recordDead((StgClosure *)R1.p, BLACKHOLE_sizeW());
+#endif
+    // 
+    // Todo: maybe use SET_HDR() and remove LDV_recordCreate()?
+    // 
     ((StgBlockingQueue *)R1.p)->header.info = &stg_BLACKHOLE_BQ_info;
+#ifdef PROFILING
+    LDV_recordCreate((StgClosure *)R1.p);
+#endif
 
     // closure is mutable since something has just been added to its BQ
     recordMutable((StgMutClosure *)R1.cl);
@@ -483,6 +499,7 @@ STGFUN(stg_BLACKHOLE_BQ_entry)
 #endif
 
     TICK_ENT_BH();
+    LDV_ENTER((StgClosure *)R1.p);
 
     /* Put ourselves on the blocking queue for this black hole */
     CurrentTSO->link = ((StgBlockingQueue *)R1.p)->blocking_queue;
@@ -515,7 +532,7 @@ STGFUN(stg_BLACKHOLE_BQ_entry)
 
 #if defined(PAR) || defined(GRAN)
 
-INFO_TABLE(stg_RBH_info, stg_RBH_entry,1,1,RBH,,EF_,0,0);
+INFO_TABLE(stg_RBH_info, stg_RBH_entry,1,1,RBH,,EF_,"RBH","RBH");
 STGFUN(stg_RBH_entry)
 {
   FB_
@@ -539,13 +556,13 @@ STGFUN(stg_RBH_entry)
   FE_
 }
 
-INFO_TABLE(stg_RBH_Save_0_info, stg_RBH_Save_0_entry,0,2,CONSTR,,EF_,0,0);
+INFO_TABLE(stg_RBH_Save_0_info, stg_RBH_Save_0_entry,0,2,CONSTR,,EF_,"RBH_Save_0","RBH_Save_0");
 NON_ENTERABLE_ENTRY_CODE(RBH_Save_0);
 
-INFO_TABLE(stg_RBH_Save_1_info, stg_RBH_Save_1_entry,1,1,CONSTR,,EF_,0,0);
+INFO_TABLE(stg_RBH_Save_1_info, stg_RBH_Save_1_entry,1,1,CONSTR,,EF_,"RBH_Save_1","RBH_Save_1");
 NON_ENTERABLE_ENTRY_CODE(RBH_Save_1);
 
-INFO_TABLE(stg_RBH_Save_2_info, stg_RBH_Save_2_entry,2,0,CONSTR,,EF_,0,0);
+INFO_TABLE(stg_RBH_Save_2_info, stg_RBH_Save_2_entry,2,0,CONSTR,,EF_,"RBH_Save_2","RBH_Save_2");
 NON_ENTERABLE_ENTRY_CODE(RBH_Save_2);
 #endif /* defined(PAR) || defined(GRAN) */
 
@@ -574,6 +591,7 @@ STGFUN(stg_CAF_BLACKHOLE_entry)
 #endif
 
     TICK_ENT_BH();
+    LDV_ENTER((StgClosure *)R1.p);
 
     // Put ourselves on the blocking queue for this black hole
 #if defined(GRAN) || defined(PAR)
@@ -602,7 +620,7 @@ STGFUN(stg_CAF_BLACKHOLE_entry)
 }
 
 #ifdef TICKY_TICKY
-INFO_TABLE(stg_SE_BLACKHOLE_info, stg_SE_BLACKHOLE_entry,0,2,SE_BLACKHOLE,,EF_,0,0);
+INFO_TABLE(stg_SE_BLACKHOLE_info, stg_SE_BLACKHOLE_entry,0,2,SE_BLACKHOLE,,EF_,"SE_BLACKHOLE","SE_BLACKHOLE");
 STGFUN(stg_SE_BLACKHOLE_entry)
 {
   FB_
@@ -611,7 +629,7 @@ STGFUN(stg_SE_BLACKHOLE_entry)
   FE_
 }
 
-INFO_TABLE(SE_CAF_BLACKHOLE_info, SE_CAF_BLACKHOLE_entry,0,2,SE_CAF_BLACKHOLE,,EF_,0,0);
+INFO_TABLE(SE_CAF_BLACKHOLE_info, SE_CAF_BLACKHOLE_entry,0,2,SE_CAF_BLACKHOLE,,EF_,"CAF_BLACKHOLE","CAF_BLACKHOLE");
 STGFUN(stg_SE_CAF_BLACKHOLE_entry)
 {
   FB_
@@ -622,7 +640,7 @@ STGFUN(stg_SE_CAF_BLACKHOLE_entry)
 #endif
 
 #ifdef SMP
-INFO_TABLE(stg_WHITEHOLE_info, stg_WHITEHOLE_entry,0,2,CONSTR_NOCAF_STATIC,,EF_,0,0);
+INFO_TABLE(stg_WHITEHOLE_info, stg_WHITEHOLE_entry,0,2,CONSTR_NOCAF_STATIC,,EF_,"WHITEHOLE","WHITEHOLE");
 STGFUN(stg_WHITEHOLE_entry)
 {
   FB_
@@ -645,7 +663,7 @@ NON_ENTERABLE_ENTRY_CODE(TSO);
    one is a real bug.
    -------------------------------------------------------------------------- */
 
-INFO_TABLE(stg_EVACUATED_info,stg_EVACUATED_entry,1,0,EVACUATED,,EF_,0,0);
+INFO_TABLE(stg_EVACUATED_info,stg_EVACUATED_entry,1,0,EVACUATED,,EF_,"EVACUATED","EVACUATED");
 NON_ENTERABLE_ENTRY_CODE(EVACUATED);
 
 /* -----------------------------------------------------------------------------
@@ -659,7 +677,19 @@ NON_ENTERABLE_ENTRY_CODE(EVACUATED);
 INFO_TABLE(stg_WEAK_info,stg_WEAK_entry,0,4,WEAK,,EF_,"WEAK","WEAK");
 NON_ENTERABLE_ENTRY_CODE(WEAK);
 
-INFO_TABLE_CONSTR(stg_DEAD_WEAK_info,stg_DEAD_WEAK_entry,0,1,0,CONSTR,,EF_,"DEAD_WEAK","DEAD_WEAK");
+// XXX! The garbage collector replaces a WEAK with a DEAD_WEAK
+// in-place, which causes problems if the heap is scanned linearly
+// after GC (certain kinds of profiling do this).  So when profiling,
+// we set the size of a DEAD_WEAK to 4 non-pointers, rather than its
+// usual 1.
+
+#ifdef PROFILING
+#define DEAD_WEAK_PAYLOAD_WORDS 4
+#else
+#define DEAD_WEAK_PAYLOAD_WORDS 1
+#endif
+
+INFO_TABLE_CONSTR(stg_DEAD_WEAK_info,stg_DEAD_WEAK_entry,0,DEAD_WEAK_PAYLOAD_WORDS,0,CONSTR,,EF_,"DEAD_WEAK","DEAD_WEAK");
 NON_ENTERABLE_ENTRY_CODE(DEAD_WEAK);
 
 /* -----------------------------------------------------------------------------
@@ -669,7 +699,7 @@ NON_ENTERABLE_ENTRY_CODE(DEAD_WEAK);
    finalizer in a weak pointer object.
    -------------------------------------------------------------------------- */
 
-INFO_TABLE_CONSTR(stg_NO_FINALIZER_info,stg_NO_FINALIZER_entry,0,0,0,CONSTR_NOCAF_STATIC,,EF_,0,0);
+INFO_TABLE_CONSTR(stg_NO_FINALIZER_info,stg_NO_FINALIZER_entry,0,0,0,CONSTR_NOCAF_STATIC,,EF_,"NO_FINALIZER","NO_FINALIZER");
 NON_ENTERABLE_ENTRY_CODE(NO_FINALIZER);
 
 SET_STATIC_HDR(stg_NO_FINALIZER_closure,stg_NO_FINALIZER_info,0/*CC*/,,EI_)
@@ -709,7 +739,7 @@ NON_ENTERABLE_ENTRY_CODE(EMPTY_MVAR);
    end of a linked TSO queue.
    -------------------------------------------------------------------------- */
 
-INFO_TABLE_CONSTR(stg_END_TSO_QUEUE_info,stg_END_TSO_QUEUE_entry,0,0,0,CONSTR_NOCAF_STATIC,,EF_,0,0);
+INFO_TABLE_CONSTR(stg_END_TSO_QUEUE_info,stg_END_TSO_QUEUE_entry,0,0,0,CONSTR_NOCAF_STATIC,,EF_,"END_TSO_QUEUE","END_TSO_QUEUE");
 NON_ENTERABLE_ENTRY_CODE(END_TSO_QUEUE);
 
 SET_STATIC_HDR(stg_END_TSO_QUEUE_closure,stg_END_TSO_QUEUE_info,0/*CC*/,,EI_)
@@ -723,26 +753,26 @@ SET_STATIC_HDR(stg_END_TSO_QUEUE_closure,stg_END_TSO_QUEUE_info,0/*CC*/,,EI_)
    an END_MUT_LIST closure.
    -------------------------------------------------------------------------- */
 
-INFO_TABLE_CONSTR(stg_END_MUT_LIST_info,stg_END_MUT_LIST_entry,0,0,0,CONSTR_NOCAF_STATIC,,EF_,0,0);
+INFO_TABLE_CONSTR(stg_END_MUT_LIST_info,stg_END_MUT_LIST_entry,0,0,0,CONSTR_NOCAF_STATIC,,EF_,"END_MUT_LIST","END_MUT_LIST");
 NON_ENTERABLE_ENTRY_CODE(END_MUT_LIST);
 
 SET_STATIC_HDR(stg_END_MUT_LIST_closure,stg_END_MUT_LIST_info,0/*CC*/,,EI_)
 , /*payload*/{} };
 
-INFO_TABLE(stg_MUT_CONS_info, stg_MUT_CONS_entry, 1, 1, MUT_CONS, , EF_, 0, 0);
+INFO_TABLE(stg_MUT_CONS_info, stg_MUT_CONS_entry, 1, 1, MUT_CONS, , EF_, "MUT_CONS", "MUT_CONS");
 NON_ENTERABLE_ENTRY_CODE(MUT_CONS);
 
 /* -----------------------------------------------------------------------------
    Exception lists
    -------------------------------------------------------------------------- */
 
-INFO_TABLE_CONSTR(stg_END_EXCEPTION_LIST_info,stg_END_EXCEPTION_LIST_entry,0,0,0,CONSTR_NOCAF_STATIC,,EF_,0,0);
+INFO_TABLE_CONSTR(stg_END_EXCEPTION_LIST_info,stg_END_EXCEPTION_LIST_entry,0,0,0,CONSTR_NOCAF_STATIC,,EF_,"END_EXCEPTION_LIST","END_EXCEPTION_LIST");
 NON_ENTERABLE_ENTRY_CODE(END_EXCEPTION_LIST);
 
 SET_STATIC_HDR(stg_END_EXCEPTION_LIST_closure,stg_END_EXCEPTION_LIST_info,0/*CC*/,,EI_)
 , /*payload*/{} };
 
-INFO_TABLE(stg_EXCEPTION_CONS_info, stg_EXCEPTION_CONS_entry, 1, 1, CONSTR, , EF_, 0, 0);
+INFO_TABLE(stg_EXCEPTION_CONS_info, stg_EXCEPTION_CONS_entry, 1, 1, CONSTR, , EF_, "EXCEPTION_CONS", "EXCEPTION_CONS");
 NON_ENTERABLE_ENTRY_CODE(EXCEPTION_CONS);
 
 /* -----------------------------------------------------------------------------
@@ -804,7 +834,7 @@ STGFUN(stg_error_entry)                                                     \
    just enter the top stack word to start the thread.  (see deleteThread)
  * -------------------------------------------------------------------------- */
 
-INFO_TABLE(stg_dummy_ret_info, stg_dummy_ret_entry, 0, 0, CONSTR_NOCAF_STATIC, , EF_, 0, 0);
+INFO_TABLE(stg_dummy_ret_info, stg_dummy_ret_entry, 0, 0, CONSTR_NOCAF_STATIC, , EF_, "DUMMY_RET", "DUMMY_RET");
 STGFUN(stg_dummy_ret_entry)
 {
   W_ ret_addr;
@@ -855,7 +885,7 @@ STGFUN(stg_forceIO_ret_entry)
 }
 #endif
 
-INFO_TABLE(stg_forceIO_info,stg_forceIO_entry,1,0,FUN_STATIC,,EF_,0,0);
+INFO_TABLE(stg_forceIO_info,stg_forceIO_entry,1,0,FUN_STATIC,,EF_,"FORCE_IO","FORCE_IO");
 FN_(stg_forceIO_entry)
 {
   FB_
index 92dc701..5920e5e 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: StgStartup.hc,v 1.16 2001/09/04 18:29:21 ken Exp $
+ * $Id: StgStartup.hc,v 1.17 2001/11/22 14:25:12 simonmar Exp $
  *
  * (c) The GHC Team, 1998-1999
  *
@@ -45,8 +45,8 @@
 
 EXTFUN(stg_stop_thread_entry);
 
-#ifdef PROFILING
-#define STOP_THREAD_BITMAP 1
+#if defined(PROFILING)
+#define STOP_THREAD_BITMAP 3
 #else
 #define STOP_THREAD_BITMAP 0
 #endif
index 9373dab..c3d77ac 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: StgStdThunks.hc,v 1.17 2001/11/08 12:46:31 simonmar Exp $
+ * $Id: StgStdThunks.hc,v 1.18 2001/11/22 14:25:12 simonmar Exp $
  *
  * (c) The GHC Team, 1998-2000
  *
@@ -32,7 +32,7 @@
 #define SAVE_CCCS(fs)          CCS_HDR(Sp-fs)=CCCS
 #define GET_SAVED_CCCS  RESTORE_CCCS(CCS_HDR(Sp))
 #define ENTER_CCS(p)    ENTER_CCS_TCL(p)
-#define RET_BITMAP 1
+#define RET_BITMAP 3
 #else
 #define SAVE_CCCS(fs)   /* empty */
 #define GET_SAVED_CCCS  /* empty */
@@ -58,6 +58,7 @@
     FB_                                                                        \
       STK_CHK_NP(UPD_FRAME_SIZE,1,);                                   \
       UPD_BH_UPDATABLE(&stg_sel_##offset##_upd_info);                  \
+      LDV_ENTER(R1.cl);                                                        \
       PUSH_UPD_FRAME(R1.p,0);                                          \
       ENTER_CCS(R1.p);                                                 \
       SAVE_CCCS(UPD_FRAME_SIZE);                                       \
@@ -103,6 +104,7 @@ SELECTOR_CODE_UPD(15);
     FB_                                                                        \
       STK_CHK_NP(NOUPD_FRAME_SIZE,1,)                                  \
       UPD_BH_SINGLE_ENTRY(&stg_sel_##offset##_noupd_info);             \
+      LDV_ENTER(R1.cl);                                                        \
       ENTER_CCS(R1.p);                                                 \
       SAVE_CCCS(NOUPD_FRAME_SIZE);                                     \
       Sp[-NOUPD_FRAME_SIZE]=(W_)&stg_sel_ret_##offset##_noupd_info;    \
@@ -163,6 +165,7 @@ FN_(stg_ap_1_upd_entry) {
   FB_
   STK_CHK_NP(sizeofW(StgUpdateFrame),1,);
   UPD_BH_UPDATABLE(&stg_ap_1_upd_info);
+  LDV_ENTER(R1.cl);
   ENTER_CCS(R1.p);
   PUSH_UPD_FRAME(R1.p,0);
   R1.p=(P_)(R1.cl->payload[0]);
@@ -176,6 +179,7 @@ FN_(stg_ap_2_upd_entry) {
   FB_
   STK_CHK_NP(sizeofW(StgUpdateFrame)+1,1,);
   UPD_BH_UPDATABLE(&stg_ap_2_upd_info);
+  LDV_ENTER(R1.cl);
   ENTER_CCS(R1.p);
   PUSH_UPD_FRAME(R1.p,0);
   Sp[-UF_SIZE-1]=(W_)(R1.cl->payload[1]);
@@ -190,6 +194,7 @@ FN_(stg_ap_3_upd_entry) {
   FB_
   STK_CHK_NP(sizeofW(StgUpdateFrame)+2,1,);
   UPD_BH_UPDATABLE(&stg_ap_3_upd_info);
+  LDV_ENTER(R1.cl);
   ENTER_CCS(R1.p);
   PUSH_UPD_FRAME(R1.p,0);
   Sp[-UF_SIZE-1]=(W_)(R1.cl->payload[2]);
@@ -205,6 +210,7 @@ FN_(stg_ap_4_upd_entry) {
   FB_
   STK_CHK_NP(sizeofW(StgUpdateFrame)+3,1,);
   UPD_BH_UPDATABLE(&stg_ap_4_upd_info);
+  LDV_ENTER(R1.cl);
   ENTER_CCS(R1.p);
   PUSH_UPD_FRAME(R1.p,0);
   Sp[-UF_SIZE-1]=(W_)(R1.cl->payload[3]);
@@ -221,6 +227,7 @@ FN_(stg_ap_5_upd_entry) {
   FB_
   STK_CHK_NP(sizeofW(StgUpdateFrame)+4,1,);
   UPD_BH_UPDATABLE(&stg_ap_5_upd_info);
+  LDV_ENTER(R1.cl);
   ENTER_CCS(R1.p);
   PUSH_UPD_FRAME(R1.p,0);
   Sp[-UF_SIZE-1]=(W_)(R1.cl->payload[4]);
@@ -238,6 +245,7 @@ FN_(stg_ap_6_upd_entry) {
   FB_
   STK_CHK_NP(sizeofW(StgUpdateFrame)+5,1,);
   UPD_BH_UPDATABLE(&stg_ap_6_upd_info);
+  LDV_ENTER(R1.cl);
   ENTER_CCS(R1.p);
   PUSH_UPD_FRAME(R1.p,0);
   Sp[-UF_SIZE-1]=(W_)(R1.cl->payload[5]);
@@ -256,6 +264,7 @@ FN_(stg_ap_7_upd_entry) {
   FB_
   STK_CHK_NP(sizeofW(StgUpdateFrame)+6,1,);
   UPD_BH_UPDATABLE(&stg_ap_7_upd_info);
+  LDV_ENTER(R1.cl);
   ENTER_CCS(R1.p);
   PUSH_UPD_FRAME(R1.p,0);
   Sp[-UF_SIZE-1]=(W_)(R1.cl->payload[6]);
@@ -275,6 +284,7 @@ FN_(stg_ap_8_upd_entry) {
   FB_
   STK_CHK_NP(sizeofW(StgUpdateFrame)+7,1,);
   UPD_BH_UPDATABLE(&stg_ap_8_upd_info);
+  LDV_ENTER(R1.cl);
   ENTER_CCS(R1.p);
   PUSH_UPD_FRAME(R1.p,0);
   Sp[-UF_SIZE-1]=(W_)(R1.cl->payload[7]);
index 9080bf6..ee8cfd8 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: Storage.c,v 1.53 2001/11/08 12:46:31 simonmar Exp $
+ * $Id: Storage.c,v 1.54 2001/11/22 14:25:12 simonmar Exp $
  *
  * (c) The GHC Team, 1998-1999
  *
@@ -23,6 +23,8 @@
 #include "Schedule.h"
 #include "StoragePriv.h"
 
+#include "RetainerProfile.h"   // for counting memory blocks (memInventory)
+
 StgClosure    *caf_list         = NULL;
 
 bdescr *small_alloc_list;      /* allocate()d small objects */
@@ -63,23 +65,6 @@ initStorage( void )
   step *stp;
   generation *gen;
 
-  /* If we're doing heap profiling, we want a two-space heap with a
-   * fixed-size allocation area so that we get roughly even-spaced
-   * samples.
-   */
-
-  /* As an experiment, try a 2 generation collector
-   */
-
-#if defined(PROFILING) || defined(DEBUG)
-  if (RtsFlags.ProfFlags.doHeapProfile) {
-    RtsFlags.GcFlags.generations = 1;
-    RtsFlags.GcFlags.steps = 1;
-    RtsFlags.GcFlags.oldGenFactor = 0;
-    RtsFlags.GcFlags.heapSizeSuggestion = 0;
-  }
-#endif
-
   if (RtsFlags.GcFlags.maxHeapSize != 0 &&
       RtsFlags.GcFlags.heapSizeSuggestion > 
       RtsFlags.GcFlags.maxHeapSize) {
@@ -350,6 +335,20 @@ resetNurseries( void )
   }
 #else
   for (bd = g0s0->blocks; bd; bd = bd->link) {
+#ifdef PROFILING
+    // @LDV profiling
+    // Reset every word in the nursery to zero when doing LDV profiling.
+    // This relieves the mutator of the burden of zeroing every new closure,
+    // which is stored in the nursery.
+    // 
+    // Todo: make it more efficient, e.g. memcpy()
+    //
+    StgPtr p;
+    if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV) {
+      for (p = bd->start; p < bd->start + BLOCK_SIZE_W; p++)
+        *p = 0;
+    }
+#endif
     bd->free = bd->start;
     ASSERT(bd->gen_no == 0);
     ASSERT(bd->step == g0s0);
@@ -370,6 +369,12 @@ allocNursery (bdescr *tail, nat blocks)
   // cons them on to the front of the list, not forgetting to update
   // the back pointer on the tail of the list to point to the new block.
   for (i=0; i < blocks; i++) {
+    // @LDV profiling
+    /*
+      processNursery() in LdvProfile.c assumes that every block group in
+      the nursery contains only a single block. So, if a block group is
+      given multiple blocks, change processNursery() accordingly.
+     */
     bd = allocBlock();
     bd->link = tail;
     // double-link the nursery: we might need to insert blocks
@@ -786,7 +791,14 @@ memInventory(void)
   for (bd = large_alloc_list; bd; bd = bd->link) {
     total_blocks += bd->blocks;
   }
-  
+
+#ifdef PROFILING
+  if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) {
+    for (bd = firstStack; bd != NULL; bd = bd->link) 
+      total_blocks += bd->blocks;
+  }
+#endif
+
   // count the blocks allocated by the arena allocator
   total_blocks += arenaBlocks();
 
index 1156dd4..f746d1f 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: Storage.h,v 1.36 2001/08/08 10:50:37 simonmar Exp $
+ * $Id: Storage.h,v 1.37 2001/11/22 14:25:12 simonmar Exp $
  *
  * (c) The GHC Team, 1998-1999
  *
@@ -13,6 +13,9 @@
 #include "Block.h"
 #include "BlockAlloc.h"
 #include "StoragePriv.h"
+#ifdef PROFILING
+#include "LdvProfile.h"
+#endif
 
 /* -----------------------------------------------------------------------------
    Initialisation / De-initialisation
@@ -152,7 +155,10 @@ recordOldToNewPtrs(StgMutClosure *p)
   }
 }
 
-#ifndef DEBUG
+// @LDV profiling
+// We zero out the slop when PROFILING is on.
+// #ifndef DEBUG
+#if !defined(DEBUG) && !defined(PROFILING)
 #define updateWithIndirection(info, p1, p2)                            \
   {                                                                    \
     bdescr *bd;                                                                \
@@ -174,6 +180,41 @@ recordOldToNewPtrs(StgMutClosure *p)
       TICK_UPD_OLD_IND();                                              \
     }                                                                  \
   }
+#elif defined(PROFILING)
+// @LDV profiling
+// We call LDV_recordDead_FILL_SLOP_DYNAMIC(p1) regardless of the generation in 
+// which p1 resides.
+//
+// Note: 
+//   After all, we do *NOT* need to call LDV_recordCreate() for both IND and 
+//   IND_OLDGEN closures because they are inherently used. But, it corrupts
+//   the invariants that every closure keeps its creation time in the profiling
+//   field. So, we call LDV_recordCreate().
+
+#define updateWithIndirection(info, p1, p2)                            \
+  {                                                                    \
+    bdescr *bd;                                                                \
+                                                                       \
+    LDV_recordDead_FILL_SLOP_DYNAMIC((p1));                             \
+    bd = Bdescr((P_)p1);                                               \
+    if (bd->gen_no == 0) {                                             \
+      ((StgInd *)p1)->indirectee = p2;                                 \
+      SET_INFO(p1,&stg_IND_info);                                      \
+      LDV_recordCreate((p1));                                           \
+      TICK_UPD_NEW_IND();                                              \
+    } else {                                                           \
+      ((StgIndOldGen *)p1)->indirectee = p2;                           \
+      if (info != &stg_BLACKHOLE_BQ_info) {                            \
+        ACQUIRE_LOCK(&sm_mutex);                                       \
+        ((StgIndOldGen *)p1)->mut_link = generations[bd->gen_no].mut_once_list;        \
+        generations[bd->gen_no].mut_once_list = (StgMutClosure *)p1;    \
+        RELEASE_LOCK(&sm_mutex);                                       \
+      }                                                                        \
+      SET_INFO(p1,&stg_IND_OLDGEN_info);                               \
+      LDV_recordCreate((p1));                                           \
+    }                                                                  \
+  }
+
 #else
 
 /* In the DEBUG case, we also zero out the slop of the old closure,
@@ -242,10 +283,17 @@ updateWithPermIndirection(const StgInfoTable *info, StgClosure *p1, StgClosure *
   bdescr *bd;
 
   ASSERT( p1 != p2 && !closure_IND(p1) );
+
+  // @LDV profiling
+  // Destroy the old closure.
+  LDV_recordDead_FILL_SLOP_DYNAMIC(p1);
   bd = Bdescr((P_)p1);
   if (bd->gen_no == 0) {
     ((StgInd *)p1)->indirectee = p2;
     SET_INFO(p1,&stg_IND_PERM_info);
+    // @LDV profiling
+    // We have just created a new closure.
+    LDV_recordCreate(p1);
     TICK_UPD_NEW_PERM_IND(p1);
   } else {
     ((StgIndOldGen *)p1)->indirectee = p2;
@@ -256,6 +304,9 @@ updateWithPermIndirection(const StgInfoTable *info, StgClosure *p1, StgClosure *
       RELEASE_LOCK(&sm_mutex);
     }
     SET_INFO(p1,&stg_IND_OLDGEN_PERM_info);
+    // @LDV profiling
+    // We have just created a new closure.
+    LDV_recordCreate(p1);
     TICK_UPD_OLD_PERM_IND();
   }
 }
index 989ce2f..81f49de 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: Updates.hc,v 1.35 2001/11/08 12:46:31 simonmar Exp $
+ * $Id: Updates.hc,v 1.36 2001/11/22 14:25:12 simonmar Exp $
  *
  * (c) The GHC Team, 1998-1999
  *
@@ -117,8 +117,8 @@ UPD_FRAME_ENTRY_TEMPLATE(stg_upd_frame_7_entry,RET_VEC(Sp[0],7));
   return size!
   */
 
-#ifdef PROFILING
-#define UPD_FRAME_BITMAP 3
+#if defined(PROFILING)
+#define UPD_FRAME_BITMAP 7
 #else
 #define UPD_FRAME_BITMAP 1
 #endif
@@ -207,6 +207,7 @@ STGFUN(stg_PAP_entry)
   Sp -= Words;
 
   TICK_ENT_PAP(pap);
+  LDV_ENTER(pap);
 
   /* Enter PAP cost centre -- lexical scoping only */
   ENTER_CCS_PAP_CL(pap);
@@ -286,6 +287,14 @@ EXTFUN(__stg_update_PAP)
         * such as removing the update frame.
         */
        if ((Hp += PapSize) > HpLim) {
+#ifdef PROFILING
+          // @LDV profiling
+          // Not filling the slop for the object (because there is none), but
+          // filling in the trailing words in the current block.
+          // This is unnecessary because we fills the entire nursery with
+          // zeroes after each garbage collection.
+          // FILL_SLOP(HpLim, PapSize - (Hp - HpLim));
+#endif
          Sp -= 1;
          Sp[0] = (W_)Fun;          
          JMP_(stg_gc_entertop);
@@ -351,7 +360,7 @@ EXTFUN(__stg_update_PAP)
       
       Updatee = Su->updatee; 
 
-#if defined(PROFILING)
+#if defined(PROFILING) 
       if (Words != 0) {
         UPD_IND(Updatee,PapClosure);
        TICK_UPD_PAP_IN_NEW(Words+1);
@@ -436,6 +445,7 @@ STGFUN(stg_AP_UPD_entry)
   Sp -= sizeofW(StgUpdateFrame) + Words;
 
   TICK_ENT_AP_UPD(ap);
+  LDV_ENTER(ap);
 
   /* Enter PAP cost centre -- lexical scoping only */
   ENTER_CCS_PAP_CL(ap);   /* ToDo: ENTER_CC_AP_UPD_CL */
index 1c03e69..c80cf8c 100644 (file)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------------------
- * $Id: Weak.c,v 1.18 2001/08/14 13:40:09 sewardj Exp $
+ * $Id: Weak.c,v 1.19 2001/11/22 14:25:13 simonmar Exp $
  *
  * (c) The GHC Team, 1998-1999
  *
@@ -36,6 +36,15 @@ finalizeWeakPointersNow(void)
   while ((w = weak_ptr_list)) {
     weak_ptr_list = w->link;
     if (w->header.info != &stg_DEAD_WEAK_info) {
+        // @LDV profiling
+        // Even thought the info type of w changes, we DO NOT perform any
+        // LDV profiling because at this moment, LDV profiling must already
+        // have been terminated. See the comments in shutdownHaskell().
+        // At any rate, there is no need to call LDV_recordDead() because
+        // weak pointers are inherently used.
+#ifdef PROFILING
+        ASSERT(ldvTime == 0);   // LDV profiling is turned off.
+#endif
        w->header.info = &stg_DEAD_WEAK_info;
        IF_DEBUG(weak,fprintf(stderr,"Finalising weak pointer at %p -> %p\n", w, w->key));
        if (w->finalizer != &stg_NO_FINALIZER_closure) {
@@ -85,7 +94,17 @@ scheduleFinalizers(StgWeak *list)
            arr->payload[n] = w->finalizer;
            n++;
        }
-       w->header.info = &stg_DEAD_WEAK_info;
+
+#ifdef PROFILING
+        // A weak pointer is inherently used, so we do not need to call
+        // LDV_recordDead().
+       //
+        // Furthermore, when PROFILING is turned on, dead weak
+        // pointers are exactly as large as weak pointers, so there is
+        // no need to fill the slop, either.  See stg_DEAD_WEAK_info
+        // in StgMiscClosures.hc.
+#endif
+       SET_HDR(w, &stg_DEAD_WEAK_info, w->header.prof.ccs);
     }
 
     t = createIOThread(RtsFlags.GcFlags.initialStkSize,