#include "ParTicky.h" /* ToDo: move into Rts.h */
#include "Profiling.h"
#include "GetTime.h"
+#include "GC.h"
+
+#if USE_PAPI
+#include "Papi.h"
+#endif
/* huh? */
#define BIG_STRING_LEN 512
static ullong GC_tot_alloc = 0;
static ullong GC_tot_copied = 0;
-static ullong GC_tot_scavd_copied = 0;
+
+static ullong GC_par_max_copied = 0;
+static ullong GC_par_avg_copied = 0;
static Ticks GC_start_time = 0, GC_tot_time = 0; /* User GC Time */
static Ticks GCe_start_time = 0, GCe_tot_time = 0; /* Elapsed GC time */
static lnat MaxResidency = 0; // in words; for stats only
static lnat AvgResidency = 0;
static lnat ResidencySamples = 0; // for stats only
+static lnat MaxSlop = 0;
static lnat GC_start_faults = 0, GC_end_faults = 0;
-static Ticks *GC_coll_times;
-
-static void statsPrintf( char *s, ... )
- GNUC3_ATTRIBUTE(format (printf, 1, 2));
+static Ticks *GC_coll_times = NULL;
+static Ticks *GC_coll_etimes = NULL;
static void statsFlush( void );
static void statsClose( void );
}
#endif /* PROFILING */
+// initStats0() has no dependencies, it can be called right at the beginning
+void
+initStats0(void)
+{
+ ElapsedTimeStart = 0;
+
+ InitUserTime = 0;
+ InitElapsedTime = 0;
+ InitElapsedStamp = 0;
+
+ MutUserTime = 0;
+ MutElapsedTime = 0;
+ MutElapsedStamp = 0;
+
+ ExitUserTime = 0;
+ ExitElapsedTime = 0;
+
+ GC_tot_alloc = 0;
+ GC_tot_copied = 0;
+ GC_par_max_copied = 0;
+ GC_par_avg_copied = 0;
+ GC_start_time = 0;
+ GC_tot_time = 0;
+ GCe_start_time = 0;
+ GCe_tot_time = 0;
+
+#ifdef PROFILING
+ RP_start_time = 0;
+ RP_tot_time = 0;
+ RPe_start_time = 0;
+ RPe_tot_time = 0;
+
+ HC_start_time = 0;
+ HC_tot_time = 0;
+ HCe_start_time = 0;
+ HCe_tot_time = 0;
+#endif
+
+ MaxResidency = 0;
+ AvgResidency = 0;
+ ResidencySamples = 0;
+ MaxSlop = 0;
+
+ GC_start_faults = 0;
+ GC_end_faults = 0;
+}
+
+// initStats1() can be called after setupRtsFlags()
void
-initStats(void)
+initStats1 (void)
{
nat i;
(Ticks *)stgMallocBytes(
sizeof(Ticks)*RtsFlags.GcFlags.generations,
"initStats");
+ GC_coll_etimes =
+ (Ticks *)stgMallocBytes(
+ sizeof(Ticks)*RtsFlags.GcFlags.generations,
+ "initStats");
for (i = 0; i < RtsFlags.GcFlags.generations; i++) {
GC_coll_times[i] = 0;
+ GC_coll_etimes[i] = 0;
}
-}
+}
/* -----------------------------------------------------------------------------
Initialisation time...
} else {
InitElapsedTime = elapsed - ElapsedTimeStart;
}
+#if USE_PAPI
+ /* We start counting events for the mutator
+ * when garbage collection starts
+ * we switch to the GC event set. */
+ papi_start_mutator_count();
+
+ /* This flag is needed to avoid counting the last GC */
+ papi_is_reporting = 1;
+
+#endif
}
/* -----------------------------------------------------------------------------
MutUserTime = user - GC_tot_time - PROF_VAL(RP_tot_time + HC_tot_time) - InitUserTime;
if (MutUserTime < 0) { MutUserTime = 0; }
+
+#if USE_PAPI
+ /* We stop counting mutator events
+ * GC events are not being counted at this point */
+ papi_stop_mutator_count();
+
+ /* This flag is needed, because GC is run once more after this function */
+ papi_is_reporting = 0;
+
+#endif
}
void
GC_start_faults = getPageFaults();
}
}
+
+#if USE_PAPI
+ if(papi_is_reporting) {
+ /* Switch to counting GC events */
+ papi_stop_mutator_count();
+ papi_start_gc_count();
+ }
+#endif
+
}
/* -----------------------------------------------------------------------------
-------------------------------------------------------------------------- */
void
-stat_endGC (lnat alloc, lnat live, lnat copied,
- lnat scavd_copied, lnat gen)
+stat_endGC (lnat alloc, lnat live, lnat copied, lnat gen,
+ lnat max_copied, lnat avg_copied, lnat slop)
{
if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
Ticks time, etime, gc_time, gc_etime;
nat faults = getPageFaults();
statsPrintf("%9ld %9ld %9ld",
- alloc*sizeof(W_), (copied+scavd_copied)*sizeof(W_),
+ alloc*sizeof(W_), copied*sizeof(W_),
live*sizeof(W_));
statsPrintf(" %5.2f %5.2f %7.2f %7.2f %4ld %4ld (Gen: %2ld)\n",
TICK_TO_DBL(gc_time),
}
GC_coll_times[gen] += gc_time;
+ GC_coll_etimes[gen] += gc_etime;
GC_tot_copied += (ullong) copied;
- GC_tot_scavd_copied += (ullong) scavd_copied;
GC_tot_alloc += (ullong) alloc;
+ GC_par_max_copied += (ullong) max_copied;
+ GC_par_avg_copied += (ullong) avg_copied;
GC_tot_time += gc_time;
GCe_tot_time += gc_etime;
ResidencySamples++;
AvgResidency += live;
}
+
+ if (slop > MaxSlop) MaxSlop = slop;
}
if (rub_bell) {
debugBelch("\b\b\b \b\b\b");
rub_bell = 0;
}
+
+#if USE_PAPI
+ if(papi_is_reporting) {
+ /* Switch to counting mutator events */
+ if (gen == 0) {
+ papi_stop_gc0_count();
+ } else {
+ papi_stop_gc1_count();
+ }
+ papi_start_mutator_count();
+ }
+#endif
}
/* -----------------------------------------------------------------------------
were left unused when the heap-check failed.
-------------------------------------------------------------------------- */
+#ifdef DEBUG
+#define TICK_VAR(arity) \
+ extern StgInt SLOW_CALLS_##arity; \
+ extern StgInt RIGHT_ARITY_##arity; \
+ extern StgInt TAGGED_PTR_##arity;
+
+#define TICK_VAR_INI(arity) \
+ StgInt SLOW_CALLS_##arity = 1; \
+ StgInt RIGHT_ARITY_##arity = 1; \
+ StgInt TAGGED_PTR_##arity = 0;
+
+extern StgInt TOTAL_CALLS;
+
+TICK_VAR(1)
+TICK_VAR(2)
+
+TICK_VAR_INI(1)
+TICK_VAR_INI(2)
+
+StgInt TOTAL_CALLS=1;
+#endif
+
+/* Report the value of a counter */
+#define REPORT(counter) \
+ { \
+ ullong_format_string(counter,temp,rtsTrue/*commas*/); \
+ statsPrintf(" (" #counter ") : %s\n",temp); \
+ }
+
+/* Report the value of a counter as a percentage of another counter */
+#define REPORT_PCT(counter,countertot) \
+ statsPrintf(" (" #counter ") %% of (" #countertot ") : %.1f%%\n", \
+ counter*100.0/countertot)
+
+#define TICK_PRINT(arity) \
+ REPORT(SLOW_CALLS_##arity); \
+ REPORT_PCT(RIGHT_ARITY_##arity,SLOW_CALLS_##arity); \
+ REPORT_PCT(TAGGED_PTR_##arity,RIGHT_ARITY_##arity); \
+ REPORT(RIGHT_ARITY_##arity); \
+ REPORT(TAGGED_PTR_##arity)
+
+#define TICK_PRINT_TOT(arity) \
+ statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \
+ SLOW_CALLS_##arity * 100.0/TOTAL_CALLS)
+
+extern lnat hw_alloc_blocks;
+
void
stat_exit(int alloc)
{
if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) {
ullong_format_string(GC_tot_alloc*sizeof(W_),
temp, rtsTrue/*commas*/);
- statsPrintf("%11s bytes allocated in the heap\n", temp);
+ statsPrintf("%16s bytes allocated in the heap\n", temp);
ullong_format_string(GC_tot_copied*sizeof(W_),
temp, rtsTrue/*commas*/);
- statsPrintf("%11s bytes copied during GC (scavenged)\n", temp);
+ statsPrintf("%16s bytes copied during GC\n", temp);
- ullong_format_string(GC_tot_scavd_copied*sizeof(W_),
- temp, rtsTrue/*commas*/);
- statsPrintf("%11s bytes copied during GC (not scavenged)\n", temp);
-
if ( ResidencySamples > 0 ) {
ullong_format_string(MaxResidency*sizeof(W_),
temp, rtsTrue/*commas*/);
- statsPrintf("%11s bytes maximum residency (%ld sample(s))\n",
+ statsPrintf("%16s bytes maximum residency (%ld sample(s))\n",
temp, ResidencySamples);
}
- statsPrintf("\n");
+
+ ullong_format_string(MaxSlop*sizeof(W_), temp, rtsTrue/*commas*/);
+ statsPrintf("%16s bytes maximum slop\n", temp);
+
+ statsPrintf("%16ld MB total memory in use (%ld MB lost due to fragmentation)\n\n",
+ mblocks_allocated * MBLOCK_SIZE_W / (1024 * 1024 / sizeof(W_)),
+ (mblocks_allocated * MBLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)));
/* Print garbage collections in each gen */
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
- statsPrintf("%11d collections in generation %d (%6.2fs)\n",
- generations[g].collections, g,
- TICK_TO_DBL(GC_coll_times[g]));
+ statsPrintf(" Generation %d: %5d collections, %5d parallel, %5.2fs, %5.2fs elapsed\n",
+ g, generations[g].collections,
+ generations[g].par_collections,
+ TICK_TO_DBL(GC_coll_times[g]),
+ TICK_TO_DBL(GC_coll_etimes[g]));
}
- statsPrintf("\n%11ld Mb total memory in use\n\n",
- mblocks_allocated * MBLOCK_SIZE / (1024 * 1024));
+#if defined(THREADED_RTS)
+ if (RtsFlags.ParFlags.gcThreads > 1) {
+ statsPrintf("\n Parallel GC work balance: %.2f (%ld / %ld, ideal %d)\n",
+ (double)GC_par_avg_copied / (double)GC_par_max_copied,
+ (lnat)GC_par_avg_copied, (lnat)GC_par_max_copied,
+ RtsFlags.ParFlags.gcThreads
+ );
+ }
+#endif
+
+ statsPrintf("\n");
#if defined(THREADED_RTS)
{
TICK_TO_DBL(time - GC_tot_time -
PROF_VAL(RP_tot_time + HC_tot_time) - InitUserTime) * 100
/ TICK_TO_DBL(etime));
+
+ /*
+ TICK_PRINT(1);
+ TICK_PRINT(2);
+ REPORT(TOTAL_CALLS);
+ TICK_PRINT_TOT(1);
+ TICK_PRINT_TOT(2);
+ */
+
+#if USE_PAPI
+ papi_stats_report();
+#endif
+#if defined(THREADED_RTS) && defined(PROF_SPIN)
+ {
+ nat g, s;
+
+ statsPrintf("recordMutableGen_sync: %"FMT_Word64"\n", recordMutableGen_sync.spin);
+ statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
+ statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin);
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ for (s = 0; s < generations[g].n_steps; s++) {
+ statsPrintf("gen[%d].steps[%d].sync_todo: %"FMT_Word64"\n", g, s, generations[g].steps[s].sync_todo.spin);
+ statsPrintf("gen[%d].steps[%d].sync_large_objects: %"FMT_Word64"\n", g, s, generations[g].steps[s].sync_large_objects.spin);
+ }
+ }
+ }
+#endif
}
if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) {
statsFlush();
statsClose();
}
+
if (GC_coll_times)
stgFree(GC_coll_times);
GC_coll_times = NULL;
+ if (GC_coll_etimes)
+ stgFree(GC_coll_etimes);
+ GC_coll_etimes = NULL;
}
/* -----------------------------------------------------------------------------
Produce some detailed info on the state of the generational GC.
-------------------------------------------------------------------------- */
-#ifdef DEBUG
void
statDescribeGens(void)
{
nat g, s, mut, lge;
- lnat live;
+ lnat live, slop;
+ lnat tot_live, tot_slop;
bdescr *bd;
step *step;
debugBelch(
-" Gen Steps Max Mutable Step Blocks Live Large\n"
-" Blocks Closures Objects\n");
+"-----------------------------------------------------------------\n"
+" Gen Max Mut-list Step Blocks Large Live Slop\n"
+" Blocks Bytes Objects \n"
+"-----------------------------------------------------------------\n");
- mut = 0;
+ tot_live = 0;
+ tot_slop = 0;
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ mut = 0;
for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
- mut += bd->free - bd->start;
+ mut += (bd->free - bd->start) * sizeof(W_);
}
- debugBelch("%8d %8d %8d %9d", g, generations[g].n_steps,
- generations[g].max_blocks, mut);
+ debugBelch("%5d %7d %9d", g, generations[g].max_blocks, mut);
for (s = 0; s < generations[g].n_steps; s++) {
step = &generations[g].steps[s];
- live = 0;
for (bd = step->large_objects, lge = 0; bd; bd = bd->link) {
lge++;
}
- live = step->n_large_blocks * BLOCK_SIZE;
- bd = step->blocks;
- // This live figure will be slightly less that the "live" figure
- // given by +RTS -Sstderr, because we take don't count the
- // slop at the end of each block.
- for (; bd; bd = bd->link) {
- live += (bd->free - bd->start) * sizeof(W_);
- }
+ live = step->n_words + countOccupied(step->large_objects);
if (s != 0) {
- debugBelch("%36s","");
+ debugBelch("%23s","");
}
- debugBelch("%6d %8d %8ld %8d\n", s, step->n_blocks,
- live, lge);
+ slop = (step->n_blocks + step->n_large_blocks) * BLOCK_SIZE_W - live;
+ debugBelch("%6d %8d %8d %8ld %8ld\n", s, step->n_blocks, lge,
+ live*sizeof(W_), slop*sizeof(W_));
+ tot_live += live;
+ tot_slop += slop;
}
}
+ debugBelch("-----------------------------------------------------------------\n");
+ debugBelch("%48s%8ld %8ld\n","",tot_live*sizeof(W_),tot_slop*sizeof(W_));
+ debugBelch("-----------------------------------------------------------------\n");
debugBelch("\n");
}
-#endif
/* -----------------------------------------------------------------------------
Stats available via a programmatic interface, so eg. GHCi can time
Dumping stuff in the stats file, or via the debug message interface
-------------------------------------------------------------------------- */
-static void
+void
statsPrintf( char *s, ... )
{
FILE *sf = RtsFlags.GcFlags.statsFile;