#include "ParTicky.h" /* ToDo: move into Rts.h */
#include "Profiling.h"
#include "GetTime.h"
+#include "GC.h"
+#include "GCUtils.h"
+#include "Evac.h"
#if USE_PAPI
#include "Papi.h"
static ullong GC_tot_alloc = 0;
static ullong GC_tot_copied = 0;
+static ullong GC_par_max_copied = 0;
+static ullong GC_par_avg_copied = 0;
+
static Ticks GC_start_time = 0, GC_tot_time = 0; /* User GC Time */
static Ticks GCe_start_time = 0, GCe_tot_time = 0; /* Elapsed GC time */
static lnat GC_start_faults = 0, GC_end_faults = 0;
-static Ticks *GC_coll_times;
-static Ticks *GC_coll_etimes;
+static Ticks *GC_coll_times = NULL;
+static Ticks *GC_coll_etimes = NULL;
static void statsFlush( void );
static void statsClose( void );
-------------------------------------------------------------------------- */
void
-stat_endGC (lnat alloc, lnat live, lnat copied, lnat gen)
+stat_endGC (lnat alloc, lnat live, lnat copied, lnat gen,
+ lnat max_copied, lnat avg_copied)
{
if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
Ticks time, etime, gc_time, gc_etime;
GC_tot_copied += (ullong) copied;
GC_tot_alloc += (ullong) alloc;
+ GC_par_max_copied += (ullong) max_copied;
+ GC_par_avg_copied += (ullong) avg_copied;
GC_tot_time += gc_time;
GCe_tot_time += gc_etime;
if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) {
ullong_format_string(GC_tot_alloc*sizeof(W_),
temp, rtsTrue/*commas*/);
- statsPrintf("%11s bytes allocated in the heap\n", temp);
+ statsPrintf("%16s bytes allocated in the heap\n", temp);
ullong_format_string(GC_tot_copied*sizeof(W_),
temp, rtsTrue/*commas*/);
- statsPrintf("%11s bytes copied during GC\n", temp);
+ statsPrintf("%16s bytes copied during GC\n", temp);
if ( ResidencySamples > 0 ) {
ullong_format_string(MaxResidency*sizeof(W_),
temp, rtsTrue/*commas*/);
- statsPrintf("%11s bytes maximum residency (%ld sample(s))\n",
+ statsPrintf("%16s bytes maximum residency (%ld sample(s))\n",
temp, ResidencySamples);
}
- statsPrintf("\n");
+ statsPrintf("%16ld MB total memory in use\n\n",
+ mblocks_allocated * MBLOCK_SIZE / (1024 * 1024));
/* Print garbage collections in each gen */
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
- statsPrintf("%11d collections in generation %d, %6.2fs, %6.2fs elapsed\n",
- generations[g].collections, g,
+ statsPrintf(" Generation %d: %5d collections, %5d parallel, %5.2fs, %5.2fs elapsed\n",
+ g, generations[g].collections,
+ generations[g].par_collections,
TICK_TO_DBL(GC_coll_times[g]),
TICK_TO_DBL(GC_coll_etimes[g]));
}
- statsPrintf("\n%11ld Mb total memory in use\n\n",
- mblocks_allocated * MBLOCK_SIZE / (1024 * 1024));
+#if defined(THREADED_RTS)
+ if (RtsFlags.ParFlags.gcThreads > 1) {
+ statsPrintf("\n Parallel GC work balance: %.2f (%ld / %ld, ideal %d)\n",
+ (double)GC_par_avg_copied / (double)GC_par_max_copied,
+ (lnat)GC_par_avg_copied, (lnat)GC_par_max_copied,
+ RtsFlags.ParFlags.gcThreads
+ );
+ }
+#endif
+
+ statsPrintf("\n");
#if defined(THREADED_RTS)
{
TICK_TO_DBL(GC_tot_time), TICK_TO_DBL(GCe_tot_time));
}
+#if defined(THREADED_RTS) && defined(PROF_SPIN)
+ {
+ nat g, s;
+
+ statsPrintf("recordMutableGen_sync: %"FMT_Word64"\n", recordMutableGen_sync.spin);
+ statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
+ statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin);
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ for (s = 0; s < generations[g].n_steps; s++) {
+ statsPrintf("gen[%d].steps[%d].sync_todo: %"FMT_Word64"\n", g, s, generations[g].steps[s].sync_todo.spin);
+ statsPrintf("gen[%d].steps[%d].sync_large_objects: %"FMT_Word64"\n", g, s, generations[g].steps[s].sync_large_objects.spin);
+ }
+ }
+ }
+#endif
+
statsFlush();
statsClose();
}
+
if (GC_coll_times)
stgFree(GC_coll_times);
GC_coll_times = NULL;
+ if (GC_coll_etimes)
+ stgFree(GC_coll_etimes);
+ GC_coll_etimes = NULL;
}
/* -----------------------------------------------------------------------------
Produce some detailed info on the state of the generational GC.
-------------------------------------------------------------------------- */
-#ifdef DEBUG
void
statDescribeGens(void)
{
nat g, s, mut, lge;
- lnat live;
+ lnat live, slop;
+ lnat tot_live, tot_slop;
bdescr *bd;
step *step;
debugBelch(
-" Gen Steps Max Mut-list Step Blocks Live Large\n"
-" Blocks Bytes Objects\n");
+"-----------------------------------------------------------------\n"
+" Gen Max Mut-list Step Blocks Large Live Slop\n"
+" Blocks Bytes Objects \n"
+"-----------------------------------------------------------------\n");
- mut = 0;
+ tot_live = 0;
+ tot_slop = 0;
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ mut = 0;
for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
mut += (bd->free - bd->start) * sizeof(W_);
}
- debugBelch("%8d %8d %8d %9d", g, generations[g].n_steps,
- generations[g].max_blocks, mut);
+ debugBelch("%5d %7d %9d", g, generations[g].max_blocks, mut);
for (s = 0; s < generations[g].n_steps; s++) {
step = &generations[g].steps[s];
- live = 0;
for (bd = step->large_objects, lge = 0; bd; bd = bd->link) {
lge++;
}
- live = step->n_large_blocks * BLOCK_SIZE;
- bd = step->blocks;
- // This live figure will be slightly less that the "live" figure
- // given by +RTS -Sstderr, because we take don't count the
- // slop at the end of each block.
- for (; bd; bd = bd->link) {
- live += (bd->free - bd->start) * sizeof(W_);
- }
+ live = step->n_words + countOccupied(step->large_objects);
if (s != 0) {
- debugBelch("%36s","");
+ debugBelch("%23s","");
}
- debugBelch("%6d %8d %8ld %8d\n", s, step->n_blocks,
- live, lge);
+ slop = (step->n_blocks + step->n_large_blocks) * BLOCK_SIZE_W - live;
+ debugBelch("%6d %8d %8d %8ld %8ld\n", s, step->n_blocks, lge,
+ live*sizeof(W_), slop*sizeof(W_));
+ tot_live += live;
+ tot_slop += slop;
}
}
+ debugBelch("-----------------------------------------------------------------\n");
+ debugBelch("%48s%8ld %8ld\n","",tot_live*sizeof(W_),tot_slop*sizeof(W_));
+ debugBelch("-----------------------------------------------------------------\n");
debugBelch("\n");
}
-#endif
/* -----------------------------------------------------------------------------
Stats available via a programmatic interface, so eg. GHCi can time