From a75a8790410ce3ffb439bfd0b7c3999e7df72eb1 Mon Sep 17 00:00:00 2001 From: Simon Marlow Date: Wed, 16 Apr 2008 22:12:24 +0000 Subject: [PATCH] improvements to +RTS -s output - count and report number of parallel collections - calculate bytes scanned in addition to bytes copied per thread - calculate "work balance factor" - tidy up the formatting a bit --- includes/Storage.h | 1 + rts/Stats.c | 35 ++++++++++++++++++++++++++--------- rts/Stats.h | 3 ++- rts/sm/GC.c | 23 +++++++++++++++-------- rts/sm/GC.h | 3 ++- rts/sm/GCUtils.c | 1 + rts/sm/Scav.c | 11 ++++++++--- rts/sm/Scav.c-inc | 6 +++--- rts/sm/Storage.c | 1 + utils/runstdtest/runstdtest.prl | 3 ++- 10 files changed, 61 insertions(+), 26 deletions(-) diff --git a/includes/Storage.h b/includes/Storage.h index d7a8421..5b8acfa 100644 --- a/includes/Storage.h +++ b/includes/Storage.h @@ -113,6 +113,7 @@ typedef struct generation_ { // stats information unsigned int collections; + unsigned int par_collections; unsigned int failed_promotions; // temporary use during GC: diff --git a/rts/Stats.c b/rts/Stats.c index cd61116..461dabd 100644 --- a/rts/Stats.c +++ b/rts/Stats.c @@ -45,6 +45,9 @@ static Ticks ExitElapsedTime = 0; static ullong GC_tot_alloc = 0; static ullong GC_tot_copied = 0; +static ullong GC_par_max_copied = 0; +static ullong GC_par_avg_copied = 0; + static Ticks GC_start_time = 0, GC_tot_time = 0; /* User GC Time */ static Ticks GCe_start_time = 0, GCe_tot_time = 0; /* Elapsed GC time */ @@ -294,7 +297,8 @@ stat_startGC(void) -------------------------------------------------------------------------- */ void -stat_endGC (lnat alloc, lnat live, lnat copied, lnat gen) +stat_endGC (lnat alloc, lnat live, lnat copied, lnat gen, + lnat max_copied, lnat avg_copied) { if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) { Ticks time, etime, gc_time, gc_etime; @@ -327,6 +331,8 @@ stat_endGC (lnat alloc, lnat live, lnat copied, lnat gen) GC_tot_copied += (ullong) copied; GC_tot_alloc += (ullong) alloc; + GC_par_max_copied += (ullong) max_copied; + GC_par_avg_copied += (ullong) avg_copied; GC_tot_time += gc_time; GCe_tot_time += gc_etime; @@ -527,30 +533,41 @@ stat_exit(int alloc) if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) { ullong_format_string(GC_tot_alloc*sizeof(W_), temp, rtsTrue/*commas*/); - statsPrintf("%11s bytes allocated in the heap\n", temp); + statsPrintf("%16s bytes allocated in the heap\n", temp); ullong_format_string(GC_tot_copied*sizeof(W_), temp, rtsTrue/*commas*/); - statsPrintf("%11s bytes copied during GC\n", temp); + statsPrintf("%16s bytes copied during GC\n", temp); if ( ResidencySamples > 0 ) { ullong_format_string(MaxResidency*sizeof(W_), temp, rtsTrue/*commas*/); - statsPrintf("%11s bytes maximum residency (%ld sample(s))\n", + statsPrintf("%16s bytes maximum residency (%ld sample(s))\n", temp, ResidencySamples); } - statsPrintf("\n"); + statsPrintf("%16ld MB total memory in use\n\n", + mblocks_allocated * MBLOCK_SIZE / (1024 * 1024)); /* Print garbage collections in each gen */ for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - statsPrintf("%11d collections in generation %d, %6.2fs, %6.2fs elapsed\n", - generations[g].collections, g, + statsPrintf(" Generation %d: %5d collections, %5d parallel, %5.2fs, %5.2fs elapsed\n", + g, generations[g].collections, + generations[g].par_collections, TICK_TO_DBL(GC_coll_times[g]), TICK_TO_DBL(GC_coll_etimes[g])); } - statsPrintf("\n%11ld MB total memory in use\n\n", - mblocks_allocated * MBLOCK_SIZE / (1024 * 1024)); +#if defined(THREADED_RTS) + if (RtsFlags.ParFlags.gcThreads > 1) { + statsPrintf("\n Parallel GC work balance: %.2f (%ld / %ld, ideal %d)\n", + (double)GC_par_avg_copied / (double)GC_par_max_copied, + (lnat)GC_par_avg_copied, (lnat)GC_par_max_copied, + RtsFlags.ParFlags.gcThreads + ); + } +#endif + + statsPrintf("\n"); #if defined(THREADED_RTS) { diff --git a/rts/Stats.h b/rts/Stats.h index 2d7e655..12311ee 100644 --- a/rts/Stats.h +++ b/rts/Stats.h @@ -16,7 +16,8 @@ void stat_endInit(void); void stat_startGC(void); void stat_endGC (lnat alloc, lnat live, - lnat copied, lnat gen); + lnat copied, lnat gen, + lnat max_copied, lnat avg_copied); #ifdef PROFILING void stat_startRP(void); diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 1ac27f2..cb2f040 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -181,7 +181,7 @@ GarbageCollect ( rtsBool force_major_gc ) { bdescr *bd; step *stp; - lnat live, allocated; + lnat live, allocated, max_copied, avg_copied; lnat oldgen_saved_blocks = 0; gc_thread *saved_gct; nat g, s, t, n; @@ -471,24 +471,35 @@ GarbageCollect ( rtsBool force_major_gc ) /* run through all the generations/steps and tidy up */ copied = 0; + max_copied = 0; + avg_copied = 0; { nat i; for (i=0; i < n_gc_threads; i++) { if (n_gc_threads > 1) { trace(TRACE_gc,"thread %d:", i); trace(TRACE_gc," copied %ld", gc_threads[i]->copied * sizeof(W_)); + trace(TRACE_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_)); trace(TRACE_gc," any_work %ld", gc_threads[i]->any_work); trace(TRACE_gc," no_work %ld", gc_threads[i]->no_work); trace(TRACE_gc," scav_find_work %ld", gc_threads[i]->scav_find_work); } copied += gc_threads[i]->copied; + max_copied = stg_max(gc_threads[i]->copied, max_copied); + } + if (n_gc_threads == 1) { + max_copied = 0; + avg_copied = 0; + } else { + avg_copied = copied; } } for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - if (g <= N) { + if (g == N) { generations[g].collections++; // for stats + if (n_gc_threads > 1) generations[g].par_collections++; } // Count the mutable list as bytes "copied" for the purposes of @@ -698,7 +709,7 @@ GarbageCollect ( rtsBool force_major_gc ) #endif // ok, GC over: tell the stats department what happened. - stat_endGC(allocated, live, copied, N); + stat_endGC(allocated, live, copied, N, max_copied, avg_copied); #if defined(RTS_USER_SIGNALS) if (RtsFlags.MiscFlags.install_signal_handlers) { @@ -1399,10 +1410,6 @@ init_uncollected_gen (nat g, nat threads) // from the current end point. ws->scan_bd = ws->todo_bd; ws->scan_bd->u.scan = ws->scan_bd->free; - - // subtract the contents of this block from the stats, - // because we'll count the whole block later. - copied -= ws->scan_bd->free - ws->scan_bd->start; } else { @@ -1440,10 +1447,10 @@ init_gc_thread (gc_thread *t) t->eager_promotion = rtsTrue; t->thunk_selector_depth = 0; t->copied = 0; + t->scanned = 0; t->any_work = 0; t->no_work = 0; t->scav_find_work = 0; - } /* ----------------------------------------------------------------------------- diff --git a/rts/sm/GC.h b/rts/sm/GC.h index 5a9cb98..0e0d90a 100644 --- a/rts/sm/GC.h +++ b/rts/sm/GC.h @@ -126,7 +126,7 @@ typedef struct gc_thread_ { StgClosure* static_objects; // live static objects StgClosure* scavenged_static_objects; // static objects scavenged so far - lnat gc_count; // number of gc's this thread has done + lnat gc_count; // number of GCs this thread has done // -------------------- // evacuate flags @@ -157,6 +157,7 @@ typedef struct gc_thread_ { // stats lnat copied; + lnat scanned; lnat any_work; lnat no_work; lnat scav_find_work; diff --git a/rts/sm/GCUtils.c b/rts/sm/GCUtils.c index 0e20c46..118d5d7 100644 --- a/rts/sm/GCUtils.c +++ b/rts/sm/GCUtils.c @@ -115,6 +115,7 @@ todo_block_full (nat size, step_workspace *ws) ASSERT(bd->link == NULL); ASSERT(bd->step == ws->step); + gct->copied += ws->todo_free - bd->free; bd->free = ws->todo_free; // If the global list is not empty, or there's not much work in diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c index b7bd7f3..0eb4b11 100644 --- a/rts/sm/Scav.c +++ b/rts/sm/Scav.c @@ -1378,6 +1378,9 @@ scavenge_large (step_workspace *ws) recordMutableGen_GC((StgClosure *)p, ws->step->gen); } } + + // stats + gct->scanned += closure_sizeW((StgClosure*)p); } } @@ -1428,12 +1431,14 @@ loop: continue; } ws = &gct->steps[s]; - + if (ws->todo_bd != NULL) { - ws->todo_bd->free = ws->todo_free; + bd = ws->todo_bd; + gct->copied += ws->todo_free - bd->free; + bd->free = ws->todo_free; } - + // If we have a todo block and no scan block, start // scanning the todo block. if (ws->scan_bd == NULL && ws->todo_bd != NULL) diff --git a/rts/sm/Scav.c-inc b/rts/sm/Scav.c-inc index 6f85203..64677c0 100644 --- a/rts/sm/Scav.c-inc +++ b/rts/sm/Scav.c-inc @@ -444,15 +444,15 @@ scavenge_block (bdescr *bd) } if (p > bd->free) { + gct->copied += ws->todo_free - bd->free; bd->free = p; } debugTrace(DEBUG_gc, " scavenged %ld bytes", (unsigned long)((bd->free - bd->u.scan) * sizeof(W_))); - // update stats: this is a block that has been copied & scavenged - gct->copied += bd->free - bd->u.scan; - + // update stats: this is a block that has been scavenged + gct->scanned += bd->free - bd->u.scan; bd->u.scan = bd->free; } diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 6f4a415..856362d 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -166,6 +166,7 @@ initStorage( void ) gen->no = g; gen->mut_list = allocBlock(); gen->collections = 0; + gen->par_collections = 0; gen->failed_promotions = 0; gen->max_blocks = 0; } diff --git a/utils/runstdtest/runstdtest.prl b/utils/runstdtest/runstdtest.prl index 6059760..0cc9c6e 100644 --- a/utils/runstdtest/runstdtest.prl +++ b/utils/runstdtest/runstdtest.prl @@ -388,6 +388,7 @@ sub process_stats_file { local($count) = 0; $GCWork = 0; + $GCs = 0; while () { if (! /Gen:\s+0/ && /^\s*\d+\s+\d+\s+(\d+)\s+\d+\.\d+/ ) { $max_live = $1 if $max_live < $1; @@ -407,7 +408,7 @@ sub process_stats_file { # $MaxResidency = $1; $ResidencySamples = $2; # } - $GCs = $1 if /^\s*([0-9,]+) collections? in generation 0/; + $GCs += $1 if /^\s*Generation\s*\d+:\s*([0-9,]+) collections/; if ( /^\s+([0-9]+)\s+M[Bb] total memory/ ) { $TotMem = $1; -- 1.7.10.4