X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=rts%2FStats.c;h=2e1561313515557b6c930658e760f2b5819b6746;hp=aabe259014a756b70096a5a95e7111928e89cbee;hb=27de38efce6d73d2a0209f803cfa98c82773e773;hpb=a7d236fbcf664fde8c81ba840efc1c471051814d diff --git a/rts/Stats.c b/rts/Stats.c index aabe2590..2e15613 100644 --- a/rts/Stats.c +++ b/rts/Stats.c @@ -16,6 +16,11 @@ #include "ParTicky.h" /* ToDo: move into Rts.h */ #include "Profiling.h" #include "GetTime.h" +#include "GC.h" + +#if USE_PAPI +#include "Papi.h" +#endif /* huh? */ #define BIG_STRING_LEN 512 @@ -37,7 +42,9 @@ static Ticks ExitElapsedTime = 0; static ullong GC_tot_alloc = 0; static ullong GC_tot_copied = 0; -static ullong GC_tot_scavd_copied = 0; + +static ullong GC_par_max_copied = 0; +static ullong GC_par_avg_copied = 0; static Ticks GC_start_time = 0, GC_tot_time = 0; /* User GC Time */ static Ticks GCe_start_time = 0, GCe_tot_time = 0; /* Elapsed GC time */ @@ -59,13 +66,12 @@ static Ticks HCe_start_time, HCe_tot_time = 0; // heap census prof elap time static lnat MaxResidency = 0; // in words; for stats only static lnat AvgResidency = 0; static lnat ResidencySamples = 0; // for stats only +static lnat MaxSlop = 0; static lnat GC_start_faults = 0, GC_end_faults = 0; -static Ticks *GC_coll_times; - -static void statsPrintf( char *s, ... ) - GNUC3_ATTRIBUTE(format (printf, 1, 2)); +static Ticks *GC_coll_times = NULL; +static Ticks *GC_coll_etimes = NULL; static void statsFlush( void ); static void statsClose( void ); @@ -125,8 +131,56 @@ mut_user_time_during_heap_census( void ) } #endif /* PROFILING */ +// initStats0() has no dependencies, it can be called right at the beginning +void +initStats0(void) +{ + ElapsedTimeStart = 0; + + InitUserTime = 0; + InitElapsedTime = 0; + InitElapsedStamp = 0; + + MutUserTime = 0; + MutElapsedTime = 0; + MutElapsedStamp = 0; + + ExitUserTime = 0; + ExitElapsedTime = 0; + + GC_tot_alloc = 0; + GC_tot_copied = 0; + GC_par_max_copied = 0; + GC_par_avg_copied = 0; + GC_start_time = 0; + GC_tot_time = 0; + GCe_start_time = 0; + GCe_tot_time = 0; + +#ifdef PROFILING + RP_start_time = 0; + RP_tot_time = 0; + RPe_start_time = 0; + RPe_tot_time = 0; + + HC_start_time = 0; + HC_tot_time = 0; + HCe_start_time = 0; + HCe_tot_time = 0; +#endif + + MaxResidency = 0; + AvgResidency = 0; + ResidencySamples = 0; + MaxSlop = 0; + + GC_start_faults = 0; + GC_end_faults = 0; +} + +// initStats1() can be called after setupRtsFlags() void -initStats(void) +initStats1 (void) { nat i; @@ -138,10 +192,15 @@ initStats(void) (Ticks *)stgMallocBytes( sizeof(Ticks)*RtsFlags.GcFlags.generations, "initStats"); + GC_coll_etimes = + (Ticks *)stgMallocBytes( + sizeof(Ticks)*RtsFlags.GcFlags.generations, + "initStats"); for (i = 0; i < RtsFlags.GcFlags.generations; i++) { GC_coll_times[i] = 0; + GC_coll_etimes[i] = 0; } -} +} /* ----------------------------------------------------------------------------- Initialisation time... @@ -170,6 +229,16 @@ stat_endInit(void) } else { InitElapsedTime = elapsed - ElapsedTimeStart; } +#if USE_PAPI + /* We start counting events for the mutator + * when garbage collection starts + * we switch to the GC event set. */ + papi_start_mutator_count(); + + /* This flag is needed to avoid counting the last GC */ + papi_is_reporting = 1; + +#endif } /* ----------------------------------------------------------------------------- @@ -192,6 +261,16 @@ stat_startExit(void) MutUserTime = user - GC_tot_time - PROF_VAL(RP_tot_time + HC_tot_time) - InitUserTime; if (MutUserTime < 0) { MutUserTime = 0; } + +#if USE_PAPI + /* We stop counting mutator events + * GC events are not being counted at this point */ + papi_stop_mutator_count(); + + /* This flag is needed, because GC is run once more after this function */ + papi_is_reporting = 0; + +#endif } void @@ -249,6 +328,15 @@ stat_startGC(void) GC_start_faults = getPageFaults(); } } + +#if USE_PAPI + if(papi_is_reporting) { + /* Switch to counting GC events */ + papi_stop_mutator_count(); + papi_start_gc_count(); + } +#endif + } /* ----------------------------------------------------------------------------- @@ -256,8 +344,8 @@ stat_startGC(void) -------------------------------------------------------------------------- */ void -stat_endGC (lnat alloc, lnat live, lnat copied, - lnat scavd_copied, lnat gen) +stat_endGC (lnat alloc, lnat live, lnat copied, lnat gen, + lnat max_copied, lnat avg_copied, lnat slop) { if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) { Ticks time, etime, gc_time, gc_etime; @@ -270,7 +358,7 @@ stat_endGC (lnat alloc, lnat live, lnat copied, nat faults = getPageFaults(); statsPrintf("%9ld %9ld %9ld", - alloc*sizeof(W_), (copied+scavd_copied)*sizeof(W_), + alloc*sizeof(W_), copied*sizeof(W_), live*sizeof(W_)); statsPrintf(" %5.2f %5.2f %7.2f %7.2f %4ld %4ld (Gen: %2ld)\n", TICK_TO_DBL(gc_time), @@ -286,10 +374,12 @@ stat_endGC (lnat alloc, lnat live, lnat copied, } GC_coll_times[gen] += gc_time; + GC_coll_etimes[gen] += gc_etime; GC_tot_copied += (ullong) copied; - GC_tot_scavd_copied += (ullong) scavd_copied; GC_tot_alloc += (ullong) alloc; + GC_par_max_copied += (ullong) max_copied; + GC_par_avg_copied += (ullong) avg_copied; GC_tot_time += gc_time; GCe_tot_time += gc_etime; @@ -310,12 +400,26 @@ stat_endGC (lnat alloc, lnat live, lnat copied, ResidencySamples++; AvgResidency += live; } + + if (slop > MaxSlop) MaxSlop = slop; } if (rub_bell) { debugBelch("\b\b\b \b\b\b"); rub_bell = 0; } + +#if USE_PAPI + if(papi_is_reporting) { + /* Switch to counting mutator events */ + if (gen == 0) { + papi_stop_gc0_count(); + } else { + papi_stop_gc1_count(); + } + papi_start_mutator_count(); + } +#endif } /* ----------------------------------------------------------------------------- @@ -401,6 +505,53 @@ stat_endHeapCensus(void) were left unused when the heap-check failed. -------------------------------------------------------------------------- */ +#ifdef DEBUG +#define TICK_VAR(arity) \ + extern StgInt SLOW_CALLS_##arity; \ + extern StgInt RIGHT_ARITY_##arity; \ + extern StgInt TAGGED_PTR_##arity; + +#define TICK_VAR_INI(arity) \ + StgInt SLOW_CALLS_##arity = 1; \ + StgInt RIGHT_ARITY_##arity = 1; \ + StgInt TAGGED_PTR_##arity = 0; + +extern StgInt TOTAL_CALLS; + +TICK_VAR(1) +TICK_VAR(2) + +TICK_VAR_INI(1) +TICK_VAR_INI(2) + +StgInt TOTAL_CALLS=1; +#endif + +/* Report the value of a counter */ +#define REPORT(counter) \ + { \ + ullong_format_string(counter,temp,rtsTrue/*commas*/); \ + statsPrintf(" (" #counter ") : %s\n",temp); \ + } + +/* Report the value of a counter as a percentage of another counter */ +#define REPORT_PCT(counter,countertot) \ + statsPrintf(" (" #counter ") %% of (" #countertot ") : %.1f%%\n", \ + counter*100.0/countertot) + +#define TICK_PRINT(arity) \ + REPORT(SLOW_CALLS_##arity); \ + REPORT_PCT(RIGHT_ARITY_##arity,SLOW_CALLS_##arity); \ + REPORT_PCT(TAGGED_PTR_##arity,RIGHT_ARITY_##arity); \ + REPORT(RIGHT_ARITY_##arity); \ + REPORT(TAGGED_PTR_##arity) + +#define TICK_PRINT_TOT(arity) \ + statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \ + SLOW_CALLS_##arity * 100.0/TOTAL_CALLS) + +extern lnat hw_alloc_blocks; + void stat_exit(int alloc) { @@ -432,33 +583,46 @@ stat_exit(int alloc) if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) { ullong_format_string(GC_tot_alloc*sizeof(W_), temp, rtsTrue/*commas*/); - statsPrintf("%11s bytes allocated in the heap\n", temp); + statsPrintf("%16s bytes allocated in the heap\n", temp); ullong_format_string(GC_tot_copied*sizeof(W_), temp, rtsTrue/*commas*/); - statsPrintf("%11s bytes copied during GC (scavenged)\n", temp); + statsPrintf("%16s bytes copied during GC\n", temp); - ullong_format_string(GC_tot_scavd_copied*sizeof(W_), - temp, rtsTrue/*commas*/); - statsPrintf("%11s bytes copied during GC (not scavenged)\n", temp); - if ( ResidencySamples > 0 ) { ullong_format_string(MaxResidency*sizeof(W_), temp, rtsTrue/*commas*/); - statsPrintf("%11s bytes maximum residency (%ld sample(s))\n", + statsPrintf("%16s bytes maximum residency (%ld sample(s))\n", temp, ResidencySamples); } - statsPrintf("\n"); + + ullong_format_string(MaxSlop*sizeof(W_), temp, rtsTrue/*commas*/); + statsPrintf("%16s bytes maximum slop\n", temp); + + statsPrintf("%16ld MB total memory in use (%ld MB lost due to fragmentation)\n\n", + mblocks_allocated * MBLOCK_SIZE_W / (1024 * 1024 / sizeof(W_)), + (mblocks_allocated * MBLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_))); /* Print garbage collections in each gen */ for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - statsPrintf("%11d collections in generation %d (%6.2fs)\n", - generations[g].collections, g, - TICK_TO_DBL(GC_coll_times[g])); + statsPrintf(" Generation %d: %5d collections, %5d parallel, %5.2fs, %5.2fs elapsed\n", + g, generations[g].collections, + generations[g].par_collections, + TICK_TO_DBL(GC_coll_times[g]), + TICK_TO_DBL(GC_coll_etimes[g])); } - statsPrintf("\n%11ld Mb total memory in use\n\n", - mblocks_allocated * MBLOCK_SIZE / (1024 * 1024)); +#if defined(THREADED_RTS) + if (RtsFlags.ParFlags.gcThreads > 1) { + statsPrintf("\n Parallel GC work balance: %.2f (%ld / %ld, ideal %d)\n", + (double)GC_par_avg_copied / (double)GC_par_max_copied, + (lnat)GC_par_avg_copied, (lnat)GC_par_max_copied, + RtsFlags.ParFlags.gcThreads + ); + } +#endif + + statsPrintf("\n"); #if defined(THREADED_RTS) { @@ -517,6 +681,33 @@ stat_exit(int alloc) TICK_TO_DBL(time - GC_tot_time - PROF_VAL(RP_tot_time + HC_tot_time) - InitUserTime) * 100 / TICK_TO_DBL(etime)); + + /* + TICK_PRINT(1); + TICK_PRINT(2); + REPORT(TOTAL_CALLS); + TICK_PRINT_TOT(1); + TICK_PRINT_TOT(2); + */ + +#if USE_PAPI + papi_stats_report(); +#endif +#if defined(THREADED_RTS) && defined(PROF_SPIN) + { + nat g, s; + + statsPrintf("recordMutableGen_sync: %"FMT_Word64"\n", recordMutableGen_sync.spin); + statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin); + statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin); + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + for (s = 0; s < generations[g].n_steps; s++) { + statsPrintf("gen[%d].steps[%d].sync_todo: %"FMT_Word64"\n", g, s, generations[g].steps[s].sync_todo.spin); + statsPrintf("gen[%d].steps[%d].sync_large_objects: %"FMT_Word64"\n", g, s, generations[g].steps[s].sync_large_objects.spin); + } + } + } +#endif } if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) { @@ -537,9 +728,13 @@ stat_exit(int alloc) statsFlush(); statsClose(); } + if (GC_coll_times) stgFree(GC_coll_times); GC_coll_times = NULL; + if (GC_coll_etimes) + stgFree(GC_coll_etimes); + GC_coll_etimes = NULL; } /* ----------------------------------------------------------------------------- @@ -547,52 +742,52 @@ stat_exit(int alloc) Produce some detailed info on the state of the generational GC. -------------------------------------------------------------------------- */ -#ifdef DEBUG void statDescribeGens(void) { nat g, s, mut, lge; - lnat live; + lnat live, slop; + lnat tot_live, tot_slop; bdescr *bd; step *step; debugBelch( -" Gen Steps Max Mut-list Step Blocks Live Large\n" -" Blocks Bytes Objects\n"); +"-----------------------------------------------------------------\n" +" Gen Max Mut-list Step Blocks Large Live Slop\n" +" Blocks Bytes Objects \n" +"-----------------------------------------------------------------\n"); - mut = 0; + tot_live = 0; + tot_slop = 0; for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + mut = 0; for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) { mut += (bd->free - bd->start) * sizeof(W_); } - debugBelch("%8d %8d %8d %9d", g, generations[g].n_steps, - generations[g].max_blocks, mut); + debugBelch("%5d %7d %9d", g, generations[g].max_blocks, mut); for (s = 0; s < generations[g].n_steps; s++) { step = &generations[g].steps[s]; - live = 0; for (bd = step->large_objects, lge = 0; bd; bd = bd->link) { lge++; } - live = step->n_large_blocks * BLOCK_SIZE; - bd = step->blocks; - // This live figure will be slightly less that the "live" figure - // given by +RTS -Sstderr, because we take don't count the - // slop at the end of each block. - for (; bd; bd = bd->link) { - live += (bd->free - bd->start) * sizeof(W_); - } + live = step->n_words + countOccupied(step->large_objects); if (s != 0) { - debugBelch("%36s",""); + debugBelch("%23s",""); } - debugBelch("%6d %8d %8ld %8d\n", s, step->n_blocks, - live, lge); + slop = (step->n_blocks + step->n_large_blocks) * BLOCK_SIZE_W - live; + debugBelch("%6d %8d %8d %8ld %8ld\n", s, step->n_blocks, lge, + live*sizeof(W_), slop*sizeof(W_)); + tot_live += live; + tot_slop += slop; } } + debugBelch("-----------------------------------------------------------------\n"); + debugBelch("%48s%8ld %8ld\n","",tot_live*sizeof(W_),tot_slop*sizeof(W_)); + debugBelch("-----------------------------------------------------------------\n"); debugBelch("\n"); } -#endif /* ----------------------------------------------------------------------------- Stats available via a programmatic interface, so eg. GHCi can time @@ -606,7 +801,7 @@ extern HsInt64 getAllocations( void ) Dumping stuff in the stats file, or via the debug message interface -------------------------------------------------------------------------- */ -static void +void statsPrintf( char *s, ... ) { FILE *sf = RtsFlags.GcFlags.statsFile;