X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=rts%2FStats.c;h=3036ed726548683ed40f8b3972b391ff7a5666d8;hp=8c24b299b6fa00fe612cc24d4cd96744e847d4be;hb=1b381af863d64aaa0a4dd9c816170c58e6131a9e;hpb=ea661992b7397eddee145b80a449c40ab565fd12 diff --git a/rts/Stats.c b/rts/Stats.c index 8c24b29..3036ed7 100644 --- a/rts/Stats.c +++ b/rts/Stats.c @@ -6,19 +6,18 @@ * * ---------------------------------------------------------------------------*/ +#include "PosixSource.h" #include "Rts.h" -#include "RtsFlags.h" + #include "RtsUtils.h" -#include "MBlock.h" -#include "Storage.h" #include "Schedule.h" #include "Stats.h" -#include "ParTicky.h" /* ToDo: move into Rts.h */ #include "Profiling.h" #include "GetTime.h" -#include "GC.h" -#include "GCUtils.h" -#include "Evac.h" +#include "sm/Storage.h" +#include "sm/GC.h" // gc_alloc_block_sync, whitehole_spin +#include "sm/GCThread.h" +#include "sm/BlockAlloc.h" #if USE_PAPI #include "Papi.h" @@ -29,28 +28,23 @@ #define TICK_TO_DBL(t) ((double)(t) / TICKS_PER_SECOND) -static Ticks ElapsedTimeStart = 0; - -static Ticks InitUserTime = 0; -static Ticks InitElapsedTime = 0; -static Ticks InitElapsedStamp = 0; - -static Ticks MutUserTime = 0; -static Ticks MutElapsedTime = 0; -static Ticks MutElapsedStamp = 0; +static Ticks + start_init_cpu, start_init_elapsed, + end_init_cpu, end_init_elapsed, + start_exit_cpu, start_exit_elapsed, + end_exit_cpu, end_exit_elapsed; -static Ticks ExitUserTime = 0; -static Ticks ExitElapsedTime = 0; +static Ticks GC_tot_cpu = 0; -static ullong GC_tot_alloc = 0; -static ullong GC_tot_copied = 0; +static StgWord64 GC_tot_alloc = 0; +static StgWord64 GC_tot_copied = 0; -static Ticks GC_start_time = 0, GC_tot_time = 0; /* User GC Time */ -static Ticks GCe_start_time = 0, GCe_tot_time = 0; /* Elapsed GC time */ +static StgWord64 GC_par_max_copied = 0; +static StgWord64 GC_par_avg_copied = 0; #ifdef PROFILING -static Ticks RP_start_time = 0, RP_tot_time = 0; /* retainer prof user time */ -static Ticks RPe_start_time = 0, RPe_tot_time = 0; /* retainer prof elap time */ +static Ticks RP_start_time = 0, RP_tot_time = 0; // retainer prof user time +static Ticks RPe_start_time = 0, RPe_tot_time = 0; // retainer prof elap time static Ticks HC_start_time, HC_tot_time = 0; // heap census prof user time static Ticks HCe_start_time, HCe_tot_time = 0; // heap census prof elap time @@ -62,75 +56,108 @@ static Ticks HCe_start_time, HCe_tot_time = 0; // heap census prof elap time #define PROF_VAL(x) 0 #endif -static lnat MaxResidency = 0; // in words; for stats only -static lnat AvgResidency = 0; -static lnat ResidencySamples = 0; // for stats only +static lnat max_residency = 0; // in words; for stats only +static lnat avg_residency = 0; +static lnat residency_samples = 0; // for stats only +static lnat max_slop = 0; -static lnat GC_start_faults = 0, GC_end_faults = 0; +static lnat GC_end_faults = 0; -static Ticks *GC_coll_times; -static Ticks *GC_coll_etimes; +static Ticks *GC_coll_cpu = NULL; +static Ticks *GC_coll_elapsed = NULL; +static Ticks *GC_coll_max_pause = NULL; static void statsFlush( void ); static void statsClose( void ); -Ticks stat_getElapsedGCTime(void) -{ - return GCe_tot_time; -} +/* ----------------------------------------------------------------------------- + Current elapsed time + ------------------------------------------------------------------------- */ Ticks stat_getElapsedTime(void) { - return getProcessElapsedTime() - ElapsedTimeStart; + return getProcessElapsedTime() - start_init_elapsed; } -/* mut_user_time_during_GC() and mut_user_time() - * - * The former function can be used to get the current mutator time - * *during* a GC, i.e. between stat_startGC and stat_endGC. This is - * used in the heap profiler for accurately time stamping the heap - * sample. - * - * ATTENTION: mut_user_time_during_GC() relies on GC_start_time being - * defined in stat_startGC() - to minimise system calls, - * GC_start_time is, however, only defined when really needed (check - * stat_startGC() for details) - */ -double -mut_user_time_during_GC( void ) -{ - return TICK_TO_DBL(GC_start_time - GC_tot_time - PROF_VAL(RP_tot_time + HC_tot_time)); -} +/* --------------------------------------------------------------------------- + Measure the current MUT time, for profiling + ------------------------------------------------------------------------ */ double mut_user_time( void ) { - Ticks user; - user = getProcessCPUTime(); - return TICK_TO_DBL(user - GC_tot_time - PROF_VAL(RP_tot_time + HC_tot_time)); + Ticks cpu; + cpu = getProcessCPUTime(); + return TICK_TO_DBL(cpu - GC_tot_cpu - PROF_VAL(RP_tot_time + HC_tot_time)); } #ifdef PROFILING /* - mut_user_time_during_RP() is similar to mut_user_time_during_GC(); - it returns the MUT time during retainer profiling. + mut_user_time_during_RP() returns the MUT time during retainer profiling. The same is for mut_user_time_during_HC(); */ double mut_user_time_during_RP( void ) { - return TICK_TO_DBL(RP_start_time - GC_tot_time - RP_tot_time - HC_tot_time); + return TICK_TO_DBL(RP_start_time - GC_tot_cpu - RP_tot_time - HC_tot_time); } double mut_user_time_during_heap_census( void ) { - return TICK_TO_DBL(HC_start_time - GC_tot_time - RP_tot_time - HC_tot_time); + return TICK_TO_DBL(HC_start_time - GC_tot_cpu - RP_tot_time - HC_tot_time); } #endif /* PROFILING */ +/* --------------------------------------------------------------------------- + initStats0() has no dependencies, it can be called right at the beginning + ------------------------------------------------------------------------ */ + void -initStats(void) +initStats0(void) +{ + start_init_cpu = 0; + start_init_elapsed = 0; + end_init_cpu = 0; + end_init_elapsed = 0; + + start_exit_cpu = 0; + start_exit_elapsed = 0; + end_exit_cpu = 0; + end_exit_elapsed = 0; + + GC_tot_alloc = 0; + GC_tot_copied = 0; + GC_par_max_copied = 0; + GC_par_avg_copied = 0; + GC_tot_cpu = 0; + +#ifdef PROFILING + RP_start_time = 0; + RP_tot_time = 0; + RPe_start_time = 0; + RPe_tot_time = 0; + + HC_start_time = 0; + HC_tot_time = 0; + HCe_start_time = 0; + HCe_tot_time = 0; +#endif + + max_residency = 0; + avg_residency = 0; + residency_samples = 0; + max_slop = 0; + + GC_end_faults = 0; +} + +/* --------------------------------------------------------------------------- + initStats1() can be called after setupRtsFlags() + ------------------------------------------------------------------------ */ + +void +initStats1 (void) { nat i; @@ -138,19 +165,24 @@ initStats(void) statsPrintf(" Alloc Copied Live GC GC TOT TOT Page Flts\n"); statsPrintf(" bytes bytes bytes user elap user elap\n"); } - GC_coll_times = + GC_coll_cpu = + (Ticks *)stgMallocBytes( + sizeof(Ticks)*RtsFlags.GcFlags.generations, + "initStats"); + GC_coll_elapsed = (Ticks *)stgMallocBytes( sizeof(Ticks)*RtsFlags.GcFlags.generations, "initStats"); - GC_coll_etimes = + GC_coll_max_pause = (Ticks *)stgMallocBytes( sizeof(Ticks)*RtsFlags.GcFlags.generations, "initStats"); for (i = 0; i < RtsFlags.GcFlags.generations; i++) { - GC_coll_times[i] = 0; - GC_coll_etimes[i] = 0; + GC_coll_cpu[i] = 0; + GC_coll_elapsed[i] = 0; + GC_coll_max_pause[i] = 0; } -} +} /* ----------------------------------------------------------------------------- Initialisation time... @@ -159,26 +191,14 @@ initStats(void) void stat_startInit(void) { - Ticks elapsed; - - elapsed = getProcessElapsedTime(); - ElapsedTimeStart = elapsed; + getProcessTimes(&start_init_cpu, &start_init_elapsed); } void stat_endInit(void) { - Ticks user, elapsed; + getProcessTimes(&end_init_cpu, &end_init_elapsed); - getProcessTimes(&user, &elapsed); - - InitUserTime = user; - InitElapsedStamp = elapsed; - if (ElapsedTimeStart > elapsed) { - InitElapsedTime = 0; - } else { - InitElapsedTime = elapsed - ElapsedTimeStart; - } #if USE_PAPI /* We start counting events for the mutator * when garbage collection starts @@ -200,17 +220,7 @@ stat_endInit(void) void stat_startExit(void) { - Ticks user, elapsed; - - getProcessTimes(&user, &elapsed); - - MutElapsedStamp = elapsed; - MutElapsedTime = elapsed - GCe_tot_time - - PROF_VAL(RPe_tot_time + HCe_tot_time) - InitElapsedStamp; - if (MutElapsedTime < 0) { MutElapsedTime = 0; } /* sometimes -0.00 */ - - MutUserTime = user - GC_tot_time - PROF_VAL(RP_tot_time + HC_tot_time) - InitUserTime; - if (MutUserTime < 0) { MutUserTime = 0; } + getProcessTimes(&start_exit_cpu, &start_exit_elapsed); #if USE_PAPI /* We stop counting mutator events @@ -219,25 +229,13 @@ stat_startExit(void) /* This flag is needed, because GC is run once more after this function */ papi_is_reporting = 0; - #endif } void stat_endExit(void) { - Ticks user, elapsed; - - getProcessTimes(&user, &elapsed); - - ExitUserTime = user - MutUserTime - GC_tot_time - PROF_VAL(RP_tot_time + HC_tot_time) - InitUserTime; - ExitElapsedTime = elapsed - MutElapsedStamp; - if (ExitUserTime < 0) { - ExitUserTime = 0; - } - if (ExitElapsedTime < 0) { - ExitElapsedTime = 0; - } + getProcessTimes(&end_exit_cpu, &end_exit_elapsed); } /* ----------------------------------------------------------------------------- @@ -246,13 +244,8 @@ stat_endExit(void) static nat rub_bell = 0; -/* initialise global variables needed during GC - * - * * GC_start_time is read in mut_user_time_during_GC(), which in turn is - * needed if either PROFILING or DEBUGing is enabled - */ void -stat_startGC(void) +stat_startGC (gc_thread *gct) { nat bell = RtsFlags.GcFlags.ringBell; @@ -265,20 +258,6 @@ stat_startGC(void) } } -#if defined(PROFILING) || defined(DEBUG) - GC_start_time = getProcessCPUTime(); // needed in mut_user_time_during_GC() -#endif - - if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) { -#if !defined(PROFILING) && !defined(DEBUG) - GC_start_time = getProcessCPUTime(); -#endif - GCe_start_time = getProcessElapsedTime(); - if (RtsFlags.GcFlags.giveStats) { - GC_start_faults = getPageFaults(); - } - } - #if USE_PAPI if(papi_is_reporting) { /* Switch to counting GC events */ @@ -287,6 +266,40 @@ stat_startGC(void) } #endif + getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed); + gct->gc_start_thread_cpu = getThreadCPUTime(); + + if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) + { + gct->gc_start_faults = getPageFaults(); + } +} + +void +stat_gcWorkerThreadStart (gc_thread *gct) +{ + if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) + { + getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed); + gct->gc_start_thread_cpu = getThreadCPUTime(); + } +} + +void +stat_gcWorkerThreadDone (gc_thread *gct) +{ + Ticks thread_cpu, elapsed, gc_cpu, gc_elapsed; + + if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) + { + elapsed = getProcessElapsedTime(); + thread_cpu = getThreadCPUTime(); + + gc_cpu = thread_cpu - gct->gc_start_thread_cpu; + gc_elapsed = elapsed - gct->gc_start_elapsed; + + taskDoneGC(gct->cap->running_task, gc_cpu, gc_elapsed); + } } /* ----------------------------------------------------------------------------- @@ -294,59 +307,65 @@ stat_startGC(void) -------------------------------------------------------------------------- */ void -stat_endGC (lnat alloc, lnat live, lnat copied, lnat gen) +stat_endGC (gc_thread *gct, + lnat alloc, lnat live, lnat copied, nat gen, + lnat max_copied, lnat avg_copied, lnat slop) { - if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) { - Ticks time, etime, gc_time, gc_etime; - - getProcessTimes(&time, &etime); - gc_time = time - GC_start_time; - gc_etime = etime - GCe_start_time; + if (RtsFlags.GcFlags.giveStats != NO_GC_STATS || + RtsFlags.ProfFlags.doHeapProfile) + // heap profiling needs GC_tot_time + { + Ticks cpu, elapsed, thread_gc_cpu, gc_cpu, gc_elapsed; - if (RtsFlags.GcFlags.giveStats == VERBOSE_GC_STATS) { + getProcessTimes(&cpu, &elapsed); + gc_elapsed = elapsed - gct->gc_start_elapsed; + + thread_gc_cpu = getThreadCPUTime() - gct->gc_start_thread_cpu; + + gc_cpu = cpu - gct->gc_start_cpu; + + taskDoneGC(gct->cap->running_task, thread_gc_cpu, gc_elapsed); + + if (RtsFlags.GcFlags.giveStats == VERBOSE_GC_STATS) { nat faults = getPageFaults(); statsPrintf("%9ld %9ld %9ld", alloc*sizeof(W_), copied*sizeof(W_), live*sizeof(W_)); - statsPrintf(" %5.2f %5.2f %7.2f %7.2f %4ld %4ld (Gen: %2ld)\n", - TICK_TO_DBL(gc_time), - TICK_TO_DBL(gc_etime), - TICK_TO_DBL(time), - TICK_TO_DBL(etime - ElapsedTimeStart), - faults - GC_start_faults, - GC_start_faults - GC_end_faults, - gen); - - GC_end_faults = faults; + statsPrintf(" %5.2f %5.2f %7.2f %7.2f %4ld %4ld (Gen: %2d)\n", + TICK_TO_DBL(gc_cpu), + TICK_TO_DBL(gc_elapsed), + TICK_TO_DBL(cpu), + TICK_TO_DBL(elapsed - start_init_elapsed), + faults - gct->gc_start_faults, + gct->gc_start_faults - GC_end_faults, + gen); + + GC_end_faults = faults; statsFlush(); } - GC_coll_times[gen] += gc_time; - GC_coll_etimes[gen] += gc_etime; + GC_coll_cpu[gen] += gc_cpu; + GC_coll_elapsed[gen] += gc_elapsed; + if (GC_coll_max_pause[gen] < gc_elapsed) { + GC_coll_max_pause[gen] = gc_elapsed; + } - GC_tot_copied += (ullong) copied; - GC_tot_alloc += (ullong) alloc; - GC_tot_time += gc_time; - GCe_tot_time += gc_etime; - -#if defined(THREADED_RTS) - { - Task *task; - if ((task = myTask()) != NULL) { - task->gc_time += gc_time; - task->gc_etime += gc_etime; - } - } -#endif + GC_tot_copied += (StgWord64) copied; + GC_tot_alloc += (StgWord64) alloc; + GC_par_max_copied += (StgWord64) max_copied; + GC_par_avg_copied += (StgWord64) avg_copied; + GC_tot_cpu += gc_cpu; if (gen == RtsFlags.GcFlags.generations-1) { /* major GC? */ - if (live > MaxResidency) { - MaxResidency = live; + if (live > max_residency) { + max_residency = live; } - ResidencySamples++; - AvgResidency += live; + residency_samples++; + avg_residency += live; } + + if (slop > max_slop) max_slop = slop; } if (rub_bell) { @@ -451,21 +470,11 @@ stat_endHeapCensus(void) -------------------------------------------------------------------------- */ #ifdef DEBUG -#define TICK_VAR(arity) \ - extern StgInt SLOW_CALLS_##arity; \ - extern StgInt RIGHT_ARITY_##arity; \ - extern StgInt TAGGED_PTR_##arity; - #define TICK_VAR_INI(arity) \ StgInt SLOW_CALLS_##arity = 1; \ StgInt RIGHT_ARITY_##arity = 1; \ StgInt TAGGED_PTR_##arity = 0; -extern StgInt TOTAL_CALLS; - -TICK_VAR(1) -TICK_VAR(2) - TICK_VAR_INI(1) TICK_VAR_INI(2) @@ -475,7 +484,7 @@ StgInt TOTAL_CALLS=1; /* Report the value of a counter */ #define REPORT(counter) \ { \ - ullong_format_string(counter,temp,rtsTrue/*commas*/); \ + showStgWord64(counter,temp,rtsTrue/*commas*/); \ statsPrintf(" (" #counter ") : %s\n",temp); \ } @@ -495,19 +504,28 @@ StgInt TOTAL_CALLS=1; statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \ SLOW_CALLS_##arity * 100.0/TOTAL_CALLS) - void stat_exit(int alloc) { + generation *gen; + Ticks gc_cpu = 0; + Ticks gc_elapsed = 0; + Ticks init_cpu = 0; + Ticks init_elapsed = 0; + Ticks mut_cpu = 0; + Ticks mut_elapsed = 0; + Ticks exit_cpu = 0; + Ticks exit_elapsed = 0; + if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) { char temp[BIG_STRING_LEN]; - Ticks time; - Ticks etime; - nat g, total_collections = 0; + Ticks tot_cpu; + Ticks tot_elapsed; + nat i, g, total_collections = 0; - getProcessTimes( &time, &etime ); - etime -= ElapsedTimeStart; + getProcessTimes( &tot_cpu, &tot_elapsed ); + tot_elapsed -= start_init_elapsed; GC_tot_alloc += alloc; @@ -515,100 +533,162 @@ stat_exit(int alloc) for (g = 0; g < RtsFlags.GcFlags.generations; g++) total_collections += generations[g].collections; - /* avoid divide by zero if time is measured as 0.00 seconds -- SDM */ - if (time == 0.0) time = 1; - if (etime == 0.0) etime = 1; + /* avoid divide by zero if tot_cpu is measured as 0.00 seconds -- SDM */ + if (tot_cpu == 0.0) tot_cpu = 1; + if (tot_elapsed == 0.0) tot_elapsed = 1; if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) { statsPrintf("%9ld %9.9s %9.9s", (lnat)alloc*sizeof(W_), "", ""); statsPrintf(" %5.2f %5.2f\n\n", 0.0, 0.0); } + for (i = 0; i < RtsFlags.GcFlags.generations; i++) { + gc_cpu += GC_coll_cpu[i]; + gc_elapsed += GC_coll_elapsed[i]; + } + + init_cpu = end_init_cpu - start_init_cpu; + init_elapsed = end_init_elapsed - start_init_elapsed; + + exit_cpu = end_exit_cpu - start_exit_cpu; + exit_elapsed = end_exit_elapsed - start_exit_elapsed; + + mut_elapsed = start_exit_elapsed - end_init_elapsed - gc_elapsed; + + mut_cpu = start_exit_cpu - end_init_cpu - gc_cpu + - PROF_VAL(RP_tot_time + HC_tot_time); + if (mut_cpu < 0) { mut_cpu = 0; } + if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) { - ullong_format_string(GC_tot_alloc*sizeof(W_), + showStgWord64(GC_tot_alloc*sizeof(W_), temp, rtsTrue/*commas*/); - statsPrintf("%11s bytes allocated in the heap\n", temp); + statsPrintf("%16s bytes allocated in the heap\n", temp); - ullong_format_string(GC_tot_copied*sizeof(W_), + showStgWord64(GC_tot_copied*sizeof(W_), temp, rtsTrue/*commas*/); - statsPrintf("%11s bytes copied during GC\n", temp); + statsPrintf("%16s bytes copied during GC\n", temp); - if ( ResidencySamples > 0 ) { - ullong_format_string(MaxResidency*sizeof(W_), + if ( residency_samples > 0 ) { + showStgWord64(max_residency*sizeof(W_), temp, rtsTrue/*commas*/); - statsPrintf("%11s bytes maximum residency (%ld sample(s))\n", - temp, ResidencySamples); + statsPrintf("%16s bytes maximum residency (%ld sample(s))\n", + temp, residency_samples); } - statsPrintf("\n"); + + showStgWord64(max_slop*sizeof(W_), temp, rtsTrue/*commas*/); + statsPrintf("%16s bytes maximum slop\n", temp); + + statsPrintf("%16ld MB total memory in use (%ld MB lost due to fragmentation)\n\n", + peak_mblocks_allocated * MBLOCK_SIZE_W / (1024 * 1024 / sizeof(W_)), + (peak_mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_))); /* Print garbage collections in each gen */ - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - statsPrintf("%11d collections in generation %d, %6.2fs, %6.2fs elapsed\n", - generations[g].collections, g, - TICK_TO_DBL(GC_coll_times[g]), - TICK_TO_DBL(GC_coll_etimes[g])); - } + statsPrintf(" Tot time (elapsed) Avg pause Max pause\n"); + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + gen = &generations[g]; + statsPrintf(" Gen %2d %5d colls, %5d par %5.2fs %5.2fs %3.4fs %3.4fs\n", + gen->no, + gen->collections, + gen->par_collections, + TICK_TO_DBL(GC_coll_cpu[g]), + TICK_TO_DBL(GC_coll_elapsed[g]), + gen->collections == 0 ? 0 : TICK_TO_DBL(GC_coll_elapsed[g] / gen->collections), + TICK_TO_DBL(GC_coll_max_pause[g])); + } - statsPrintf("\n%11ld MB total memory in use\n\n", - mblocks_allocated * MBLOCK_SIZE / (1024 * 1024)); +#if defined(THREADED_RTS) + if (RtsFlags.ParFlags.parGcEnabled) { + statsPrintf("\n Parallel GC work balance: %.2f (%ld / %ld, ideal %d)\n", + (double)GC_par_avg_copied / (double)GC_par_max_copied, + (lnat)GC_par_avg_copied, (lnat)GC_par_max_copied, + RtsFlags.ParFlags.nNodes + ); + } +#endif + statsPrintf("\n"); #if defined(THREADED_RTS) { nat i; Task *task; + statsPrintf(" MUT time (elapsed) GC time (elapsed)\n"); for (i = 0, task = all_tasks; task != NULL; i++, task = task->all_link) { - statsPrintf(" Task %2d %-8s : MUT time: %6.2fs (%6.2fs elapsed)\n" - " GC time: %6.2fs (%6.2fs elapsed)\n\n", + statsPrintf(" Task %2d %-8s : %6.2fs (%6.2fs) %6.2fs (%6.2fs)\n", i, - (task->tso == NULL) ? "(worker)" : "(bound)", + (task->worker) ? "(worker)" : "(bound)", TICK_TO_DBL(task->mut_time), TICK_TO_DBL(task->mut_etime), TICK_TO_DBL(task->gc_time), TICK_TO_DBL(task->gc_etime)); } } + + statsPrintf("\n"); + + { + nat i; + lnat sparks_created = 0; + lnat sparks_dud = 0; + lnat sparks_converted = 0; + lnat sparks_gcd = 0; + lnat sparks_fizzled = 0; + for (i = 0; i < n_capabilities; i++) { + sparks_created += capabilities[i].sparks_created; + sparks_dud += capabilities[i].sparks_dud; + sparks_converted += capabilities[i].sparks_converted; + sparks_gcd += capabilities[i].sparks_gcd; + sparks_fizzled += capabilities[i].sparks_fizzled; + } + + statsPrintf(" SPARKS: %ld (%ld converted, %ld dud, %ld GC'd, %ld fizzled)\n\n", + sparks_created + sparks_dud, sparks_converted, sparks_dud, sparks_gcd, sparks_fizzled); + } #endif - statsPrintf(" INIT time %6.2fs (%6.2fs elapsed)\n", - TICK_TO_DBL(InitUserTime), TICK_TO_DBL(InitElapsedTime)); - statsPrintf(" MUT time %6.2fs (%6.2fs elapsed)\n", - TICK_TO_DBL(MutUserTime), TICK_TO_DBL(MutElapsedTime)); - statsPrintf(" GC time %6.2fs (%6.2fs elapsed)\n", - TICK_TO_DBL(GC_tot_time), TICK_TO_DBL(GCe_tot_time)); + statsPrintf(" INIT time %6.2fs (%6.2fs elapsed)\n", + TICK_TO_DBL(init_cpu), TICK_TO_DBL(init_elapsed)); + + statsPrintf(" MUT time %6.2fs (%6.2fs elapsed)\n", + TICK_TO_DBL(mut_cpu), TICK_TO_DBL(mut_elapsed)); + statsPrintf(" GC time %6.2fs (%6.2fs elapsed)\n", + TICK_TO_DBL(gc_cpu), TICK_TO_DBL(gc_elapsed)); + #ifdef PROFILING - statsPrintf(" RP time %6.2fs (%6.2fs elapsed)\n", + statsPrintf(" RP time %6.2fs (%6.2fs elapsed)\n", TICK_TO_DBL(RP_tot_time), TICK_TO_DBL(RPe_tot_time)); - statsPrintf(" PROF time %6.2fs (%6.2fs elapsed)\n", + statsPrintf(" PROF time %6.2fs (%6.2fs elapsed)\n", TICK_TO_DBL(HC_tot_time), TICK_TO_DBL(HCe_tot_time)); #endif - statsPrintf(" EXIT time %6.2fs (%6.2fs elapsed)\n", - TICK_TO_DBL(ExitUserTime), TICK_TO_DBL(ExitElapsedTime)); - statsPrintf(" Total time %6.2fs (%6.2fs elapsed)\n\n", - TICK_TO_DBL(time), TICK_TO_DBL(etime)); - statsPrintf(" %%GC time %5.1f%% (%.1f%% elapsed)\n\n", - TICK_TO_DBL(GC_tot_time)*100/TICK_TO_DBL(time), - TICK_TO_DBL(GCe_tot_time)*100/TICK_TO_DBL(etime)); - - if (time - GC_tot_time - PROF_VAL(RP_tot_time + HC_tot_time) == 0) - ullong_format_string(0, temp, rtsTrue/*commas*/); + statsPrintf(" EXIT time %6.2fs (%6.2fs elapsed)\n", + TICK_TO_DBL(exit_cpu), TICK_TO_DBL(exit_elapsed)); + statsPrintf(" Total time %6.2fs (%6.2fs elapsed)\n\n", + TICK_TO_DBL(tot_cpu), TICK_TO_DBL(tot_elapsed)); +#ifndef THREADED_RTS + statsPrintf(" %%GC time %5.1f%% (%.1f%% elapsed)\n\n", + TICK_TO_DBL(gc_cpu)*100/TICK_TO_DBL(tot_cpu), + TICK_TO_DBL(gc_elapsed)*100/TICK_TO_DBL(tot_elapsed)); +#endif + + if (tot_cpu - GC_tot_cpu - PROF_VAL(RP_tot_time + HC_tot_time) == 0) + showStgWord64(0, temp, rtsTrue/*commas*/); else - ullong_format_string( - (ullong)((GC_tot_alloc*sizeof(W_))/ - TICK_TO_DBL(time - GC_tot_time - + showStgWord64( + (StgWord64)((GC_tot_alloc*sizeof(W_))/ + TICK_TO_DBL(tot_cpu - GC_tot_cpu - PROF_VAL(RP_tot_time + HC_tot_time))), temp, rtsTrue/*commas*/); statsPrintf(" Alloc rate %s bytes per MUT second\n\n", temp); statsPrintf(" Productivity %5.1f%% of total user, %.1f%% of total elapsed\n\n", - TICK_TO_DBL(time - GC_tot_time - - PROF_VAL(RP_tot_time + HC_tot_time) - InitUserTime) * 100 - / TICK_TO_DBL(time), - TICK_TO_DBL(time - GC_tot_time - - PROF_VAL(RP_tot_time + HC_tot_time) - InitUserTime) * 100 - / TICK_TO_DBL(etime)); + TICK_TO_DBL(tot_cpu - GC_tot_cpu - + PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100 + / TICK_TO_DBL(tot_cpu), + TICK_TO_DBL(tot_cpu - GC_tot_cpu - + PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100 + / TICK_TO_DBL(tot_elapsed)); /* TICK_PRINT(1); @@ -621,46 +701,70 @@ stat_exit(int alloc) #if USE_PAPI papi_stats_report(); #endif +#if defined(THREADED_RTS) && defined(PROF_SPIN) + { + nat g; + + statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin); + statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin); + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + statsPrintf("gen[%d].sync: %"FMT_Word64"\n", g, generations[g].sync.spin); + } + } +#endif } if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) { + char *fmt1, *fmt2; + if (RtsFlags.MiscFlags.machineReadable) { + fmt1 = " [(\"bytes allocated\", \"%llu\")\n"; + fmt2 = " ,(\"num_GCs\", \"%d\")\n" + " ,(\"average_bytes_used\", \"%ld\")\n" + " ,(\"max_bytes_used\", \"%ld\")\n" + " ,(\"num_byte_usage_samples\", \"%ld\")\n" + " ,(\"peak_megabytes_allocated\", \"%lu\")\n" + " ,(\"init_cpu_seconds\", \"%.2f\")\n" + " ,(\"init_wall_seconds\", \"%.2f\")\n" + " ,(\"mutator_cpu_seconds\", \"%.2f\")\n" + " ,(\"mutator_wall_seconds\", \"%.2f\")\n" + " ,(\"GC_cpu_seconds\", \"%.2f\")\n" + " ,(\"GC_wall_seconds\", \"%.2f\")\n" + " ]\n"; + } + else { + fmt1 = "<>\n", + statsPrintf(fmt1, GC_tot_alloc*(StgWord64)sizeof(W_)); + statsPrintf(fmt2, total_collections, - ResidencySamples == 0 ? 0 : - AvgResidency*sizeof(W_)/ResidencySamples, - MaxResidency*sizeof(W_), - ResidencySamples, - (unsigned long)(mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)), - TICK_TO_DBL(InitUserTime), TICK_TO_DBL(InitElapsedTime), - TICK_TO_DBL(MutUserTime), TICK_TO_DBL(MutElapsedTime), - TICK_TO_DBL(GC_tot_time), TICK_TO_DBL(GCe_tot_time)); + residency_samples == 0 ? 0 : + avg_residency*sizeof(W_)/residency_samples, + max_residency*sizeof(W_), + residency_samples, + (unsigned long)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)), + TICK_TO_DBL(init_cpu), TICK_TO_DBL(init_elapsed), + TICK_TO_DBL(mut_cpu), TICK_TO_DBL(mut_elapsed), + TICK_TO_DBL(gc_cpu), TICK_TO_DBL(gc_elapsed)); } statsFlush(); statsClose(); } - if (GC_coll_times) - stgFree(GC_coll_times); - GC_coll_times = NULL; -#if defined(THREADED_RTS) && defined(PROF_SPIN) - { - nat g, s; - - statsPrintf("recordMutableGen_sync: %"FMT_Word64"\n", recordMutableGen_sync.spin); - statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin); - statsPrintf("static_objects_sync: %"FMT_Word64"\n", static_objects_sync.spin); - statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin); - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - for (s = 0; s < generations[g].n_steps; s++) { - statsPrintf("gen[%d].steps[%d].sync_todo: %"FMT_Word64"\n", g, s, generations[g].steps[s].sync_todo.spin); - statsPrintf("gen[%d].steps[%d].sync_large_objects: %"FMT_Word64"\n", g, s, generations[g].steps[s].sync_large_objects.spin); - } - } + if (GC_coll_cpu) { + stgFree(GC_coll_cpu); + GC_coll_cpu = NULL; + } + if (GC_coll_elapsed) { + stgFree(GC_coll_elapsed); + GC_coll_elapsed = NULL; + } + if (GC_coll_max_pause) { + stgFree(GC_coll_max_pause); + GC_coll_max_pause = NULL; } -#endif } /* ----------------------------------------------------------------------------- @@ -668,48 +772,65 @@ stat_exit(int alloc) Produce some detailed info on the state of the generational GC. -------------------------------------------------------------------------- */ -#ifdef DEBUG void statDescribeGens(void) { - nat g, s, mut, lge; - lnat live; + nat g, mut, lge, i; + lnat gen_slop; + lnat tot_live, tot_slop; + lnat gen_live, gen_blocks; bdescr *bd; - step *step; - + generation *gen; + debugBelch( -" Gen Steps Max Mut-list Step Blocks Live Large\n" -" Blocks Bytes Objects\n"); +"----------------------------------------------------------\n" +" Gen Max Mut-list Blocks Large Live Slop\n" +" Blocks Bytes Objects \n" +"----------------------------------------------------------\n"); + + tot_live = 0; + tot_slop = 0; - mut = 0; for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) { - mut += (bd->free - bd->start) * sizeof(W_); + gen = &generations[g]; + + for (bd = gen->large_objects, lge = 0; bd; bd = bd->link) { + lge++; } - debugBelch("%8d %8d %8d %9d", g, generations[g].n_steps, - generations[g].max_blocks, mut); + gen_live = genLiveWords(gen); + gen_blocks = genLiveBlocks(gen); - for (s = 0; s < generations[g].n_steps; s++) { - step = &generations[g].steps[s]; - live = 0; - for (bd = step->large_objects, lge = 0; bd; bd = bd->link) { - lge++; - } - // This live figure will be slightly less that the "live" figure - // given by +RTS -Sstderr, because we take don't count the - // slop at the end of each block. - live += countOccupied(step->blocks) + countOccupied(step->large_objects); - if (s != 0) { - debugBelch("%36s",""); + mut = 0; + for (i = 0; i < n_capabilities; i++) { + mut += countOccupied(capabilities[i].mut_lists[g]); + + // Add the pinned object block. + bd = capabilities[i].pinned_object_block; + if (bd != NULL) { + gen_live += bd->free - bd->start; + gen_blocks += bd->blocks; + } + + gen_live += gcThreadLiveWords(i,g); + gen_live += gcThreadLiveWords(i,g); + gen_blocks += gcThreadLiveBlocks(i,g); } - debugBelch("%6d %8d %8ld %8d\n", s, step->n_blocks, - live, lge); - } + + debugBelch("%5d %7d %9d", g, gen->max_blocks, mut); + + gen_slop = gen_blocks * BLOCK_SIZE_W - gen_live; + + debugBelch("%8ld %8d %8ld %8ld\n", gen_blocks, lge, + gen_live*sizeof(W_), gen_slop*sizeof(W_)); + tot_live += gen_live; + tot_slop += gen_slop; } + debugBelch("----------------------------------------------------------\n"); + debugBelch("%41s%8ld %8ld\n","",tot_live*sizeof(W_),tot_slop*sizeof(W_)); + debugBelch("----------------------------------------------------------\n"); debugBelch("\n"); } -#endif /* ----------------------------------------------------------------------------- Stats available via a programmatic interface, so eg. GHCi can time @@ -717,7 +838,7 @@ statDescribeGens(void) -------------------------------------------------------------------------- */ extern HsInt64 getAllocations( void ) -{ return (HsInt64)total_allocated * sizeof(W_); } +{ return (HsInt64)GC_tot_alloc * sizeof(W_); } /* ----------------------------------------------------------------------------- Dumping stuff in the stats file, or via the debug message interface