*
* ---------------------------------------------------------------------------*/
+#include "PosixSource.h"
#include "Rts.h"
-#include "RtsFlags.h"
+
#include "RtsUtils.h"
-#include "MBlock.h"
-#include "Storage.h"
#include "Schedule.h"
#include "Stats.h"
-#include "ParTicky.h" /* ToDo: move into Rts.h */
#include "Profiling.h"
#include "GetTime.h"
-#include "GC.h"
-#include "GCUtils.h"
-#include "Evac.h"
+#include "sm/Storage.h"
+#include "sm/GC.h" // gc_alloc_block_sync, whitehole_spin
#if USE_PAPI
#include "Papi.h"
static ullong GC_tot_alloc = 0;
static ullong GC_tot_copied = 0;
+static ullong GC_par_max_copied = 0;
+static ullong GC_par_avg_copied = 0;
+
static Ticks GC_start_time = 0, GC_tot_time = 0; /* User GC Time */
static Ticks GCe_start_time = 0, GCe_tot_time = 0; /* Elapsed GC time */
static lnat MaxResidency = 0; // in words; for stats only
static lnat AvgResidency = 0;
static lnat ResidencySamples = 0; // for stats only
+static lnat MaxSlop = 0;
static lnat GC_start_faults = 0, GC_end_faults = 0;
}
#endif /* PROFILING */
+// initStats0() has no dependencies, it can be called right at the beginning
+void
+initStats0(void)
+{
+ ElapsedTimeStart = 0;
+
+ InitUserTime = 0;
+ InitElapsedTime = 0;
+ InitElapsedStamp = 0;
+
+ MutUserTime = 0;
+ MutElapsedTime = 0;
+ MutElapsedStamp = 0;
+
+ ExitUserTime = 0;
+ ExitElapsedTime = 0;
+
+ GC_tot_alloc = 0;
+ GC_tot_copied = 0;
+ GC_par_max_copied = 0;
+ GC_par_avg_copied = 0;
+ GC_start_time = 0;
+ GC_tot_time = 0;
+ GCe_start_time = 0;
+ GCe_tot_time = 0;
+
+#ifdef PROFILING
+ RP_start_time = 0;
+ RP_tot_time = 0;
+ RPe_start_time = 0;
+ RPe_tot_time = 0;
+
+ HC_start_time = 0;
+ HC_tot_time = 0;
+ HCe_start_time = 0;
+ HCe_tot_time = 0;
+#endif
+
+ MaxResidency = 0;
+ AvgResidency = 0;
+ ResidencySamples = 0;
+ MaxSlop = 0;
+
+ GC_start_faults = 0;
+ GC_end_faults = 0;
+}
+
+// initStats1() can be called after setupRtsFlags()
void
-initStats(void)
+initStats1 (void)
{
nat i;
GC_coll_times[i] = 0;
GC_coll_etimes[i] = 0;
}
-}
+}
/* -----------------------------------------------------------------------------
Initialisation time...
-------------------------------------------------------------------------- */
void
-stat_endGC (lnat alloc, lnat live, lnat copied, lnat gen)
+stat_endGC (lnat alloc, lnat live, lnat copied, lnat gen,
+ lnat max_copied, lnat avg_copied, lnat slop)
{
if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
Ticks time, etime, gc_time, gc_etime;
GC_tot_copied += (ullong) copied;
GC_tot_alloc += (ullong) alloc;
+ GC_par_max_copied += (ullong) max_copied;
+ GC_par_avg_copied += (ullong) avg_copied;
GC_tot_time += gc_time;
GCe_tot_time += gc_etime;
ResidencySamples++;
AvgResidency += live;
}
+
+ if (slop > MaxSlop) MaxSlop = slop;
}
if (rub_bell) {
-------------------------------------------------------------------------- */
#ifdef DEBUG
-#define TICK_VAR(arity) \
- extern StgInt SLOW_CALLS_##arity; \
- extern StgInt RIGHT_ARITY_##arity; \
- extern StgInt TAGGED_PTR_##arity;
-
#define TICK_VAR_INI(arity) \
StgInt SLOW_CALLS_##arity = 1; \
StgInt RIGHT_ARITY_##arity = 1; \
StgInt TAGGED_PTR_##arity = 0;
-extern StgInt TOTAL_CALLS;
-
-TICK_VAR(1)
-TICK_VAR(2)
-
TICK_VAR_INI(1)
TICK_VAR_INI(2)
statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \
SLOW_CALLS_##arity * 100.0/TOTAL_CALLS)
+extern lnat hw_alloc_blocks;
void
stat_exit(int alloc)
if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) {
ullong_format_string(GC_tot_alloc*sizeof(W_),
temp, rtsTrue/*commas*/);
- statsPrintf("%11s bytes allocated in the heap\n", temp);
+ statsPrintf("%16s bytes allocated in the heap\n", temp);
ullong_format_string(GC_tot_copied*sizeof(W_),
temp, rtsTrue/*commas*/);
- statsPrintf("%11s bytes copied during GC\n", temp);
+ statsPrintf("%16s bytes copied during GC\n", temp);
if ( ResidencySamples > 0 ) {
ullong_format_string(MaxResidency*sizeof(W_),
temp, rtsTrue/*commas*/);
- statsPrintf("%11s bytes maximum residency (%ld sample(s))\n",
+ statsPrintf("%16s bytes maximum residency (%ld sample(s))\n",
temp, ResidencySamples);
}
- statsPrintf("\n");
+
+ ullong_format_string(MaxSlop*sizeof(W_), temp, rtsTrue/*commas*/);
+ statsPrintf("%16s bytes maximum slop\n", temp);
+
+ statsPrintf("%16ld MB total memory in use (%ld MB lost due to fragmentation)\n\n",
+ mblocks_allocated * MBLOCK_SIZE_W / (1024 * 1024 / sizeof(W_)),
+ (mblocks_allocated * MBLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)));
/* Print garbage collections in each gen */
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
- statsPrintf("%11d collections in generation %d, %6.2fs, %6.2fs elapsed\n",
- generations[g].collections, g,
+ statsPrintf(" Generation %d: %5d collections, %5d parallel, %5.2fs, %5.2fs elapsed\n",
+ g, generations[g].collections,
+ generations[g].par_collections,
TICK_TO_DBL(GC_coll_times[g]),
TICK_TO_DBL(GC_coll_etimes[g]));
}
- statsPrintf("\n%11ld MB total memory in use\n\n",
- mblocks_allocated * MBLOCK_SIZE / (1024 * 1024));
+#if defined(THREADED_RTS)
+ if (RtsFlags.ParFlags.parGcEnabled) {
+ statsPrintf("\n Parallel GC work balance: %.2f (%ld / %ld, ideal %d)\n",
+ (double)GC_par_avg_copied / (double)GC_par_max_copied,
+ (lnat)GC_par_avg_copied, (lnat)GC_par_max_copied,
+ RtsFlags.ParFlags.nNodes
+ );
+ }
+#endif
+
+ statsPrintf("\n");
#if defined(THREADED_RTS)
{
nat i;
Task *task;
+ statsPrintf(" MUT time (elapsed) GC time (elapsed)\n");
for (i = 0, task = all_tasks;
task != NULL;
i++, task = task->all_link) {
- statsPrintf(" Task %2d %-8s : MUT time: %6.2fs (%6.2fs elapsed)\n"
- " GC time: %6.2fs (%6.2fs elapsed)\n\n",
+ statsPrintf(" Task %2d %-8s : %6.2fs (%6.2fs) %6.2fs (%6.2fs)\n",
i,
- (task->tso == NULL) ? "(worker)" : "(bound)",
+ (task->worker) ? "(worker)" : "(bound)",
TICK_TO_DBL(task->mut_time),
TICK_TO_DBL(task->mut_etime),
TICK_TO_DBL(task->gc_time),
TICK_TO_DBL(task->gc_etime));
}
}
+
+ statsPrintf("\n");
+
+ {
+ nat i;
+ lnat sparks_created = 0;
+ lnat sparks_converted = 0;
+ lnat sparks_pruned = 0;
+ for (i = 0; i < n_capabilities; i++) {
+ sparks_created += capabilities[i].sparks_created;
+ sparks_converted += capabilities[i].sparks_converted;
+ sparks_pruned += capabilities[i].sparks_pruned;
+ }
+
+ statsPrintf(" SPARKS: %ld (%ld converted, %ld pruned)\n\n",
+ sparks_created, sparks_converted, sparks_pruned);
+ }
#endif
statsPrintf(" INIT time %6.2fs (%6.2fs elapsed)\n",
#if USE_PAPI
papi_stats_report();
#endif
+#if defined(THREADED_RTS) && defined(PROF_SPIN)
+ {
+ nat g;
+
+ statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
+ statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin);
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ statsPrintf("gen[%d].sync_large_objects: %"FMT_Word64"\n", g, generations[g].sync_large_objects.spin);
+ }
+ }
+#endif
}
if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) {
+ char *fmt1, *fmt2;
+ if (RtsFlags.MiscFlags.machineReadable) {
+ fmt1 = " [(\"bytes allocated\", \"%llu\")\n";
+ fmt2 = " ,(\"num_GCs\", \"%d\")\n"
+ " ,(\"average_bytes_used\", \"%ld\")\n"
+ " ,(\"max_bytes_used\", \"%ld\")\n"
+ " ,(\"num_byte_usage_samples\", \"%ld\")\n"
+ " ,(\"peak_megabytes_allocated\", \"%lu\")\n"
+ " ,(\"init_cpu_seconds\", \"%.2f\")\n"
+ " ,(\"init_wall_seconds\", \"%.2f\")\n"
+ " ,(\"mutator_cpu_seconds\", \"%.2f\")\n"
+ " ,(\"mutator_wall_seconds\", \"%.2f\")\n"
+ " ,(\"GC_cpu_seconds\", \"%.2f\")\n"
+ " ,(\"GC_wall_seconds\", \"%.2f\")\n"
+ " ]\n";
+ }
+ else {
+ fmt1 = "<<ghc: %llu bytes, ";
+ fmt2 = "%d GCs, %ld/%ld avg/max bytes residency (%ld samples), %luM in use, %.2f INIT (%.2f elapsed), %.2f MUT (%.2f elapsed), %.2f GC (%.2f elapsed) :ghc>>\n";
+ }
/* print the long long separately to avoid bugginess on mingwin (2001-07-02, mingw-0.5) */
- statsPrintf("<<ghc: %llu bytes, ", GC_tot_alloc*(ullong)sizeof(W_));
- statsPrintf("%d GCs, %ld/%ld avg/max bytes residency (%ld samples), %luM in use, %.2f INIT (%.2f elapsed), %.2f MUT (%.2f elapsed), %.2f GC (%.2f elapsed) :ghc>>\n",
+ statsPrintf(fmt1, GC_tot_alloc*(ullong)sizeof(W_));
+ statsPrintf(fmt2,
total_collections,
ResidencySamples == 0 ? 0 :
AvgResidency*sizeof(W_)/ResidencySamples,
TICK_TO_DBL(GC_tot_time), TICK_TO_DBL(GCe_tot_time));
}
-#if defined(THREADED_RTS) && defined(PROF_SPIN)
- {
- nat g, s;
-
- statsPrintf("recordMutableGen_sync: %"FMT_Word64"\n", recordMutableGen_sync.spin);
- statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
- statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin);
- for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
- for (s = 0; s < generations[g].n_steps; s++) {
- statsPrintf("gen[%d].steps[%d].sync_todo: %"FMT_Word64"\n", g, s, generations[g].steps[s].sync_todo.spin);
- statsPrintf("gen[%d].steps[%d].sync_large_objects: %"FMT_Word64"\n", g, s, generations[g].steps[s].sync_large_objects.spin);
- }
- }
- }
-#endif
-
statsFlush();
statsClose();
}
void
statDescribeGens(void)
{
- nat g, s, mut, lge;
- lnat live;
+ nat g, mut, lge;
+ lnat live, slop;
+ lnat tot_live, tot_slop;
bdescr *bd;
- step *step;
-
+ generation *gen;
+
debugBelch(
-" Gen Steps Max Mut-list Step Blocks Live Large\n"
-" Blocks Bytes Objects\n");
+"----------------------------------------------------------\n"
+" Gen Max Mut-list Blocks Large Live Slop\n"
+" Blocks Bytes Objects \n"
+"----------------------------------------------------------\n");
- mut = 0;
+ tot_live = 0;
+ tot_slop = 0;
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ mut = 0;
for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
mut += (bd->free - bd->start) * sizeof(W_);
}
- debugBelch("%8d %8d %8d %9d", g, generations[g].n_steps,
- generations[g].max_blocks, mut);
+ gen = &generations[g];
- for (s = 0; s < generations[g].n_steps; s++) {
- step = &generations[g].steps[s];
- live = 0;
- for (bd = step->large_objects, lge = 0; bd; bd = bd->link) {
- lge++;
- }
- // This live figure will be slightly less that the "live" figure
- // given by +RTS -Sstderr, because we take don't count the
- // slop at the end of each block.
- live += countOccupied(step->blocks) + countOccupied(step->large_objects);
- if (s != 0) {
- debugBelch("%36s","");
+ debugBelch("%5d %7d %9d", g, gen->max_blocks, mut);
+
+ for (bd = gen->large_objects, lge = 0; bd; bd = bd->link) {
+ lge++;
}
- debugBelch("%6d %8d %8ld %8d\n", s, step->n_blocks,
- live, lge);
- }
+ live = gen->n_words + countOccupied(gen->large_objects);
+ slop = (gen->n_blocks + gen->n_large_blocks) * BLOCK_SIZE_W - live;
+ debugBelch("%8d %8d %8ld %8ld\n", gen->n_blocks, lge,
+ live*sizeof(W_), slop*sizeof(W_));
+ tot_live += live;
+ tot_slop += slop;
}
+ debugBelch("----------------------------------------------------------\n");
+ debugBelch("%41s%8ld %8ld\n","",tot_live*sizeof(W_),tot_slop*sizeof(W_));
+ debugBelch("----------------------------------------------------------\n");
debugBelch("\n");
}
-------------------------------------------------------------------------- */
extern HsInt64 getAllocations( void )
-{ return (HsInt64)total_allocated * sizeof(W_); }
+{ return (HsInt64)GC_tot_alloc * sizeof(W_); }
/* -----------------------------------------------------------------------------
Dumping stuff in the stats file, or via the debug message interface