RTS tidyup sweep, first phase
[ghc-hetmet.git] / rts / Stats.c
index e331462..6029745 100644 (file)
@@ -6,19 +6,16 @@
  *
  * ---------------------------------------------------------------------------*/
 
+#include "PosixSource.h"
 #include "Rts.h"
-#include "RtsFlags.h"
+
 #include "RtsUtils.h"
-#include "MBlock.h"
-#include "Storage.h"
 #include "Schedule.h"
 #include "Stats.h"
-#include "ParTicky.h"                       /* ToDo: move into Rts.h */
 #include "Profiling.h"
 #include "GetTime.h"
-#include "GC.h"
-#include "GCUtils.h"
-#include "Evac.h"
+#include "sm/Storage.h"
+#include "sm/GC.h" // gc_alloc_block_sync, whitehole_spin
 
 #if USE_PAPI
 #include "Papi.h"
@@ -508,21 +505,11 @@ stat_endHeapCensus(void)
    -------------------------------------------------------------------------- */
 
 #ifdef DEBUG
-#define TICK_VAR(arity) \
-  extern StgInt SLOW_CALLS_##arity; \
-  extern StgInt RIGHT_ARITY_##arity; \
-  extern StgInt TAGGED_PTR_##arity;
-
 #define TICK_VAR_INI(arity) \
   StgInt SLOW_CALLS_##arity = 1; \
   StgInt RIGHT_ARITY_##arity = 1; \
   StgInt TAGGED_PTR_##arity = 0;
 
-extern StgInt TOTAL_CALLS;
-
-TICK_VAR(1)
-TICK_VAR(2)
-
 TICK_VAR_INI(1)
 TICK_VAR_INI(2)
 
@@ -552,6 +539,7 @@ StgInt TOTAL_CALLS=1;
   statsPrintf("  (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \
              SLOW_CALLS_##arity * 100.0/TOTAL_CALLS)
 
+extern lnat hw_alloc_blocks;
 
 void
 stat_exit(int alloc)
@@ -600,8 +588,9 @@ stat_exit(int alloc)
            ullong_format_string(MaxSlop*sizeof(W_), temp, rtsTrue/*commas*/);
            statsPrintf("%16s bytes maximum slop\n", temp);
 
-           statsPrintf("%16ld MB total memory in use\n\n", 
-                   mblocks_allocated * MBLOCK_SIZE / (1024 * 1024));
+           statsPrintf("%16ld MB total memory in use (%ld MB lost due to fragmentation)\n\n", 
+                        mblocks_allocated * MBLOCK_SIZE_W / (1024 * 1024 / sizeof(W_)),
+                        (mblocks_allocated * MBLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)));
 
            /* Print garbage collections in each gen */
            for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
@@ -613,11 +602,11 @@ stat_exit(int alloc)
            }
 
 #if defined(THREADED_RTS)
-            if (RtsFlags.ParFlags.gcThreads > 1) {
+            if (RtsFlags.ParFlags.parGcEnabled) {
                 statsPrintf("\n  Parallel GC work balance: %.2f (%ld / %ld, ideal %d)\n", 
                             (double)GC_par_avg_copied / (double)GC_par_max_copied,
                             (lnat)GC_par_avg_copied, (lnat)GC_par_max_copied,
-                            RtsFlags.ParFlags.gcThreads
+                            RtsFlags.ParFlags.nNodes
                     );
             }
 #endif
@@ -628,11 +617,11 @@ stat_exit(int alloc)
            {
                nat i;
                Task *task;
+                statsPrintf("                        MUT time (elapsed)       GC time  (elapsed)\n");
                for (i = 0, task = all_tasks; 
                     task != NULL; 
                     i++, task = task->all_link) {
-                   statsPrintf("  Task %2d %-8s :  MUT time: %6.2fs  (%6.2fs elapsed)\n"
-                           "                      GC  time: %6.2fs  (%6.2fs elapsed)\n\n", 
+                   statsPrintf("  Task %2d %-8s :  %6.2fs    (%6.2fs)     %6.2fs    (%6.2fs)\n",
                                i,
                                (task->tso == NULL) ? "(worker)" : "(bound)",
                                TICK_TO_DBL(task->mut_time),
@@ -641,6 +630,23 @@ stat_exit(int alloc)
                                TICK_TO_DBL(task->gc_etime));
                }
            }
+
+           statsPrintf("\n");
+
+            {
+                nat i;
+                lnat sparks_created   = 0;
+                lnat sparks_converted = 0;
+                lnat sparks_pruned    = 0;
+                for (i = 0; i < n_capabilities; i++) {
+                    sparks_created   += capabilities[i].sparks_created;
+                    sparks_converted += capabilities[i].sparks_converted;
+                    sparks_pruned    += capabilities[i].sparks_pruned;
+                }
+
+                statsPrintf("  SPARKS: %ld (%ld converted, %ld pruned)\n\n",
+                            sparks_created, sparks_converted, sparks_pruned);
+            }
 #endif
 
            statsPrintf("  INIT  time  %6.2fs  (%6.2fs elapsed)\n",
@@ -693,12 +699,45 @@ stat_exit(int alloc)
 #if USE_PAPI
             papi_stats_report();
 #endif
+#if defined(THREADED_RTS) && defined(PROF_SPIN)
+            {
+                nat g, s;
+                
+                statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
+                statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin);
+                for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+                    for (s = 0; s < generations[g].n_steps; s++) {
+                        statsPrintf("gen[%d].steps[%d].sync_large_objects: %"FMT_Word64"\n", g, s, generations[g].steps[s].sync_large_objects.spin);
+                    }
+                }
+            }
+#endif
        }
 
        if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) {
+      char *fmt1, *fmt2;
+      if (RtsFlags.MiscFlags.machineReadable) {
+          fmt1 = " [(\"bytes allocated\", \"%llu\")\n";
+          fmt2 = " ,(\"num_GCs\", \"%d\")\n"
+                 " ,(\"average_bytes_used\", \"%ld\")\n"
+                 " ,(\"max_bytes_used\", \"%ld\")\n"
+                 " ,(\"num_byte_usage_samples\", \"%ld\")\n"
+                 " ,(\"peak_megabytes_allocated\", \"%lu\")\n"
+                 " ,(\"init_cpu_seconds\", \"%.2f\")\n"
+                 " ,(\"init_wall_seconds\", \"%.2f\")\n"
+                 " ,(\"mutator_cpu_seconds\", \"%.2f\")\n"
+                 " ,(\"mutator_wall_seconds\", \"%.2f\")\n"
+                 " ,(\"GC_cpu_seconds\", \"%.2f\")\n"
+                 " ,(\"GC_wall_seconds\", \"%.2f\")\n"
+                 " ]\n";
+      }
+      else {
+          fmt1 = "<<ghc: %llu bytes, ";
+          fmt2 = "%d GCs, %ld/%ld avg/max bytes residency (%ld samples), %luM in use, %.2f INIT (%.2f elapsed), %.2f MUT (%.2f elapsed), %.2f GC (%.2f elapsed) :ghc>>\n";
+      }
          /* print the long long separately to avoid bugginess on mingwin (2001-07-02, mingw-0.5) */
-         statsPrintf("<<ghc: %llu bytes, ", GC_tot_alloc*(ullong)sizeof(W_));
-         statsPrintf("%d GCs, %ld/%ld avg/max bytes residency (%ld samples), %luM in use, %.2f INIT (%.2f elapsed), %.2f MUT (%.2f elapsed), %.2f GC (%.2f elapsed) :ghc>>\n",
+         statsPrintf(fmt1, GC_tot_alloc*(ullong)sizeof(W_));
+         statsPrintf(fmt2,
                    total_collections,
                    ResidencySamples == 0 ? 0 : 
                        AvgResidency*sizeof(W_)/ResidencySamples, 
@@ -710,22 +749,6 @@ stat_exit(int alloc)
                    TICK_TO_DBL(GC_tot_time), TICK_TO_DBL(GCe_tot_time));
        }
 
-#if defined(THREADED_RTS) && defined(PROF_SPIN)
-        {
-            nat g, s;
-            
-            statsPrintf("recordMutableGen_sync: %"FMT_Word64"\n", recordMutableGen_sync.spin);
-            statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
-            statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin);
-            for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-                for (s = 0; s < generations[g].n_steps; s++) {
-                    statsPrintf("gen[%d].steps[%d].sync_todo: %"FMT_Word64"\n", g, s, generations[g].steps[s].sync_todo.spin);
-                    statsPrintf("gen[%d].steps[%d].sync_large_objects: %"FMT_Word64"\n", g, s, generations[g].steps[s].sync_large_objects.spin);
-                }
-            }
-        }
-#endif
-
        statsFlush();
        statsClose();
     }