Add 'sync-all grep'

[ghc-hetmet.git] / rts / Stats.c
diff --git a/rts/Stats.c b/rts/Stats.c

index f1e6e33..4b9f6d8 100644 (file)
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -258,7 +258,8 @@ stat_startExit(void)
         PROF_VAL(RPe_tot_time + HCe_tot_time) - InitElapsedStamp;
      if (MutElapsedTime < 0) { MutElapsedTime = 0; }    /* sometimes -0.00 */
  
-    MutUserTime = user - GC_tot_time - PROF_VAL(RP_tot_time + HC_tot_time) - InitUserTime;
+    MutUserTime = user - GC_tot_time - 
+        PROF_VAL(RP_tot_time + HC_tot_time) - InitUserTime;
      if (MutUserTime < 0) { MutUserTime = 0; }
  
  #if USE_PAPI
@@ -314,15 +315,11 @@ stat_startGC(void)
         }
      }
  
-#if defined(PROFILING) || defined(DEBUG)
-    GC_start_time = getProcessCPUTime();  // needed in mut_user_time_during_GC()
-#endif
-
-    if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
-#if !defined(PROFILING) && !defined(DEBUG)
-        GC_start_time = getProcessCPUTime();
-#endif
-       GCe_start_time = getProcessElapsedTime();
+    if (RtsFlags.GcFlags.giveStats != NO_GC_STATS
+        || RtsFlags.ProfFlags.doHeapProfile)
+        // heap profiling needs GC_tot_time
+    {
+        getProcessTimes(&GC_start_time, &GCe_start_time);
         if (RtsFlags.GcFlags.giveStats) {
             GC_start_faults = getPageFaults();
         }
@@ -346,7 +343,10 @@ void
  stat_endGC (lnat alloc, lnat live, lnat copied, lnat gen,
              lnat max_copied, lnat avg_copied, lnat slop)
  {
-    if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) {
+    if (RtsFlags.GcFlags.giveStats != NO_GC_STATS ||
+        RtsFlags.ProfFlags.doHeapProfile)
+        // heap profiling needs GC_tot_time
+    {
         Ticks time, etime, gc_time, gc_etime;
         
         getProcessTimes(&time, &etime);
@@ -589,8 +589,8 @@ stat_exit(int alloc)
             statsPrintf("%16s bytes maximum slop\n", temp);
  
             statsPrintf("%16ld MB total memory in use (%ld MB lost due to fragmentation)\n\n", 
-                        mblocks_allocated * MBLOCK_SIZE_W / (1024 * 1024 / sizeof(W_)),
-                        (mblocks_allocated * MBLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)));
+                        peak_mblocks_allocated * MBLOCK_SIZE_W / (1024 * 1024 / sizeof(W_)),
+                        (peak_mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)));
  
             /* Print garbage collections in each gen */
             for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
@@ -636,16 +636,20 @@ stat_exit(int alloc)
              {
                  nat i;
                  lnat sparks_created   = 0;
+                lnat sparks_dud       = 0;
                  lnat sparks_converted = 0;
-                lnat sparks_pruned    = 0;
+                lnat sparks_gcd       = 0;
+                lnat sparks_fizzled   = 0;
                  for (i = 0; i < n_capabilities; i++) {
                      sparks_created   += capabilities[i].sparks_created;
+                    sparks_dud       += capabilities[i].sparks_dud;
                      sparks_converted += capabilities[i].sparks_converted;
-                    sparks_pruned    += capabilities[i].sparks_pruned;
+                    sparks_gcd       += capabilities[i].sparks_gcd;
+                    sparks_fizzled   += capabilities[i].sparks_fizzled;
                  }
  
-                statsPrintf("  SPARKS: %ld (%ld converted, %ld pruned)\n\n",
-                            sparks_created, sparks_converted, sparks_pruned);
+                statsPrintf("  SPARKS: %ld (%ld converted, %ld dud, %ld GC'd, %ld fizzled)\n\n",
+                            sparks_created + sparks_dud, sparks_converted, sparks_dud, sparks_gcd, sparks_fizzled);
              }
  #endif
  
@@ -706,7 +710,7 @@ stat_exit(int alloc)
                  statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
                  statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin);
                  for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-                    statsPrintf("gen[%d].sync_large_objects: %"FMT_Word64"\n", g, generations[g].sync_large_objects.spin);
+                    statsPrintf("gen[%d].sync: %"FMT_Word64"\n", g, generations[g].sync.spin);
                  }
              }
  #endif
@@ -741,7 +745,7 @@ stat_exit(int alloc)
                         AvgResidency*sizeof(W_)/ResidencySamples, 
                     MaxResidency*sizeof(W_), 
                     ResidencySamples,
-                   (unsigned long)(mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)),
+                   (unsigned long)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)),
                     TICK_TO_DBL(InitUserTime), TICK_TO_DBL(InitElapsedTime),
                     TICK_TO_DBL(MutUserTime), TICK_TO_DBL(MutElapsedTime),
                     TICK_TO_DBL(GC_tot_time), TICK_TO_DBL(GCe_tot_time));
@@ -767,9 +771,10 @@ stat_exit(int alloc)
  void
  statDescribeGens(void)
  {
-  nat g, mut, lge;
-  lnat live, slop;
+  nat g, mut, lge, i;
+  lnat gen_slop;
    lnat tot_live, tot_slop;
+  lnat gen_live, gen_blocks;
    bdescr *bd;
    generation *gen;
    
@@ -781,25 +786,32 @@ statDescribeGens(void)
  
    tot_live = 0;
    tot_slop = 0;
-  for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-      mut = 0;
-      for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
-         mut += (bd->free - bd->start) * sizeof(W_);
-      }
  
+  for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
        gen = &generations[g];
  
-      debugBelch("%5d %7d %9d", g, gen->max_blocks, mut);
-
        for (bd = gen->large_objects, lge = 0; bd; bd = bd->link) {
            lge++;
        }
-      live = gen->n_words + countOccupied(gen->large_objects);
-      slop = (gen->n_blocks + gen->n_large_blocks) * BLOCK_SIZE_W - live;
-      debugBelch("%8d %8d %8ld %8ld\n", gen->n_blocks, lge,
-                 live*sizeof(W_), slop*sizeof(W_));
-      tot_live += live;
-      tot_slop += slop;
+
+      gen_live   = genLiveWords(gen);
+      gen_blocks = genLiveBlocks(gen);
+
+      mut = 0;
+      for (i = 0; i < n_capabilities; i++) {
+          mut += countOccupied(capabilities[i].mut_lists[g]);
+          gen_live   += gcThreadLiveWords(i,g);
+          gen_blocks += gcThreadLiveBlocks(i,g);
+      }
+
+      debugBelch("%5d %7d %9d", g, gen->max_blocks, mut);
+
+      gen_slop = gen_blocks * BLOCK_SIZE_W - gen_live;
+
+      debugBelch("%8ld %8d %8ld %8ld\n", gen_blocks, lge,
+                 gen_live*sizeof(W_), gen_slop*sizeof(W_));
+      tot_live += gen_live;
+      tot_slop += gen_slop;
    }
    debugBelch("----------------------------------------------------------\n");
    debugBelch("%41s%8ld %8ld\n","",tot_live*sizeof(W_),tot_slop*sizeof(W_));