Add profiling of spinlocks
authorSimon Marlow <simonmarhaskell@gmail.com>
Wed, 16 Apr 2008 21:33:58 +0000 (21:33 +0000)
committerSimon Marlow <simonmarhaskell@gmail.com>
Wed, 16 Apr 2008 21:33:58 +0000 (21:33 +0000)
includes/RtsConfig.h
rts/Stats.c
rts/sm/Evac.c
rts/sm/Evac.c-inc
rts/sm/Evac.h
rts/sm/GC.c
rts/sm/Storage.c

index c40924a..828b9e7 100644 (file)
@@ -77,4 +77,8 @@
 #define RTS_USER_SIGNALS 1
 #endif
 
+/* Profile spin locks */
+
+#define PROF_SPIN
+
 #endif /* RTSCONFIG_H */
index fcca405..8c24b29 100644 (file)
@@ -16,6 +16,9 @@
 #include "ParTicky.h"                       /* ToDo: move into Rts.h */
 #include "Profiling.h"
 #include "GetTime.h"
+#include "GC.h"
+#include "GCUtils.h"
+#include "Evac.h"
 
 #if USE_PAPI
 #include "Papi.h"
@@ -641,6 +644,23 @@ stat_exit(int alloc)
     if (GC_coll_times)
       stgFree(GC_coll_times);
     GC_coll_times = NULL;
+
+#if defined(THREADED_RTS) && defined(PROF_SPIN)
+    {
+       nat g, s;
+
+       statsPrintf("recordMutableGen_sync: %"FMT_Word64"\n", recordMutableGen_sync.spin);
+       statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
+       statsPrintf("static_objects_sync: %"FMT_Word64"\n", static_objects_sync.spin);
+       statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin);
+       for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+           for (s = 0; s < generations[g].n_steps; s++) {
+               statsPrintf("gen[%d].steps[%d].sync_todo: %"FMT_Word64"\n", g, s, generations[g].steps[s].sync_todo.spin);
+               statsPrintf("gen[%d].steps[%d].sync_large_objects: %"FMT_Word64"\n", g, s, generations[g].steps[s].sync_large_objects.spin);
+           }
+       }
+    }
+#endif
 }
 
 /* -----------------------------------------------------------------------------
index a5919ef..0a47c3b 100644 (file)
 #include "Prelude.h"
 #include "LdvProfile.h"
 
+#if defined(PROF_SPIN) && defined(THREADED_RTS)
+StgWord64 whitehole_spin = 0;
+#endif
+
 /* Used to avoid long recursion due to selector thunks
  */
 #define MAX_THUNK_SELECTOR_DEPTH 16
@@ -93,8 +97,11 @@ STATIC_INLINE void
 evacuate_large(StgPtr p)
 {
   bdescr *bd = Bdescr(p);
-  step *stp;
+  step *stp, *new_stp;
   step_workspace *ws;
+    
+  stp = bd->step;
+  ACQUIRE_SPIN_LOCK(&stp->sync_large_objects);
 
   // object must be at the beginning of the block (or be a ByteArray)
   ASSERT(get_itbl((StgClosure *)p)->type == ARR_WORDS ||
@@ -105,16 +112,14 @@ evacuate_large(StgPtr p)
     /* Don't forget to set the gct->failed_to_evac flag if we didn't get
      * the desired destination (see comments in evacuate()).
      */
-    if (bd->step < gct->evac_step) {
-      gct->failed_to_evac = rtsTrue;
-      TICK_GC_FAILED_PROMOTION();
+    if (stp < gct->evac_step) {
+       gct->failed_to_evac = rtsTrue;
+       TICK_GC_FAILED_PROMOTION();
     }
+    RELEASE_SPIN_LOCK(&stp->sync_large_objects);
     return;
   }
 
-  stp = bd->step;
-
-  ACQUIRE_SPIN_LOCK(&stp->sync_large_objects);
   // remove from large_object list 
   if (bd->u.back) {
     bd->u.back->link = bd->link;
@@ -124,25 +129,26 @@ evacuate_large(StgPtr p)
   if (bd->link) {
     bd->link->u.back = bd->u.back;
   }
-  RELEASE_SPIN_LOCK(&stp->sync_large_objects);
   
   /* link it on to the evacuated large object list of the destination step
    */
-  stp = bd->step->to;
-  if (stp < gct->evac_step) {
+  new_stp = stp->to;
+  if (new_stp < gct->evac_step) {
       if (gct->eager_promotion) {
-         stp = gct->evac_step;
+         new_stp = gct->evac_step;
       } else {
          gct->failed_to_evac = rtsTrue;
       }
   }
 
-  ws = &gct->steps[stp->gen_no][stp->no];
-  bd->step = stp;
-  bd->gen_no = stp->gen_no;
+  ws = &gct->steps[new_stp->gen_no][new_stp->no];
+  bd->flags |= BF_EVACUATED;
+  bd->step = new_stp;
+  bd->gen_no = new_stp->gen_no;
   bd->link = ws->todo_large_objects;
   ws->todo_large_objects = bd;
-  bd->flags |= BF_EVACUATED;
+
+  RELEASE_SPIN_LOCK(&stp->sync_large_objects);
 }
 
 /* -----------------------------------------------------------------------------
index 0f2cc6d..752fe92 100644 (file)
@@ -30,10 +30,15 @@ copy_tag(StgClosure **p, StgClosure *src, nat size, step *stp, StgWord tag)
     StgWord info;
 
 #if !defined(MINOR_GC) && defined(THREADED_RTS)
-    do {
+spin:
        info = xchg((StgPtr)&src->header.info, (W_)&stg_WHITEHOLE_info);
        // so..  what is it?
-    } while (info == (W_)&stg_WHITEHOLE_info);
+    if (info == (W_)&stg_WHITEHOLE_info) {
+#ifdef PROF_SPIN
+           whitehole_spin++;
+#endif
+           goto spin;
+    }
     if (info == (W_)&stg_EVACUATED_info || info == (W_)&stg_IND_info) {
         // NB. a closure might be updated with an IND by
         // unchain_selector_thunks(), hence the test above.
@@ -88,12 +93,18 @@ copyPart(StgClosure **p, StgClosure *src, nat size_to_reserve, nat size_to_copy,
     StgWord info;
     
 #if !defined(MINOR_GC) && defined(THREADED_RTS)
-    do {
+spin:
        info = xchg((StgPtr)&src->header.info, (W_)&stg_WHITEHOLE_info);
-    } while (info == (W_)&stg_WHITEHOLE_info);
+       if (info == (W_)&stg_WHITEHOLE_info) {
+#ifdef PROF_SPIN
+           whitehole_spin++;
+#endif
+           goto spin;
+       }
     if (info == (W_)&stg_EVACUATED_info) {
        src->header.info = (const StgInfoTable *)info;
-       return evacuate(p); // does the failed_to_evac stuff
+       evacuate(p); // does the failed_to_evac stuff
+       return ;
     }
 #else
     info = (W_)src->header.info;
index 33ee4c5..1bce74a 100644 (file)
@@ -31,3 +31,7 @@ REGPARM1 void evacuate  (StgClosure **p);
 REGPARM1 void evacuate0 (StgClosure **p);
 
 extern lnat thunk_selector_depth;
+
+#if defined(PROF_SPIN) && defined(THREADED_RTS)
+StgWord64 whitehole_spin;
+#endif
index 0d2ba85..09e2b2c 100644 (file)
@@ -274,12 +274,6 @@ GarbageCollect ( rtsBool force_major_gc )
   static_objects = END_OF_STATIC_LIST;
   scavenged_static_objects = END_OF_STATIC_LIST;
 
-#ifdef THREADED_RTS
-  initSpinLock(&static_objects_sync);
-  initSpinLock(&recordMutableGen_sync);
-  initSpinLock(&gc_alloc_block_sync);
-#endif
-
   // Initialise all the generations/steps that we're collecting.
   for (g = 0; g <= N; g++) {
       init_collected_gen(g,n_gc_threads);
index 58cd766..fef882a 100644 (file)
@@ -31,6 +31,7 @@
 #include "Trace.h"
 #include "GC.h"
 #include "GCUtils.h"
+#include "Evac.h"
 
 #include <stdlib.h>
 #include <string.h>
@@ -256,6 +257,9 @@ initStorage( void )
 
 #ifdef THREADED_RTS
   initSpinLock(&gc_alloc_block_sync);
+  initSpinLock(&static_objects_sync);
+  initSpinLock(&recordMutableGen_sync);
+  whitehole_spin = 0;
 #endif
 
   IF_DEBUG(gc, statDescribeGens());