Allow work units smaller than a block to improve load balancing

[ghc-hetmet.git] / rts / sm / Scav.c
diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c

index 1640edd..b7bd7f3 100644 (file)
--- a/rts/sm/Scav.c
+++ b/rts/sm/Scav.c
@@ -134,8 +134,17 @@ scavenge_fun_srt(const StgInfoTable *info)
  static void
  scavengeTSO (StgTSO *tso)
  {
-    rtsBool saved_eager = gct->eager_promotion;
+    rtsBool saved_eager;
+
+    if (tso->what_next == ThreadRelocated) {
+        // the only way this can happen is if the old TSO was on the
+        // mutable list.  We might have other links to this defunct
+        // TSO, so we must update its link field.
+        evacuate((StgClosure**)&tso->_link);
+        return;
+    }
  
+    saved_eager = gct->eager_promotion;
      gct->eager_promotion = rtsFalse;
  
      if (   tso->why_blocked == BlockedOnMVar
@@ -1058,6 +1067,8 @@ scavenge_static(void)
    StgClosure* p;
    const StgInfoTable *info;
  
+  debugTrace(DEBUG_gc, "scavenging static objects");
+
    /* Always evacuate straight to the oldest generation for static
     * objects */
    gct->evac_step = &oldest_gen->steps[0];
@@ -1067,15 +1078,12 @@ scavenge_static(void)
  
    while (1) {
        
-    ACQUIRE_SPIN_LOCK(&static_objects_sync);
-    
      /* get the next static object from the list.  Remember, there might
       * be more stuff on this list after each evacuation...
       * (static_objects is a global)
       */
-    p = static_objects;
+    p = gct->static_objects;
      if (p == END_OF_STATIC_LIST) {
-         RELEASE_SPIN_LOCK(&static_objects_sync);
           break;
      }
      
@@ -1090,11 +1098,9 @@ scavenge_static(void)
      /* Take this object *off* the static_objects list,
       * and put it on the scavenged_static_objects list.
       */
-    static_objects = *STATIC_LINK(info,p);
-    *STATIC_LINK(info,p) = scavenged_static_objects;
-    scavenged_static_objects = p;
-    
-    RELEASE_SPIN_LOCK(&static_objects_sync);
+    gct->static_objects = *STATIC_LINK(info,p);
+    *STATIC_LINK(info,p) = gct->scavenged_static_objects;
+    gct->scavenged_static_objects = p;
      
      switch (info -> type) {
        
@@ -1349,7 +1355,7 @@ scavenge_large (step_workspace *ws)
      bdescr *bd;
      StgPtr p;
  
-    gct->evac_step = ws->stp;
+    gct->evac_step = ws->step;
  
      bd = ws->todo_large_objects;
      
@@ -1361,15 +1367,15 @@ scavenge_large (step_workspace *ws)
         // the front when evacuating.
         ws->todo_large_objects = bd->link;
         
-       ACQUIRE_SPIN_LOCK(&ws->stp->sync_large_objects);
-       dbl_link_onto(bd, &ws->stp->scavenged_large_objects);
-       ws->stp->n_scavenged_large_blocks += bd->blocks;
-       RELEASE_SPIN_LOCK(&ws->stp->sync_large_objects);
+       ACQUIRE_SPIN_LOCK(&ws->step->sync_large_objects);
+       dbl_link_onto(bd, &ws->step->scavenged_large_objects);
+       ws->step->n_scavenged_large_blocks += bd->blocks;
+       RELEASE_SPIN_LOCK(&ws->step->sync_large_objects);
         
         p = bd->start;
         if (scavenge_one(p)) {
-           if (ws->stp->gen_no > 0) {
-               recordMutableGen_GC((StgClosure *)p, ws->stp->gen);
+           if (ws->step->gen_no > 0) {
+               recordMutableGen_GC((StgClosure *)p, ws->step->gen);
             }
         }
      }
@@ -1379,128 +1385,117 @@ scavenge_large (step_workspace *ws)
     Scavenge a block
     ------------------------------------------------------------------------- */
  
-#define MINOR_GC
+#define PARALLEL_GC
  #include "Scav.c-inc"
-#undef MINOR_GC
+#undef PARALLEL_GC
  #include "Scav.c-inc"
  
  /* ----------------------------------------------------------------------------
-   Find the oldest full block to scavenge, and scavenge it.
+   Look for work to do.
+
+   We look for the oldest step that has either a todo block that can
+   be scanned, or a block of work on the global queue that we can
+   scan.
+
+   It is important to take work from the *oldest* generation that we
+   has work available, because that minimizes the likelihood of
+   evacuating objects into a young generation when they should have
+   been eagerly promoted.  This really does make a difference (the
+   cacheprof benchmark is one that is affected).
+
+   We also want to scan the todo block if possible before grabbing
+   work from the global queue, the reason being that we don't want to
+   steal work from the global queue and starve other threads if there
+   is other work we can usefully be doing.
     ------------------------------------------------------------------------- */
  
  static rtsBool
-scavenge_find_global_work (void)
+scavenge_find_work (void)
  {
-    bdescr *bd;
-    int g, s;
-    rtsBool flag;
+    int s;
      step_workspace *ws;
+    rtsBool did_something, did_anything;
+    bdescr *bd;
  
-    flag = rtsFalse;
-    for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
-       for (s = generations[g].n_steps-1; s >= 0; s--) {
-           if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) { 
-               continue; 
-           }
-           ws = &gct->steps[g][s];
-
-           // If we have any large objects to scavenge, do them now.
-           if (ws->todo_large_objects) {
-               scavenge_large(ws);
-               flag = rtsTrue;
-           }
-
-           if ((bd = grab_todo_block(ws)) != NULL) {
-               // no need to assign this to ws->scan_bd, we're going
-               // to scavenge the whole thing and then push it on
-               // our scavd list.  This saves pushing out the
-               // scan_bd block, which might be partial.
-               if (N == 0) {
-                   scavenge_block0(bd, bd->start);
-               } else {
-                   scavenge_block(bd, bd->start);
-               }
-               push_scan_block(bd, ws);
-               return rtsTrue;
-           }
-
-           if (flag) return rtsTrue;
-       }
-    }
-    return rtsFalse;
-}
-
-/* ----------------------------------------------------------------------------
-   Look for local work to do.
-
-   We can have outstanding scavenging to do if, for any of the workspaces,
-
-     - the scan block is the same as the todo block, and new objects
-       have been evacuated to the todo block.
-
-     - the scan block *was* the same as the todo block, but the todo
-       block filled up and a new one has been allocated.
-   ------------------------------------------------------------------------- */
+    gct->scav_find_work++;
  
-static rtsBool
-scavenge_find_local_work (void)
-{
-    int g, s;
-    step_workspace *ws;
-    rtsBool flag;
+    did_anything = rtsFalse;
  
-    flag = rtsFalse;
-    for (g = RtsFlags.GcFlags.generations; --g >= 0; ) {
-       for (s = generations[g].n_steps; --s >= 0; ) {
-           if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) { 
-               continue; 
-           }
-           ws = &gct->steps[g][s];
+loop:
+    did_something = rtsFalse;
+    for (s = total_steps-1; s >= 0; s--) {
+        if (s == 0 && RtsFlags.GcFlags.generations > 1) { 
+            continue; 
+        }
+        ws = &gct->steps[s];
  
-            if (ws->todo_bd != NULL)
-            {
-                ws->todo_bd->free = ws->todo_free;
+        if (ws->todo_bd != NULL)
+        {
+            ws->todo_bd->free = ws->todo_free;
+        }
+        
+        // If we have a todo block and no scan block, start
+        // scanning the todo block.
+        if (ws->scan_bd == NULL && ws->todo_bd != NULL)
+        {
+            ws->scan_bd = ws->todo_bd;
+        }
+        
+        // If we have a scan block with some work to do,
+        // scavenge everything up to the free pointer.
+        if (ws->scan_bd != NULL && ws->scan_bd->u.scan < ws->scan_bd->free)
+        {
+            if (n_gc_threads == 1) {
+                scavenge_block1(ws->scan_bd);
+            } else {
+                scavenge_block(ws->scan_bd);
              }
+            did_something = rtsTrue;
+        }
+        
+        if (ws->scan_bd != NULL && ws->scan_bd != ws->todo_bd)
+        {
+            ASSERT(ws->scan_bd->u.scan == ws->scan_bd->free);
+            // we're not going to evac any more objects into
+            // this block, so push it now.
+            push_scanned_block(ws->scan_bd, ws);
+            ws->scan_bd = NULL;
+            // we might be able to scan the todo block now.
+            did_something = rtsTrue; 
+        }
  
-           // If we have a todo block and no scan block, start
-           // scanning the todo block.
-           if (ws->scan_bd == NULL && ws->todo_bd != NULL)
-           {
-               ws->scan_bd = ws->todo_bd;
-               ws->scan = ws->scan_bd->start;
-           }
+        if (did_something) break;
  
-           // If we have a scan block with some work to do,
-           // scavenge everything up to the free pointer.
-           if (ws->scan != NULL && ws->scan < ws->scan_bd->free)
-           {
-               if (N == 0) {
-                   scavenge_block0(ws->scan_bd, ws->scan);
-               } else {
-                   scavenge_block(ws->scan_bd, ws->scan);
-               }
-               ws->scan = ws->scan_bd->free;
-               flag = rtsTrue;
-           }
+        // If we have any large objects to scavenge, do them now.
+        if (ws->todo_large_objects) {
+            scavenge_large(ws);
+            did_something = rtsTrue;
+            break;
+        }
  
-           if (ws->scan_bd != NULL && ws->scan == ws->scan_bd->free
-               && ws->scan_bd != ws->todo_bd)
-           {
-               // we're not going to evac any more objects into
-               // this block, so push it now.
-               push_scan_block(ws->scan_bd, ws);
-               ws->scan_bd = NULL;
-               ws->scan = NULL;
-                // we might be able to scan the todo block now.  But
-                // don't do it right away: there might be full blocks
-               // waiting to be scanned as a result of scavenge_block above.
-               flag = rtsTrue; 
-           }
+        if ((bd = grab_todo_block(ws)) != NULL) {
+            // no need to assign this to ws->scan_bd, we're going
+            // to scavenge the whole thing and then push it on
+            // our scavd list.  This saves pushing out the
+            // scan_bd block, which might be partial.
+            if (n_gc_threads == 1) {
+                scavenge_block1(bd);
+            } else {
+                scavenge_block(bd);
+            }
+            push_scanned_block(bd, ws);
+            did_something = rtsTrue;
+            break;
+        }
+    }
  
-           if (flag) return rtsTrue;
-       }
+    if (did_something) {
+        did_anything = rtsTrue;
+        goto loop;
      }
-    return rtsFalse;
+    // only return when there is no more work to do
+
+    return did_anything;
  }
  
  /* ----------------------------------------------------------------------------
@@ -1516,8 +1511,8 @@ loop:
      work_to_do = rtsFalse;
  
      // scavenge static objects 
-    if (major_gc && static_objects != END_OF_STATIC_LIST) {
-       IF_DEBUG(sanity, checkStaticObjects(static_objects));
+    if (major_gc && gct->static_objects != END_OF_STATIC_LIST) {
+       IF_DEBUG(sanity, checkStaticObjects(gct->static_objects));
         scavenge_static();
      }
      
@@ -1533,8 +1528,7 @@ loop:
      // local work.  Only if all the global work has been exhausted
      // do we start scavenging the fragments of blocks in the local
      // workspaces.
-    if (scavenge_find_global_work()) goto loop;
-    if (scavenge_find_local_work())  goto loop;
+    if (scavenge_find_work()) goto loop;
      
      if (work_to_do) goto loop;
  }
@@ -1542,16 +1536,13 @@ loop:
  rtsBool
  any_work (void)
  {
-    int g, s;
+    int s;
      step_workspace *ws;
  
+    gct->any_work++;
+
      write_barrier();
  
-    // scavenge static objects 
-    if (major_gc && static_objects != END_OF_STATIC_LIST) {
-       return rtsTrue;
-    }
-    
      // scavenge objects in compacted generation
      if (mark_stack_overflowed || oldgen_scan_bd != NULL ||
         (mark_stack_bdescr != NULL && !mark_stack_empty())) {
@@ -1561,16 +1552,16 @@ any_work (void)
      // Check for global work in any step.  We don't need to check for
      // local work, because we have already exited scavenge_loop(),
      // which means there is no local work for this thread.
-    for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
-       for (s = generations[g].n_steps-1; s >= 0; s--) {
-           if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) { 
-               continue; 
-           }
-           ws = &gct->steps[g][s];
-           if (ws->todo_large_objects) return rtsTrue;
-           if (ws->stp->todos) return rtsTrue;
-       }
+    for (s = total_steps-1; s >= 0; s--) {
+        if (s == 0 && RtsFlags.GcFlags.generations > 1) { 
+            continue; 
+        }
+        ws = &gct->steps[s];
+        if (ws->todo_large_objects) return rtsTrue;
+        if (ws->step->todos) return rtsTrue;
      }
  
+    gct->no_work++;
+
      return rtsFalse;
  }