free empty blocks at the end of GC
[ghc-hetmet.git] / rts / sm / GC.c
index 4d0c4ef..97b32b6 100644 (file)
@@ -181,7 +181,7 @@ GarbageCollect ( rtsBool force_major_gc )
 {
   bdescr *bd;
   step *stp;
-  lnat live, allocated;
+  lnat live, allocated, max_copied, avg_copied;
   lnat oldgen_saved_blocks = 0;
   gc_thread *saved_gct;
   nat g, s, t, n;
@@ -195,8 +195,6 @@ GarbageCollect ( rtsBool force_major_gc )
 
   ACQUIRE_SM_LOCK;
 
-  debugTrace(DEBUG_gc, "starting GC");
-
 #if defined(RTS_USER_SIGNALS)
   if (RtsFlags.MiscFlags.install_signal_handlers) {
     // block signals
@@ -248,11 +246,11 @@ GarbageCollect ( rtsBool force_major_gc )
   } else {
       n_gc_threads = RtsFlags.ParFlags.gcThreads;
   }
-  trace(TRACE_gc|DEBUG_gc, "GC (gen %d): %dKB to collect, using %d thread(s)",
-        N, n * (BLOCK_SIZE / 1024), n_gc_threads);
 #else
   n_gc_threads = 1;
 #endif
+  trace(TRACE_gc|DEBUG_gc, "GC (gen %d): %dKB to collect, using %d thread(s)",
+        N, n * (BLOCK_SIZE / 1024), n_gc_threads);
 
 #ifdef RTS_GTK_FRONTPANEL
   if (RtsFlags.GcFlags.frontpanel) {
@@ -412,7 +410,7 @@ GarbageCollect ( rtsBool force_major_gc )
   {
       gc_thread *thr;
       step_workspace *ws;
-      bdescr *prev;
+      bdescr *prev, *next;
 
       for (t = 0; t < n_gc_threads; t++) {
          thr = gc_threads[t];
@@ -420,24 +418,52 @@ GarbageCollect ( rtsBool force_major_gc )
           // not step 0
           for (s = 1; s < total_steps; s++) {
               ws = &thr->steps[s];
-              // Not true?
-              // ASSERT( ws->scan_bd == ws->todo_bd );
-              ASSERT( ws->scan_bd ? ws->scan == ws->scan_bd->free : 1 );
 
               // Push the final block
-              if (ws->scan_bd) { push_scan_block(ws->scan_bd, ws); }
-              
+              if (ws->todo_bd) { 
+                  push_scanned_block(ws->todo_bd, ws);
+              }
+
+              ASSERT(gct->scan_bd == NULL);
               ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
               
-              prev = ws->scavd_list;
+              prev = NULL;
               for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
                   bd->flags &= ~BF_EVACUATED;   // now from-space 
+                  ws->step->n_words += bd->free - bd->start;
                   prev = bd;
               }
-              prev->link = ws->stp->blocks;
-              ws->stp->blocks = ws->scavd_list;
-              ws->stp->n_blocks += ws->n_scavd_blocks;
-              ASSERT(countBlocks(ws->stp->blocks) == ws->stp->n_blocks);
+              if (prev != NULL) {
+                  prev->link = ws->step->blocks;
+                  ws->step->blocks = ws->scavd_list;
+              } 
+              ws->step->n_blocks += ws->n_scavd_blocks;
+
+              prev = NULL;
+              for (bd = ws->part_list; bd != NULL; bd = next) {
+                  next = bd->link;
+                  if (bd->free == bd->start) {
+                      if (prev == NULL) {
+                          ws->part_list = next;
+                      } else {
+                          prev->link = next;
+                      }
+                      freeGroup(bd);
+                      ws->n_part_blocks--;
+                  } else {
+                      bd->flags &= ~BF_EVACUATED;       // now from-space 
+                      ws->step->n_words += bd->free - bd->start;
+                      prev = bd;
+                  }
+              }
+              if (prev != NULL) {
+                  prev->link = ws->step->blocks;
+                  ws->step->blocks = ws->part_list;
+              }
+              ws->step->n_blocks += ws->n_part_blocks;
+
+              ASSERT(countBlocks(ws->step->blocks) == ws->step->n_blocks);
+              ASSERT(countOccupied(ws->step->blocks) == ws->step->n_words);
          }
       }
   }
@@ -458,24 +484,35 @@ GarbageCollect ( rtsBool force_major_gc )
   /* run through all the generations/steps and tidy up 
    */
   copied = 0;
+  max_copied = 0;
+  avg_copied = 0;
   { 
       nat i;
       for (i=0; i < n_gc_threads; i++) {
           if (n_gc_threads > 1) {
               trace(TRACE_gc,"thread %d:", i);
               trace(TRACE_gc,"   copied           %ld", gc_threads[i]->copied * sizeof(W_));
+              trace(TRACE_gc,"   scanned          %ld", gc_threads[i]->scanned * sizeof(W_));
               trace(TRACE_gc,"   any_work         %ld", gc_threads[i]->any_work);
               trace(TRACE_gc,"   no_work          %ld", gc_threads[i]->no_work);
               trace(TRACE_gc,"   scav_find_work %ld",   gc_threads[i]->scav_find_work);
           }
           copied += gc_threads[i]->copied;
+          max_copied = stg_max(gc_threads[i]->copied, max_copied);
+      }
+      if (n_gc_threads == 1) {
+          max_copied = 0;
+          avg_copied = 0;
+      } else {
+          avg_copied = copied;
       }
   }
 
   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 
-    if (g <= N) {
+    if (g == N) {
       generations[g].collections++; // for stats 
+      if (n_gc_threads > 1) generations[g].par_collections++;
     }
 
     // Count the mutable list as bytes "copied" for the purposes of
@@ -511,6 +548,7 @@ GarbageCollect ( rtsBool force_major_gc )
                // onto the front of the now-compacted existing blocks.
                for (bd = stp->blocks; bd != NULL; bd = bd->link) {
                    bd->flags &= ~BF_EVACUATED;  // now from-space 
+                    stp->n_words += bd->free - bd->start;
                }
                // tack the new blocks on the end of the existing blocks
                if (stp->old_blocks != NULL) {
@@ -530,6 +568,7 @@ GarbageCollect ( rtsBool force_major_gc )
                // add the new blocks to the block tally
                stp->n_blocks += stp->n_old_blocks;
                ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
+                ASSERT(countOccupied(stp->blocks) == stp->n_words);
            }
            else // not copacted
            {
@@ -579,10 +618,8 @@ GarbageCollect ( rtsBool force_major_gc )
   // update the max size of older generations after a major GC
   resize_generations();
   
-  // Guess the amount of live data for stats.
-  live = calcLiveBlocks() * BLOCK_SIZE_W;
-  debugTrace(DEBUG_gc, "Slop: %ldKB", 
-             (live - calcLiveWords()) / (1024/sizeof(W_)));
+  // Calculate the amount of live data for stats.
+  live = calcLiveWords();
 
   // Free the small objects allocated via allocate(), since this will
   // all have been copied into G0S1 now.  
@@ -592,6 +629,7 @@ GarbageCollect ( rtsBool force_major_gc )
           g0s0->blocks = NULL;
       }
       g0s0->n_blocks = 0;
+      g0s0->n_words = 0;
   }
   alloc_blocks = 0;
   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
@@ -684,7 +722,7 @@ GarbageCollect ( rtsBool force_major_gc )
 #endif
 
   // ok, GC over: tell the stats department what happened. 
-  stat_endGC(allocated, live, copied, N);
+  stat_endGC(allocated, live, copied, N, max_copied, avg_copied);
 
 #if defined(RTS_USER_SIGNALS)
   if (RtsFlags.MiscFlags.install_signal_handlers) {
@@ -929,7 +967,7 @@ initialise_N (rtsBool force_major_gc)
     for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
         blocks = 0;
         for (s = 0; s < generations[g].n_steps; s++) {
-            blocks += generations[g].steps[s].n_blocks;
+            blocks += generations[g].steps[s].n_words / BLOCK_SIZE_W;
             blocks += generations[g].steps[s].n_large_blocks;
         }
         if (blocks >= generations[g].max_blocks) {
@@ -964,7 +1002,8 @@ alloc_gc_thread (int n)
     t->id = 0;
     initCondition(&t->wake_cond);
     initMutex(&t->wake_mutex);
-    t->wakeup = rtsFalse;
+    t->wakeup = rtsTrue;  // starts true, so we can wait for the
+                          // thread to start up, see wakeup_gc_threads
     t->exit   = rtsFalse;
 #endif
 
@@ -981,16 +1020,16 @@ alloc_gc_thread (int n)
     for (s = 0; s < total_steps; s++)
     {
         ws = &t->steps[s];
-        ws->stp = &all_steps[s];
-        ASSERT(s == ws->stp->abs_no);
+        ws->step = &all_steps[s];
+        ASSERT(s == ws->step->abs_no);
         ws->gct = t;
         
-        ws->scan_bd = NULL;
-        ws->scan = NULL;
-
         ws->todo_bd = NULL;
         ws->buffer_todo_bd = NULL;
         
+        ws->part_list = NULL;
+        ws->n_part_blocks = 0;
+
         ws->scavd_list = NULL;
         ws->n_scavd_blocks = 0;
     }
@@ -1173,7 +1212,16 @@ wakeup_gc_threads (nat n_threads USED_IF_THREADS)
     nat i;
     for (i=1; i < n_threads; i++) {
        inc_running();
-       ACQUIRE_LOCK(&gc_threads[i]->wake_mutex);
+        debugTrace(DEBUG_gc, "waking up gc thread %d", i);
+        do {
+            ACQUIRE_LOCK(&gc_threads[i]->wake_mutex);
+            if (gc_threads[i]->wakeup) {
+                RELEASE_LOCK(&gc_threads[i]->wake_mutex);
+                continue;
+            } else {
+                break;
+            }
+        } while (1);
        gc_threads[i]->wakeup = rtsTrue;
        signalCondition(&gc_threads[i]->wake_cond);
        RELEASE_LOCK(&gc_threads[i]->wake_mutex);
@@ -1240,6 +1288,7 @@ init_collected_gen (nat g, nat n_threads)
        stp->n_old_blocks = stp->n_blocks;
        stp->blocks       = NULL;
        stp->n_blocks     = 0;
+       stp->n_words      = 0;
 
        // we don't have any to-be-scavenged blocks yet
        stp->todos = NULL;
@@ -1303,16 +1352,16 @@ init_collected_gen (nat g, nat n_threads)
 
            ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
 
-           ws->scan_bd = NULL;
-           ws->scan = NULL;
-
            ws->todo_large_objects = NULL;
 
+            ws->part_list = NULL;
+            ws->n_part_blocks = 0;
+
            // allocate the first to-space block; extra blocks will be
            // chained on as necessary.
            ws->todo_bd = NULL;
            ws->buffer_todo_bd = NULL;
-           gc_alloc_todo_block(ws);
+           alloc_todo_block(ws,0);
 
            ws->scavd_list = NULL;
            ws->n_scavd_blocks = 0;
@@ -1343,11 +1392,14 @@ init_uncollected_gen (nat g, nat threads)
        for (s = 0; s < generations[g].n_steps; s++) {
            
            ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
-           stp = ws->stp;
+           stp = ws->step;
            
            ws->buffer_todo_bd = NULL;
            ws->todo_large_objects = NULL;
 
+            ws->part_list = NULL;
+            ws->n_part_blocks = 0;
+
            ws->scavd_list = NULL;
            ws->n_scavd_blocks = 0;
 
@@ -1360,23 +1412,15 @@ init_uncollected_gen (nat g, nat threads)
                 ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
                stp->blocks = stp->blocks->link;
                stp->n_blocks -= 1;
+                stp->n_words -= ws->todo_bd->free - ws->todo_bd->start;
                ws->todo_bd->link = NULL;
-
-               // this block is also the scan block; we must scan
-               // from the current end point.
-               ws->scan_bd = ws->todo_bd;
-               ws->scan = ws->scan_bd->free;
-
-               // subtract the contents of this block from the stats,
-               // because we'll count the whole block later.
-               copied -= ws->scan_bd->free - ws->scan_bd->start;
+               // we must scan from the current end point.
+               ws->todo_bd->u.scan = ws->todo_bd->free;
            } 
            else
            {
-               ws->scan_bd = NULL;
-               ws->scan = NULL;
                ws->todo_bd = NULL;
-               gc_alloc_todo_block(ws);
+               alloc_todo_block(ws,0);
            }
        }
     }
@@ -1403,15 +1447,16 @@ init_gc_thread (gc_thread *t)
 {
     t->static_objects = END_OF_STATIC_LIST;
     t->scavenged_static_objects = END_OF_STATIC_LIST;
+    t->scan_bd = NULL;
     t->evac_step = 0;
     t->failed_to_evac = rtsFalse;
     t->eager_promotion = rtsTrue;
     t->thunk_selector_depth = 0;
     t->copied = 0;
+    t->scanned = 0;
     t->any_work = 0;
     t->no_work = 0;
     t->scav_find_work = 0;
-
 }
 
 /* -----------------------------------------------------------------------------
@@ -1531,7 +1576,7 @@ resize_generations (void)
        nat gens = RtsFlags.GcFlags.generations;
        
        // live in the oldest generations
-       live = oldest_gen->steps[0].n_blocks +
+       live = (oldest_gen->steps[0].n_words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W+
            oldest_gen->steps[0].n_large_blocks;
        
        // default max size for all generations except zero