Recursively call cmmMachOpFold on divides that we turned into shifts

[ghc-hetmet.git] / rts / sm / GC.c
diff --git a/rts/sm/GC.c b/rts/sm/GC.c

index 38ae1a7..4ba05bf 100644 (file)
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -137,8 +137,8 @@ DECLARE_GCT
  static void mark_root               (void *user, StgClosure **root);
  static void zero_static_object_list (StgClosure* first_static);
  static nat  initialise_N            (rtsBool force_major_gc);
-static void init_collected_gen      (nat g, nat threads);
-static void init_uncollected_gen    (nat g, nat threads);
+static void prepare_collected_gen   (generation *gen);
+static void prepare_uncollected_gen (generation *gen);
  static void init_gc_thread          (gc_thread *t);
  static void resize_generations      (void);
  static void resize_nursery          (void);
@@ -148,6 +148,7 @@ static StgWord inc_running          (void);
  static StgWord dec_running          (void);
  static void wakeup_gc_threads       (nat n_threads, nat me);
  static void shutdown_gc_threads     (nat n_threads, nat me);
+static void collect_gct_blocks      (void);
  
  #if 0 && defined(DEBUG)
  static void gcCAFs                  (void);
@@ -174,7 +175,7 @@ GarbageCollect (rtsBool force_major_gc,
  {
    bdescr *bd;
    generation *gen;
-  lnat live, allocated, max_copied, avg_copied, slop;
+  lnat live_blocks, live_words, allocated, max_copied, avg_copied;
    gc_thread *saved_gct;
    nat g, t, n;
  
@@ -274,7 +275,7 @@ GarbageCollect (rtsBool force_major_gc,
  #endif
  
    // check sanity *before* GC
-  IF_DEBUG(sanity, checkSanity(rtsTrue));
+  IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
  
    // Initialise all our gc_thread structures
    for (t = 0; t < n_gc_threads; t++) {
@@ -283,12 +284,11 @@ GarbageCollect (rtsBool force_major_gc,
  
    // Initialise all the generations/steps that we're collecting.
    for (g = 0; g <= N; g++) {
-      init_collected_gen(g,n_gc_threads);
+      prepare_collected_gen(&generations[g]);
    }
-  
    // Initialise all the generations/steps that we're *not* collecting.
    for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
-      init_uncollected_gen(g,n_gc_threads);
+      prepare_uncollected_gen(&generations[g]);
    }
  
    /* Allocate a mark stack if we're doing a major collection.
@@ -344,11 +344,11 @@ SET_GCT(gc_threads[0]);
    }
  
    // follow roots from the CAF list (used by GHCi)
-  gct->evac_gen = 0;
+  gct->evac_gen_no = 0;
    markCAFs(mark_root, gct);
  
    // follow all the roots that the application knows about.
-  gct->evac_gen = 0;
+  gct->evac_gen_no = 0;
    markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
                         rtsTrue/*prune sparks*/);
  
@@ -420,76 +420,6 @@ SET_GCT(gc_threads[0]);
        }
    }
  
-  // For each workspace, in each thread, move the copied blocks to the step
-  {
-      gc_thread *thr;
-      gen_workspace *ws;
-      bdescr *prev, *next;
-
-      for (t = 0; t < n_gc_threads; t++) {
-         thr = gc_threads[t];
-
-          for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-              ws = &thr->gens[g];
-
-              // Push the final block
-              if (ws->todo_bd) { 
-                  push_scanned_block(ws->todo_bd, ws);
-              }
-
-              ASSERT(gct->scan_bd == NULL);
-              ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
-              
-              prev = NULL;
-              for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
-                  ws->gen->n_words += bd->free - bd->start;
-                  prev = bd;
-              }
-              if (prev != NULL) {
-                  prev->link = ws->gen->blocks;
-                  ws->gen->blocks = ws->scavd_list;
-              } 
-              ws->gen->n_blocks += ws->n_scavd_blocks;
-          }
-      }
-
-      // Add all the partial blocks *after* we've added all the full
-      // blocks.  This is so that we can grab the partial blocks back
-      // again and try to fill them up in the next GC.
-      for (t = 0; t < n_gc_threads; t++) {
-         thr = gc_threads[t];
-
-          for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-              ws = &thr->gens[g];
-
-              prev = NULL;
-              for (bd = ws->part_list; bd != NULL; bd = next) {
-                  next = bd->link;
-                  if (bd->free == bd->start) {
-                      if (prev == NULL) {
-                          ws->part_list = next;
-                      } else {
-                          prev->link = next;
-                      }
-                      freeGroup(bd);
-                      ws->n_part_blocks--;
-                  } else {
-                      ws->gen->n_words += bd->free - bd->start;
-                      prev = bd;
-                  }
-              }
-              if (prev != NULL) {
-                  prev->link = ws->gen->blocks;
-                  ws->gen->blocks = ws->part_list;
-              }
-              ws->gen->n_blocks += ws->n_part_blocks;
-
-              ASSERT(countBlocks(ws->gen->blocks) == ws->gen->n_blocks);
-              ASSERT(countOccupied(ws->gen->blocks) == ws->gen->n_words);
-         }
-      }
-  }
-
    // Finally: compact or sweep the oldest generation.
    if (major_gc && oldest_gen->mark) {
        if (oldest_gen->compact) 
@@ -498,8 +428,6 @@ SET_GCT(gc_threads[0]);
            sweep(oldest_gen);
    }
  
-  /* run through all the generations/steps and tidy up 
-   */
    copied = 0;
    max_copied = 0;
    avg_copied = 0;
@@ -525,6 +453,16 @@ SET_GCT(gc_threads[0]);
        }
    }
  
+  // Run through all the generations/steps and tidy up.
+  // We're going to:
+  //   - count the amount of "live" data (live_words, live_blocks)
+  //   - count the amount of "copied" data in this GC (copied)
+  //   - free from-space
+  //   - make to-space the new from-space (set BF_EVACUATED on all blocks)
+  //
+  live_words = 0;
+  live_blocks = 0;
+
    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
  
      if (g == N) {
@@ -622,7 +560,6 @@ SET_GCT(gc_threads[0]);
          gen->large_objects  = gen->scavenged_large_objects;
          gen->n_large_blocks = gen->n_scavenged_large_blocks;
          gen->n_new_large_words = 0;
-        ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
      }
      else // for generations > N
      {
@@ -637,16 +574,31 @@ SET_GCT(gc_threads[0]);
          
         // add the new blocks we promoted during this GC 
         gen->n_large_blocks += gen->n_scavenged_large_blocks;
-        ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
+    }
+
+    ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
+
+    gen->scavenged_large_objects = NULL;
+    gen->n_scavenged_large_blocks = 0;
+
+    // Count "live" data
+    live_words  += genLiveWords(gen);
+    live_blocks += genLiveBlocks(gen);
+
+    // add in the partial blocks in the gen_workspaces, but ignore gen 0
+    // if this is a local GC (we can't count another capability's part_list)
+    {
+        nat i;
+        for (i = 0; i < n_capabilities; i++) {
+            live_words  += gcThreadLiveWords(i, gen->no);
+            live_blocks += gcThreadLiveBlocks(i, gen->no);
+        }
      }
    } // for all generations
  
    // update the max size of older generations after a major GC
    resize_generations();
    
-  // Calculate the amount of live data for stats.
-  live = calcLiveWords();
-
    // Start a new pinned_object_block
    for (n = 0; n < n_capabilities; n++) {
        capabilities[n].pinned_object_block = NULL;
@@ -698,11 +650,6 @@ SET_GCT(gc_threads[0]);
        }
    }
  
-  // send exceptions to any threads which were about to die
-  RELEASE_SM_LOCK;
-  resurrectThreads(resurrected_threads);
-  ACQUIRE_SM_LOCK;
-
    // Update the stable pointer hash table.
    updateStablePtrTable(major_gc);
  
@@ -717,6 +664,17 @@ SET_GCT(gc_threads[0]);
    scheduleFinalizers(cap, old_weak_ptr_list);
    ACQUIRE_SM_LOCK;
  
+  // check sanity after GC
+  // before resurrectThreads(), because that might overwrite some
+  // closures, which will cause problems with THREADED where we don't
+  // fill slop.
+  IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc));
+
+  // send exceptions to any threads which were about to die
+  RELEASE_SM_LOCK;
+  resurrectThreads(resurrected_threads);
+  ACQUIRE_SM_LOCK;
+
    if (major_gc) {
        nat need, got;
        need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
@@ -730,10 +688,7 @@ SET_GCT(gc_threads[0]);
        }
    }
  
-  // check sanity after GC
-  IF_DEBUG(sanity, checkSanity(rtsTrue));
-
-  // extra GC trace info 
+  // extra GC trace info
    IF_DEBUG(gc, statDescribeGens());
  
  #ifdef DEBUG
@@ -758,8 +713,8 @@ SET_GCT(gc_threads[0]);
  #endif
  
    // ok, GC over: tell the stats department what happened. 
-  slop = calcLiveBlocks() * BLOCK_SIZE_W - live;
-  stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop);
+  stat_endGC(allocated, live_words, copied, N, max_copied, avg_copied,
+             live_blocks * BLOCK_SIZE_W - live_words /* slop */);
  
    // Guess which generation we'll collect *next* time
    initialise_N(force_major_gc);
@@ -858,11 +813,26 @@ new_gc_thread (nat n, gc_thread *t)
          ASSERT(g == ws->gen->no);
          ws->my_gct = t;
          
-        ws->todo_bd = NULL;
+        // We want to call
+        //   alloc_todo_block(ws,0);
+        // but can't, because it uses gct which isn't set up at this point.
+        // Hence, allocate a block for todo_bd manually:
+        {
+            bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet
+            initBdescr(bd, ws->gen, ws->gen->to);
+            bd->flags = BF_EVACUATED;
+            bd->u.scan = bd->free = bd->start;
+
+            ws->todo_bd = bd;
+            ws->todo_free = bd->free;
+            ws->todo_lim = bd->start + BLOCK_SIZE_W;
+        }
+
          ws->todo_q = newWSDeque(128);
          ws->todo_overflow = NULL;
          ws->n_todo_overflow = 0;
-        
+        ws->todo_large_objects = NULL;
+
          ws->part_list = NULL;
          ws->n_part_blocks = 0;
  
@@ -1012,6 +982,8 @@ loop:
      scavenge_loop();
  #endif
  
+    collect_gct_blocks();
+
      // scavenge_loop() only exits when there's no work to do
      r = dec_running();
      
@@ -1062,7 +1034,7 @@ gcWorkerThread (Capability *cap)
  #endif
      
      // Every thread evacuates some roots.
-    gct->evac_gen = 0;
+    gct->evac_gen_no = 0;
      markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
                           rtsTrue/*prune sparks*/);
      scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
@@ -1197,16 +1169,16 @@ releaseGCThreads (Capability *cap USED_IF_THREADS)
     ------------------------------------------------------------------------- */
  
  static void
-init_collected_gen (nat g, nat n_threads)
+prepare_collected_gen (generation *gen)
  {
-    nat t, i;
+    nat i, g, n;
      gen_workspace *ws;
-    generation *gen;
-    bdescr *bd;
+    bdescr *bd, *next;
  
      // Throw away the current mutable list.  Invariant: the mutable
      // list always has at least one block; this means we can avoid a
      // check for NULL in recordMutable().
+    g = gen->no;
      if (g != 0) {
          for (i = 0; i < n_capabilities; i++) {
             freeChain(capabilities[i].mut_lists[g]);
@@ -1231,9 +1203,35 @@ init_collected_gen (nat g, nat n_threads)
      gen->live_estimate = 0;
  
      // initialise the large object queues.
-    gen->scavenged_large_objects = NULL;
-    gen->n_scavenged_large_blocks = 0;
-    
+    ASSERT(gen->scavenged_large_objects == NULL);
+    ASSERT(gen->n_scavenged_large_blocks == 0);
+
+    // grab all the partial blocks stashed in the gc_thread workspaces and
+    // move them to the old_blocks list of this gen.
+    for (n = 0; n < n_capabilities; n++) {
+        ws = &gc_threads[n]->gens[gen->no];
+
+        for (bd = ws->part_list; bd != NULL; bd = next) {
+            next = bd->link;
+            bd->link = gen->old_blocks;
+            gen->old_blocks = bd;
+            gen->n_old_blocks += bd->blocks;
+        }
+        ws->part_list = NULL;
+        ws->n_part_blocks = 0;
+
+        ASSERT(ws->scavd_list == NULL);
+        ASSERT(ws->n_scavd_blocks == 0);
+
+        if (ws->todo_free != ws->todo_bd->start) {
+            ws->todo_bd->free = ws->todo_free;
+            ws->todo_bd->link = gen->old_blocks;
+            gen->old_blocks = ws->todo_bd;
+            gen->n_old_blocks += ws->todo_bd->blocks;
+            alloc_todo_block(ws,0); // always has one block.
+        }
+    }
+
      // mark the small objects as from-space
      for (bd = gen->old_blocks; bd; bd = bd->link) {
          bd->flags &= ~BF_EVACUATED;
@@ -1285,106 +1283,83 @@ init_collected_gen (nat g, nat n_threads)
              }
          }
      }
-
-    // For each GC thread, for each step, allocate a "todo" block to
-    // store evacuated objects to be scavenged, and a block to store
-    // evacuated objects that do not need to be scavenged.
-    for (t = 0; t < n_threads; t++) {
-        ws = &gc_threads[t]->gens[g];
-        
-        ws->todo_large_objects = NULL;
-        
-        ws->part_list = NULL;
-        ws->n_part_blocks = 0;
-        
-        // allocate the first to-space block; extra blocks will be
-        // chained on as necessary.
-        ws->todo_bd = NULL;
-        ASSERT(looksEmptyWSDeque(ws->todo_q));
-        alloc_todo_block(ws,0);
-        
-        ws->todo_overflow = NULL;
-        ws->n_todo_overflow = 0;
-        
-        ws->scavd_list = NULL;
-        ws->n_scavd_blocks = 0;
-    }
  }
  
  
  /* ----------------------------------------------------------------------------
+   Save the mutable lists in saved_mut_lists
+   ------------------------------------------------------------------------- */
+
+static void
+stash_mut_list (Capability *cap, nat gen_no)
+{
+    cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
+    cap->mut_lists[gen_no] = allocBlock_sync();
+}
+
+/* ----------------------------------------------------------------------------
     Initialise a generation that is *not* to be collected 
     ------------------------------------------------------------------------- */
  
  static void
-init_uncollected_gen (nat g, nat threads)
+prepare_uncollected_gen (generation *gen)
  {
-    nat t, n;
-    gen_workspace *ws;
-    generation *gen;
-    bdescr *bd;
+    nat i;
+
+
+    ASSERT(gen->no > 0);
  
      // save the current mutable lists for this generation, and
      // allocate a fresh block for each one.  We'll traverse these
      // mutable lists as roots early on in the GC.
-    for (n = 0; n < n_capabilities; n++) {
-        capabilities[n].saved_mut_lists[g] = capabilities[n].mut_lists[g];
-        capabilities[n].mut_lists[g] = allocBlock();
+    for (i = 0; i < n_capabilities; i++) {
+        stash_mut_list(&capabilities[i], gen->no);
      }
  
-    gen = &generations[g];
+    ASSERT(gen->scavenged_large_objects == NULL);
+    ASSERT(gen->n_scavenged_large_blocks == 0);
+}
  
-    gen->scavenged_large_objects = NULL;
-    gen->n_scavenged_large_blocks = 0;
+/* -----------------------------------------------------------------------------
+   Collect the completed blocks from a GC thread and attach them to
+   the generation.
+   -------------------------------------------------------------------------- */
  
-    for (t = 0; t < threads; t++) {
-        ws = &gc_threads[t]->gens[g];
-           
-        ASSERT(looksEmptyWSDeque(ws->todo_q));
-        ws->todo_large_objects = NULL;
-        
-        ws->part_list = NULL;
-        ws->n_part_blocks = 0;
+static void
+collect_gct_blocks (void)
+{
+    nat g;
+    gen_workspace *ws;
+    bdescr *bd, *prev;
+    
+    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+        ws = &gct->gens[g];
          
-        ws->scavd_list = NULL;
-        ws->n_scavd_blocks = 0;
+        // there may still be a block attached to ws->todo_bd;
+        // leave it there to use next time.
+
+        if (ws->scavd_list != NULL) {
+            ACQUIRE_SPIN_LOCK(&ws->gen->sync);
+
+            ASSERT(gct->scan_bd == NULL);
+            ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
          
-        // If the block at the head of the list in this generation
-        // is less than 3/4 full, then use it as a todo block.
-        if (gen->blocks && isPartiallyFull(gen->blocks))
-        {
-            ws->todo_bd = gen->blocks;
-            ws->todo_free = ws->todo_bd->free;
-            ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
-            gen->blocks = gen->blocks->link;
-            gen->n_blocks -= 1;
-            gen->n_words -= ws->todo_bd->free - ws->todo_bd->start;
-            ws->todo_bd->link = NULL;
-            // we must scan from the current end point.
-            ws->todo_bd->u.scan = ws->todo_bd->free;
-        } 
-        else
-        {
-            ws->todo_bd = NULL;
-            alloc_todo_block(ws,0);
-        }
-    }
+            prev = NULL;
+            for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
+                ws->gen->n_words += bd->free - bd->start;
+                prev = bd;
+            }
+            if (prev != NULL) {
+                prev->link = ws->gen->blocks;
+                ws->gen->blocks = ws->scavd_list;
+            } 
+            ws->gen->n_blocks += ws->n_scavd_blocks;
  
-    // deal out any more partial blocks to the threads' part_lists
-    t = 0;
-    while (gen->blocks && isPartiallyFull(gen->blocks))
-    {
-        bd = gen->blocks;
-        gen->blocks = bd->link;
-        ws = &gc_threads[t]->gens[g];
-        bd->link = ws->part_list;
-        ws->part_list = bd;
-        ws->n_part_blocks += 1;
-        bd->u.scan = bd->free;
-        gen->n_blocks -= 1;
-        gen->n_words -= bd->free - bd->start;
-        t++;
-        if (t == n_gc_threads) t = 0;
+            ws->scavd_list = NULL;
+            ws->n_scavd_blocks = 0;
+
+            RELEASE_SPIN_LOCK(&ws->gen->sync);
+        }
      }
  }
  
@@ -1399,7 +1374,7 @@ init_gc_thread (gc_thread *t)
      t->scavenged_static_objects = END_OF_STATIC_LIST;
      t->scan_bd = NULL;
      t->mut_lists = capabilities[t->thread_index].mut_lists;
-    t->evac_gen = 0;
+    t->evac_gen_no = 0;
      t->failed_to_evac = rtsFalse;
      t->eager_promotion = rtsTrue;
      t->thunk_selector_depth = 0;