do a bit of by-hand CSE

[ghc-hetmet.git] / rts / sm / GC.c
diff --git a/rts/sm/GC.c b/rts/sm/GC.c

index ae6fc99..38ae1a7 100644 (file)
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -221,7 +221,7 @@ GarbageCollect (rtsBool force_major_gc,
    /* Approximate how much we allocated.  
     * Todo: only when generating stats? 
     */
-  allocated = calcAllocated();
+  allocated = calcAllocated(rtsFalse/* don't count the nursery yet */);
  
    /* Figure out which generation to collect
     */
@@ -327,27 +327,6 @@ SET_GCT(gc_threads[0]);
    inc_running();
    wakeup_gc_threads(n_gc_threads, gct->thread_index);
  
-  // Mutable lists from each generation > N
-  // we want to *scavenge* these roots, not evacuate them: they're not
-  // going to move in this GC.
-  // Also do them in reverse generation order, for the usual reason:
-  // namely to reduce the likelihood of spurious old->new pointers.
-  //
-  for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
-#if defined(THREADED_RTS)
-      if (n_gc_threads > 1) {
-          scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]);
-      } else {
-          scavenge_mutable_list1(generations[g].saved_mut_list, &generations[g]);
-      }
-#else
-      scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]);
-#endif
-      freeChain_sync(generations[g].saved_mut_list);
-      generations[g].saved_mut_list = NULL;
-
-  }
-
    // scavenge the capability-private mutable lists.  This isn't part
    // of markSomeCapabilities() because markSomeCapabilities() can only
    // call back into the GC via mark_root() (due to the gct register
@@ -395,13 +374,6 @@ SET_GCT(gc_threads[0]);
        // The other threads are now stopped.  We might recurse back to
        // here, but from now on this is the only thread.
        
-      // if any blackholes are alive, make the threads that wait on
-      // them alive too.
-      if (traverseBlackholeQueue()) {
-         inc_running(); 
-         continue;
-      }
-  
        // must be last...  invariant is that everything is fully
        // scavenged at this point.
        if (traverseWeakPtrList()) { // returns rtsTrue if evaced something 
@@ -418,6 +390,16 @@ SET_GCT(gc_threads[0]);
    // Now see which stable names are still alive.
    gcStablePtrTable();
  
+#ifdef THREADED_RTS
+  if (n_gc_threads == 1) {
+      for (n = 0; n < n_capabilities; n++) {
+          pruneSparkQueue(&capabilities[n]);
+      }
+  } else {
+      pruneSparkQueue(&capabilities[gct->thread_index]);
+  }
+#endif
+
  #ifdef PROFILING
    // We call processHeapClosureForDead() on every closure destroyed during
    // the current garbage collection, so we invoke LdvCensusForDead().
@@ -554,14 +536,8 @@ SET_GCT(gc_threads[0]);
      // stats.  Every mutable list is copied during every GC.
      if (g > 0) {
         nat mut_list_size = 0;
-       for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
-           mut_list_size += bd->free - bd->start;
-       }
          for (n = 0; n < n_capabilities; n++) {
-            for (bd = capabilities[n].mut_lists[g]; 
-                 bd != NULL; bd = bd->link) {
-                mut_list_size += bd->free - bd->start;
-            }
+            mut_list_size += countOccupied(capabilities[n].mut_lists[g]);
          }
         copied +=  mut_list_size;
  
@@ -645,7 +621,7 @@ SET_GCT(gc_threads[0]);
          freeChain(gen->large_objects);
          gen->large_objects  = gen->scavenged_large_objects;
          gen->n_large_blocks = gen->n_scavenged_large_blocks;
-       gen->n_new_large_blocks = 0;
+        gen->n_new_large_words = 0;
          ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
      }
      else // for generations > N
@@ -671,10 +647,6 @@ SET_GCT(gc_threads[0]);
    // Calculate the amount of live data for stats.
    live = calcLiveWords();
  
-  // Free the small objects allocated via allocate(), since this will
-  // all have been copied into G0S1 now.  
-  alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
-
    // Start a new pinned_object_block
    for (n = 0; n < n_capabilities; n++) {
        capabilities[n].pinned_object_block = NULL;
@@ -696,9 +668,14 @@ SET_GCT(gc_threads[0]);
        }
    }
  
+  // Reset the nursery: make the blocks empty
+  allocated += clearNurseries();
+
    resize_nursery();
  
- // mark the garbage collected CAFs as dead 
+  resetNurseries();
+
+ // mark the garbage collected CAFs as dead
  #if 0 && defined(DEBUG) // doesn't work at the moment 
    if (major_gc) { gcCAFs(); }
  #endif
@@ -721,15 +698,7 @@ SET_GCT(gc_threads[0]);
        }
    }
  
-  // Reset the nursery
-  resetNurseries();
-
-  // start any pending finalizers 
-  RELEASE_SM_LOCK;
-  scheduleFinalizers(cap, old_weak_ptr_list);
-  ACQUIRE_SM_LOCK;
-  
-  // send exceptions to any threads which were about to die 
+  // send exceptions to any threads which were about to die
    RELEASE_SM_LOCK;
    resurrectThreads(resurrected_threads);
    ACQUIRE_SM_LOCK;
@@ -737,6 +706,30 @@ SET_GCT(gc_threads[0]);
    // Update the stable pointer hash table.
    updateStablePtrTable(major_gc);
  
+  // unlock the StablePtr table.  Must be before scheduleFinalizers(),
+  // because a finalizer may call hs_free_fun_ptr() or
+  // hs_free_stable_ptr(), both of which access the StablePtr table.
+  stablePtrPostGC();
+
+  // Start any pending finalizers.  Must be after
+  // updateStablePtrTable() and stablePtrPostGC() (see #4221).
+  RELEASE_SM_LOCK;
+  scheduleFinalizers(cap, old_weak_ptr_list);
+  ACQUIRE_SM_LOCK;
+
+  if (major_gc) {
+      nat need, got;
+      need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
+      got = mblocks_allocated;
+      /* If the amount of data remains constant, next major GC we'll
+         require (F+1)*need. We leave (F+2)*need in order to reduce
+         repeated deallocation and reallocation. */
+      need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
+      if (got > need) {
+          returnMemoryToOS(got - need);
+      }
+  }
+
    // check sanity after GC
    IF_DEBUG(sanity, checkSanity(rtsTrue));
  
@@ -768,9 +761,6 @@ SET_GCT(gc_threads[0]);
    slop = calcLiveBlocks() * BLOCK_SIZE_W - live;
    stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop);
  
-  // unlock the StablePtr table
-  stablePtrPostGC();
-
    // Guess which generation we'll collect *next* time
    initialise_N(force_major_gc);
  
@@ -1079,6 +1069,16 @@ gcWorkerThread (Capability *cap)
  
      scavenge_until_all_done();
      
+#ifdef THREADED_RTS
+    // Now that the whole heap is marked, we discard any sparks that
+    // were found to be unreachable.  The main GC thread is currently
+    // marking heap reachable via weak pointers, so it is
+    // non-deterministic whether a spark will be retained if it is
+    // only reachable via weak pointers.  To fix this problem would
+    // require another GC barrier, which is too high a price.
+    pruneSparkQueue(cap);
+#endif
+
  #ifdef USE_PAPI
      // count events in this thread towards the GC totals
      papi_thread_stop_gc1_count(gct->papi_events);
@@ -1102,8 +1102,8 @@ gcWorkerThread (Capability *cap)
  void
  waitForGcThreads (Capability *cap USED_IF_THREADS)
  {
-    nat n_threads = RtsFlags.ParFlags.nNodes;
-    nat me = cap->no;
+    const nat n_threads = RtsFlags.ParFlags.nNodes;
+    const nat me = cap->no;
      nat i, j;
      rtsBool retry = rtsTrue;
  
@@ -1177,8 +1177,8 @@ shutdown_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
  void
  releaseGCThreads (Capability *cap USED_IF_THREADS)
  {
-    nat n_threads = RtsFlags.ParFlags.nNodes;
-    nat me = cap->no;
+    const nat n_threads = RtsFlags.ParFlags.nNodes;
+    const nat me = cap->no;
      nat i;
      for (i=0; i < n_threads; i++) {
          if (i == me) continue;
@@ -1208,9 +1208,7 @@ init_collected_gen (nat g, nat n_threads)
      // list always has at least one block; this means we can avoid a
      // check for NULL in recordMutable().
      if (g != 0) {
-       freeChain(generations[g].mut_list);
-       generations[g].mut_list = allocBlock();
-       for (i = 0; i < n_capabilities; i++) {
+        for (i = 0; i < n_capabilities; i++) {
             freeChain(capabilities[i].mut_lists[g]);
             capabilities[i].mut_lists[g] = allocBlock();
         }
@@ -1280,6 +1278,10 @@ init_collected_gen (nat g, nat n_threads)
                  if (!(bd->flags & BF_FRAGMENTED)) {
                      bd->flags |= BF_MARKED;
                  }
+
+                // BF_SWEPT should be marked only for blocks that are being
+                // collected in sweep()
+                bd->flags &= ~BF_SWEPT;
              }
          }
      }
@@ -1325,8 +1327,6 @@ init_uncollected_gen (nat g, nat threads)
      // save the current mutable lists for this generation, and
      // allocate a fresh block for each one.  We'll traverse these
      // mutable lists as roots early on in the GC.
-    generations[g].saved_mut_list = generations[g].mut_list;
-    generations[g].mut_list = allocBlock(); 
      for (n = 0; n < n_capabilities; n++) {
          capabilities[n].saved_mut_lists[g] = capabilities[n].mut_lists[g];
          capabilities[n].mut_lists[g] = allocBlock();
@@ -1465,8 +1465,8 @@ resize_generations (void)
  
      if (major_gc && RtsFlags.GcFlags.generations > 1) {
         nat live, size, min_alloc, words;
-       nat max  = RtsFlags.GcFlags.maxHeapSize;
-       nat gens = RtsFlags.GcFlags.generations;
+       const nat max  = RtsFlags.GcFlags.maxHeapSize;
+       const nat gens = RtsFlags.GcFlags.generations;
         
         // live in the oldest generations
          if (oldest_gen->live_estimate != 0) {
@@ -1491,11 +1491,10 @@ resize_generations (void)
  
         // Auto-enable compaction when the residency reaches a
         // certain percentage of the maximum heap size (default: 30%).
-       if (RtsFlags.GcFlags.generations > 1 &&
-           (RtsFlags.GcFlags.compact ||
-            (max > 0 &&
-             oldest_gen->n_blocks > 
-             (RtsFlags.GcFlags.compactThreshold * max) / 100))) {
+       if (RtsFlags.GcFlags.compact ||
+            (max > 0 &&
+             oldest_gen->n_blocks > 
+             (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
             oldest_gen->mark = 1;
             oldest_gen->compact = 1;
  //       debugBelch("compaction: on\n", live);
@@ -1555,7 +1554,7 @@ resize_generations (void)
  static void
  resize_nursery (void)
  {
-    lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities;
+    const lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities;
  
      if (RtsFlags.GcFlags.generations == 1)
      {   // Two-space collector:
@@ -1615,7 +1614,7 @@ resize_nursery (void)
         if (RtsFlags.GcFlags.heapSizeSuggestion)
         {
             long blocks;
-           nat needed = calcNeeded();  // approx blocks needed at next GC 
+           const nat needed = calcNeeded();    // approx blocks needed at next GC 
             
             /* Guess how much will be live in generation 0 step 0 next time.
              * A good approximation is obtained by finding the