GC refactoring and cleanup

author Simon Marlow <marlowsd@gmail.com>

Wed, 2 Feb 2011 15:49:55 +0000 (15:49 +0000)

committer Simon Marlow <marlowsd@gmail.com>

Wed, 2 Feb 2011 15:49:55 +0000 (15:49 +0000)
author Simon Marlow <marlowsd@gmail.com>
Wed, 2 Feb 2011 15:49:55 +0000 (15:49 +0000)
committer Simon Marlow <marlowsd@gmail.com>
Wed, 2 Feb 2011 15:49:55 +0000 (15:49 +0000)
diff --git a/includes/rts/storage/GC.h b/includes/rts/storage/GC.h

index 7cee670..bbed216 100644 (file)
--- a/includes/rts/storage/GC.h
+++ b/includes/rts/storage/GC.h
@@ -87,7 +87,7 @@ typedef struct generation_ {
  #if defined(THREADED_RTS)
      char pad[128];                      // make sure the following is
                                          // on a separate cache line.
-    SpinLock     sync_large_objects;    // lock for large_objects
+    SpinLock     sync;                  // lock for large_objects
                                          //    and scavenged_large_objects
  #endif
  
@@ -101,9 +101,6 @@ typedef struct generation_ {
      unsigned int n_old_blocks;         // number of blocks in from-space
      unsigned int live_estimate;         // for sweeping: estimate of live data
      
-    bdescr *     part_blocks;           // partially-full scanned blocks
-    unsigned int n_part_blocks;         // count of above
-
      bdescr *     scavenged_large_objects;  // live large objs after GC (d-link)
      unsigned int n_scavenged_large_blocks; // size (not count) of above
  
diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c

index 550f703..775505f 100644 (file)
--- a/rts/RaiseAsync.c
+++ b/rts/RaiseAsync.c
@@ -592,7 +592,7 @@ removeFromMVarBlockedQueue (StgTSO *tso)
  
      if (mvar->head == q) {
          mvar->head = q->link;
-        q->header.info = &stg_IND_info;
+        OVERWRITE_INFO(q, &stg_IND_info);
          if (mvar->tail == q) {
              mvar->tail = (StgMVarTSOQueue*)END_TSO_QUEUE;
          }
@@ -602,10 +602,10 @@ removeFromMVarBlockedQueue (StgTSO *tso)
          // we lose the tail pointer when the GC shorts out the IND.
          // So we use MSG_NULL as a kind of non-dupable indirection;
          // these are ignored by takeMVar/putMVar.
-        q->header.info = &stg_MSG_NULL_info;
+        OVERWRITE_INFO(q, &stg_MSG_NULL_info);
      }
      else {
-        q->header.info = &stg_IND_info;
+        OVERWRITE_INFO(q, &stg_IND_info);
      }
  
      // revoke the MVar operation
diff --git a/rts/Stats.c b/rts/Stats.c

index cbd02cd..4b9f6d8 100644 (file)
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -710,7 +710,7 @@ stat_exit(int alloc)
                  statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin);
                  statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin);
                  for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-                    statsPrintf("gen[%d].sync_large_objects: %"FMT_Word64"\n", g, generations[g].sync_large_objects.spin);
+                    statsPrintf("gen[%d].sync: %"FMT_Word64"\n", g, generations[g].sync.spin);
                  }
              }
  #endif
@@ -772,8 +772,9 @@ void
  statDescribeGens(void)
  {
    nat g, mut, lge, i;
-  lnat live, slop;
+  lnat gen_slop;
    lnat tot_live, tot_slop;
+  lnat gen_live, gen_blocks;
    bdescr *bd;
    generation *gen;
    
@@ -785,25 +786,32 @@ statDescribeGens(void)
  
    tot_live = 0;
    tot_slop = 0;
+
    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+      gen = &generations[g];
+
+      for (bd = gen->large_objects, lge = 0; bd; bd = bd->link) {
+          lge++;
+      }
+
+      gen_live   = genLiveWords(gen);
+      gen_blocks = genLiveBlocks(gen);
+
        mut = 0;
        for (i = 0; i < n_capabilities; i++) {
            mut += countOccupied(capabilities[i].mut_lists[g]);
+          gen_live   += gcThreadLiveWords(i,g);
+          gen_blocks += gcThreadLiveBlocks(i,g);
        }
  
-      gen = &generations[g];
-
        debugBelch("%5d %7d %9d", g, gen->max_blocks, mut);
  
-      for (bd = gen->large_objects, lge = 0; bd; bd = bd->link) {
-          lge++;
-      }
-      live = gen->n_words + countOccupied(gen->large_objects);
-      slop = (gen->n_blocks + gen->n_large_blocks) * BLOCK_SIZE_W - live;
-      debugBelch("%8d %8d %8ld %8ld\n", gen->n_blocks, lge,
-                 live*sizeof(W_), slop*sizeof(W_));
-      tot_live += live;
-      tot_slop += slop;
+      gen_slop = gen_blocks * BLOCK_SIZE_W - gen_live;
+
+      debugBelch("%8ld %8d %8ld %8ld\n", gen_blocks, lge,
+                 gen_live*sizeof(W_), gen_slop*sizeof(W_));
+      tot_live += gen_live;
+      tot_slop += gen_slop;
    }
    debugBelch("----------------------------------------------------------\n");
    debugBelch("%41s%8ld %8ld\n","",tot_live*sizeof(W_),tot_slop*sizeof(W_));
diff --git a/rts/Threads.c b/rts/Threads.c

index dcb916a..3e1c5cf 100644 (file)
--- a/rts/Threads.c
+++ b/rts/Threads.c
@@ -628,7 +628,7 @@ threadStackOverflow (Capability *cap, StgTSO *tso)
      // will be discarded after the first overflow, being replaced by a
      // non-moving 32k chunk.
      if (old_stack->sp == old_stack->stack + old_stack->stack_size) {
-        frame->next_chunk = new_stack;
+        frame->next_chunk = (StgStack*)END_TSO_QUEUE; // dummy
      }
  
      tso->stackobj = new_stack;
diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c

index 18ace21..d049f98 100644 (file)
--- a/rts/sm/Evac.c
+++ b/rts/sm/Evac.c
@@ -248,7 +248,7 @@ evacuate_large(StgPtr p)
    bd = Bdescr(p);
    gen = bd->gen;
    gen_no = bd->gen_no;
-  ACQUIRE_SPIN_LOCK(&gen->sync_large_objects);
+  ACQUIRE_SPIN_LOCK(&gen->sync);
  
    // already evacuated? 
    if (bd->flags & BF_EVACUATED) { 
@@ -259,7 +259,7 @@ evacuate_large(StgPtr p)
         gct->failed_to_evac = rtsTrue;
         TICK_GC_FAILED_PROMOTION();
      }
-    RELEASE_SPIN_LOCK(&gen->sync_large_objects);
+    RELEASE_SPIN_LOCK(&gen->sync);
      return;
    }
  
@@ -297,16 +297,16 @@ evacuate_large(StgPtr p)
    // them straight on the scavenged_large_objects list.
    if (bd->flags & BF_PINNED) {
        ASSERT(get_itbl((StgClosure *)p)->type == ARR_WORDS);
-      if (new_gen != gen) { ACQUIRE_SPIN_LOCK(&new_gen->sync_large_objects); }
+      if (new_gen != gen) { ACQUIRE_SPIN_LOCK(&new_gen->sync); }
        dbl_link_onto(bd, &new_gen->scavenged_large_objects);
        new_gen->n_scavenged_large_blocks += bd->blocks;
-      if (new_gen != gen) { RELEASE_SPIN_LOCK(&new_gen->sync_large_objects); }
+      if (new_gen != gen) { RELEASE_SPIN_LOCK(&new_gen->sync); }
    } else {
        bd->link = ws->todo_large_objects;
        ws->todo_large_objects = bd;
    }
  
-  RELEASE_SPIN_LOCK(&gen->sync_large_objects);
+  RELEASE_SPIN_LOCK(&gen->sync);
  }
  
  /* ----------------------------------------------------------------------------
diff --git a/rts/sm/GC.c b/rts/sm/GC.c

index c990835..221f24a 100644 (file)
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -137,8 +137,8 @@ DECLARE_GCT
  static void mark_root               (void *user, StgClosure **root);
  static void zero_static_object_list (StgClosure* first_static);
  static nat  initialise_N            (rtsBool force_major_gc);
-static void init_collected_gen      (nat g, nat threads);
-static void init_uncollected_gen    (nat g, nat threads);
+static void prepare_collected_gen   (generation *gen);
+static void prepare_uncollected_gen (generation *gen);
  static void init_gc_thread          (gc_thread *t);
  static void resize_generations      (void);
  static void resize_nursery          (void);
@@ -148,6 +148,7 @@ static StgWord inc_running          (void);
  static StgWord dec_running          (void);
  static void wakeup_gc_threads       (nat n_threads, nat me);
  static void shutdown_gc_threads     (nat n_threads, nat me);
+static void collect_gct_blocks      (void);
  
  #if 0 && defined(DEBUG)
  static void gcCAFs                  (void);
@@ -174,7 +175,7 @@ GarbageCollect (rtsBool force_major_gc,
  {
    bdescr *bd;
    generation *gen;
-  lnat live, allocated, max_copied, avg_copied, slop;
+  lnat live_blocks, live_words, allocated, max_copied, avg_copied;
    gc_thread *saved_gct;
    nat g, t, n;
  
@@ -274,7 +275,7 @@ GarbageCollect (rtsBool force_major_gc,
  #endif
  
    // check sanity *before* GC
-  IF_DEBUG(sanity, checkSanity(rtsTrue));
+  IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
  
    // Initialise all our gc_thread structures
    for (t = 0; t < n_gc_threads; t++) {
@@ -283,12 +284,11 @@ GarbageCollect (rtsBool force_major_gc,
  
    // Initialise all the generations/steps that we're collecting.
    for (g = 0; g <= N; g++) {
-      init_collected_gen(g,n_gc_threads);
+      prepare_collected_gen(&generations[g]);
    }
-  
    // Initialise all the generations/steps that we're *not* collecting.
    for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
-      init_uncollected_gen(g,n_gc_threads);
+      prepare_uncollected_gen(&generations[g]);
    }
  
    /* Allocate a mark stack if we're doing a major collection.
@@ -420,76 +420,6 @@ SET_GCT(gc_threads[0]);
        }
    }
  
-  // For each workspace, in each thread, move the copied blocks to the step
-  {
-      gc_thread *thr;
-      gen_workspace *ws;
-      bdescr *prev, *next;
-
-      for (t = 0; t < n_gc_threads; t++) {
-         thr = gc_threads[t];
-
-          for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-              ws = &thr->gens[g];
-
-              // Push the final block
-              if (ws->todo_bd) { 
-                  push_scanned_block(ws->todo_bd, ws);
-              }
-
-              ASSERT(gct->scan_bd == NULL);
-              ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
-              
-              prev = NULL;
-              for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
-                  ws->gen->n_words += bd->free - bd->start;
-                  prev = bd;
-              }
-              if (prev != NULL) {
-                  prev->link = ws->gen->blocks;
-                  ws->gen->blocks = ws->scavd_list;
-              } 
-              ws->gen->n_blocks += ws->n_scavd_blocks;
-          }
-      }
-
-      // Add all the partial blocks *after* we've added all the full
-      // blocks.  This is so that we can grab the partial blocks back
-      // again and try to fill them up in the next GC.
-      for (t = 0; t < n_gc_threads; t++) {
-         thr = gc_threads[t];
-
-          for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-              ws = &thr->gens[g];
-
-              prev = NULL;
-              for (bd = ws->part_list; bd != NULL; bd = next) {
-                  next = bd->link;
-                  if (bd->free == bd->start) {
-                      if (prev == NULL) {
-                          ws->part_list = next;
-                      } else {
-                          prev->link = next;
-                      }
-                      freeGroup(bd);
-                      ws->n_part_blocks--;
-                  } else {
-                      ws->gen->n_words += bd->free - bd->start;
-                      prev = bd;
-                  }
-              }
-              if (prev != NULL) {
-                  prev->link = ws->gen->blocks;
-                  ws->gen->blocks = ws->part_list;
-              }
-              ws->gen->n_blocks += ws->n_part_blocks;
-
-              ASSERT(countBlocks(ws->gen->blocks) == ws->gen->n_blocks);
-              ASSERT(countOccupied(ws->gen->blocks) == ws->gen->n_words);
-         }
-      }
-  }
-
    // Finally: compact or sweep the oldest generation.
    if (major_gc && oldest_gen->mark) {
        if (oldest_gen->compact) 
@@ -498,8 +428,6 @@ SET_GCT(gc_threads[0]);
            sweep(oldest_gen);
    }
  
-  /* run through all the generations/steps and tidy up 
-   */
    copied = 0;
    max_copied = 0;
    avg_copied = 0;
@@ -525,6 +453,16 @@ SET_GCT(gc_threads[0]);
        }
    }
  
+  // Run through all the generations/steps and tidy up.
+  // We're going to:
+  //   - count the amount of "live" data (live_words, live_blocks)
+  //   - count the amount of "copied" data in this GC (copied)
+  //   - free from-space
+  //   - make to-space the new from-space (set BF_EVACUATED on all blocks)
+  //
+  live_words = 0;
+  live_blocks = 0;
+
    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
  
      if (g == N) {
@@ -622,7 +560,6 @@ SET_GCT(gc_threads[0]);
          gen->large_objects  = gen->scavenged_large_objects;
          gen->n_large_blocks = gen->n_scavenged_large_blocks;
          gen->n_new_large_words = 0;
-        ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
      }
      else // for generations > N
      {
@@ -637,16 +574,31 @@ SET_GCT(gc_threads[0]);
          
         // add the new blocks we promoted during this GC 
         gen->n_large_blocks += gen->n_scavenged_large_blocks;
-        ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
+    }
+
+    ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
+
+    gen->scavenged_large_objects = NULL;
+    gen->n_scavenged_large_blocks = 0;
+
+    // Count "live" data
+    live_words  += genLiveWords(gen);
+    live_blocks += genLiveBlocks(gen);
+
+    // add in the partial blocks in the gen_workspaces, but ignore gen 0
+    // if this is a local GC (we can't count another capability's part_list)
+    {
+        nat i;
+        for (i = 0; i < n_capabilities; i++) {
+            live_words  += gcThreadLiveWords(i, gen->no);
+            live_blocks += gcThreadLiveBlocks(i, gen->no);
+        }
      }
    } // for all generations
  
    // update the max size of older generations after a major GC
    resize_generations();
    
-  // Calculate the amount of live data for stats.
-  live = calcLiveWords();
-
    // Start a new pinned_object_block
    for (n = 0; n < n_capabilities; n++) {
        capabilities[n].pinned_object_block = NULL;
@@ -698,11 +650,6 @@ SET_GCT(gc_threads[0]);
        }
    }
  
-  // send exceptions to any threads which were about to die
-  RELEASE_SM_LOCK;
-  resurrectThreads(resurrected_threads);
-  ACQUIRE_SM_LOCK;
-
    // Update the stable pointer hash table.
    updateStablePtrTable(major_gc);
  
@@ -717,6 +664,17 @@ SET_GCT(gc_threads[0]);
    scheduleFinalizers(cap, old_weak_ptr_list);
    ACQUIRE_SM_LOCK;
  
+  // check sanity after GC
+  // before resurrectThreads(), because that might overwrite some
+  // closures, which will cause problems with THREADED where we don't
+  // fill slop.
+  IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc));
+
+  // send exceptions to any threads which were about to die
+  RELEASE_SM_LOCK;
+  resurrectThreads(resurrected_threads);
+  ACQUIRE_SM_LOCK;
+
    if (major_gc) {
        nat need, got;
        need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
@@ -730,10 +688,7 @@ SET_GCT(gc_threads[0]);
        }
    }
  
-  // check sanity after GC
-  IF_DEBUG(sanity, checkSanity(rtsTrue));
-
-  // extra GC trace info 
+  // extra GC trace info
    IF_DEBUG(gc, statDescribeGens());
  
  #ifdef DEBUG
@@ -758,8 +713,8 @@ SET_GCT(gc_threads[0]);
  #endif
  
    // ok, GC over: tell the stats department what happened. 
-  slop = calcLiveBlocks() * BLOCK_SIZE_W - live;
-  stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop);
+  stat_endGC(allocated, live_words, copied, N, max_copied, avg_copied,
+             live_blocks * BLOCK_SIZE_W - live_words /* slop */);
  
    // Guess which generation we'll collect *next* time
    initialise_N(force_major_gc);
@@ -858,7 +813,21 @@ new_gc_thread (nat n, gc_thread *t)
          ASSERT(g == ws->gen->no);
          ws->my_gct = t;
          
-        ws->todo_bd = NULL;
+        // We want to call
+        //   alloc_todo_block(ws,0);
+        // but can't, because it uses gct which isn't set up at this point.
+        // Hence, allocate a block for todo_bd manually:
+        {
+            bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet
+            initBdescr(bd, ws->gen, ws->gen->to);
+            bd->flags = BF_EVACUATED;
+            bd->u.scan = bd->free = bd->start;
+
+            ws->todo_bd = bd;
+            ws->todo_free = bd->free;
+            ws->todo_lim = bd->start + BLOCK_SIZE_W;
+        }
+
          ws->todo_q = newWSDeque(128);
          ws->todo_overflow = NULL;
          ws->n_todo_overflow = 0;
@@ -1012,6 +981,8 @@ loop:
      scavenge_loop();
  #endif
  
+    collect_gct_blocks();
+
      // scavenge_loop() only exits when there's no work to do
      r = dec_running();
      
@@ -1197,16 +1168,16 @@ releaseGCThreads (Capability *cap USED_IF_THREADS)
     ------------------------------------------------------------------------- */
  
  static void
-init_collected_gen (nat g, nat n_threads)
+prepare_collected_gen (generation *gen)
  {
-    nat t, i;
+    nat i, g, n;
      gen_workspace *ws;
-    generation *gen;
-    bdescr *bd;
+    bdescr *bd, *next;
  
      // Throw away the current mutable list.  Invariant: the mutable
      // list always has at least one block; this means we can avoid a
      // check for NULL in recordMutable().
+    g = gen->no;
      if (g != 0) {
          for (i = 0; i < n_capabilities; i++) {
             freeChain(capabilities[i].mut_lists[g]);
@@ -1231,9 +1202,35 @@ init_collected_gen (nat g, nat n_threads)
      gen->live_estimate = 0;
  
      // initialise the large object queues.
-    gen->scavenged_large_objects = NULL;
-    gen->n_scavenged_large_blocks = 0;
-    
+    ASSERT(gen->scavenged_large_objects == NULL);
+    ASSERT(gen->n_scavenged_large_blocks == 0);
+
+    // grab all the partial blocks stashed in the gc_thread workspaces and
+    // move them to the old_blocks list of this gen.
+    for (n = 0; n < n_capabilities; n++) {
+        ws = &gc_threads[n]->gens[gen->no];
+
+        for (bd = ws->part_list; bd != NULL; bd = next) {
+            next = bd->link;
+            bd->link = gen->old_blocks;
+            gen->old_blocks = bd;
+            gen->n_old_blocks += bd->blocks;
+        }
+        ws->part_list = NULL;
+        ws->n_part_blocks = 0;
+
+        ASSERT(ws->scavd_list == NULL);
+        ASSERT(ws->n_scavd_blocks == 0);
+
+        if (ws->todo_free != ws->todo_bd->start) {
+            ws->todo_bd->free = ws->todo_free;
+            ws->todo_bd->link = gen->old_blocks;
+            gen->old_blocks = ws->todo_bd;
+            gen->n_old_blocks += ws->todo_bd->blocks;
+            alloc_todo_block(ws,0); // always has one block.
+        }
+    }
+
      // mark the small objects as from-space
      for (bd = gen->old_blocks; bd; bd = bd->link) {
          bd->flags &= ~BF_EVACUATED;
@@ -1285,106 +1282,83 @@ init_collected_gen (nat g, nat n_threads)
              }
          }
      }
-
-    // For each GC thread, for each step, allocate a "todo" block to
-    // store evacuated objects to be scavenged, and a block to store
-    // evacuated objects that do not need to be scavenged.
-    for (t = 0; t < n_threads; t++) {
-        ws = &gc_threads[t]->gens[g];
-        
-        ws->todo_large_objects = NULL;
-        
-        ws->part_list = NULL;
-        ws->n_part_blocks = 0;
-        
-        // allocate the first to-space block; extra blocks will be
-        // chained on as necessary.
-        ws->todo_bd = NULL;
-        ASSERT(looksEmptyWSDeque(ws->todo_q));
-        alloc_todo_block(ws,0);
-        
-        ws->todo_overflow = NULL;
-        ws->n_todo_overflow = 0;
-        
-        ws->scavd_list = NULL;
-        ws->n_scavd_blocks = 0;
-    }
  }
  
  
  /* ----------------------------------------------------------------------------
+   Save the mutable lists in saved_mut_lists
+   ------------------------------------------------------------------------- */
+
+static void
+stash_mut_list (Capability *cap, nat gen_no)
+{
+    cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
+    cap->mut_lists[gen_no] = allocBlock_sync();
+}
+
+/* ----------------------------------------------------------------------------
     Initialise a generation that is *not* to be collected 
     ------------------------------------------------------------------------- */
  
  static void
-init_uncollected_gen (nat g, nat threads)
+prepare_uncollected_gen (generation *gen)
  {
-    nat t, n;
-    gen_workspace *ws;
-    generation *gen;
-    bdescr *bd;
+    nat i;
+
+
+    ASSERT(gen->no > 0);
  
      // save the current mutable lists for this generation, and
      // allocate a fresh block for each one.  We'll traverse these
      // mutable lists as roots early on in the GC.
-    for (n = 0; n < n_capabilities; n++) {
-        capabilities[n].saved_mut_lists[g] = capabilities[n].mut_lists[g];
-        capabilities[n].mut_lists[g] = allocBlock();
+    for (i = 0; i < n_capabilities; i++) {
+        stash_mut_list(&capabilities[i], gen->no);
      }
  
-    gen = &generations[g];
+    ASSERT(gen->scavenged_large_objects == NULL);
+    ASSERT(gen->n_scavenged_large_blocks == 0);
+}
  
-    gen->scavenged_large_objects = NULL;
-    gen->n_scavenged_large_blocks = 0;
+/* -----------------------------------------------------------------------------
+   Collect the completed blocks from a GC thread and attach them to
+   the generation.
+   -------------------------------------------------------------------------- */
  
-    for (t = 0; t < threads; t++) {
-        ws = &gc_threads[t]->gens[g];
-           
-        ASSERT(looksEmptyWSDeque(ws->todo_q));
-        ws->todo_large_objects = NULL;
-        
-        ws->part_list = NULL;
-        ws->n_part_blocks = 0;
+static void
+collect_gct_blocks (void)
+{
+    nat g;
+    gen_workspace *ws;
+    bdescr *bd, *prev;
+    
+    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+        ws = &gct->gens[g];
          
-        ws->scavd_list = NULL;
-        ws->n_scavd_blocks = 0;
+        // there may still be a block attached to ws->todo_bd;
+        // leave it there to use next time.
+
+        if (ws->scavd_list != NULL) {
+            ACQUIRE_SPIN_LOCK(&ws->gen->sync);
+
+            ASSERT(gct->scan_bd == NULL);
+            ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
          
-        // If the block at the head of the list in this generation
-        // is less than 3/4 full, then use it as a todo block.
-        if (gen->blocks && isPartiallyFull(gen->blocks))
-        {
-            ws->todo_bd = gen->blocks;
-            ws->todo_free = ws->todo_bd->free;
-            ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
-            gen->blocks = gen->blocks->link;
-            gen->n_blocks -= 1;
-            gen->n_words -= ws->todo_bd->free - ws->todo_bd->start;
-            ws->todo_bd->link = NULL;
-            // we must scan from the current end point.
-            ws->todo_bd->u.scan = ws->todo_bd->free;
-        } 
-        else
-        {
-            ws->todo_bd = NULL;
-            alloc_todo_block(ws,0);
-        }
-    }
+            prev = NULL;
+            for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
+                ws->gen->n_words += bd->free - bd->start;
+                prev = bd;
+            }
+            if (prev != NULL) {
+                prev->link = ws->gen->blocks;
+                ws->gen->blocks = ws->scavd_list;
+            } 
+            ws->gen->n_blocks += ws->n_scavd_blocks;
  
-    // deal out any more partial blocks to the threads' part_lists
-    t = 0;
-    while (gen->blocks && isPartiallyFull(gen->blocks))
-    {
-        bd = gen->blocks;
-        gen->blocks = bd->link;
-        ws = &gc_threads[t]->gens[g];
-        bd->link = ws->part_list;
-        ws->part_list = bd;
-        ws->n_part_blocks += 1;
-        bd->u.scan = bd->free;
-        gen->n_blocks -= 1;
-        gen->n_words -= bd->free - bd->start;
-        t++;
-        if (t == n_gc_threads) t = 0;
+            ws->scavd_list = NULL;
+            ws->n_scavd_blocks = 0;
+
+            RELEASE_SPIN_LOCK(&ws->gen->sync);
+        }
      }
  }
  
diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c

index 65a70fa..8ebb9a2 100644 (file)
--- a/rts/sm/Sanity.c
+++ b/rts/sm/Sanity.c
@@ -21,6 +21,7 @@
  #include "RtsUtils.h"
  #include "sm/Storage.h"
  #include "sm/BlockAlloc.h"
+#include "GCThread.h"
  #include "Sanity.h"
  #include "Schedule.h"
  #include "Apply.h"
@@ -468,17 +469,10 @@ checkClosure( StgClosure* p )
     all the objects in the remainder of the chain.
     -------------------------------------------------------------------------- */
  
-void 
-checkHeap(bdescr *bd)
+void checkHeapChain (bdescr *bd)
  {
      StgPtr p;
  
-#if defined(THREADED_RTS)
-    // heap sanity checking doesn't work with SMP, because we can't
-    // zero the slop (see Updates.h).
-    return;
-#endif
-
      for (; bd != NULL; bd = bd->link) {
          if(!(bd->flags & BF_SWEPT)) {
              p = bd->start;
@@ -496,7 +490,7 @@ checkHeap(bdescr *bd)
      }
  }
  
-void 
+void
  checkHeapChunk(StgPtr start, StgPtr end)
  {
    StgPtr p;
@@ -587,6 +581,24 @@ checkGlobalTSOList (rtsBool checkTSOs)
                ASSERT(Bdescr((P_)tso)->gen_no == 0 || (tso->flags & TSO_MARKED));
                tso->flags &= ~TSO_MARKED;
            }
+
+          {
+              StgStack *stack;
+              StgUnderflowFrame *frame;
+
+              stack = tso->stackobj;
+              while (1) {
+                  if (stack->dirty & 1) {
+                      ASSERT(Bdescr((P_)stack)->gen_no == 0 || (stack->dirty & TSO_MARKED));
+                      stack->dirty &= ~TSO_MARKED;
+                  }
+                  frame = (StgUnderflowFrame*) (stack->stack + stack->stack_size
+                                                - sizeofW(StgUnderflowFrame));
+                  if (frame->info != &stg_stack_underflow_frame_info
+                      || frame->next_chunk == (StgStack*)END_TSO_QUEUE) break;
+                  stack = frame->next_chunk;
+              }
+          }
        }
    }
  }
@@ -595,7 +607,7 @@ checkGlobalTSOList (rtsBool checkTSOs)
     Check mutable list sanity.
     -------------------------------------------------------------------------- */
  
-void
+static void
  checkMutableList( bdescr *mut_bd, nat gen )
  {
      bdescr *bd;
@@ -605,25 +617,37 @@ checkMutableList( bdescr *mut_bd, nat gen )
      for (bd = mut_bd; bd != NULL; bd = bd->link) {
         for (q = bd->start; q < bd->free; q++) {
             p = (StgClosure *)*q;
-           ASSERT(!HEAP_ALLOCED(p) || Bdescr((P_)p)->gen_no == gen);
-            if (get_itbl(p)->type == TSO) {
+            ASSERT(!HEAP_ALLOCED(p) || Bdescr((P_)p)->gen_no == gen);
+            checkClosure(p);
+
+            switch (get_itbl(p)->type) {
+            case TSO:
                  ((StgTSO *)p)->flags |= TSO_MARKED;
+                break;
+            case STACK:
+                ((StgStack *)p)->dirty |= TSO_MARKED;
+                break;
              }
-       }
+        }
      }
  }
  
-void
-checkMutableLists (rtsBool checkTSOs)
+static void
+checkLocalMutableLists (nat cap_no)
  {
-    nat g, i;
+    nat g;
+    for (g = 1; g < RtsFlags.GcFlags.generations; g++) {
+        checkMutableList(capabilities[cap_no].mut_lists[g], g);
+    }
+}
  
-    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-        for (i = 0; i < n_capabilities; i++) {
-            checkMutableList(capabilities[i].mut_lists[g], g);
-        }
+static void
+checkMutableLists (void)
+{
+    nat i;
+    for (i = 0; i < n_capabilities; i++) {
+        checkLocalMutableLists(i);
      }
-    checkGlobalTSOList(checkTSOs);
  }
  
  /*
@@ -677,7 +701,8 @@ checkNurserySanity (nursery *nursery)
  
      prev = NULL;
      for (bd = nursery->blocks; bd != NULL; bd = bd->link) {
-       ASSERT(bd->u.back == prev);
+        ASSERT(bd->gen == g0);
+        ASSERT(bd->u.back == prev);
         prev = bd;
         blocks += bd->blocks;
      }
@@ -685,41 +710,59 @@ checkNurserySanity (nursery *nursery)
      ASSERT(blocks == nursery->n_blocks);
  }
  
+static void checkGeneration (generation *gen, 
+                             rtsBool after_major_gc USED_IF_THREADS)
+{
+    nat n;
+    gen_workspace *ws;
+
+    ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
+    ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
+
+#if defined(THREADED_RTS)
+    // heap sanity checking doesn't work with SMP, because we can't
+    // zero the slop (see Updates.h).  However, we can sanity-check
+    // the heap after a major gc, because there is no slop.
+    if (!after_major_gc) return;
+#endif
+
+    checkHeapChain(gen->blocks);
+
+    for (n = 0; n < n_capabilities; n++) {
+        ws = &gc_threads[n]->gens[gen->no];
+        checkHeapChain(ws->todo_bd);
+        checkHeapChain(ws->part_list);
+        checkHeapChain(ws->scavd_list);
+    }
+
+    checkLargeObjects(gen->large_objects);
+}
  
  /* Full heap sanity check. */
-void
-checkSanity( rtsBool check_heap )
+static void checkFullHeap (rtsBool after_major_gc)
  {
      nat g, n;
  
      for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-        ASSERT(countBlocks(generations[g].blocks)
-               == generations[g].n_blocks);
-        ASSERT(countBlocks(generations[g].large_objects)
-                   == generations[g].n_large_blocks);
-        if (check_heap) {
-            checkHeap(generations[g].blocks);
-        }
-        checkLargeObjects(generations[g].large_objects);
+        checkGeneration(&generations[g], after_major_gc);
      }
-    
      for (n = 0; n < n_capabilities; n++) {
          checkNurserySanity(&nurseries[n]);
      }
-    
+}
+
+void checkSanity (rtsBool after_gc, rtsBool major_gc)
+{
+    checkFullHeap(after_gc && major_gc);
+
      checkFreeListSanity();
  
-#if defined(THREADED_RTS)
      // always check the stacks in threaded mode, because checkHeap()
      // does nothing in this case.
-    checkMutableLists(rtsTrue);
-#else
-    if (check_heap) {
-        checkMutableLists(rtsFalse);
-    } else {
-        checkMutableLists(rtsTrue);
+    if (after_gc) {
+        checkMutableLists();
+        checkGlobalTSOList(rtsTrue);
      }
-#endif
  }
  
  // If memInventory() calculates that we have a memory leak, this
@@ -732,18 +775,21 @@ checkSanity( rtsBool check_heap )
  static void
  findMemoryLeak (void)
  {
-  nat g, i;
-  for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-      for (i = 0; i < n_capabilities; i++) {
-         markBlocks(capabilities[i].mut_lists[g]);
-      }
-      markBlocks(generations[g].blocks);
-      markBlocks(generations[g].large_objects);
-  }
+    nat g, i;
+    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+        for (i = 0; i < n_capabilities; i++) {
+            markBlocks(capabilities[i].mut_lists[g]);
+            markBlocks(gc_threads[i]->gens[g].part_list);
+            markBlocks(gc_threads[i]->gens[g].scavd_list);
+            markBlocks(gc_threads[i]->gens[g].todo_bd);
+        }
+        markBlocks(generations[g].blocks);
+        markBlocks(generations[g].large_objects);
+    }
  
-  for (i = 0; i < n_capabilities; i++) {
-      markBlocks(nurseries[i].blocks);
-  }
+    for (i = 0; i < n_capabilities; i++) {
+        markBlocks(nurseries[i].blocks);
+    }
  
  #ifdef PROFILING
    // TODO:
@@ -823,6 +869,9 @@ memInventory (rtsBool show)
        gen_blocks[g] = 0;
        for (i = 0; i < n_capabilities; i++) {
           gen_blocks[g] += countBlocks(capabilities[i].mut_lists[g]);
+          gen_blocks[g] += countBlocks(gc_threads[i]->gens[g].part_list);
+          gen_blocks[g] += countBlocks(gc_threads[i]->gens[g].scavd_list);
+          gen_blocks[g] += countBlocks(gc_threads[i]->gens[g].todo_bd);
        }          
        gen_blocks[g] += genBlocks(&generations[g]);
    }
diff --git a/rts/sm/Sanity.h b/rts/sm/Sanity.h

index 602be54..f302bc2 100644 (file)
--- a/rts/sm/Sanity.h
+++ b/rts/sm/Sanity.h
@@ -21,9 +21,9 @@
  # endif
  
  /* debugging routines */
-void checkSanity        ( rtsBool check_heap );
+void checkSanity        ( rtsBool after_gc, rtsBool major_gc );
  void checkNurserySanity ( nursery *nursery );
-void checkHeap          ( bdescr *bd );
+void checkHeapChain     ( bdescr *bd );
  void checkHeapChunk     ( StgPtr start, StgPtr end );
  void checkLargeObjects  ( bdescr *bd );
  void checkTSO           ( StgTSO* tso );
@@ -33,9 +33,6 @@ void checkStackChunk    ( StgPtr sp, StgPtr stack_end );
  StgOffset checkStackFrame ( StgPtr sp );
  StgOffset checkClosure  ( StgClosure* p );
  
-void checkMutableList   ( bdescr *bd, nat gen );
-void checkMutableLists  ( rtsBool checkTSOs );
-
  void checkRunQueue      (Capability *cap);
  
  void memInventory (rtsBool show);
diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c

index 9ac152a..d77734f 100644 (file)
--- a/rts/sm/Scav.c
+++ b/rts/sm/Scav.c
@@ -1755,10 +1755,10 @@ scavenge_large (gen_workspace *ws)
         // the front when evacuating.
         ws->todo_large_objects = bd->link;
         
-       ACQUIRE_SPIN_LOCK(&ws->gen->sync_large_objects);
+        ACQUIRE_SPIN_LOCK(&ws->gen->sync);
         dbl_link_onto(bd, &ws->gen->scavenged_large_objects);
         ws->gen->n_scavenged_large_blocks += bd->blocks;
-       RELEASE_SPIN_LOCK(&ws->gen->sync_large_objects);
+        RELEASE_SPIN_LOCK(&ws->gen->sync);
         
         p = bd->start;
         if (scavenge_one(p)) {
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c

index 1b8a720..6c11065 100644 (file)
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -15,6 +15,7 @@
  #include "Rts.h"
  
  #include "Storage.h"
+#include "GCThread.h"
  #include "RtsUtils.h"
  #include "Stats.h"
  #include "BlockAlloc.h"
@@ -84,7 +85,7 @@ initGeneration (generation *gen, int g)
      gen->compact = 0;
      gen->bitmap = NULL;
  #ifdef THREADED_RTS
-    initSpinLock(&gen->sync_large_objects);
+    initSpinLock(&gen->sync);
  #endif
      gen->threads = END_TSO_QUEUE;
      gen->old_threads = END_TSO_QUEUE;
@@ -766,7 +767,6 @@ lnat
  calcAllocated (rtsBool include_nurseries)
  {
    nat allocated = 0;
-  bdescr *bd;
    nat i;
  
    // When called from GC.c, we already have the allocation count for
@@ -775,9 +775,7 @@ calcAllocated (rtsBool include_nurseries)
    if (include_nurseries)
    {
        for (i = 0; i < n_capabilities; i++) {
-          for (bd = nurseries[i].blocks; bd; bd = bd->link) {
-              allocated += (lnat)(bd->free - bd->start);
-          }
+          allocated += countOccupied(nurseries[i].blocks);
        }
    }
  
@@ -787,25 +785,6 @@ calcAllocated (rtsBool include_nurseries)
    return allocated;
  }  
  
-/* Approximate the amount of live data in the heap.  To be called just
- * after garbage collection (see GarbageCollect()).
- */
-lnat calcLiveBlocks (void)
-{
-  nat g;
-  lnat live = 0;
-  generation *gen;
-
-  for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-      /* approximate amount of live data (doesn't take into account slop
-       * at end of each block).
-       */
-      gen = &generations[g];
-      live += gen->n_large_blocks + gen->n_blocks;
-  }
-  return live;
-}
-
  lnat countOccupied (bdescr *bd)
  {
      lnat words;
@@ -818,6 +797,38 @@ lnat countOccupied (bdescr *bd)
      return words;
  }
  
+lnat genLiveWords (generation *gen)
+{
+    return gen->n_words + countOccupied(gen->large_objects);
+}
+
+lnat genLiveBlocks (generation *gen)
+{
+    return gen->n_blocks + gen->n_large_blocks;
+}
+
+lnat gcThreadLiveWords (nat i, nat g)
+{
+    lnat words;
+
+    words   = countOccupied(gc_threads[i]->gens[g].todo_bd);
+    words  += countOccupied(gc_threads[i]->gens[g].part_list);
+    words  += countOccupied(gc_threads[i]->gens[g].scavd_list);
+
+    return words;
+}
+
+lnat gcThreadLiveBlocks (nat i, nat g)
+{
+    lnat blocks;
+
+    blocks  = countBlocks(gc_threads[i]->gens[g].todo_bd);
+    blocks += gc_threads[i]->gens[g].n_part_blocks;
+    blocks += gc_threads[i]->gens[g].n_scavd_blocks;
+
+    return blocks;
+}
+
  // Return an accurate count of the live data in the heap, excluding
  // generation 0.
  lnat calcLiveWords (void)
@@ -828,8 +839,19 @@ lnat calcLiveWords (void)
      
      live = 0;
      for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
-        gen = &generations[g];
-        live += gen->n_words + countOccupied(gen->large_objects);
+        live += genLiveWords(&generations[g]);
+    }
+    return live;
+}
+
+lnat calcLiveBlocks (void)
+{
+    nat g;
+    lnat live;
+
+    live = 0;
+    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+        live += genLiveBlocks(&generations[g]);
      }
      return live;
  }
diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h

index 8927ed6..d463d1a 100644 (file)
--- a/rts/sm/Storage.h
+++ b/rts/sm/Storage.h
@@ -87,11 +87,18 @@ lnat     countNurseryBlocks   ( void );
     -------------------------------------------------------------------------- */
  
  lnat    calcAllocated  (rtsBool count_nurseries);
-lnat    calcLiveBlocks (void);
-lnat    calcLiveWords  (void);
  lnat    countOccupied  (bdescr *bd);
  lnat    calcNeeded     (void);
  
+lnat    gcThreadLiveWords  (nat i, nat g);
+lnat    gcThreadLiveBlocks (nat i, nat g);
+
+lnat    genLiveWords  (generation *gen);
+lnat    genLiveBlocks (generation *gen);
+
+lnat    calcLiveBlocks (void);
+lnat    calcLiveWords  (void);
+
  /* ----------------------------------------------------------------------------
     Storage manager internal APIs and globals
     ------------------------------------------------------------------------- */
author	Simon Marlow <marlowsd@gmail.com>
	Wed, 2 Feb 2011 15:49:55 +0000 (15:49 +0000)
committer	Simon Marlow <marlowsd@gmail.com>
	Wed, 2 Feb 2011 15:49:55 +0000 (15:49 +0000)
includes/rts/storage/GC.h		patch \| blob \| history
rts/RaiseAsync.c		patch \| blob \| history
rts/Stats.c		patch \| blob \| history
rts/Threads.c		patch \| blob \| history
rts/sm/Evac.c		patch \| blob \| history
rts/sm/GC.c		patch \| blob \| history
rts/sm/Sanity.c		patch \| blob \| history
rts/sm/Sanity.h		patch \| blob \| history
rts/sm/Scav.c		patch \| blob \| history
rts/sm/Storage.c		patch \| blob \| history
rts/sm/Storage.h		patch \| blob \| history