rts/sm/GC.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team 1998-2008
   4  *
   5  * Generational garbage collector
   6  *
   7  * Documentation on the architecture of the Garbage Collector can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 #include "PosixSource.h"
  15 #include "Rts.h"
  16 #include "HsFFI.h"
  17
  18 #include "Storage.h"
  19 #include "RtsUtils.h"
  20 #include "Apply.h"
  21 #include "Updates.h"
  22 #include "Stats.h"
  23 #include "Schedule.h"
  24 #include "Sanity.h"
  25 #include "BlockAlloc.h"
  26 #include "ProfHeap.h"
  27 #include "Weak.h"
  28 #include "Prelude.h"
  29 #include "RtsSignals.h"
  30 #include "STM.h"
  31 #if defined(RTS_GTK_FRONTPANEL)
  32 #include "FrontPanel.h"
  33 #endif
  34 #include "Trace.h"
  35 #include "RetainerProfile.h"
  36 #include "LdvProfile.h"
  37 #include "RaiseAsync.h"
  38 #include "Papi.h"
  39 #include "Stable.h"
  40
  41 #include "GC.h"
  42 #include "GCThread.h"
  43 #include "Compact.h"
  44 #include "Evac.h"
  45 #include "Scav.h"
  46 #include "GCUtils.h"
  47 #include "MarkStack.h"
  48 #include "MarkWeak.h"
  49 #include "Sparks.h"
  50 #include "Sweep.h"
  51
  52 #include <string.h> // for memset()
  53 #include <unistd.h>
  54
  55 /* -----------------------------------------------------------------------------
  56    Global variables
  57    -------------------------------------------------------------------------- */
  58
  59 /* STATIC OBJECT LIST.
  60  *
  61  * During GC:
  62  * We maintain a linked list of static objects that are still live.
  63  * The requirements for this list are:
  64  *
  65  *  - we need to scan the list while adding to it, in order to
  66  *    scavenge all the static objects (in the same way that
  67  *    breadth-first scavenging works for dynamic objects).
  68  *
  69  *  - we need to be able to tell whether an object is already on
  70  *    the list, to break loops.
  71  *
  72  * Each static object has a "static link field", which we use for
  73  * linking objects on to the list.  We use a stack-type list, consing
  74  * objects on the front as they are added (this means that the
  75  * scavenge phase is depth-first, not breadth-first, but that
  76  * shouldn't matter).
  77  *
  78  * A separate list is kept for objects that have been scavenged
  79  * already - this is so that we can zero all the marks afterwards.
  80  *
  81  * An object is on the list if its static link field is non-zero; this
  82  * means that we have to mark the end of the list with '1', not NULL.
  83  *
  84  * Extra notes for generational GC:
  85  *
  86  * Each generation has a static object list associated with it.  When
  87  * collecting generations up to N, we treat the static object lists
  88  * from generations > N as roots.
  89  *
  90  * We build up a static object list while collecting generations 0..N,
  91  * which is then appended to the static object list of generation N+1.
  92  */
  93
  94 /* N is the oldest generation being collected, where the generations
  95  * are numbered starting at 0.  A major GC (indicated by the major_gc
  96  * flag) is when we're collecting all generations.  We only attempt to
  97  * deal with static objects and GC CAFs when doing a major GC.
  98  */
  99 nat N;
 100 rtsBool major_gc;
 101
 102 /* Data used for allocation area sizing.
 103  */
 104 static lnat g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
 105
 106 /* Mut-list stats */
 107 #ifdef DEBUG
 108 nat mutlist_MUTVARS,
 109     mutlist_MUTARRS,
 110     mutlist_MVARS,
 111     mutlist_OTHERS;
 112 #endif
 113
 114 /* Thread-local data for each GC thread
 115  */
 116 gc_thread **gc_threads = NULL;
 117
 118 #if !defined(THREADED_RTS)
 119 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
 120 #endif
 121
 122 // Number of threads running in *this* GC.  Affects how many
 123 // step->todos[] lists we have to look in to find work.
 124 nat n_gc_threads;
 125
 126 // For stats:
 127 long copied;        // *words* copied & scavenged during this GC
 128
 129 rtsBool work_stealing;
 130
 131 DECLARE_GCT
 132
 133 /* -----------------------------------------------------------------------------
 134    Static function declarations
 135    -------------------------------------------------------------------------- */
 136
 137 static void mark_root               (void *user, StgClosure **root);
 138 static void zero_static_object_list (StgClosure* first_static);
 139 static nat  initialise_N            (rtsBool force_major_gc);
 140 static void init_collected_gen      (nat g, nat threads);
 141 static void init_uncollected_gen    (nat g, nat threads);
 142 static void init_gc_thread          (gc_thread *t);
 143 static void resize_generations      (void);
 144 static void resize_nursery          (void);
 145 static void start_gc_threads        (void);
 146 static void scavenge_until_all_done (void);
 147 static StgWord inc_running          (void);
 148 static StgWord dec_running          (void);
 149 static void wakeup_gc_threads       (nat n_threads, nat me);
 150 static void shutdown_gc_threads     (nat n_threads, nat me);
 151
 152 #if 0 && defined(DEBUG)
 153 static void gcCAFs                  (void);
 154 #endif
 155
 156 /* -----------------------------------------------------------------------------
 157    The mark stack.
 158    -------------------------------------------------------------------------- */
 159
 160 bdescr *mark_stack_top_bd; // topmost block in the mark stack
 161 bdescr *mark_stack_bd;     // current block in the mark stack
 162 StgPtr mark_sp;            // pointer to the next unallocated mark stack entry
 163
 164 /* -----------------------------------------------------------------------------
 165    GarbageCollect: the main entry point to the garbage collector.
 166
 167    Locks held: all capabilities are held throughout GarbageCollect().
 168    -------------------------------------------------------------------------- */
 169
 170 void
 171 GarbageCollect (rtsBool force_major_gc,
 172                 nat gc_type USED_IF_THREADS,
 173                 Capability *cap)
 174 {
 175   bdescr *bd;
 176   generation *gen;
 177   lnat live, allocated, max_copied, avg_copied, slop;
 178   gc_thread *saved_gct;
 179   nat g, t, n;
 180
 181   // necessary if we stole a callee-saves register for gct:
 182   saved_gct = gct;
 183
 184 #ifdef PROFILING
 185   CostCentreStack *prev_CCS;
 186 #endif
 187
 188   ACQUIRE_SM_LOCK;
 189
 190 #if defined(RTS_USER_SIGNALS)
 191   if (RtsFlags.MiscFlags.install_signal_handlers) {
 192     // block signals
 193     blockUserSignals();
 194   }
 195 #endif
 196
 197   ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
 198   // otherwise adjust the padding in gen_workspace.
 199
 200   // tell the stats department that we've started a GC
 201   stat_startGC();
 202
 203   // tell the STM to discard any cached closures it's hoping to re-use
 204   stmPreGCHook();
 205
 206   // lock the StablePtr table
 207   stablePtrPreGC();
 208
 209 #ifdef DEBUG
 210   mutlist_MUTVARS = 0;
 211   mutlist_MUTARRS = 0;
 212   mutlist_OTHERS = 0;
 213 #endif
 214
 215   // attribute any costs to CCS_GC
 216 #ifdef PROFILING
 217   prev_CCS = CCCS;
 218   CCCS = CCS_GC;
 219 #endif
 220
 221   /* Approximate how much we allocated.
 222    * Todo: only when generating stats?
 223    */
 224   allocated = calcAllocated();
 225
 226   /* Figure out which generation to collect
 227    */
 228   n = initialise_N(force_major_gc);
 229
 230 #if defined(THREADED_RTS)
 231   work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
 232                   N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
 233       // It's not always a good idea to do load balancing in parallel
 234       // GC.  In particular, for a parallel program we don't want to
 235       // lose locality by moving cached data into another CPU's cache
 236       // (this effect can be quite significant).
 237       //
 238       // We could have a more complex way to deterimine whether to do
 239       // work stealing or not, e.g. it might be a good idea to do it
 240       // if the heap is big.  For now, we just turn it on or off with
 241       // a flag.
 242 #endif
 243
 244   /* Start threads, so they can be spinning up while we finish initialisation.
 245    */
 246   start_gc_threads();
 247
 248 #if defined(THREADED_RTS)
 249   /* How many threads will be participating in this GC?
 250    * We don't try to parallelise minor GCs (unless the user asks for
 251    * it with +RTS -gn0), or mark/compact/sweep GC.
 252    */
 253   if (gc_type == PENDING_GC_PAR) {
 254       n_gc_threads = RtsFlags.ParFlags.nNodes;
 255   } else {
 256       n_gc_threads = 1;
 257   }
 258 #else
 259   n_gc_threads = 1;
 260 #endif
 261
 262   debugTrace(DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
 263         N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
 264
 265 #ifdef RTS_GTK_FRONTPANEL
 266   if (RtsFlags.GcFlags.frontpanel) {
 267       updateFrontPanelBeforeGC(N);
 268   }
 269 #endif
 270
 271 #ifdef DEBUG
 272   // check for memory leaks if DEBUG is on
 273   memInventory(DEBUG_gc);
 274 #endif
 275
 276   // check sanity *before* GC
 277   IF_DEBUG(sanity, checkSanity(rtsTrue));
 278
 279   // Initialise all our gc_thread structures
 280   for (t = 0; t < n_gc_threads; t++) {
 281       init_gc_thread(gc_threads[t]);
 282   }
 283
 284   // Initialise all the generations/steps that we're collecting.
 285   for (g = 0; g <= N; g++) {
 286       init_collected_gen(g,n_gc_threads);
 287   }
 288
 289   // Initialise all the generations/steps that we're *not* collecting.
 290   for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
 291       init_uncollected_gen(g,n_gc_threads);
 292   }
 293
 294   /* Allocate a mark stack if we're doing a major collection.
 295    */
 296   if (major_gc && oldest_gen->mark) {
 297       mark_stack_bd     = allocBlock();
 298       mark_stack_top_bd = mark_stack_bd;
 299       mark_stack_bd->link = NULL;
 300       mark_stack_bd->u.back = NULL;
 301       mark_sp           = mark_stack_bd->start;
 302   } else {
 303       mark_stack_bd     = NULL;
 304       mark_stack_top_bd = NULL;
 305       mark_sp           = NULL;
 306   }
 307
 308   // this is the main thread
 309 #ifdef THREADED_RTS
 310   if (n_gc_threads == 1) {
 311       SET_GCT(gc_threads[0]);
 312   } else {
 313       SET_GCT(gc_threads[cap->no]);
 314   }
 315 #else
 316 SET_GCT(gc_threads[0]);
 317 #endif
 318
 319   /* -----------------------------------------------------------------------
 320    * follow all the roots that we know about:
 321    */
 322
 323   // the main thread is running: this prevents any other threads from
 324   // exiting prematurely, so we can start them now.
 325   // NB. do this after the mutable lists have been saved above, otherwise
 326   // the other GC threads will be writing into the old mutable lists.
 327   inc_running();
 328   wakeup_gc_threads(n_gc_threads, gct->thread_index);
 329
 330   // Mutable lists from each generation > N
 331   // we want to *scavenge* these roots, not evacuate them: they're not
 332   // going to move in this GC.
 333   // Also do them in reverse generation order, for the usual reason:
 334   // namely to reduce the likelihood of spurious old->new pointers.
 335   //
 336   for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
 337 #if defined(THREADED_RTS)
 338       if (n_gc_threads > 1) {
 339           scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]);
 340       } else {
 341           scavenge_mutable_list1(generations[g].saved_mut_list, &generations[g]);
 342       }
 343 #else
 344       scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]);
 345 #endif
 346       freeChain_sync(generations[g].saved_mut_list);
 347       generations[g].saved_mut_list = NULL;
 348
 349   }
 350
 351   // scavenge the capability-private mutable lists.  This isn't part
 352   // of markSomeCapabilities() because markSomeCapabilities() can only
 353   // call back into the GC via mark_root() (due to the gct register
 354   // variable).
 355   if (n_gc_threads == 1) {
 356       for (n = 0; n < n_capabilities; n++) {
 357 #if defined(THREADED_RTS)
 358           scavenge_capability_mut_Lists1(&capabilities[n]);
 359 #else
 360           scavenge_capability_mut_lists(&capabilities[n]);
 361 #endif
 362       }
 363   } else {
 364       scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
 365   }
 366
 367   // follow roots from the CAF list (used by GHCi)
 368   gct->evac_gen = 0;
 369   markCAFs(mark_root, gct);
 370
 371   // follow all the roots that the application knows about.
 372   gct->evac_gen = 0;
 373   markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
 374                        rtsTrue/*prune sparks*/);
 375
 376 #if defined(RTS_USER_SIGNALS)
 377   // mark the signal handlers (signals should be already blocked)
 378   markSignalHandlers(mark_root, gct);
 379 #endif
 380
 381   // Mark the weak pointer list, and prepare to detect dead weak pointers.
 382   markWeakPtrList();
 383   initWeakForGC();
 384
 385   // Mark the stable pointer table.
 386   markStablePtrTable(mark_root, gct);
 387
 388   /* -------------------------------------------------------------------------
 389    * Repeatedly scavenge all the areas we know about until there's no
 390    * more scavenging to be done.
 391    */
 392   for (;;)
 393   {
 394       scavenge_until_all_done();
 395       // The other threads are now stopped.  We might recurse back to
 396       // here, but from now on this is the only thread.
 397
 398       // must be last...  invariant is that everything is fully
 399       // scavenged at this point.
 400       if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
 401           inc_running();
 402           continue;
 403       }
 404
 405       // If we get to here, there's really nothing left to do.
 406       break;
 407   }
 408
 409   shutdown_gc_threads(n_gc_threads, gct->thread_index);
 410
 411   // Now see which stable names are still alive.
 412   gcStablePtrTable();
 413
 414 #ifdef THREADED_RTS
 415   if (n_gc_threads == 1) {
 416       for (n = 0; n < n_capabilities; n++) {
 417           pruneSparkQueue(&capabilities[n]);
 418       }
 419   } else {
 420       pruneSparkQueue(&capabilities[gct->thread_index]);
 421   }
 422 #endif
 423
 424 #ifdef PROFILING
 425   // We call processHeapClosureForDead() on every closure destroyed during
 426   // the current garbage collection, so we invoke LdvCensusForDead().
 427   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
 428       || RtsFlags.ProfFlags.bioSelector != NULL)
 429     LdvCensusForDead(N);
 430 #endif
 431
 432   // NO MORE EVACUATION AFTER THIS POINT!
 433
 434   // Two-space collector: free the old to-space.
 435   // g0->old_blocks is the old nursery
 436   // g0->blocks is to-space from the previous GC
 437   if (RtsFlags.GcFlags.generations == 1) {
 438       if (g0->blocks != NULL) {
 439           freeChain(g0->blocks);
 440           g0->blocks = NULL;
 441       }
 442   }
 443
 444   // For each workspace, in each thread, move the copied blocks to the step
 445   {
 446       gc_thread *thr;
 447       gen_workspace *ws;
 448       bdescr *prev, *next;
 449
 450       for (t = 0; t < n_gc_threads; t++) {
 451           thr = gc_threads[t];
 452
 453           for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 454               ws = &thr->gens[g];
 455
 456               // Push the final block
 457               if (ws->todo_bd) {
 458                   push_scanned_block(ws->todo_bd, ws);
 459               }
 460
 461               ASSERT(gct->scan_bd == NULL);
 462               ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
 463
 464               prev = NULL;
 465               for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
 466                   ws->gen->n_words += bd->free - bd->start;
 467                   prev = bd;
 468               }
 469               if (prev != NULL) {
 470                   prev->link = ws->gen->blocks;
 471                   ws->gen->blocks = ws->scavd_list;
 472               }
 473               ws->gen->n_blocks += ws->n_scavd_blocks;
 474           }
 475       }
 476
 477       // Add all the partial blocks *after* we've added all the full
 478       // blocks.  This is so that we can grab the partial blocks back
 479       // again and try to fill them up in the next GC.
 480       for (t = 0; t < n_gc_threads; t++) {
 481           thr = gc_threads[t];
 482
 483           for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 484               ws = &thr->gens[g];
 485
 486               prev = NULL;
 487               for (bd = ws->part_list; bd != NULL; bd = next) {
 488                   next = bd->link;
 489                   if (bd->free == bd->start) {
 490                       if (prev == NULL) {
 491                           ws->part_list = next;
 492                       } else {
 493                           prev->link = next;
 494                       }
 495                       freeGroup(bd);
 496                       ws->n_part_blocks--;
 497                   } else {
 498                       ws->gen->n_words += bd->free - bd->start;
 499                       prev = bd;
 500                   }
 501               }
 502               if (prev != NULL) {
 503                   prev->link = ws->gen->blocks;
 504                   ws->gen->blocks = ws->part_list;
 505               }
 506               ws->gen->n_blocks += ws->n_part_blocks;
 507
 508               ASSERT(countBlocks(ws->gen->blocks) == ws->gen->n_blocks);
 509               ASSERT(countOccupied(ws->gen->blocks) == ws->gen->n_words);
 510           }
 511       }
 512   }
 513
 514   // Finally: compact or sweep the oldest generation.
 515   if (major_gc && oldest_gen->mark) {
 516       if (oldest_gen->compact)
 517           compact(gct->scavenged_static_objects);
 518       else
 519           sweep(oldest_gen);
 520   }
 521
 522   /* run through all the generations/steps and tidy up
 523    */
 524   copied = 0;
 525   max_copied = 0;
 526   avg_copied = 0;
 527   {
 528       nat i;
 529       for (i=0; i < n_gc_threads; i++) {
 530           if (n_gc_threads > 1) {
 531               debugTrace(DEBUG_gc,"thread %d:", i);
 532               debugTrace(DEBUG_gc,"   copied           %ld", gc_threads[i]->copied * sizeof(W_));
 533               debugTrace(DEBUG_gc,"   scanned          %ld", gc_threads[i]->scanned * sizeof(W_));
 534               debugTrace(DEBUG_gc,"   any_work         %ld", gc_threads[i]->any_work);
 535               debugTrace(DEBUG_gc,"   no_work          %ld", gc_threads[i]->no_work);
 536               debugTrace(DEBUG_gc,"   scav_find_work %ld",   gc_threads[i]->scav_find_work);
 537           }
 538           copied += gc_threads[i]->copied;
 539           max_copied = stg_max(gc_threads[i]->copied, max_copied);
 540       }
 541       if (n_gc_threads == 1) {
 542           max_copied = 0;
 543           avg_copied = 0;
 544       } else {
 545           avg_copied = copied;
 546       }
 547   }
 548
 549   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 550
 551     if (g == N) {
 552       generations[g].collections++; // for stats
 553       if (n_gc_threads > 1) generations[g].par_collections++;
 554     }
 555
 556     // Count the mutable list as bytes "copied" for the purposes of
 557     // stats.  Every mutable list is copied during every GC.
 558     if (g > 0) {
 559         nat mut_list_size = 0;
 560         for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
 561             mut_list_size += bd->free - bd->start;
 562         }
 563         for (n = 0; n < n_capabilities; n++) {
 564             for (bd = capabilities[n].mut_lists[g];
 565                  bd != NULL; bd = bd->link) {
 566                 mut_list_size += bd->free - bd->start;
 567             }
 568         }
 569         copied +=  mut_list_size;
 570
 571         debugTrace(DEBUG_gc,
 572                    "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
 573                    (unsigned long)(mut_list_size * sizeof(W_)),
 574                    mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
 575     }
 576
 577     bdescr *next, *prev;
 578     gen = &generations[g];
 579
 580     // for generations we collected...
 581     if (g <= N) {
 582
 583         /* free old memory and shift to-space into from-space for all
 584          * the collected steps (except the allocation area).  These
 585          * freed blocks will probaby be quickly recycled.
 586          */
 587         if (gen->mark)
 588         {
 589             // tack the new blocks on the end of the existing blocks
 590             if (gen->old_blocks != NULL) {
 591
 592                 prev = NULL;
 593                 for (bd = gen->old_blocks; bd != NULL; bd = next) {
 594
 595                     next = bd->link;
 596
 597                     if (!(bd->flags & BF_MARKED))
 598                     {
 599                         if (prev == NULL) {
 600                             gen->old_blocks = next;
 601                         } else {
 602                             prev->link = next;
 603                         }
 604                         freeGroup(bd);
 605                         gen->n_old_blocks--;
 606                     }
 607                     else
 608                     {
 609                         gen->n_words += bd->free - bd->start;
 610
 611                         // NB. this step might not be compacted next
 612                         // time, so reset the BF_MARKED flags.
 613                         // They are set before GC if we're going to
 614                         // compact.  (search for BF_MARKED above).
 615                         bd->flags &= ~BF_MARKED;
 616
 617                         // between GCs, all blocks in the heap except
 618                         // for the nursery have the BF_EVACUATED flag set.
 619                         bd->flags |= BF_EVACUATED;
 620
 621                         prev = bd;
 622                     }
 623                 }
 624
 625                 if (prev != NULL) {
 626                     prev->link = gen->blocks;
 627                     gen->blocks = gen->old_blocks;
 628                 }
 629             }
 630             // add the new blocks to the block tally
 631             gen->n_blocks += gen->n_old_blocks;
 632             ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
 633             ASSERT(countOccupied(gen->blocks) == gen->n_words);
 634         }
 635         else // not copacted
 636         {
 637             freeChain(gen->old_blocks);
 638         }
 639
 640         gen->old_blocks = NULL;
 641         gen->n_old_blocks = 0;
 642
 643         /* LARGE OBJECTS.  The current live large objects are chained on
 644          * scavenged_large, having been moved during garbage
 645          * collection from large_objects.  Any objects left on the
 646          * large_objects list are therefore dead, so we free them here.
 647          */
 648         freeChain(gen->large_objects);
 649         gen->large_objects  = gen->scavenged_large_objects;
 650         gen->n_large_blocks = gen->n_scavenged_large_blocks;
 651         gen->n_new_large_blocks = 0;
 652         ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
 653     }
 654     else // for generations > N
 655     {
 656         /* For older generations, we need to append the
 657          * scavenged_large_object list (i.e. large objects that have been
 658          * promoted during this GC) to the large_object list for that step.
 659          */
 660         for (bd = gen->scavenged_large_objects; bd; bd = next) {
 661             next = bd->link;
 662             dbl_link_onto(bd, &gen->large_objects);
 663         }
 664
 665         // add the new blocks we promoted during this GC
 666         gen->n_large_blocks += gen->n_scavenged_large_blocks;
 667         ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
 668     }
 669   } // for all generations
 670
 671   // update the max size of older generations after a major GC
 672   resize_generations();
 673
 674   // Calculate the amount of live data for stats.
 675   live = calcLiveWords();
 676
 677   // Free the small objects allocated via allocate(), since this will
 678   // all have been copied into G0S1 now.
 679   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 680
 681   // Start a new pinned_object_block
 682   for (n = 0; n < n_capabilities; n++) {
 683       capabilities[n].pinned_object_block = NULL;
 684   }
 685
 686   // Free the mark stack.
 687   if (mark_stack_top_bd != NULL) {
 688       debugTrace(DEBUG_gc, "mark stack: %d blocks",
 689                  countBlocks(mark_stack_top_bd));
 690       freeChain(mark_stack_top_bd);
 691   }
 692
 693   // Free any bitmaps.
 694   for (g = 0; g <= N; g++) {
 695       gen = &generations[g];
 696       if (gen->bitmap != NULL) {
 697           freeGroup(gen->bitmap);
 698           gen->bitmap = NULL;
 699       }
 700   }
 701
 702   resize_nursery();
 703
 704  // mark the garbage collected CAFs as dead
 705 #if 0 && defined(DEBUG) // doesn't work at the moment
 706   if (major_gc) { gcCAFs(); }
 707 #endif
 708
 709 #ifdef PROFILING
 710   // resetStaticObjectForRetainerProfiling() must be called before
 711   // zeroing below.
 712   if (n_gc_threads > 1) {
 713       barf("profiling is currently broken with multi-threaded GC");
 714       // ToDo: fix the gct->scavenged_static_objects below
 715   }
 716   resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
 717 #endif
 718
 719   // zero the scavenged static object list
 720   if (major_gc) {
 721       nat i;
 722       for (i = 0; i < n_gc_threads; i++) {
 723           zero_static_object_list(gc_threads[i]->scavenged_static_objects);
 724       }
 725   }
 726
 727   // Reset the nursery
 728   resetNurseries();
 729
 730   // send exceptions to any threads which were about to die
 731   RELEASE_SM_LOCK;
 732   resurrectThreads(resurrected_threads);
 733   ACQUIRE_SM_LOCK;
 734
 735   // Update the stable pointer hash table.
 736   updateStablePtrTable(major_gc);
 737
 738   // unlock the StablePtr table.  Must be before scheduleFinalizers(),
 739   // because a finalizer may call hs_free_fun_ptr() or
 740   // hs_free_stable_ptr(), both of which access the StablePtr table.
 741   stablePtrPostGC();
 742
 743   // Start any pending finalizers.  Must be after
 744   // updateStablePtrTable() and stablePtrPostGC() (see #4221).
 745   RELEASE_SM_LOCK;
 746   scheduleFinalizers(cap, old_weak_ptr_list);
 747   ACQUIRE_SM_LOCK;
 748
 749   // check sanity after GC
 750   IF_DEBUG(sanity, checkSanity(rtsTrue));
 751
 752   // extra GC trace info
 753   IF_DEBUG(gc, statDescribeGens());
 754
 755 #ifdef DEBUG
 756   // symbol-table based profiling
 757   /*  heapCensus(to_blocks); */ /* ToDo */
 758 #endif
 759
 760   // restore enclosing cost centre
 761 #ifdef PROFILING
 762   CCCS = prev_CCS;
 763 #endif
 764
 765 #ifdef DEBUG
 766   // check for memory leaks if DEBUG is on
 767   memInventory(DEBUG_gc);
 768 #endif
 769
 770 #ifdef RTS_GTK_FRONTPANEL
 771   if (RtsFlags.GcFlags.frontpanel) {
 772       updateFrontPanelAfterGC( N, live );
 773   }
 774 #endif
 775
 776   // ok, GC over: tell the stats department what happened.
 777   slop = calcLiveBlocks() * BLOCK_SIZE_W - live;
 778   stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop);
 779
 780   // Guess which generation we'll collect *next* time
 781   initialise_N(force_major_gc);
 782
 783 #if defined(RTS_USER_SIGNALS)
 784   if (RtsFlags.MiscFlags.install_signal_handlers) {
 785     // unblock signals again
 786     unblockUserSignals();
 787   }
 788 #endif
 789
 790   RELEASE_SM_LOCK;
 791
 792   SET_GCT(saved_gct);
 793 }
 794
 795 /* -----------------------------------------------------------------------------
 796    Figure out which generation to collect, initialise N and major_gc.
 797
 798    Also returns the total number of blocks in generations that will be
 799    collected.
 800    -------------------------------------------------------------------------- */
 801
 802 static nat
 803 initialise_N (rtsBool force_major_gc)
 804 {
 805     int g;
 806     nat blocks, blocks_total;
 807
 808     blocks = 0;
 809     blocks_total = 0;
 810
 811     if (force_major_gc) {
 812         N = RtsFlags.GcFlags.generations - 1;
 813     } else {
 814         N = 0;
 815     }
 816
 817     for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
 818
 819         blocks = generations[g].n_words / BLOCK_SIZE_W
 820                + generations[g].n_large_blocks;
 821
 822         if (blocks >= generations[g].max_blocks) {
 823             N = stg_max(N,g);
 824         }
 825         if ((nat)g <= N) {
 826             blocks_total += blocks;
 827         }
 828     }
 829
 830     blocks_total += countNurseryBlocks();
 831
 832     major_gc = (N == RtsFlags.GcFlags.generations-1);
 833     return blocks_total;
 834 }
 835
 836 /* -----------------------------------------------------------------------------
 837    Initialise the gc_thread structures.
 838    -------------------------------------------------------------------------- */
 839
 840 #define GC_THREAD_INACTIVE             0
 841 #define GC_THREAD_STANDING_BY          1
 842 #define GC_THREAD_RUNNING              2
 843 #define GC_THREAD_WAITING_TO_CONTINUE  3
 844
 845 static void
 846 new_gc_thread (nat n, gc_thread *t)
 847 {
 848     nat g;
 849     gen_workspace *ws;
 850
 851 #ifdef THREADED_RTS
 852     t->id = 0;
 853     initSpinLock(&t->gc_spin);
 854     initSpinLock(&t->mut_spin);
 855     ACQUIRE_SPIN_LOCK(&t->gc_spin);
 856     t->wakeup = GC_THREAD_INACTIVE;  // starts true, so we can wait for the
 857                           // thread to start up, see wakeup_gc_threads
 858 #endif
 859
 860     t->thread_index = n;
 861     t->free_blocks = NULL;
 862     t->gc_count = 0;
 863
 864     init_gc_thread(t);
 865
 866 #ifdef USE_PAPI
 867     t->papi_events = -1;
 868 #endif
 869
 870     for (g = 0; g < RtsFlags.GcFlags.generations; g++)
 871     {
 872         ws = &t->gens[g];
 873         ws->gen = &generations[g];
 874         ASSERT(g == ws->gen->no);
 875         ws->my_gct = t;
 876
 877         ws->todo_bd = NULL;
 878         ws->todo_q = newWSDeque(128);
 879         ws->todo_overflow = NULL;
 880         ws->n_todo_overflow = 0;
 881
 882         ws->part_list = NULL;
 883         ws->n_part_blocks = 0;
 884
 885         ws->scavd_list = NULL;
 886         ws->n_scavd_blocks = 0;
 887     }
 888 }
 889
 890
 891 void
 892 initGcThreads (void)
 893 {
 894     if (gc_threads == NULL) {
 895 #if defined(THREADED_RTS)
 896         nat i;
 897         gc_threads = stgMallocBytes (RtsFlags.ParFlags.nNodes *
 898                                      sizeof(gc_thread*),
 899                                      "alloc_gc_threads");
 900
 901         for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
 902             gc_threads[i] =
 903                 stgMallocBytes(sizeof(gc_thread) +
 904                                RtsFlags.GcFlags.generations * sizeof(gen_workspace),
 905                                "alloc_gc_threads");
 906
 907             new_gc_thread(i, gc_threads[i]);
 908         }
 909 #else
 910         gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
 911         gc_threads[0] = gct;
 912         new_gc_thread(0,gc_threads[0]);
 913 #endif
 914     }
 915 }
 916
 917 void
 918 freeGcThreads (void)
 919 {
 920     nat g;
 921     if (gc_threads != NULL) {
 922 #if defined(THREADED_RTS)
 923         nat i;
 924         for (i = 0; i < n_capabilities; i++) {
 925             for (g = 0; g < RtsFlags.GcFlags.generations; g++)
 926             {
 927                 freeWSDeque(gc_threads[i]->gens[g].todo_q);
 928             }
 929             stgFree (gc_threads[i]);
 930         }
 931         stgFree (gc_threads);
 932 #else
 933         for (g = 0; g < RtsFlags.GcFlags.generations; g++)
 934         {
 935             freeWSDeque(gc_threads[0]->gens[g].todo_q);
 936         }
 937         stgFree (gc_threads);
 938 #endif
 939         gc_threads = NULL;
 940     }
 941 }
 942
 943 /* ----------------------------------------------------------------------------
 944    Start GC threads
 945    ------------------------------------------------------------------------- */
 946
 947 static volatile StgWord gc_running_threads;
 948
 949 static StgWord
 950 inc_running (void)
 951 {
 952     StgWord new;
 953     new = atomic_inc(&gc_running_threads);
 954     ASSERT(new <= n_gc_threads);
 955     return new;
 956 }
 957
 958 static StgWord
 959 dec_running (void)
 960 {
 961     ASSERT(gc_running_threads != 0);
 962     return atomic_dec(&gc_running_threads);
 963 }
 964
 965 static rtsBool
 966 any_work (void)
 967 {
 968     int g;
 969     gen_workspace *ws;
 970
 971     gct->any_work++;
 972
 973     write_barrier();
 974
 975     // scavenge objects in compacted generation
 976     if (mark_stack_bd != NULL && !mark_stack_empty()) {
 977         return rtsTrue;
 978     }
 979
 980     // Check for global work in any step.  We don't need to check for
 981     // local work, because we have already exited scavenge_loop(),
 982     // which means there is no local work for this thread.
 983     for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
 984         ws = &gct->gens[g];
 985         if (ws->todo_large_objects) return rtsTrue;
 986         if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
 987         if (ws->todo_overflow) return rtsTrue;
 988     }
 989
 990 #if defined(THREADED_RTS)
 991     if (work_stealing) {
 992         nat n;
 993         // look for work to steal
 994         for (n = 0; n < n_gc_threads; n++) {
 995             if (n == gct->thread_index) continue;
 996             for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
 997                 ws = &gc_threads[n]->gens[g];
 998                 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
 999             }
1000         }
1001     }
1002 #endif
1003
1004     gct->no_work++;
1005 #if defined(THREADED_RTS)
1006     yieldThread();
1007 #endif
1008
1009     return rtsFalse;
1010 }
1011
1012 static void
1013 scavenge_until_all_done (void)
1014 {
1015     nat r;
1016
1017
1018 loop:
1019     traceEventGcWork(&capabilities[gct->thread_index]);
1020
1021 #if defined(THREADED_RTS)
1022     if (n_gc_threads > 1) {
1023         scavenge_loop();
1024     } else {
1025         scavenge_loop1();
1026     }
1027 #else
1028     scavenge_loop();
1029 #endif
1030
1031     // scavenge_loop() only exits when there's no work to do
1032     r = dec_running();
1033
1034     traceEventGcIdle(&capabilities[gct->thread_index]);
1035
1036     debugTrace(DEBUG_gc, "%d GC threads still running", r);
1037
1038     while (gc_running_threads != 0) {
1039         // usleep(1);
1040         if (any_work()) {
1041             inc_running();
1042             goto loop;
1043         }
1044         // any_work() does not remove the work from the queue, it
1045         // just checks for the presence of work.  If we find any,
1046         // then we increment gc_running_threads and go back to
1047         // scavenge_loop() to perform any pending work.
1048     }
1049
1050     traceEventGcDone(&capabilities[gct->thread_index]);
1051 }
1052
1053 #if defined(THREADED_RTS)
1054
1055 void
1056 gcWorkerThread (Capability *cap)
1057 {
1058     gc_thread *saved_gct;
1059
1060     // necessary if we stole a callee-saves register for gct:
1061     saved_gct = gct;
1062
1063     gct = gc_threads[cap->no];
1064     gct->id = osThreadId();
1065
1066     // Wait until we're told to wake up
1067     RELEASE_SPIN_LOCK(&gct->mut_spin);
1068     gct->wakeup = GC_THREAD_STANDING_BY;
1069     debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1070     ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1071
1072 #ifdef USE_PAPI
1073     // start performance counters in this thread...
1074     if (gct->papi_events == -1) {
1075         papi_init_eventset(&gct->papi_events);
1076     }
1077     papi_thread_start_gc1_count(gct->papi_events);
1078 #endif
1079
1080     // Every thread evacuates some roots.
1081     gct->evac_gen = 0;
1082     markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
1083                          rtsTrue/*prune sparks*/);
1084     scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
1085
1086     scavenge_until_all_done();
1087
1088 #ifdef THREADED_RTS
1089     // Now that the whole heap is marked, we discard any sparks that
1090     // were found to be unreachable.  The main GC thread is currently
1091     // marking heap reachable via weak pointers, so it is
1092     // non-deterministic whether a spark will be retained if it is
1093     // only reachable via weak pointers.  To fix this problem would
1094     // require another GC barrier, which is too high a price.
1095     pruneSparkQueue(cap);
1096 #endif
1097
1098 #ifdef USE_PAPI
1099     // count events in this thread towards the GC totals
1100     papi_thread_stop_gc1_count(gct->papi_events);
1101 #endif
1102
1103     // Wait until we're told to continue
1104     RELEASE_SPIN_LOCK(&gct->gc_spin);
1105     gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1106     debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1107                gct->thread_index);
1108     ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1109     debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1110
1111     SET_GCT(saved_gct);
1112 }
1113
1114 #endif
1115
1116 #if defined(THREADED_RTS)
1117
1118 void
1119 waitForGcThreads (Capability *cap USED_IF_THREADS)
1120 {
1121     const nat n_threads = RtsFlags.ParFlags.nNodes;
1122     const nat me = cap->no;
1123     nat i, j;
1124     rtsBool retry = rtsTrue;
1125
1126     while(retry) {
1127         for (i=0; i < n_threads; i++) {
1128             if (i == me) continue;
1129             if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1130                 prodCapability(&capabilities[i], cap->running_task);
1131             }
1132         }
1133         for (j=0; j < 10; j++) {
1134             retry = rtsFalse;
1135             for (i=0; i < n_threads; i++) {
1136                 if (i == me) continue;
1137                 write_barrier();
1138                 setContextSwitches();
1139                 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1140                     retry = rtsTrue;
1141                 }
1142             }
1143             if (!retry) break;
1144             yieldThread();
1145         }
1146     }
1147 }
1148
1149 #endif // THREADED_RTS
1150
1151 static void
1152 start_gc_threads (void)
1153 {
1154 #if defined(THREADED_RTS)
1155     gc_running_threads = 0;
1156 #endif
1157 }
1158
1159 static void
1160 wakeup_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1161 {
1162 #if defined(THREADED_RTS)
1163     nat i;
1164     for (i=0; i < n_threads; i++) {
1165         if (i == me) continue;
1166         inc_running();
1167         debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1168         if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1169
1170         gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1171         ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1172         RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1173     }
1174 #endif
1175 }
1176
1177 // After GC is complete, we must wait for all GC threads to enter the
1178 // standby state, otherwise they may still be executing inside
1179 // any_work(), and may even remain awake until the next GC starts.
1180 static void
1181 shutdown_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1182 {
1183 #if defined(THREADED_RTS)
1184     nat i;
1185     for (i=0; i < n_threads; i++) {
1186         if (i == me) continue;
1187         while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1188     }
1189 #endif
1190 }
1191
1192 #if defined(THREADED_RTS)
1193 void
1194 releaseGCThreads (Capability *cap USED_IF_THREADS)
1195 {
1196     const nat n_threads = RtsFlags.ParFlags.nNodes;
1197     const nat me = cap->no;
1198     nat i;
1199     for (i=0; i < n_threads; i++) {
1200         if (i == me) continue;
1201         if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1202             barf("releaseGCThreads");
1203
1204         gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1205         ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1206         RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1207     }
1208 }
1209 #endif
1210
1211 /* ----------------------------------------------------------------------------
1212    Initialise a generation that is to be collected
1213    ------------------------------------------------------------------------- */
1214
1215 static void
1216 init_collected_gen (nat g, nat n_threads)
1217 {
1218     nat t, i;
1219     gen_workspace *ws;
1220     generation *gen;
1221     bdescr *bd;
1222
1223     // Throw away the current mutable list.  Invariant: the mutable
1224     // list always has at least one block; this means we can avoid a
1225     // check for NULL in recordMutable().
1226     if (g != 0) {
1227         freeChain(generations[g].mut_list);
1228         generations[g].mut_list = allocBlock();
1229         for (i = 0; i < n_capabilities; i++) {
1230             freeChain(capabilities[i].mut_lists[g]);
1231             capabilities[i].mut_lists[g] = allocBlock();
1232         }
1233     }
1234
1235     gen = &generations[g];
1236     ASSERT(gen->no == g);
1237
1238     // we'll construct a new list of threads in this step
1239     // during GC, throw away the current list.
1240     gen->old_threads = gen->threads;
1241     gen->threads = END_TSO_QUEUE;
1242
1243     // deprecate the existing blocks
1244     gen->old_blocks   = gen->blocks;
1245     gen->n_old_blocks = gen->n_blocks;
1246     gen->blocks       = NULL;
1247     gen->n_blocks     = 0;
1248     gen->n_words      = 0;
1249     gen->live_estimate = 0;
1250
1251     // initialise the large object queues.
1252     gen->scavenged_large_objects = NULL;
1253     gen->n_scavenged_large_blocks = 0;
1254
1255     // mark the small objects as from-space
1256     for (bd = gen->old_blocks; bd; bd = bd->link) {
1257         bd->flags &= ~BF_EVACUATED;
1258     }
1259
1260     // mark the large objects as from-space
1261     for (bd = gen->large_objects; bd; bd = bd->link) {
1262         bd->flags &= ~BF_EVACUATED;
1263     }
1264
1265     // for a compacted generation, we need to allocate the bitmap
1266     if (gen->mark) {
1267         nat bitmap_size; // in bytes
1268         bdescr *bitmap_bdescr;
1269         StgWord *bitmap;
1270
1271         bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1272
1273         if (bitmap_size > 0) {
1274             bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1275                                        / BLOCK_SIZE);
1276             gen->bitmap = bitmap_bdescr;
1277             bitmap = bitmap_bdescr->start;
1278
1279             debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1280                        bitmap_size, bitmap);
1281
1282             // don't forget to fill it with zeros!
1283             memset(bitmap, 0, bitmap_size);
1284
1285             // For each block in this step, point to its bitmap from the
1286             // block descriptor.
1287             for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1288                 bd->u.bitmap = bitmap;
1289                 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1290
1291                 // Also at this point we set the BF_MARKED flag
1292                 // for this block.  The invariant is that
1293                 // BF_MARKED is always unset, except during GC
1294                 // when it is set on those blocks which will be
1295                 // compacted.
1296                 if (!(bd->flags & BF_FRAGMENTED)) {
1297                     bd->flags |= BF_MARKED;
1298                 }
1299             }
1300         }
1301     }
1302
1303     // For each GC thread, for each step, allocate a "todo" block to
1304     // store evacuated objects to be scavenged, and a block to store
1305     // evacuated objects that do not need to be scavenged.
1306     for (t = 0; t < n_threads; t++) {
1307         ws = &gc_threads[t]->gens[g];
1308
1309         ws->todo_large_objects = NULL;
1310
1311         ws->part_list = NULL;
1312         ws->n_part_blocks = 0;
1313
1314         // allocate the first to-space block; extra blocks will be
1315         // chained on as necessary.
1316         ws->todo_bd = NULL;
1317         ASSERT(looksEmptyWSDeque(ws->todo_q));
1318         alloc_todo_block(ws,0);
1319
1320         ws->todo_overflow = NULL;
1321         ws->n_todo_overflow = 0;
1322
1323         ws->scavd_list = NULL;
1324         ws->n_scavd_blocks = 0;
1325     }
1326 }
1327
1328
1329 /* ----------------------------------------------------------------------------
1330    Initialise a generation that is *not* to be collected
1331    ------------------------------------------------------------------------- */
1332
1333 static void
1334 init_uncollected_gen (nat g, nat threads)
1335 {
1336     nat t, n;
1337     gen_workspace *ws;
1338     generation *gen;
1339     bdescr *bd;
1340
1341     // save the current mutable lists for this generation, and
1342     // allocate a fresh block for each one.  We'll traverse these
1343     // mutable lists as roots early on in the GC.
1344     generations[g].saved_mut_list = generations[g].mut_list;
1345     generations[g].mut_list = allocBlock();
1346     for (n = 0; n < n_capabilities; n++) {
1347         capabilities[n].saved_mut_lists[g] = capabilities[n].mut_lists[g];
1348         capabilities[n].mut_lists[g] = allocBlock();
1349     }
1350
1351     gen = &generations[g];
1352
1353     gen->scavenged_large_objects = NULL;
1354     gen->n_scavenged_large_blocks = 0;
1355
1356     for (t = 0; t < threads; t++) {
1357         ws = &gc_threads[t]->gens[g];
1358
1359         ASSERT(looksEmptyWSDeque(ws->todo_q));
1360         ws->todo_large_objects = NULL;
1361
1362         ws->part_list = NULL;
1363         ws->n_part_blocks = 0;
1364
1365         ws->scavd_list = NULL;
1366         ws->n_scavd_blocks = 0;
1367
1368         // If the block at the head of the list in this generation
1369         // is less than 3/4 full, then use it as a todo block.
1370         if (gen->blocks && isPartiallyFull(gen->blocks))
1371         {
1372             ws->todo_bd = gen->blocks;
1373             ws->todo_free = ws->todo_bd->free;
1374             ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
1375             gen->blocks = gen->blocks->link;
1376             gen->n_blocks -= 1;
1377             gen->n_words -= ws->todo_bd->free - ws->todo_bd->start;
1378             ws->todo_bd->link = NULL;
1379             // we must scan from the current end point.
1380             ws->todo_bd->u.scan = ws->todo_bd->free;
1381         }
1382         else
1383         {
1384             ws->todo_bd = NULL;
1385             alloc_todo_block(ws,0);
1386         }
1387     }
1388
1389     // deal out any more partial blocks to the threads' part_lists
1390     t = 0;
1391     while (gen->blocks && isPartiallyFull(gen->blocks))
1392     {
1393         bd = gen->blocks;
1394         gen->blocks = bd->link;
1395         ws = &gc_threads[t]->gens[g];
1396         bd->link = ws->part_list;
1397         ws->part_list = bd;
1398         ws->n_part_blocks += 1;
1399         bd->u.scan = bd->free;
1400         gen->n_blocks -= 1;
1401         gen->n_words -= bd->free - bd->start;
1402         t++;
1403         if (t == n_gc_threads) t = 0;
1404     }
1405 }
1406
1407 /* -----------------------------------------------------------------------------
1408    Initialise a gc_thread before GC
1409    -------------------------------------------------------------------------- */
1410
1411 static void
1412 init_gc_thread (gc_thread *t)
1413 {
1414     t->static_objects = END_OF_STATIC_LIST;
1415     t->scavenged_static_objects = END_OF_STATIC_LIST;
1416     t->scan_bd = NULL;
1417     t->mut_lists = capabilities[t->thread_index].mut_lists;
1418     t->evac_gen = 0;
1419     t->failed_to_evac = rtsFalse;
1420     t->eager_promotion = rtsTrue;
1421     t->thunk_selector_depth = 0;
1422     t->copied = 0;
1423     t->scanned = 0;
1424     t->any_work = 0;
1425     t->no_work = 0;
1426     t->scav_find_work = 0;
1427 }
1428
1429 /* -----------------------------------------------------------------------------
1430    Function we pass to evacuate roots.
1431    -------------------------------------------------------------------------- */
1432
1433 static void
1434 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1435 {
1436     // we stole a register for gct, but this function is called from
1437     // *outside* the GC where the register variable is not in effect,
1438     // so we need to save and restore it here.  NB. only call
1439     // mark_root() from the main GC thread, otherwise gct will be
1440     // incorrect.
1441     gc_thread *saved_gct;
1442     saved_gct = gct;
1443     SET_GCT(user);
1444
1445     evacuate(root);
1446
1447     SET_GCT(saved_gct);
1448 }
1449
1450 /* -----------------------------------------------------------------------------
1451    Initialising the static object & mutable lists
1452    -------------------------------------------------------------------------- */
1453
1454 static void
1455 zero_static_object_list(StgClosure* first_static)
1456 {
1457   StgClosure* p;
1458   StgClosure* link;
1459   const StgInfoTable *info;
1460
1461   for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1462     info = get_itbl(p);
1463     link = *STATIC_LINK(info, p);
1464     *STATIC_LINK(info,p) = NULL;
1465   }
1466 }
1467
1468 /* ----------------------------------------------------------------------------
1469    Reset the sizes of the older generations when we do a major
1470    collection.
1471
1472    CURRENT STRATEGY: make all generations except zero the same size.
1473    We have to stay within the maximum heap size, and leave a certain
1474    percentage of the maximum heap size available to allocate into.
1475    ------------------------------------------------------------------------- */
1476
1477 static void
1478 resize_generations (void)
1479 {
1480     nat g;
1481
1482     if (major_gc && RtsFlags.GcFlags.generations > 1) {
1483         nat live, size, min_alloc, words;
1484         const nat max  = RtsFlags.GcFlags.maxHeapSize;
1485         const nat gens = RtsFlags.GcFlags.generations;
1486
1487         // live in the oldest generations
1488         if (oldest_gen->live_estimate != 0) {
1489             words = oldest_gen->live_estimate;
1490         } else {
1491             words = oldest_gen->n_words;
1492         }
1493         live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1494             oldest_gen->n_large_blocks;
1495
1496         // default max size for all generations except zero
1497         size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1498                        RtsFlags.GcFlags.minOldGenSize);
1499
1500         if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1501             RtsFlags.GcFlags.heapSizeSuggestion = size;
1502         }
1503
1504         // minimum size for generation zero
1505         min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1506                             RtsFlags.GcFlags.minAllocAreaSize);
1507
1508         // Auto-enable compaction when the residency reaches a
1509         // certain percentage of the maximum heap size (default: 30%).
1510         if (RtsFlags.GcFlags.compact ||
1511             (max > 0 &&
1512              oldest_gen->n_blocks >
1513              (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1514             oldest_gen->mark = 1;
1515             oldest_gen->compact = 1;
1516 //        debugBelch("compaction: on\n", live);
1517         } else {
1518             oldest_gen->mark = 0;
1519             oldest_gen->compact = 0;
1520 //        debugBelch("compaction: off\n", live);
1521         }
1522
1523         if (RtsFlags.GcFlags.sweep) {
1524             oldest_gen->mark = 1;
1525         }
1526
1527         // if we're going to go over the maximum heap size, reduce the
1528         // size of the generations accordingly.  The calculation is
1529         // different if compaction is turned on, because we don't need
1530         // to double the space required to collect the old generation.
1531         if (max != 0) {
1532
1533             // this test is necessary to ensure that the calculations
1534             // below don't have any negative results - we're working
1535             // with unsigned values here.
1536             if (max < min_alloc) {
1537                 heapOverflow();
1538             }
1539
1540             if (oldest_gen->compact) {
1541                 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1542                     size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1543                 }
1544             } else {
1545                 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1546                     size = (max - min_alloc) / ((gens - 1) * 2);
1547                 }
1548             }
1549
1550             if (size < live) {
1551                 heapOverflow();
1552             }
1553         }
1554
1555 #if 0
1556         debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1557                    min_alloc, size, max);
1558 #endif
1559
1560         for (g = 0; g < gens; g++) {
1561             generations[g].max_blocks = size;
1562         }
1563     }
1564 }
1565
1566 /* -----------------------------------------------------------------------------
1567    Calculate the new size of the nursery, and resize it.
1568    -------------------------------------------------------------------------- */
1569
1570 static void
1571 resize_nursery (void)
1572 {
1573     const lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities;
1574
1575     if (RtsFlags.GcFlags.generations == 1)
1576     {   // Two-space collector:
1577         nat blocks;
1578
1579         /* set up a new nursery.  Allocate a nursery size based on a
1580          * function of the amount of live data (by default a factor of 2)
1581          * Use the blocks from the old nursery if possible, freeing up any
1582          * left over blocks.
1583          *
1584          * If we get near the maximum heap size, then adjust our nursery
1585          * size accordingly.  If the nursery is the same size as the live
1586          * data (L), then we need 3L bytes.  We can reduce the size of the
1587          * nursery to bring the required memory down near 2L bytes.
1588          *
1589          * A normal 2-space collector would need 4L bytes to give the same
1590          * performance we get from 3L bytes, reducing to the same
1591          * performance at 2L bytes.
1592          */
1593         blocks = generations[0].n_blocks;
1594
1595         if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1596              blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1597              RtsFlags.GcFlags.maxHeapSize )
1598         {
1599             long adjusted_blocks;  // signed on purpose
1600             int pc_free;
1601
1602             adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1603
1604             debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1605                        RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1606
1607             pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1608             if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1609             {
1610                 heapOverflow();
1611             }
1612             blocks = adjusted_blocks;
1613         }
1614         else
1615         {
1616             blocks *= RtsFlags.GcFlags.oldGenFactor;
1617             if (blocks < min_nursery)
1618             {
1619                 blocks = min_nursery;
1620             }
1621         }
1622         resizeNurseries(blocks);
1623     }
1624     else  // Generational collector
1625     {
1626         /*
1627          * If the user has given us a suggested heap size, adjust our
1628          * allocation area to make best use of the memory available.
1629          */
1630         if (RtsFlags.GcFlags.heapSizeSuggestion)
1631         {
1632             long blocks;
1633             const nat needed = calcNeeded();    // approx blocks needed at next GC
1634
1635             /* Guess how much will be live in generation 0 step 0 next time.
1636              * A good approximation is obtained by finding the
1637              * percentage of g0 that was live at the last minor GC.
1638              *
1639              * We have an accurate figure for the amount of copied data in
1640              * 'copied', but we must convert this to a number of blocks, with
1641              * a small adjustment for estimated slop at the end of a block
1642              * (- 10 words).
1643              */
1644             if (N == 0)
1645             {
1646                 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1647                     / countNurseryBlocks();
1648             }
1649
1650             /* Estimate a size for the allocation area based on the
1651              * information available.  We might end up going slightly under
1652              * or over the suggested heap size, but we should be pretty
1653              * close on average.
1654              *
1655              * Formula:            suggested - needed
1656              *                ----------------------------
1657              *                    1 + g0_pcnt_kept/100
1658              *
1659              * where 'needed' is the amount of memory needed at the next
1660              * collection for collecting all gens except g0.
1661              */
1662             blocks =
1663                 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1664                 (100 + (long)g0_pcnt_kept);
1665
1666             if (blocks < (long)min_nursery) {
1667                 blocks = min_nursery;
1668             }
1669
1670             resizeNurseries((nat)blocks);
1671         }
1672         else
1673         {
1674             // we might have added extra large blocks to the nursery, so
1675             // resize back to minAllocAreaSize again.
1676             resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1677         }
1678     }
1679 }
1680
1681 /* -----------------------------------------------------------------------------
1682    Sanity code for CAF garbage collection.
1683
1684    With DEBUG turned on, we manage a CAF list in addition to the SRT
1685    mechanism.  After GC, we run down the CAF list and blackhole any
1686    CAFs which have been garbage collected.  This means we get an error
1687    whenever the program tries to enter a garbage collected CAF.
1688
1689    Any garbage collected CAFs are taken off the CAF list at the same
1690    time.
1691    -------------------------------------------------------------------------- */
1692
1693 #if 0 && defined(DEBUG)
1694
1695 static void
1696 gcCAFs(void)
1697 {
1698   StgClosure*  p;
1699   StgClosure** pp;
1700   const StgInfoTable *info;
1701   nat i;
1702
1703   i = 0;
1704   p = caf_list;
1705   pp = &caf_list;
1706
1707   while (p != NULL) {
1708
1709     info = get_itbl(p);
1710
1711     ASSERT(info->type == IND_STATIC);
1712
1713     if (STATIC_LINK(info,p) == NULL) {
1714         debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1715         // black hole it
1716         SET_INFO(p,&stg_BLACKHOLE_info);
1717         p = STATIC_LINK2(info,p);
1718         *pp = p;
1719     }
1720     else {
1721       pp = &STATIC_LINK2(info,p);
1722       p = *pp;
1723       i++;
1724     }
1725
1726   }
1727
1728   debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1729 }
1730 #endif