rts/sm/GC.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team 1998-2008
   4  *
   5  * Generational garbage collector
   6  *
   7  * Documentation on the architecture of the Garbage Collector can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 #include "PosixSource.h"
  15 #include "Rts.h"
  16 #include "HsFFI.h"
  17
  18 #include "Storage.h"
  19 #include "RtsUtils.h"
  20 #include "Apply.h"
  21 #include "Updates.h"
  22 #include "Stats.h"
  23 #include "Schedule.h"
  24 #include "Sanity.h"
  25 #include "BlockAlloc.h"
  26 #include "ProfHeap.h"
  27 #include "Weak.h"
  28 #include "Prelude.h"
  29 #include "RtsSignals.h"
  30 #include "STM.h"
  31 #if defined(RTS_GTK_FRONTPANEL)
  32 #include "FrontPanel.h"
  33 #endif
  34 #include "Trace.h"
  35 #include "RetainerProfile.h"
  36 #include "LdvProfile.h"
  37 #include "RaiseAsync.h"
  38 #include "Papi.h"
  39 #include "Stable.h"
  40
  41 #include "GC.h"
  42 #include "GCThread.h"
  43 #include "Compact.h"
  44 #include "Evac.h"
  45 #include "Scav.h"
  46 #include "GCUtils.h"
  47 #include "MarkStack.h"
  48 #include "MarkWeak.h"
  49 #include "Sparks.h"
  50 #include "Sweep.h"
  51
  52 #include <string.h> // for memset()
  53 #include <unistd.h>
  54
  55 /* -----------------------------------------------------------------------------
  56    Global variables
  57    -------------------------------------------------------------------------- */
  58
  59 /* STATIC OBJECT LIST.
  60  *
  61  * During GC:
  62  * We maintain a linked list of static objects that are still live.
  63  * The requirements for this list are:
  64  *
  65  *  - we need to scan the list while adding to it, in order to
  66  *    scavenge all the static objects (in the same way that
  67  *    breadth-first scavenging works for dynamic objects).
  68  *
  69  *  - we need to be able to tell whether an object is already on
  70  *    the list, to break loops.
  71  *
  72  * Each static object has a "static link field", which we use for
  73  * linking objects on to the list.  We use a stack-type list, consing
  74  * objects on the front as they are added (this means that the
  75  * scavenge phase is depth-first, not breadth-first, but that
  76  * shouldn't matter).
  77  *
  78  * A separate list is kept for objects that have been scavenged
  79  * already - this is so that we can zero all the marks afterwards.
  80  *
  81  * An object is on the list if its static link field is non-zero; this
  82  * means that we have to mark the end of the list with '1', not NULL.
  83  *
  84  * Extra notes for generational GC:
  85  *
  86  * Each generation has a static object list associated with it.  When
  87  * collecting generations up to N, we treat the static object lists
  88  * from generations > N as roots.
  89  *
  90  * We build up a static object list while collecting generations 0..N,
  91  * which is then appended to the static object list of generation N+1.
  92  */
  93
  94 /* N is the oldest generation being collected, where the generations
  95  * are numbered starting at 0.  A major GC (indicated by the major_gc
  96  * flag) is when we're collecting all generations.  We only attempt to
  97  * deal with static objects and GC CAFs when doing a major GC.
  98  */
  99 nat N;
 100 rtsBool major_gc;
 101
 102 /* Data used for allocation area sizing.
 103  */
 104 static lnat g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
 105
 106 /* Mut-list stats */
 107 #ifdef DEBUG
 108 nat mutlist_MUTVARS,
 109     mutlist_MUTARRS,
 110     mutlist_MVARS,
 111     mutlist_OTHERS;
 112 #endif
 113
 114 /* Thread-local data for each GC thread
 115  */
 116 gc_thread **gc_threads = NULL;
 117
 118 #if !defined(THREADED_RTS)
 119 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
 120 #endif
 121
 122 // Number of threads running in *this* GC.  Affects how many
 123 // step->todos[] lists we have to look in to find work.
 124 nat n_gc_threads;
 125
 126 // For stats:
 127 long copied;        // *words* copied & scavenged during this GC
 128
 129 rtsBool work_stealing;
 130
 131 DECLARE_GCT
 132
 133 /* -----------------------------------------------------------------------------
 134    Static function declarations
 135    -------------------------------------------------------------------------- */
 136
 137 static void mark_root               (void *user, StgClosure **root);
 138 static void zero_static_object_list (StgClosure* first_static);
 139 static nat  initialise_N            (rtsBool force_major_gc);
 140 static void init_collected_gen      (nat g, nat threads);
 141 static void init_uncollected_gen    (nat g, nat threads);
 142 static void init_gc_thread          (gc_thread *t);
 143 static void resize_generations      (void);
 144 static void resize_nursery          (void);
 145 static void start_gc_threads        (void);
 146 static void scavenge_until_all_done (void);
 147 static StgWord inc_running          (void);
 148 static StgWord dec_running          (void);
 149 static void wakeup_gc_threads       (nat n_threads, nat me);
 150 static void shutdown_gc_threads     (nat n_threads, nat me);
 151
 152 #if 0 && defined(DEBUG)
 153 static void gcCAFs                  (void);
 154 #endif
 155
 156 /* -----------------------------------------------------------------------------
 157    The mark stack.
 158    -------------------------------------------------------------------------- */
 159
 160 bdescr *mark_stack_top_bd; // topmost block in the mark stack
 161 bdescr *mark_stack_bd;     // current block in the mark stack
 162 StgPtr mark_sp;            // pointer to the next unallocated mark stack entry
 163
 164 /* -----------------------------------------------------------------------------
 165    GarbageCollect: the main entry point to the garbage collector.
 166
 167    Locks held: all capabilities are held throughout GarbageCollect().
 168    -------------------------------------------------------------------------- */
 169
 170 void
 171 GarbageCollect (rtsBool force_major_gc,
 172                 nat gc_type USED_IF_THREADS,
 173                 Capability *cap)
 174 {
 175   bdescr *bd;
 176   generation *gen;
 177   lnat live, allocated, max_copied, avg_copied, slop;
 178   gc_thread *saved_gct;
 179   nat g, t, n;
 180
 181   // necessary if we stole a callee-saves register for gct:
 182   saved_gct = gct;
 183
 184 #ifdef PROFILING
 185   CostCentreStack *prev_CCS;
 186 #endif
 187
 188   ACQUIRE_SM_LOCK;
 189
 190 #if defined(RTS_USER_SIGNALS)
 191   if (RtsFlags.MiscFlags.install_signal_handlers) {
 192     // block signals
 193     blockUserSignals();
 194   }
 195 #endif
 196
 197   ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
 198   // otherwise adjust the padding in gen_workspace.
 199
 200   // tell the stats department that we've started a GC
 201   stat_startGC();
 202
 203   // tell the STM to discard any cached closures it's hoping to re-use
 204   stmPreGCHook();
 205
 206   // lock the StablePtr table
 207   stablePtrPreGC();
 208
 209 #ifdef DEBUG
 210   mutlist_MUTVARS = 0;
 211   mutlist_MUTARRS = 0;
 212   mutlist_OTHERS = 0;
 213 #endif
 214
 215   // attribute any costs to CCS_GC
 216 #ifdef PROFILING
 217   prev_CCS = CCCS;
 218   CCCS = CCS_GC;
 219 #endif
 220
 221   /* Approximate how much we allocated.
 222    * Todo: only when generating stats?
 223    */
 224   allocated = calcAllocated();
 225
 226   /* Figure out which generation to collect
 227    */
 228   n = initialise_N(force_major_gc);
 229
 230 #if defined(THREADED_RTS)
 231   work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
 232                   N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
 233       // It's not always a good idea to do load balancing in parallel
 234       // GC.  In particular, for a parallel program we don't want to
 235       // lose locality by moving cached data into another CPU's cache
 236       // (this effect can be quite significant).
 237       //
 238       // We could have a more complex way to deterimine whether to do
 239       // work stealing or not, e.g. it might be a good idea to do it
 240       // if the heap is big.  For now, we just turn it on or off with
 241       // a flag.
 242 #endif
 243
 244   /* Start threads, so they can be spinning up while we finish initialisation.
 245    */
 246   start_gc_threads();
 247
 248 #if defined(THREADED_RTS)
 249   /* How many threads will be participating in this GC?
 250    * We don't try to parallelise minor GCs (unless the user asks for
 251    * it with +RTS -gn0), or mark/compact/sweep GC.
 252    */
 253   if (gc_type == PENDING_GC_PAR) {
 254       n_gc_threads = RtsFlags.ParFlags.nNodes;
 255   } else {
 256       n_gc_threads = 1;
 257   }
 258 #else
 259   n_gc_threads = 1;
 260 #endif
 261
 262   debugTrace(DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
 263         N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
 264
 265 #ifdef RTS_GTK_FRONTPANEL
 266   if (RtsFlags.GcFlags.frontpanel) {
 267       updateFrontPanelBeforeGC(N);
 268   }
 269 #endif
 270
 271 #ifdef DEBUG
 272   // check for memory leaks if DEBUG is on
 273   memInventory(DEBUG_gc);
 274 #endif
 275
 276   // check sanity *before* GC
 277   IF_DEBUG(sanity, checkSanity(rtsTrue));
 278
 279   // Initialise all our gc_thread structures
 280   for (t = 0; t < n_gc_threads; t++) {
 281       init_gc_thread(gc_threads[t]);
 282   }
 283
 284   // Initialise all the generations/steps that we're collecting.
 285   for (g = 0; g <= N; g++) {
 286       init_collected_gen(g,n_gc_threads);
 287   }
 288
 289   // Initialise all the generations/steps that we're *not* collecting.
 290   for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
 291       init_uncollected_gen(g,n_gc_threads);
 292   }
 293
 294   /* Allocate a mark stack if we're doing a major collection.
 295    */
 296   if (major_gc && oldest_gen->mark) {
 297       mark_stack_bd     = allocBlock();
 298       mark_stack_top_bd = mark_stack_bd;
 299       mark_stack_bd->link = NULL;
 300       mark_stack_bd->u.back = NULL;
 301       mark_sp           = mark_stack_bd->start;
 302   } else {
 303       mark_stack_bd     = NULL;
 304       mark_stack_top_bd = NULL;
 305       mark_sp           = NULL;
 306   }
 307
 308   // this is the main thread
 309 #ifdef THREADED_RTS
 310   if (n_gc_threads == 1) {
 311       SET_GCT(gc_threads[0]);
 312   } else {
 313       SET_GCT(gc_threads[cap->no]);
 314   }
 315 #else
 316 SET_GCT(gc_threads[0]);
 317 #endif
 318
 319   /* -----------------------------------------------------------------------
 320    * follow all the roots that we know about:
 321    */
 322
 323   // the main thread is running: this prevents any other threads from
 324   // exiting prematurely, so we can start them now.
 325   // NB. do this after the mutable lists have been saved above, otherwise
 326   // the other GC threads will be writing into the old mutable lists.
 327   inc_running();
 328   wakeup_gc_threads(n_gc_threads, gct->thread_index);
 329
 330   // Mutable lists from each generation > N
 331   // we want to *scavenge* these roots, not evacuate them: they're not
 332   // going to move in this GC.
 333   // Also do them in reverse generation order, for the usual reason:
 334   // namely to reduce the likelihood of spurious old->new pointers.
 335   //
 336   for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
 337 #if defined(THREADED_RTS)
 338       if (n_gc_threads > 1) {
 339           scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]);
 340       } else {
 341           scavenge_mutable_list1(generations[g].saved_mut_list, &generations[g]);
 342       }
 343 #else
 344       scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]);
 345 #endif
 346       freeChain_sync(generations[g].saved_mut_list);
 347       generations[g].saved_mut_list = NULL;
 348
 349   }
 350
 351   // scavenge the capability-private mutable lists.  This isn't part
 352   // of markSomeCapabilities() because markSomeCapabilities() can only
 353   // call back into the GC via mark_root() (due to the gct register
 354   // variable).
 355   if (n_gc_threads == 1) {
 356       for (n = 0; n < n_capabilities; n++) {
 357 #if defined(THREADED_RTS)
 358           scavenge_capability_mut_Lists1(&capabilities[n]);
 359 #else
 360           scavenge_capability_mut_lists(&capabilities[n]);
 361 #endif
 362       }
 363   } else {
 364       scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
 365   }
 366
 367   // follow roots from the CAF list (used by GHCi)
 368   gct->evac_gen = 0;
 369   markCAFs(mark_root, gct);
 370
 371   // follow all the roots that the application knows about.
 372   gct->evac_gen = 0;
 373   markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
 374                        rtsTrue/*prune sparks*/);
 375
 376 #if defined(RTS_USER_SIGNALS)
 377   // mark the signal handlers (signals should be already blocked)
 378   markSignalHandlers(mark_root, gct);
 379 #endif
 380
 381   // Mark the weak pointer list, and prepare to detect dead weak pointers.
 382   markWeakPtrList();
 383   initWeakForGC();
 384
 385   // Mark the stable pointer table.
 386   markStablePtrTable(mark_root, gct);
 387
 388   /* -------------------------------------------------------------------------
 389    * Repeatedly scavenge all the areas we know about until there's no
 390    * more scavenging to be done.
 391    */
 392   for (;;)
 393   {
 394       scavenge_until_all_done();
 395       // The other threads are now stopped.  We might recurse back to
 396       // here, but from now on this is the only thread.
 397
 398       // must be last...  invariant is that everything is fully
 399       // scavenged at this point.
 400       if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
 401           inc_running();
 402           continue;
 403       }
 404
 405       // If we get to here, there's really nothing left to do.
 406       break;
 407   }
 408
 409   shutdown_gc_threads(n_gc_threads, gct->thread_index);
 410
 411   // Now see which stable names are still alive.
 412   gcStablePtrTable();
 413
 414 #ifdef THREADED_RTS
 415   if (n_gc_threads == 1) {
 416       for (n = 0; n < n_capabilities; n++) {
 417           pruneSparkQueue(&capabilities[n]);
 418       }
 419   } else {
 420       pruneSparkQueue(&capabilities[gct->thread_index]);
 421   }
 422 #endif
 423
 424 #ifdef PROFILING
 425   // We call processHeapClosureForDead() on every closure destroyed during
 426   // the current garbage collection, so we invoke LdvCensusForDead().
 427   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
 428       || RtsFlags.ProfFlags.bioSelector != NULL)
 429     LdvCensusForDead(N);
 430 #endif
 431
 432   // NO MORE EVACUATION AFTER THIS POINT!
 433
 434   // Two-space collector: free the old to-space.
 435   // g0->old_blocks is the old nursery
 436   // g0->blocks is to-space from the previous GC
 437   if (RtsFlags.GcFlags.generations == 1) {
 438       if (g0->blocks != NULL) {
 439           freeChain(g0->blocks);
 440           g0->blocks = NULL;
 441       }
 442   }
 443
 444   // For each workspace, in each thread, move the copied blocks to the step
 445   {
 446       gc_thread *thr;
 447       gen_workspace *ws;
 448       bdescr *prev, *next;
 449
 450       for (t = 0; t < n_gc_threads; t++) {
 451           thr = gc_threads[t];
 452
 453           for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 454               ws = &thr->gens[g];
 455
 456               // Push the final block
 457               if (ws->todo_bd) {
 458                   push_scanned_block(ws->todo_bd, ws);
 459               }
 460
 461               ASSERT(gct->scan_bd == NULL);
 462               ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
 463
 464               prev = NULL;
 465               for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
 466                   ws->gen->n_words += bd->free - bd->start;
 467                   prev = bd;
 468               }
 469               if (prev != NULL) {
 470                   prev->link = ws->gen->blocks;
 471                   ws->gen->blocks = ws->scavd_list;
 472               }
 473               ws->gen->n_blocks += ws->n_scavd_blocks;
 474           }
 475       }
 476
 477       // Add all the partial blocks *after* we've added all the full
 478       // blocks.  This is so that we can grab the partial blocks back
 479       // again and try to fill them up in the next GC.
 480       for (t = 0; t < n_gc_threads; t++) {
 481           thr = gc_threads[t];
 482
 483           for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 484               ws = &thr->gens[g];
 485
 486               prev = NULL;
 487               for (bd = ws->part_list; bd != NULL; bd = next) {
 488                   next = bd->link;
 489                   if (bd->free == bd->start) {
 490                       if (prev == NULL) {
 491                           ws->part_list = next;
 492                       } else {
 493                           prev->link = next;
 494                       }
 495                       freeGroup(bd);
 496                       ws->n_part_blocks--;
 497                   } else {
 498                       ws->gen->n_words += bd->free - bd->start;
 499                       prev = bd;
 500                   }
 501               }
 502               if (prev != NULL) {
 503                   prev->link = ws->gen->blocks;
 504                   ws->gen->blocks = ws->part_list;
 505               }
 506               ws->gen->n_blocks += ws->n_part_blocks;
 507
 508               ASSERT(countBlocks(ws->gen->blocks) == ws->gen->n_blocks);
 509               ASSERT(countOccupied(ws->gen->blocks) == ws->gen->n_words);
 510           }
 511       }
 512   }
 513
 514   // Finally: compact or sweep the oldest generation.
 515   if (major_gc && oldest_gen->mark) {
 516       if (oldest_gen->compact)
 517           compact(gct->scavenged_static_objects);
 518       else
 519           sweep(oldest_gen);
 520   }
 521
 522   /* run through all the generations/steps and tidy up
 523    */
 524   copied = 0;
 525   max_copied = 0;
 526   avg_copied = 0;
 527   {
 528       nat i;
 529       for (i=0; i < n_gc_threads; i++) {
 530           if (n_gc_threads > 1) {
 531               debugTrace(DEBUG_gc,"thread %d:", i);
 532               debugTrace(DEBUG_gc,"   copied           %ld", gc_threads[i]->copied * sizeof(W_));
 533               debugTrace(DEBUG_gc,"   scanned          %ld", gc_threads[i]->scanned * sizeof(W_));
 534               debugTrace(DEBUG_gc,"   any_work         %ld", gc_threads[i]->any_work);
 535               debugTrace(DEBUG_gc,"   no_work          %ld", gc_threads[i]->no_work);
 536               debugTrace(DEBUG_gc,"   scav_find_work %ld",   gc_threads[i]->scav_find_work);
 537           }
 538           copied += gc_threads[i]->copied;
 539           max_copied = stg_max(gc_threads[i]->copied, max_copied);
 540       }
 541       if (n_gc_threads == 1) {
 542           max_copied = 0;
 543           avg_copied = 0;
 544       } else {
 545           avg_copied = copied;
 546       }
 547   }
 548
 549   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 550
 551     if (g == N) {
 552       generations[g].collections++; // for stats
 553       if (n_gc_threads > 1) generations[g].par_collections++;
 554     }
 555
 556     // Count the mutable list as bytes "copied" for the purposes of
 557     // stats.  Every mutable list is copied during every GC.
 558     if (g > 0) {
 559         nat mut_list_size = 0;
 560         for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
 561             mut_list_size += bd->free - bd->start;
 562         }
 563         for (n = 0; n < n_capabilities; n++) {
 564             for (bd = capabilities[n].mut_lists[g];
 565                  bd != NULL; bd = bd->link) {
 566                 mut_list_size += bd->free - bd->start;
 567             }
 568         }
 569         copied +=  mut_list_size;
 570
 571         debugTrace(DEBUG_gc,
 572                    "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
 573                    (unsigned long)(mut_list_size * sizeof(W_)),
 574                    mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
 575     }
 576
 577     bdescr *next, *prev;
 578     gen = &generations[g];
 579
 580     // for generations we collected...
 581     if (g <= N) {
 582
 583         /* free old memory and shift to-space into from-space for all
 584          * the collected steps (except the allocation area).  These
 585          * freed blocks will probaby be quickly recycled.
 586          */
 587         if (gen->mark)
 588         {
 589             // tack the new blocks on the end of the existing blocks
 590             if (gen->old_blocks != NULL) {
 591
 592                 prev = NULL;
 593                 for (bd = gen->old_blocks; bd != NULL; bd = next) {
 594
 595                     next = bd->link;
 596
 597                     if (!(bd->flags & BF_MARKED))
 598                     {
 599                         if (prev == NULL) {
 600                             gen->old_blocks = next;
 601                         } else {
 602                             prev->link = next;
 603                         }
 604                         freeGroup(bd);
 605                         gen->n_old_blocks--;
 606                     }
 607                     else
 608                     {
 609                         gen->n_words += bd->free - bd->start;
 610
 611                         // NB. this step might not be compacted next
 612                         // time, so reset the BF_MARKED flags.
 613                         // They are set before GC if we're going to
 614                         // compact.  (search for BF_MARKED above).
 615                         bd->flags &= ~BF_MARKED;
 616
 617                         // between GCs, all blocks in the heap except
 618                         // for the nursery have the BF_EVACUATED flag set.
 619                         bd->flags |= BF_EVACUATED;
 620
 621                         prev = bd;
 622                     }
 623                 }
 624
 625                 if (prev != NULL) {
 626                     prev->link = gen->blocks;
 627                     gen->blocks = gen->old_blocks;
 628                 }
 629             }
 630             // add the new blocks to the block tally
 631             gen->n_blocks += gen->n_old_blocks;
 632             ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
 633             ASSERT(countOccupied(gen->blocks) == gen->n_words);
 634         }
 635         else // not copacted
 636         {
 637             freeChain(gen->old_blocks);
 638         }
 639
 640         gen->old_blocks = NULL;
 641         gen->n_old_blocks = 0;
 642
 643         /* LARGE OBJECTS.  The current live large objects are chained on
 644          * scavenged_large, having been moved during garbage
 645          * collection from large_objects.  Any objects left on the
 646          * large_objects list are therefore dead, so we free them here.
 647          */
 648         freeChain(gen->large_objects);
 649         gen->large_objects  = gen->scavenged_large_objects;
 650         gen->n_large_blocks = gen->n_scavenged_large_blocks;
 651         gen->n_new_large_blocks = 0;
 652         ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
 653     }
 654     else // for generations > N
 655     {
 656         /* For older generations, we need to append the
 657          * scavenged_large_object list (i.e. large objects that have been
 658          * promoted during this GC) to the large_object list for that step.
 659          */
 660         for (bd = gen->scavenged_large_objects; bd; bd = next) {
 661             next = bd->link;
 662             dbl_link_onto(bd, &gen->large_objects);
 663         }
 664
 665         // add the new blocks we promoted during this GC
 666         gen->n_large_blocks += gen->n_scavenged_large_blocks;
 667         ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
 668     }
 669   } // for all generations
 670
 671   // update the max size of older generations after a major GC
 672   resize_generations();
 673
 674   // Calculate the amount of live data for stats.
 675   live = calcLiveWords();
 676
 677   // Free the small objects allocated via allocate(), since this will
 678   // all have been copied into G0S1 now.
 679   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 680
 681   // Start a new pinned_object_block
 682   for (n = 0; n < n_capabilities; n++) {
 683       capabilities[n].pinned_object_block = NULL;
 684   }
 685
 686   // Free the mark stack.
 687   if (mark_stack_top_bd != NULL) {
 688       debugTrace(DEBUG_gc, "mark stack: %d blocks",
 689                  countBlocks(mark_stack_top_bd));
 690       freeChain(mark_stack_top_bd);
 691   }
 692
 693   // Free any bitmaps.
 694   for (g = 0; g <= N; g++) {
 695       gen = &generations[g];
 696       if (gen->bitmap != NULL) {
 697           freeGroup(gen->bitmap);
 698           gen->bitmap = NULL;
 699       }
 700   }
 701
 702   resize_nursery();
 703
 704  // mark the garbage collected CAFs as dead
 705 #if 0 && defined(DEBUG) // doesn't work at the moment
 706   if (major_gc) { gcCAFs(); }
 707 #endif
 708
 709 #ifdef PROFILING
 710   // resetStaticObjectForRetainerProfiling() must be called before
 711   // zeroing below.
 712   if (n_gc_threads > 1) {
 713       barf("profiling is currently broken with multi-threaded GC");
 714       // ToDo: fix the gct->scavenged_static_objects below
 715   }
 716   resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
 717 #endif
 718
 719   // zero the scavenged static object list
 720   if (major_gc) {
 721       nat i;
 722       for (i = 0; i < n_gc_threads; i++) {
 723           zero_static_object_list(gc_threads[i]->scavenged_static_objects);
 724       }
 725   }
 726
 727   // Reset the nursery
 728   resetNurseries();
 729
 730   // send exceptions to any threads which were about to die
 731   RELEASE_SM_LOCK;
 732   resurrectThreads(resurrected_threads);
 733   ACQUIRE_SM_LOCK;
 734
 735   // Update the stable pointer hash table.
 736   updateStablePtrTable(major_gc);
 737
 738   // unlock the StablePtr table.  Must be before scheduleFinalizers(),
 739   // because a finalizer may call hs_free_fun_ptr() or
 740   // hs_free_stable_ptr(), both of which access the StablePtr table.
 741   stablePtrPostGC();
 742
 743   // Start any pending finalizers.  Must be after
 744   // updateStablePtrTable() and stablePtrPostGC() (see #4221).
 745   RELEASE_SM_LOCK;
 746   scheduleFinalizers(cap, old_weak_ptr_list);
 747   ACQUIRE_SM_LOCK;
 748
 749   if (major_gc) {
 750       nat need, got;
 751       need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
 752       got = mblocks_allocated;
 753       /* If the amount of data remains constant, next major GC we'll
 754          require (F+1)*need. We leave (F+2)*need in order to reduce
 755          repeated deallocation and reallocation. */
 756       need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
 757       if (got > need) {
 758           returnMemoryToOS(got - need);
 759       }
 760   }
 761
 762   // check sanity after GC
 763   IF_DEBUG(sanity, checkSanity(rtsTrue));
 764
 765   // extra GC trace info
 766   IF_DEBUG(gc, statDescribeGens());
 767
 768 #ifdef DEBUG
 769   // symbol-table based profiling
 770   /*  heapCensus(to_blocks); */ /* ToDo */
 771 #endif
 772
 773   // restore enclosing cost centre
 774 #ifdef PROFILING
 775   CCCS = prev_CCS;
 776 #endif
 777
 778 #ifdef DEBUG
 779   // check for memory leaks if DEBUG is on
 780   memInventory(DEBUG_gc);
 781 #endif
 782
 783 #ifdef RTS_GTK_FRONTPANEL
 784   if (RtsFlags.GcFlags.frontpanel) {
 785       updateFrontPanelAfterGC( N, live );
 786   }
 787 #endif
 788
 789   // ok, GC over: tell the stats department what happened.
 790   slop = calcLiveBlocks() * BLOCK_SIZE_W - live;
 791   stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop);
 792
 793   // Guess which generation we'll collect *next* time
 794   initialise_N(force_major_gc);
 795
 796 #if defined(RTS_USER_SIGNALS)
 797   if (RtsFlags.MiscFlags.install_signal_handlers) {
 798     // unblock signals again
 799     unblockUserSignals();
 800   }
 801 #endif
 802
 803   RELEASE_SM_LOCK;
 804
 805   SET_GCT(saved_gct);
 806 }
 807
 808 /* -----------------------------------------------------------------------------
 809    Figure out which generation to collect, initialise N and major_gc.
 810
 811    Also returns the total number of blocks in generations that will be
 812    collected.
 813    -------------------------------------------------------------------------- */
 814
 815 static nat
 816 initialise_N (rtsBool force_major_gc)
 817 {
 818     int g;
 819     nat blocks, blocks_total;
 820
 821     blocks = 0;
 822     blocks_total = 0;
 823
 824     if (force_major_gc) {
 825         N = RtsFlags.GcFlags.generations - 1;
 826     } else {
 827         N = 0;
 828     }
 829
 830     for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
 831
 832         blocks = generations[g].n_words / BLOCK_SIZE_W
 833                + generations[g].n_large_blocks;
 834
 835         if (blocks >= generations[g].max_blocks) {
 836             N = stg_max(N,g);
 837         }
 838         if ((nat)g <= N) {
 839             blocks_total += blocks;
 840         }
 841     }
 842
 843     blocks_total += countNurseryBlocks();
 844
 845     major_gc = (N == RtsFlags.GcFlags.generations-1);
 846     return blocks_total;
 847 }
 848
 849 /* -----------------------------------------------------------------------------
 850    Initialise the gc_thread structures.
 851    -------------------------------------------------------------------------- */
 852
 853 #define GC_THREAD_INACTIVE             0
 854 #define GC_THREAD_STANDING_BY          1
 855 #define GC_THREAD_RUNNING              2
 856 #define GC_THREAD_WAITING_TO_CONTINUE  3
 857
 858 static void
 859 new_gc_thread (nat n, gc_thread *t)
 860 {
 861     nat g;
 862     gen_workspace *ws;
 863
 864 #ifdef THREADED_RTS
 865     t->id = 0;
 866     initSpinLock(&t->gc_spin);
 867     initSpinLock(&t->mut_spin);
 868     ACQUIRE_SPIN_LOCK(&t->gc_spin);
 869     t->wakeup = GC_THREAD_INACTIVE;  // starts true, so we can wait for the
 870                           // thread to start up, see wakeup_gc_threads
 871 #endif
 872
 873     t->thread_index = n;
 874     t->free_blocks = NULL;
 875     t->gc_count = 0;
 876
 877     init_gc_thread(t);
 878
 879 #ifdef USE_PAPI
 880     t->papi_events = -1;
 881 #endif
 882
 883     for (g = 0; g < RtsFlags.GcFlags.generations; g++)
 884     {
 885         ws = &t->gens[g];
 886         ws->gen = &generations[g];
 887         ASSERT(g == ws->gen->no);
 888         ws->my_gct = t;
 889
 890         ws->todo_bd = NULL;
 891         ws->todo_q = newWSDeque(128);
 892         ws->todo_overflow = NULL;
 893         ws->n_todo_overflow = 0;
 894
 895         ws->part_list = NULL;
 896         ws->n_part_blocks = 0;
 897
 898         ws->scavd_list = NULL;
 899         ws->n_scavd_blocks = 0;
 900     }
 901 }
 902
 903
 904 void
 905 initGcThreads (void)
 906 {
 907     if (gc_threads == NULL) {
 908 #if defined(THREADED_RTS)
 909         nat i;
 910         gc_threads = stgMallocBytes (RtsFlags.ParFlags.nNodes *
 911                                      sizeof(gc_thread*),
 912                                      "alloc_gc_threads");
 913
 914         for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
 915             gc_threads[i] =
 916                 stgMallocBytes(sizeof(gc_thread) +
 917                                RtsFlags.GcFlags.generations * sizeof(gen_workspace),
 918                                "alloc_gc_threads");
 919
 920             new_gc_thread(i, gc_threads[i]);
 921         }
 922 #else
 923         gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
 924         gc_threads[0] = gct;
 925         new_gc_thread(0,gc_threads[0]);
 926 #endif
 927     }
 928 }
 929
 930 void
 931 freeGcThreads (void)
 932 {
 933     nat g;
 934     if (gc_threads != NULL) {
 935 #if defined(THREADED_RTS)
 936         nat i;
 937         for (i = 0; i < n_capabilities; i++) {
 938             for (g = 0; g < RtsFlags.GcFlags.generations; g++)
 939             {
 940                 freeWSDeque(gc_threads[i]->gens[g].todo_q);
 941             }
 942             stgFree (gc_threads[i]);
 943         }
 944         stgFree (gc_threads);
 945 #else
 946         for (g = 0; g < RtsFlags.GcFlags.generations; g++)
 947         {
 948             freeWSDeque(gc_threads[0]->gens[g].todo_q);
 949         }
 950         stgFree (gc_threads);
 951 #endif
 952         gc_threads = NULL;
 953     }
 954 }
 955
 956 /* ----------------------------------------------------------------------------
 957    Start GC threads
 958    ------------------------------------------------------------------------- */
 959
 960 static volatile StgWord gc_running_threads;
 961
 962 static StgWord
 963 inc_running (void)
 964 {
 965     StgWord new;
 966     new = atomic_inc(&gc_running_threads);
 967     ASSERT(new <= n_gc_threads);
 968     return new;
 969 }
 970
 971 static StgWord
 972 dec_running (void)
 973 {
 974     ASSERT(gc_running_threads != 0);
 975     return atomic_dec(&gc_running_threads);
 976 }
 977
 978 static rtsBool
 979 any_work (void)
 980 {
 981     int g;
 982     gen_workspace *ws;
 983
 984     gct->any_work++;
 985
 986     write_barrier();
 987
 988     // scavenge objects in compacted generation
 989     if (mark_stack_bd != NULL && !mark_stack_empty()) {
 990         return rtsTrue;
 991     }
 992
 993     // Check for global work in any step.  We don't need to check for
 994     // local work, because we have already exited scavenge_loop(),
 995     // which means there is no local work for this thread.
 996     for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
 997         ws = &gct->gens[g];
 998         if (ws->todo_large_objects) return rtsTrue;
 999         if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
1000         if (ws->todo_overflow) return rtsTrue;
1001     }
1002
1003 #if defined(THREADED_RTS)
1004     if (work_stealing) {
1005         nat n;
1006         // look for work to steal
1007         for (n = 0; n < n_gc_threads; n++) {
1008             if (n == gct->thread_index) continue;
1009             for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
1010                 ws = &gc_threads[n]->gens[g];
1011                 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
1012             }
1013         }
1014     }
1015 #endif
1016
1017     gct->no_work++;
1018 #if defined(THREADED_RTS)
1019     yieldThread();
1020 #endif
1021
1022     return rtsFalse;
1023 }
1024
1025 static void
1026 scavenge_until_all_done (void)
1027 {
1028     nat r;
1029
1030
1031 loop:
1032     traceEventGcWork(&capabilities[gct->thread_index]);
1033
1034 #if defined(THREADED_RTS)
1035     if (n_gc_threads > 1) {
1036         scavenge_loop();
1037     } else {
1038         scavenge_loop1();
1039     }
1040 #else
1041     scavenge_loop();
1042 #endif
1043
1044     // scavenge_loop() only exits when there's no work to do
1045     r = dec_running();
1046
1047     traceEventGcIdle(&capabilities[gct->thread_index]);
1048
1049     debugTrace(DEBUG_gc, "%d GC threads still running", r);
1050
1051     while (gc_running_threads != 0) {
1052         // usleep(1);
1053         if (any_work()) {
1054             inc_running();
1055             goto loop;
1056         }
1057         // any_work() does not remove the work from the queue, it
1058         // just checks for the presence of work.  If we find any,
1059         // then we increment gc_running_threads and go back to
1060         // scavenge_loop() to perform any pending work.
1061     }
1062
1063     traceEventGcDone(&capabilities[gct->thread_index]);
1064 }
1065
1066 #if defined(THREADED_RTS)
1067
1068 void
1069 gcWorkerThread (Capability *cap)
1070 {
1071     gc_thread *saved_gct;
1072
1073     // necessary if we stole a callee-saves register for gct:
1074     saved_gct = gct;
1075
1076     gct = gc_threads[cap->no];
1077     gct->id = osThreadId();
1078
1079     // Wait until we're told to wake up
1080     RELEASE_SPIN_LOCK(&gct->mut_spin);
1081     gct->wakeup = GC_THREAD_STANDING_BY;
1082     debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1083     ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1084
1085 #ifdef USE_PAPI
1086     // start performance counters in this thread...
1087     if (gct->papi_events == -1) {
1088         papi_init_eventset(&gct->papi_events);
1089     }
1090     papi_thread_start_gc1_count(gct->papi_events);
1091 #endif
1092
1093     // Every thread evacuates some roots.
1094     gct->evac_gen = 0;
1095     markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
1096                          rtsTrue/*prune sparks*/);
1097     scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
1098
1099     scavenge_until_all_done();
1100
1101 #ifdef THREADED_RTS
1102     // Now that the whole heap is marked, we discard any sparks that
1103     // were found to be unreachable.  The main GC thread is currently
1104     // marking heap reachable via weak pointers, so it is
1105     // non-deterministic whether a spark will be retained if it is
1106     // only reachable via weak pointers.  To fix this problem would
1107     // require another GC barrier, which is too high a price.
1108     pruneSparkQueue(cap);
1109 #endif
1110
1111 #ifdef USE_PAPI
1112     // count events in this thread towards the GC totals
1113     papi_thread_stop_gc1_count(gct->papi_events);
1114 #endif
1115
1116     // Wait until we're told to continue
1117     RELEASE_SPIN_LOCK(&gct->gc_spin);
1118     gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1119     debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1120                gct->thread_index);
1121     ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1122     debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1123
1124     SET_GCT(saved_gct);
1125 }
1126
1127 #endif
1128
1129 #if defined(THREADED_RTS)
1130
1131 void
1132 waitForGcThreads (Capability *cap USED_IF_THREADS)
1133 {
1134     const nat n_threads = RtsFlags.ParFlags.nNodes;
1135     const nat me = cap->no;
1136     nat i, j;
1137     rtsBool retry = rtsTrue;
1138
1139     while(retry) {
1140         for (i=0; i < n_threads; i++) {
1141             if (i == me) continue;
1142             if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1143                 prodCapability(&capabilities[i], cap->running_task);
1144             }
1145         }
1146         for (j=0; j < 10; j++) {
1147             retry = rtsFalse;
1148             for (i=0; i < n_threads; i++) {
1149                 if (i == me) continue;
1150                 write_barrier();
1151                 setContextSwitches();
1152                 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1153                     retry = rtsTrue;
1154                 }
1155             }
1156             if (!retry) break;
1157             yieldThread();
1158         }
1159     }
1160 }
1161
1162 #endif // THREADED_RTS
1163
1164 static void
1165 start_gc_threads (void)
1166 {
1167 #if defined(THREADED_RTS)
1168     gc_running_threads = 0;
1169 #endif
1170 }
1171
1172 static void
1173 wakeup_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1174 {
1175 #if defined(THREADED_RTS)
1176     nat i;
1177     for (i=0; i < n_threads; i++) {
1178         if (i == me) continue;
1179         inc_running();
1180         debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1181         if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1182
1183         gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1184         ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1185         RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1186     }
1187 #endif
1188 }
1189
1190 // After GC is complete, we must wait for all GC threads to enter the
1191 // standby state, otherwise they may still be executing inside
1192 // any_work(), and may even remain awake until the next GC starts.
1193 static void
1194 shutdown_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1195 {
1196 #if defined(THREADED_RTS)
1197     nat i;
1198     for (i=0; i < n_threads; i++) {
1199         if (i == me) continue;
1200         while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1201     }
1202 #endif
1203 }
1204
1205 #if defined(THREADED_RTS)
1206 void
1207 releaseGCThreads (Capability *cap USED_IF_THREADS)
1208 {
1209     const nat n_threads = RtsFlags.ParFlags.nNodes;
1210     const nat me = cap->no;
1211     nat i;
1212     for (i=0; i < n_threads; i++) {
1213         if (i == me) continue;
1214         if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1215             barf("releaseGCThreads");
1216
1217         gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1218         ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1219         RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1220     }
1221 }
1222 #endif
1223
1224 /* ----------------------------------------------------------------------------
1225    Initialise a generation that is to be collected
1226    ------------------------------------------------------------------------- */
1227
1228 static void
1229 init_collected_gen (nat g, nat n_threads)
1230 {
1231     nat t, i;
1232     gen_workspace *ws;
1233     generation *gen;
1234     bdescr *bd;
1235
1236     // Throw away the current mutable list.  Invariant: the mutable
1237     // list always has at least one block; this means we can avoid a
1238     // check for NULL in recordMutable().
1239     if (g != 0) {
1240         freeChain(generations[g].mut_list);
1241         generations[g].mut_list = allocBlock();
1242         for (i = 0; i < n_capabilities; i++) {
1243             freeChain(capabilities[i].mut_lists[g]);
1244             capabilities[i].mut_lists[g] = allocBlock();
1245         }
1246     }
1247
1248     gen = &generations[g];
1249     ASSERT(gen->no == g);
1250
1251     // we'll construct a new list of threads in this step
1252     // during GC, throw away the current list.
1253     gen->old_threads = gen->threads;
1254     gen->threads = END_TSO_QUEUE;
1255
1256     // deprecate the existing blocks
1257     gen->old_blocks   = gen->blocks;
1258     gen->n_old_blocks = gen->n_blocks;
1259     gen->blocks       = NULL;
1260     gen->n_blocks     = 0;
1261     gen->n_words      = 0;
1262     gen->live_estimate = 0;
1263
1264     // initialise the large object queues.
1265     gen->scavenged_large_objects = NULL;
1266     gen->n_scavenged_large_blocks = 0;
1267
1268     // mark the small objects as from-space
1269     for (bd = gen->old_blocks; bd; bd = bd->link) {
1270         bd->flags &= ~BF_EVACUATED;
1271     }
1272
1273     // mark the large objects as from-space
1274     for (bd = gen->large_objects; bd; bd = bd->link) {
1275         bd->flags &= ~BF_EVACUATED;
1276     }
1277
1278     // for a compacted generation, we need to allocate the bitmap
1279     if (gen->mark) {
1280         nat bitmap_size; // in bytes
1281         bdescr *bitmap_bdescr;
1282         StgWord *bitmap;
1283
1284         bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1285
1286         if (bitmap_size > 0) {
1287             bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1288                                        / BLOCK_SIZE);
1289             gen->bitmap = bitmap_bdescr;
1290             bitmap = bitmap_bdescr->start;
1291
1292             debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1293                        bitmap_size, bitmap);
1294
1295             // don't forget to fill it with zeros!
1296             memset(bitmap, 0, bitmap_size);
1297
1298             // For each block in this step, point to its bitmap from the
1299             // block descriptor.
1300             for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1301                 bd->u.bitmap = bitmap;
1302                 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1303
1304                 // Also at this point we set the BF_MARKED flag
1305                 // for this block.  The invariant is that
1306                 // BF_MARKED is always unset, except during GC
1307                 // when it is set on those blocks which will be
1308                 // compacted.
1309                 if (!(bd->flags & BF_FRAGMENTED)) {
1310                     bd->flags |= BF_MARKED;
1311                 }
1312
1313                 // BF_SWEPT should be marked only for blocks that are being
1314                 // collected in sweep()
1315                 bd->flags &= ~BF_SWEPT;
1316             }
1317         }
1318     }
1319
1320     // For each GC thread, for each step, allocate a "todo" block to
1321     // store evacuated objects to be scavenged, and a block to store
1322     // evacuated objects that do not need to be scavenged.
1323     for (t = 0; t < n_threads; t++) {
1324         ws = &gc_threads[t]->gens[g];
1325
1326         ws->todo_large_objects = NULL;
1327
1328         ws->part_list = NULL;
1329         ws->n_part_blocks = 0;
1330
1331         // allocate the first to-space block; extra blocks will be
1332         // chained on as necessary.
1333         ws->todo_bd = NULL;
1334         ASSERT(looksEmptyWSDeque(ws->todo_q));
1335         alloc_todo_block(ws,0);
1336
1337         ws->todo_overflow = NULL;
1338         ws->n_todo_overflow = 0;
1339
1340         ws->scavd_list = NULL;
1341         ws->n_scavd_blocks = 0;
1342     }
1343 }
1344
1345
1346 /* ----------------------------------------------------------------------------
1347    Initialise a generation that is *not* to be collected
1348    ------------------------------------------------------------------------- */
1349
1350 static void
1351 init_uncollected_gen (nat g, nat threads)
1352 {
1353     nat t, n;
1354     gen_workspace *ws;
1355     generation *gen;
1356     bdescr *bd;
1357
1358     // save the current mutable lists for this generation, and
1359     // allocate a fresh block for each one.  We'll traverse these
1360     // mutable lists as roots early on in the GC.
1361     generations[g].saved_mut_list = generations[g].mut_list;
1362     generations[g].mut_list = allocBlock();
1363     for (n = 0; n < n_capabilities; n++) {
1364         capabilities[n].saved_mut_lists[g] = capabilities[n].mut_lists[g];
1365         capabilities[n].mut_lists[g] = allocBlock();
1366     }
1367
1368     gen = &generations[g];
1369
1370     gen->scavenged_large_objects = NULL;
1371     gen->n_scavenged_large_blocks = 0;
1372
1373     for (t = 0; t < threads; t++) {
1374         ws = &gc_threads[t]->gens[g];
1375
1376         ASSERT(looksEmptyWSDeque(ws->todo_q));
1377         ws->todo_large_objects = NULL;
1378
1379         ws->part_list = NULL;
1380         ws->n_part_blocks = 0;
1381
1382         ws->scavd_list = NULL;
1383         ws->n_scavd_blocks = 0;
1384
1385         // If the block at the head of the list in this generation
1386         // is less than 3/4 full, then use it as a todo block.
1387         if (gen->blocks && isPartiallyFull(gen->blocks))
1388         {
1389             ws->todo_bd = gen->blocks;
1390             ws->todo_free = ws->todo_bd->free;
1391             ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
1392             gen->blocks = gen->blocks->link;
1393             gen->n_blocks -= 1;
1394             gen->n_words -= ws->todo_bd->free - ws->todo_bd->start;
1395             ws->todo_bd->link = NULL;
1396             // we must scan from the current end point.
1397             ws->todo_bd->u.scan = ws->todo_bd->free;
1398         }
1399         else
1400         {
1401             ws->todo_bd = NULL;
1402             alloc_todo_block(ws,0);
1403         }
1404     }
1405
1406     // deal out any more partial blocks to the threads' part_lists
1407     t = 0;
1408     while (gen->blocks && isPartiallyFull(gen->blocks))
1409     {
1410         bd = gen->blocks;
1411         gen->blocks = bd->link;
1412         ws = &gc_threads[t]->gens[g];
1413         bd->link = ws->part_list;
1414         ws->part_list = bd;
1415         ws->n_part_blocks += 1;
1416         bd->u.scan = bd->free;
1417         gen->n_blocks -= 1;
1418         gen->n_words -= bd->free - bd->start;
1419         t++;
1420         if (t == n_gc_threads) t = 0;
1421     }
1422 }
1423
1424 /* -----------------------------------------------------------------------------
1425    Initialise a gc_thread before GC
1426    -------------------------------------------------------------------------- */
1427
1428 static void
1429 init_gc_thread (gc_thread *t)
1430 {
1431     t->static_objects = END_OF_STATIC_LIST;
1432     t->scavenged_static_objects = END_OF_STATIC_LIST;
1433     t->scan_bd = NULL;
1434     t->mut_lists = capabilities[t->thread_index].mut_lists;
1435     t->evac_gen = 0;
1436     t->failed_to_evac = rtsFalse;
1437     t->eager_promotion = rtsTrue;
1438     t->thunk_selector_depth = 0;
1439     t->copied = 0;
1440     t->scanned = 0;
1441     t->any_work = 0;
1442     t->no_work = 0;
1443     t->scav_find_work = 0;
1444 }
1445
1446 /* -----------------------------------------------------------------------------
1447    Function we pass to evacuate roots.
1448    -------------------------------------------------------------------------- */
1449
1450 static void
1451 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1452 {
1453     // we stole a register for gct, but this function is called from
1454     // *outside* the GC where the register variable is not in effect,
1455     // so we need to save and restore it here.  NB. only call
1456     // mark_root() from the main GC thread, otherwise gct will be
1457     // incorrect.
1458     gc_thread *saved_gct;
1459     saved_gct = gct;
1460     SET_GCT(user);
1461
1462     evacuate(root);
1463
1464     SET_GCT(saved_gct);
1465 }
1466
1467 /* -----------------------------------------------------------------------------
1468    Initialising the static object & mutable lists
1469    -------------------------------------------------------------------------- */
1470
1471 static void
1472 zero_static_object_list(StgClosure* first_static)
1473 {
1474   StgClosure* p;
1475   StgClosure* link;
1476   const StgInfoTable *info;
1477
1478   for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1479     info = get_itbl(p);
1480     link = *STATIC_LINK(info, p);
1481     *STATIC_LINK(info,p) = NULL;
1482   }
1483 }
1484
1485 /* ----------------------------------------------------------------------------
1486    Reset the sizes of the older generations when we do a major
1487    collection.
1488
1489    CURRENT STRATEGY: make all generations except zero the same size.
1490    We have to stay within the maximum heap size, and leave a certain
1491    percentage of the maximum heap size available to allocate into.
1492    ------------------------------------------------------------------------- */
1493
1494 static void
1495 resize_generations (void)
1496 {
1497     nat g;
1498
1499     if (major_gc && RtsFlags.GcFlags.generations > 1) {
1500         nat live, size, min_alloc, words;
1501         const nat max  = RtsFlags.GcFlags.maxHeapSize;
1502         const nat gens = RtsFlags.GcFlags.generations;
1503
1504         // live in the oldest generations
1505         if (oldest_gen->live_estimate != 0) {
1506             words = oldest_gen->live_estimate;
1507         } else {
1508             words = oldest_gen->n_words;
1509         }
1510         live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1511             oldest_gen->n_large_blocks;
1512
1513         // default max size for all generations except zero
1514         size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1515                        RtsFlags.GcFlags.minOldGenSize);
1516
1517         if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1518             RtsFlags.GcFlags.heapSizeSuggestion = size;
1519         }
1520
1521         // minimum size for generation zero
1522         min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1523                             RtsFlags.GcFlags.minAllocAreaSize);
1524
1525         // Auto-enable compaction when the residency reaches a
1526         // certain percentage of the maximum heap size (default: 30%).
1527         if (RtsFlags.GcFlags.compact ||
1528             (max > 0 &&
1529              oldest_gen->n_blocks >
1530              (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1531             oldest_gen->mark = 1;
1532             oldest_gen->compact = 1;
1533 //        debugBelch("compaction: on\n", live);
1534         } else {
1535             oldest_gen->mark = 0;
1536             oldest_gen->compact = 0;
1537 //        debugBelch("compaction: off\n", live);
1538         }
1539
1540         if (RtsFlags.GcFlags.sweep) {
1541             oldest_gen->mark = 1;
1542         }
1543
1544         // if we're going to go over the maximum heap size, reduce the
1545         // size of the generations accordingly.  The calculation is
1546         // different if compaction is turned on, because we don't need
1547         // to double the space required to collect the old generation.
1548         if (max != 0) {
1549
1550             // this test is necessary to ensure that the calculations
1551             // below don't have any negative results - we're working
1552             // with unsigned values here.
1553             if (max < min_alloc) {
1554                 heapOverflow();
1555             }
1556
1557             if (oldest_gen->compact) {
1558                 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1559                     size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1560                 }
1561             } else {
1562                 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1563                     size = (max - min_alloc) / ((gens - 1) * 2);
1564                 }
1565             }
1566
1567             if (size < live) {
1568                 heapOverflow();
1569             }
1570         }
1571
1572 #if 0
1573         debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1574                    min_alloc, size, max);
1575 #endif
1576
1577         for (g = 0; g < gens; g++) {
1578             generations[g].max_blocks = size;
1579         }
1580     }
1581 }
1582
1583 /* -----------------------------------------------------------------------------
1584    Calculate the new size of the nursery, and resize it.
1585    -------------------------------------------------------------------------- */
1586
1587 static void
1588 resize_nursery (void)
1589 {
1590     const lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities;
1591
1592     if (RtsFlags.GcFlags.generations == 1)
1593     {   // Two-space collector:
1594         nat blocks;
1595
1596         /* set up a new nursery.  Allocate a nursery size based on a
1597          * function of the amount of live data (by default a factor of 2)
1598          * Use the blocks from the old nursery if possible, freeing up any
1599          * left over blocks.
1600          *
1601          * If we get near the maximum heap size, then adjust our nursery
1602          * size accordingly.  If the nursery is the same size as the live
1603          * data (L), then we need 3L bytes.  We can reduce the size of the
1604          * nursery to bring the required memory down near 2L bytes.
1605          *
1606          * A normal 2-space collector would need 4L bytes to give the same
1607          * performance we get from 3L bytes, reducing to the same
1608          * performance at 2L bytes.
1609          */
1610         blocks = generations[0].n_blocks;
1611
1612         if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1613              blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1614              RtsFlags.GcFlags.maxHeapSize )
1615         {
1616             long adjusted_blocks;  // signed on purpose
1617             int pc_free;
1618
1619             adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1620
1621             debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1622                        RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1623
1624             pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1625             if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1626             {
1627                 heapOverflow();
1628             }
1629             blocks = adjusted_blocks;
1630         }
1631         else
1632         {
1633             blocks *= RtsFlags.GcFlags.oldGenFactor;
1634             if (blocks < min_nursery)
1635             {
1636                 blocks = min_nursery;
1637             }
1638         }
1639         resizeNurseries(blocks);
1640     }
1641     else  // Generational collector
1642     {
1643         /*
1644          * If the user has given us a suggested heap size, adjust our
1645          * allocation area to make best use of the memory available.
1646          */
1647         if (RtsFlags.GcFlags.heapSizeSuggestion)
1648         {
1649             long blocks;
1650             const nat needed = calcNeeded();    // approx blocks needed at next GC
1651
1652             /* Guess how much will be live in generation 0 step 0 next time.
1653              * A good approximation is obtained by finding the
1654              * percentage of g0 that was live at the last minor GC.
1655              *
1656              * We have an accurate figure for the amount of copied data in
1657              * 'copied', but we must convert this to a number of blocks, with
1658              * a small adjustment for estimated slop at the end of a block
1659              * (- 10 words).
1660              */
1661             if (N == 0)
1662             {
1663                 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1664                     / countNurseryBlocks();
1665             }
1666
1667             /* Estimate a size for the allocation area based on the
1668              * information available.  We might end up going slightly under
1669              * or over the suggested heap size, but we should be pretty
1670              * close on average.
1671              *
1672              * Formula:            suggested - needed
1673              *                ----------------------------
1674              *                    1 + g0_pcnt_kept/100
1675              *
1676              * where 'needed' is the amount of memory needed at the next
1677              * collection for collecting all gens except g0.
1678              */
1679             blocks =
1680                 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1681                 (100 + (long)g0_pcnt_kept);
1682
1683             if (blocks < (long)min_nursery) {
1684                 blocks = min_nursery;
1685             }
1686
1687             resizeNurseries((nat)blocks);
1688         }
1689         else
1690         {
1691             // we might have added extra large blocks to the nursery, so
1692             // resize back to minAllocAreaSize again.
1693             resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1694         }
1695     }
1696 }
1697
1698 /* -----------------------------------------------------------------------------
1699    Sanity code for CAF garbage collection.
1700
1701    With DEBUG turned on, we manage a CAF list in addition to the SRT
1702    mechanism.  After GC, we run down the CAF list and blackhole any
1703    CAFs which have been garbage collected.  This means we get an error
1704    whenever the program tries to enter a garbage collected CAF.
1705
1706    Any garbage collected CAFs are taken off the CAF list at the same
1707    time.
1708    -------------------------------------------------------------------------- */
1709
1710 #if 0 && defined(DEBUG)
1711
1712 static void
1713 gcCAFs(void)
1714 {
1715   StgClosure*  p;
1716   StgClosure** pp;
1717   const StgInfoTable *info;
1718   nat i;
1719
1720   i = 0;
1721   p = caf_list;
1722   pp = &caf_list;
1723
1724   while (p != NULL) {
1725
1726     info = get_itbl(p);
1727
1728     ASSERT(info->type == IND_STATIC);
1729
1730     if (STATIC_LINK(info,p) == NULL) {
1731         debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1732         // black hole it
1733         SET_INFO(p,&stg_BLACKHOLE_info);
1734         p = STATIC_LINK2(info,p);
1735         *pp = p;
1736     }
1737     else {
1738       pp = &STATIC_LINK2(info,p);
1739       p = *pp;
1740       i++;
1741     }
1742
1743   }
1744
1745   debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1746 }
1747 #endif