rts/sm/GC.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team 1998-2008
   4  *
   5  * Generational garbage collector
   6  *
   7  * Documentation on the architecture of the Garbage Collector can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 #include "PosixSource.h"
  15 #include "Rts.h"
  16 #include "HsFFI.h"
  17
  18 #include "Storage.h"
  19 #include "RtsUtils.h"
  20 #include "Apply.h"
  21 #include "Updates.h"
  22 #include "Stats.h"
  23 #include "Schedule.h"
  24 #include "Sanity.h"
  25 #include "BlockAlloc.h"
  26 #include "ProfHeap.h"
  27 #include "Weak.h"
  28 #include "Prelude.h"
  29 #include "RtsSignals.h"
  30 #include "STM.h"
  31 #if defined(RTS_GTK_FRONTPANEL)
  32 #include "FrontPanel.h"
  33 #endif
  34 #include "Trace.h"
  35 #include "RetainerProfile.h"
  36 #include "LdvProfile.h"
  37 #include "RaiseAsync.h"
  38 #include "Papi.h"
  39 #include "Stable.h"
  40
  41 #include "GC.h"
  42 #include "GCThread.h"
  43 #include "Compact.h"
  44 #include "Evac.h"
  45 #include "Scav.h"
  46 #include "GCUtils.h"
  47 #include "MarkStack.h"
  48 #include "MarkWeak.h"
  49 #include "Sparks.h"
  50 #include "Sweep.h"
  51
  52 #include <string.h> // for memset()
  53 #include <unistd.h>
  54
  55 /* -----------------------------------------------------------------------------
  56    Global variables
  57    -------------------------------------------------------------------------- */
  58
  59 /* STATIC OBJECT LIST.
  60  *
  61  * During GC:
  62  * We maintain a linked list of static objects that are still live.
  63  * The requirements for this list are:
  64  *
  65  *  - we need to scan the list while adding to it, in order to
  66  *    scavenge all the static objects (in the same way that
  67  *    breadth-first scavenging works for dynamic objects).
  68  *
  69  *  - we need to be able to tell whether an object is already on
  70  *    the list, to break loops.
  71  *
  72  * Each static object has a "static link field", which we use for
  73  * linking objects on to the list.  We use a stack-type list, consing
  74  * objects on the front as they are added (this means that the
  75  * scavenge phase is depth-first, not breadth-first, but that
  76  * shouldn't matter).
  77  *
  78  * A separate list is kept for objects that have been scavenged
  79  * already - this is so that we can zero all the marks afterwards.
  80  *
  81  * An object is on the list if its static link field is non-zero; this
  82  * means that we have to mark the end of the list with '1', not NULL.
  83  *
  84  * Extra notes for generational GC:
  85  *
  86  * Each generation has a static object list associated with it.  When
  87  * collecting generations up to N, we treat the static object lists
  88  * from generations > N as roots.
  89  *
  90  * We build up a static object list while collecting generations 0..N,
  91  * which is then appended to the static object list of generation N+1.
  92  */
  93
  94 /* N is the oldest generation being collected, where the generations
  95  * are numbered starting at 0.  A major GC (indicated by the major_gc
  96  * flag) is when we're collecting all generations.  We only attempt to
  97  * deal with static objects and GC CAFs when doing a major GC.
  98  */
  99 nat N;
 100 rtsBool major_gc;
 101
 102 /* Data used for allocation area sizing.
 103  */
 104 static lnat g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
 105
 106 /* Mut-list stats */
 107 #ifdef DEBUG
 108 nat mutlist_MUTVARS,
 109     mutlist_MUTARRS,
 110     mutlist_MVARS,
 111     mutlist_OTHERS;
 112 #endif
 113
 114 /* Thread-local data for each GC thread
 115  */
 116 gc_thread **gc_threads = NULL;
 117
 118 #if !defined(THREADED_RTS)
 119 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
 120 #endif
 121
 122 // Number of threads running in *this* GC.  Affects how many
 123 // step->todos[] lists we have to look in to find work.
 124 nat n_gc_threads;
 125
 126 // For stats:
 127 long copied;        // *words* copied & scavenged during this GC
 128
 129 rtsBool work_stealing;
 130
 131 DECLARE_GCT
 132
 133 /* -----------------------------------------------------------------------------
 134    Static function declarations
 135    -------------------------------------------------------------------------- */
 136
 137 static void mark_root               (void *user, StgClosure **root);
 138 static void zero_static_object_list (StgClosure* first_static);
 139 static nat  initialise_N            (rtsBool force_major_gc);
 140 static void prepare_collected_gen   (generation *gen);
 141 static void prepare_uncollected_gen (generation *gen);
 142 static void init_gc_thread          (gc_thread *t);
 143 static void resize_generations      (void);
 144 static void resize_nursery          (void);
 145 static void start_gc_threads        (void);
 146 static void scavenge_until_all_done (void);
 147 static StgWord inc_running          (void);
 148 static StgWord dec_running          (void);
 149 static void wakeup_gc_threads       (nat n_threads, nat me);
 150 static void shutdown_gc_threads     (nat n_threads, nat me);
 151 static void collect_gct_blocks      (void);
 152
 153 #if 0 && defined(DEBUG)
 154 static void gcCAFs                  (void);
 155 #endif
 156
 157 /* -----------------------------------------------------------------------------
 158    The mark stack.
 159    -------------------------------------------------------------------------- */
 160
 161 bdescr *mark_stack_top_bd; // topmost block in the mark stack
 162 bdescr *mark_stack_bd;     // current block in the mark stack
 163 StgPtr mark_sp;            // pointer to the next unallocated mark stack entry
 164
 165 /* -----------------------------------------------------------------------------
 166    GarbageCollect: the main entry point to the garbage collector.
 167
 168    Locks held: all capabilities are held throughout GarbageCollect().
 169    -------------------------------------------------------------------------- */
 170
 171 void
 172 GarbageCollect (rtsBool force_major_gc,
 173                 nat gc_type USED_IF_THREADS,
 174                 Capability *cap)
 175 {
 176   bdescr *bd;
 177   generation *gen;
 178   lnat live_blocks, live_words, allocated, max_copied, avg_copied;
 179   gc_thread *saved_gct;
 180   nat g, t, n;
 181
 182   // necessary if we stole a callee-saves register for gct:
 183   saved_gct = gct;
 184
 185 #ifdef PROFILING
 186   CostCentreStack *prev_CCS;
 187 #endif
 188
 189   ACQUIRE_SM_LOCK;
 190
 191 #if defined(RTS_USER_SIGNALS)
 192   if (RtsFlags.MiscFlags.install_signal_handlers) {
 193     // block signals
 194     blockUserSignals();
 195   }
 196 #endif
 197
 198   ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
 199   // otherwise adjust the padding in gen_workspace.
 200
 201   // tell the stats department that we've started a GC
 202   stat_startGC();
 203
 204   // tell the STM to discard any cached closures it's hoping to re-use
 205   stmPreGCHook();
 206
 207   // lock the StablePtr table
 208   stablePtrPreGC();
 209
 210 #ifdef DEBUG
 211   mutlist_MUTVARS = 0;
 212   mutlist_MUTARRS = 0;
 213   mutlist_OTHERS = 0;
 214 #endif
 215
 216   // attribute any costs to CCS_GC
 217 #ifdef PROFILING
 218   prev_CCS = CCCS;
 219   CCCS = CCS_GC;
 220 #endif
 221
 222   /* Approximate how much we allocated.
 223    * Todo: only when generating stats?
 224    */
 225   allocated = calcAllocated(rtsFalse/* don't count the nursery yet */);
 226
 227   /* Figure out which generation to collect
 228    */
 229   n = initialise_N(force_major_gc);
 230
 231 #if defined(THREADED_RTS)
 232   work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
 233                   N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
 234       // It's not always a good idea to do load balancing in parallel
 235       // GC.  In particular, for a parallel program we don't want to
 236       // lose locality by moving cached data into another CPU's cache
 237       // (this effect can be quite significant).
 238       //
 239       // We could have a more complex way to deterimine whether to do
 240       // work stealing or not, e.g. it might be a good idea to do it
 241       // if the heap is big.  For now, we just turn it on or off with
 242       // a flag.
 243 #endif
 244
 245   /* Start threads, so they can be spinning up while we finish initialisation.
 246    */
 247   start_gc_threads();
 248
 249 #if defined(THREADED_RTS)
 250   /* How many threads will be participating in this GC?
 251    * We don't try to parallelise minor GCs (unless the user asks for
 252    * it with +RTS -gn0), or mark/compact/sweep GC.
 253    */
 254   if (gc_type == PENDING_GC_PAR) {
 255       n_gc_threads = RtsFlags.ParFlags.nNodes;
 256   } else {
 257       n_gc_threads = 1;
 258   }
 259 #else
 260   n_gc_threads = 1;
 261 #endif
 262
 263   debugTrace(DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
 264         N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
 265
 266 #ifdef RTS_GTK_FRONTPANEL
 267   if (RtsFlags.GcFlags.frontpanel) {
 268       updateFrontPanelBeforeGC(N);
 269   }
 270 #endif
 271
 272 #ifdef DEBUG
 273   // check for memory leaks if DEBUG is on
 274   memInventory(DEBUG_gc);
 275 #endif
 276
 277   // check sanity *before* GC
 278   IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
 279
 280   // Initialise all our gc_thread structures
 281   for (t = 0; t < n_gc_threads; t++) {
 282       init_gc_thread(gc_threads[t]);
 283   }
 284
 285   // Initialise all the generations/steps that we're collecting.
 286   for (g = 0; g <= N; g++) {
 287       prepare_collected_gen(&generations[g]);
 288   }
 289   // Initialise all the generations/steps that we're *not* collecting.
 290   for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
 291       prepare_uncollected_gen(&generations[g]);
 292   }
 293
 294   /* Allocate a mark stack if we're doing a major collection.
 295    */
 296   if (major_gc && oldest_gen->mark) {
 297       mark_stack_bd     = allocBlock();
 298       mark_stack_top_bd = mark_stack_bd;
 299       mark_stack_bd->link = NULL;
 300       mark_stack_bd->u.back = NULL;
 301       mark_sp           = mark_stack_bd->start;
 302   } else {
 303       mark_stack_bd     = NULL;
 304       mark_stack_top_bd = NULL;
 305       mark_sp           = NULL;
 306   }
 307
 308   // this is the main thread
 309 #ifdef THREADED_RTS
 310   if (n_gc_threads == 1) {
 311       SET_GCT(gc_threads[0]);
 312   } else {
 313       SET_GCT(gc_threads[cap->no]);
 314   }
 315 #else
 316 SET_GCT(gc_threads[0]);
 317 #endif
 318
 319   /* -----------------------------------------------------------------------
 320    * follow all the roots that we know about:
 321    */
 322
 323   // the main thread is running: this prevents any other threads from
 324   // exiting prematurely, so we can start them now.
 325   // NB. do this after the mutable lists have been saved above, otherwise
 326   // the other GC threads will be writing into the old mutable lists.
 327   inc_running();
 328   wakeup_gc_threads(n_gc_threads, gct->thread_index);
 329
 330   // scavenge the capability-private mutable lists.  This isn't part
 331   // of markSomeCapabilities() because markSomeCapabilities() can only
 332   // call back into the GC via mark_root() (due to the gct register
 333   // variable).
 334   if (n_gc_threads == 1) {
 335       for (n = 0; n < n_capabilities; n++) {
 336 #if defined(THREADED_RTS)
 337           scavenge_capability_mut_Lists1(&capabilities[n]);
 338 #else
 339           scavenge_capability_mut_lists(&capabilities[n]);
 340 #endif
 341       }
 342   } else {
 343       scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
 344   }
 345
 346   // follow roots from the CAF list (used by GHCi)
 347   gct->evac_gen_no = 0;
 348   markCAFs(mark_root, gct);
 349
 350   // follow all the roots that the application knows about.
 351   gct->evac_gen_no = 0;
 352   markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
 353                        rtsTrue/*prune sparks*/);
 354
 355 #if defined(RTS_USER_SIGNALS)
 356   // mark the signal handlers (signals should be already blocked)
 357   markSignalHandlers(mark_root, gct);
 358 #endif
 359
 360   // Mark the weak pointer list, and prepare to detect dead weak pointers.
 361   markWeakPtrList();
 362   initWeakForGC();
 363
 364   // Mark the stable pointer table.
 365   markStablePtrTable(mark_root, gct);
 366
 367   /* -------------------------------------------------------------------------
 368    * Repeatedly scavenge all the areas we know about until there's no
 369    * more scavenging to be done.
 370    */
 371   for (;;)
 372   {
 373       scavenge_until_all_done();
 374       // The other threads are now stopped.  We might recurse back to
 375       // here, but from now on this is the only thread.
 376
 377       // must be last...  invariant is that everything is fully
 378       // scavenged at this point.
 379       if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
 380           inc_running();
 381           continue;
 382       }
 383
 384       // If we get to here, there's really nothing left to do.
 385       break;
 386   }
 387
 388   shutdown_gc_threads(n_gc_threads, gct->thread_index);
 389
 390   // Now see which stable names are still alive.
 391   gcStablePtrTable();
 392
 393 #ifdef THREADED_RTS
 394   if (n_gc_threads == 1) {
 395       for (n = 0; n < n_capabilities; n++) {
 396           pruneSparkQueue(&capabilities[n]);
 397       }
 398   } else {
 399       pruneSparkQueue(&capabilities[gct->thread_index]);
 400   }
 401 #endif
 402
 403 #ifdef PROFILING
 404   // We call processHeapClosureForDead() on every closure destroyed during
 405   // the current garbage collection, so we invoke LdvCensusForDead().
 406   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
 407       || RtsFlags.ProfFlags.bioSelector != NULL)
 408     LdvCensusForDead(N);
 409 #endif
 410
 411   // NO MORE EVACUATION AFTER THIS POINT!
 412
 413   // Two-space collector: free the old to-space.
 414   // g0->old_blocks is the old nursery
 415   // g0->blocks is to-space from the previous GC
 416   if (RtsFlags.GcFlags.generations == 1) {
 417       if (g0->blocks != NULL) {
 418           freeChain(g0->blocks);
 419           g0->blocks = NULL;
 420       }
 421   }
 422
 423   // Finally: compact or sweep the oldest generation.
 424   if (major_gc && oldest_gen->mark) {
 425       if (oldest_gen->compact)
 426           compact(gct->scavenged_static_objects);
 427       else
 428           sweep(oldest_gen);
 429   }
 430
 431   copied = 0;
 432   max_copied = 0;
 433   avg_copied = 0;
 434   {
 435       nat i;
 436       for (i=0; i < n_gc_threads; i++) {
 437           if (n_gc_threads > 1) {
 438               debugTrace(DEBUG_gc,"thread %d:", i);
 439               debugTrace(DEBUG_gc,"   copied           %ld", gc_threads[i]->copied * sizeof(W_));
 440               debugTrace(DEBUG_gc,"   scanned          %ld", gc_threads[i]->scanned * sizeof(W_));
 441               debugTrace(DEBUG_gc,"   any_work         %ld", gc_threads[i]->any_work);
 442               debugTrace(DEBUG_gc,"   no_work          %ld", gc_threads[i]->no_work);
 443               debugTrace(DEBUG_gc,"   scav_find_work %ld",   gc_threads[i]->scav_find_work);
 444           }
 445           copied += gc_threads[i]->copied;
 446           max_copied = stg_max(gc_threads[i]->copied, max_copied);
 447       }
 448       if (n_gc_threads == 1) {
 449           max_copied = 0;
 450           avg_copied = 0;
 451       } else {
 452           avg_copied = copied;
 453       }
 454   }
 455
 456   // Run through all the generations/steps and tidy up.
 457   // We're going to:
 458   //   - count the amount of "live" data (live_words, live_blocks)
 459   //   - count the amount of "copied" data in this GC (copied)
 460   //   - free from-space
 461   //   - make to-space the new from-space (set BF_EVACUATED on all blocks)
 462   //
 463   live_words = 0;
 464   live_blocks = 0;
 465
 466   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 467
 468     if (g == N) {
 469       generations[g].collections++; // for stats
 470       if (n_gc_threads > 1) generations[g].par_collections++;
 471     }
 472
 473     // Count the mutable list as bytes "copied" for the purposes of
 474     // stats.  Every mutable list is copied during every GC.
 475     if (g > 0) {
 476         nat mut_list_size = 0;
 477         for (n = 0; n < n_capabilities; n++) {
 478             mut_list_size += countOccupied(capabilities[n].mut_lists[g]);
 479         }
 480         copied +=  mut_list_size;
 481
 482         debugTrace(DEBUG_gc,
 483                    "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
 484                    (unsigned long)(mut_list_size * sizeof(W_)),
 485                    mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
 486     }
 487
 488     bdescr *next, *prev;
 489     gen = &generations[g];
 490
 491     // for generations we collected...
 492     if (g <= N) {
 493
 494         /* free old memory and shift to-space into from-space for all
 495          * the collected steps (except the allocation area).  These
 496          * freed blocks will probaby be quickly recycled.
 497          */
 498         if (gen->mark)
 499         {
 500             // tack the new blocks on the end of the existing blocks
 501             if (gen->old_blocks != NULL) {
 502
 503                 prev = NULL;
 504                 for (bd = gen->old_blocks; bd != NULL; bd = next) {
 505
 506                     next = bd->link;
 507
 508                     if (!(bd->flags & BF_MARKED))
 509                     {
 510                         if (prev == NULL) {
 511                             gen->old_blocks = next;
 512                         } else {
 513                             prev->link = next;
 514                         }
 515                         freeGroup(bd);
 516                         gen->n_old_blocks--;
 517                     }
 518                     else
 519                     {
 520                         gen->n_words += bd->free - bd->start;
 521
 522                         // NB. this step might not be compacted next
 523                         // time, so reset the BF_MARKED flags.
 524                         // They are set before GC if we're going to
 525                         // compact.  (search for BF_MARKED above).
 526                         bd->flags &= ~BF_MARKED;
 527
 528                         // between GCs, all blocks in the heap except
 529                         // for the nursery have the BF_EVACUATED flag set.
 530                         bd->flags |= BF_EVACUATED;
 531
 532                         prev = bd;
 533                     }
 534                 }
 535
 536                 if (prev != NULL) {
 537                     prev->link = gen->blocks;
 538                     gen->blocks = gen->old_blocks;
 539                 }
 540             }
 541             // add the new blocks to the block tally
 542             gen->n_blocks += gen->n_old_blocks;
 543             ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
 544             ASSERT(countOccupied(gen->blocks) == gen->n_words);
 545         }
 546         else // not copacted
 547         {
 548             freeChain(gen->old_blocks);
 549         }
 550
 551         gen->old_blocks = NULL;
 552         gen->n_old_blocks = 0;
 553
 554         /* LARGE OBJECTS.  The current live large objects are chained on
 555          * scavenged_large, having been moved during garbage
 556          * collection from large_objects.  Any objects left on the
 557          * large_objects list are therefore dead, so we free them here.
 558          */
 559         freeChain(gen->large_objects);
 560         gen->large_objects  = gen->scavenged_large_objects;
 561         gen->n_large_blocks = gen->n_scavenged_large_blocks;
 562         gen->n_new_large_words = 0;
 563     }
 564     else // for generations > N
 565     {
 566         /* For older generations, we need to append the
 567          * scavenged_large_object list (i.e. large objects that have been
 568          * promoted during this GC) to the large_object list for that step.
 569          */
 570         for (bd = gen->scavenged_large_objects; bd; bd = next) {
 571             next = bd->link;
 572             dbl_link_onto(bd, &gen->large_objects);
 573         }
 574
 575         // add the new blocks we promoted during this GC
 576         gen->n_large_blocks += gen->n_scavenged_large_blocks;
 577     }
 578
 579     ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
 580
 581     gen->scavenged_large_objects = NULL;
 582     gen->n_scavenged_large_blocks = 0;
 583
 584     // Count "live" data
 585     live_words  += genLiveWords(gen);
 586     live_blocks += genLiveBlocks(gen);
 587
 588     // add in the partial blocks in the gen_workspaces, but ignore gen 0
 589     // if this is a local GC (we can't count another capability's part_list)
 590     {
 591         nat i;
 592         for (i = 0; i < n_capabilities; i++) {
 593             live_words  += gcThreadLiveWords(i, gen->no);
 594             live_blocks += gcThreadLiveBlocks(i, gen->no);
 595         }
 596     }
 597   } // for all generations
 598
 599   // update the max size of older generations after a major GC
 600   resize_generations();
 601
 602   // Start a new pinned_object_block
 603   for (n = 0; n < n_capabilities; n++) {
 604       capabilities[n].pinned_object_block = NULL;
 605   }
 606
 607   // Free the mark stack.
 608   if (mark_stack_top_bd != NULL) {
 609       debugTrace(DEBUG_gc, "mark stack: %d blocks",
 610                  countBlocks(mark_stack_top_bd));
 611       freeChain(mark_stack_top_bd);
 612   }
 613
 614   // Free any bitmaps.
 615   for (g = 0; g <= N; g++) {
 616       gen = &generations[g];
 617       if (gen->bitmap != NULL) {
 618           freeGroup(gen->bitmap);
 619           gen->bitmap = NULL;
 620       }
 621   }
 622
 623   // Reset the nursery: make the blocks empty
 624   allocated += clearNurseries();
 625
 626   resize_nursery();
 627
 628   resetNurseries();
 629
 630  // mark the garbage collected CAFs as dead
 631 #if 0 && defined(DEBUG) // doesn't work at the moment
 632   if (major_gc) { gcCAFs(); }
 633 #endif
 634
 635 #ifdef PROFILING
 636   // resetStaticObjectForRetainerProfiling() must be called before
 637   // zeroing below.
 638   if (n_gc_threads > 1) {
 639       barf("profiling is currently broken with multi-threaded GC");
 640       // ToDo: fix the gct->scavenged_static_objects below
 641   }
 642   resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
 643 #endif
 644
 645   // zero the scavenged static object list
 646   if (major_gc) {
 647       nat i;
 648       for (i = 0; i < n_gc_threads; i++) {
 649           zero_static_object_list(gc_threads[i]->scavenged_static_objects);
 650       }
 651   }
 652
 653   // Update the stable pointer hash table.
 654   updateStablePtrTable(major_gc);
 655
 656   // unlock the StablePtr table.  Must be before scheduleFinalizers(),
 657   // because a finalizer may call hs_free_fun_ptr() or
 658   // hs_free_stable_ptr(), both of which access the StablePtr table.
 659   stablePtrPostGC();
 660
 661   // Start any pending finalizers.  Must be after
 662   // updateStablePtrTable() and stablePtrPostGC() (see #4221).
 663   RELEASE_SM_LOCK;
 664   scheduleFinalizers(cap, old_weak_ptr_list);
 665   ACQUIRE_SM_LOCK;
 666
 667   // check sanity after GC
 668   // before resurrectThreads(), because that might overwrite some
 669   // closures, which will cause problems with THREADED where we don't
 670   // fill slop.
 671   IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc));
 672
 673   // send exceptions to any threads which were about to die
 674   RELEASE_SM_LOCK;
 675   resurrectThreads(resurrected_threads);
 676   ACQUIRE_SM_LOCK;
 677
 678   if (major_gc) {
 679       nat need, got;
 680       need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
 681       got = mblocks_allocated;
 682       /* If the amount of data remains constant, next major GC we'll
 683          require (F+1)*need. We leave (F+2)*need in order to reduce
 684          repeated deallocation and reallocation. */
 685       need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
 686       if (got > need) {
 687           returnMemoryToOS(got - need);
 688       }
 689   }
 690
 691   // extra GC trace info
 692   IF_DEBUG(gc, statDescribeGens());
 693
 694 #ifdef DEBUG
 695   // symbol-table based profiling
 696   /*  heapCensus(to_blocks); */ /* ToDo */
 697 #endif
 698
 699   // restore enclosing cost centre
 700 #ifdef PROFILING
 701   CCCS = prev_CCS;
 702 #endif
 703
 704 #ifdef DEBUG
 705   // check for memory leaks if DEBUG is on
 706   memInventory(DEBUG_gc);
 707 #endif
 708
 709 #ifdef RTS_GTK_FRONTPANEL
 710   if (RtsFlags.GcFlags.frontpanel) {
 711       updateFrontPanelAfterGC( N, live );
 712   }
 713 #endif
 714
 715   // ok, GC over: tell the stats department what happened.
 716   stat_endGC(allocated, live_words, copied, N, max_copied, avg_copied,
 717              live_blocks * BLOCK_SIZE_W - live_words /* slop */);
 718
 719   // Guess which generation we'll collect *next* time
 720   initialise_N(force_major_gc);
 721
 722 #if defined(RTS_USER_SIGNALS)
 723   if (RtsFlags.MiscFlags.install_signal_handlers) {
 724     // unblock signals again
 725     unblockUserSignals();
 726   }
 727 #endif
 728
 729   RELEASE_SM_LOCK;
 730
 731   SET_GCT(saved_gct);
 732 }
 733
 734 /* -----------------------------------------------------------------------------
 735    Figure out which generation to collect, initialise N and major_gc.
 736
 737    Also returns the total number of blocks in generations that will be
 738    collected.
 739    -------------------------------------------------------------------------- */
 740
 741 static nat
 742 initialise_N (rtsBool force_major_gc)
 743 {
 744     int g;
 745     nat blocks, blocks_total;
 746
 747     blocks = 0;
 748     blocks_total = 0;
 749
 750     if (force_major_gc) {
 751         N = RtsFlags.GcFlags.generations - 1;
 752     } else {
 753         N = 0;
 754     }
 755
 756     for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
 757
 758         blocks = generations[g].n_words / BLOCK_SIZE_W
 759                + generations[g].n_large_blocks;
 760
 761         if (blocks >= generations[g].max_blocks) {
 762             N = stg_max(N,g);
 763         }
 764         if ((nat)g <= N) {
 765             blocks_total += blocks;
 766         }
 767     }
 768
 769     blocks_total += countNurseryBlocks();
 770
 771     major_gc = (N == RtsFlags.GcFlags.generations-1);
 772     return blocks_total;
 773 }
 774
 775 /* -----------------------------------------------------------------------------
 776    Initialise the gc_thread structures.
 777    -------------------------------------------------------------------------- */
 778
 779 #define GC_THREAD_INACTIVE             0
 780 #define GC_THREAD_STANDING_BY          1
 781 #define GC_THREAD_RUNNING              2
 782 #define GC_THREAD_WAITING_TO_CONTINUE  3
 783
 784 static void
 785 new_gc_thread (nat n, gc_thread *t)
 786 {
 787     nat g;
 788     gen_workspace *ws;
 789
 790 #ifdef THREADED_RTS
 791     t->id = 0;
 792     initSpinLock(&t->gc_spin);
 793     initSpinLock(&t->mut_spin);
 794     ACQUIRE_SPIN_LOCK(&t->gc_spin);
 795     t->wakeup = GC_THREAD_INACTIVE;  // starts true, so we can wait for the
 796                           // thread to start up, see wakeup_gc_threads
 797 #endif
 798
 799     t->thread_index = n;
 800     t->free_blocks = NULL;
 801     t->gc_count = 0;
 802
 803     init_gc_thread(t);
 804
 805 #ifdef USE_PAPI
 806     t->papi_events = -1;
 807 #endif
 808
 809     for (g = 0; g < RtsFlags.GcFlags.generations; g++)
 810     {
 811         ws = &t->gens[g];
 812         ws->gen = &generations[g];
 813         ASSERT(g == ws->gen->no);
 814         ws->my_gct = t;
 815
 816         // We want to call
 817         //   alloc_todo_block(ws,0);
 818         // but can't, because it uses gct which isn't set up at this point.
 819         // Hence, allocate a block for todo_bd manually:
 820         {
 821             bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet
 822             initBdescr(bd, ws->gen, ws->gen->to);
 823             bd->flags = BF_EVACUATED;
 824             bd->u.scan = bd->free = bd->start;
 825
 826             ws->todo_bd = bd;
 827             ws->todo_free = bd->free;
 828             ws->todo_lim = bd->start + BLOCK_SIZE_W;
 829         }
 830
 831         ws->todo_q = newWSDeque(128);
 832         ws->todo_overflow = NULL;
 833         ws->n_todo_overflow = 0;
 834
 835         ws->part_list = NULL;
 836         ws->n_part_blocks = 0;
 837
 838         ws->scavd_list = NULL;
 839         ws->n_scavd_blocks = 0;
 840     }
 841 }
 842
 843
 844 void
 845 initGcThreads (void)
 846 {
 847     if (gc_threads == NULL) {
 848 #if defined(THREADED_RTS)
 849         nat i;
 850         gc_threads = stgMallocBytes (RtsFlags.ParFlags.nNodes *
 851                                      sizeof(gc_thread*),
 852                                      "alloc_gc_threads");
 853
 854         for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
 855             gc_threads[i] =
 856                 stgMallocBytes(sizeof(gc_thread) +
 857                                RtsFlags.GcFlags.generations * sizeof(gen_workspace),
 858                                "alloc_gc_threads");
 859
 860             new_gc_thread(i, gc_threads[i]);
 861         }
 862 #else
 863         gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
 864         gc_threads[0] = gct;
 865         new_gc_thread(0,gc_threads[0]);
 866 #endif
 867     }
 868 }
 869
 870 void
 871 freeGcThreads (void)
 872 {
 873     nat g;
 874     if (gc_threads != NULL) {
 875 #if defined(THREADED_RTS)
 876         nat i;
 877         for (i = 0; i < n_capabilities; i++) {
 878             for (g = 0; g < RtsFlags.GcFlags.generations; g++)
 879             {
 880                 freeWSDeque(gc_threads[i]->gens[g].todo_q);
 881             }
 882             stgFree (gc_threads[i]);
 883         }
 884         stgFree (gc_threads);
 885 #else
 886         for (g = 0; g < RtsFlags.GcFlags.generations; g++)
 887         {
 888             freeWSDeque(gc_threads[0]->gens[g].todo_q);
 889         }
 890         stgFree (gc_threads);
 891 #endif
 892         gc_threads = NULL;
 893     }
 894 }
 895
 896 /* ----------------------------------------------------------------------------
 897    Start GC threads
 898    ------------------------------------------------------------------------- */
 899
 900 static volatile StgWord gc_running_threads;
 901
 902 static StgWord
 903 inc_running (void)
 904 {
 905     StgWord new;
 906     new = atomic_inc(&gc_running_threads);
 907     ASSERT(new <= n_gc_threads);
 908     return new;
 909 }
 910
 911 static StgWord
 912 dec_running (void)
 913 {
 914     ASSERT(gc_running_threads != 0);
 915     return atomic_dec(&gc_running_threads);
 916 }
 917
 918 static rtsBool
 919 any_work (void)
 920 {
 921     int g;
 922     gen_workspace *ws;
 923
 924     gct->any_work++;
 925
 926     write_barrier();
 927
 928     // scavenge objects in compacted generation
 929     if (mark_stack_bd != NULL && !mark_stack_empty()) {
 930         return rtsTrue;
 931     }
 932
 933     // Check for global work in any step.  We don't need to check for
 934     // local work, because we have already exited scavenge_loop(),
 935     // which means there is no local work for this thread.
 936     for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
 937         ws = &gct->gens[g];
 938         if (ws->todo_large_objects) return rtsTrue;
 939         if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
 940         if (ws->todo_overflow) return rtsTrue;
 941     }
 942
 943 #if defined(THREADED_RTS)
 944     if (work_stealing) {
 945         nat n;
 946         // look for work to steal
 947         for (n = 0; n < n_gc_threads; n++) {
 948             if (n == gct->thread_index) continue;
 949             for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
 950                 ws = &gc_threads[n]->gens[g];
 951                 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
 952             }
 953         }
 954     }
 955 #endif
 956
 957     gct->no_work++;
 958 #if defined(THREADED_RTS)
 959     yieldThread();
 960 #endif
 961
 962     return rtsFalse;
 963 }
 964
 965 static void
 966 scavenge_until_all_done (void)
 967 {
 968     nat r;
 969
 970
 971 loop:
 972     traceEventGcWork(&capabilities[gct->thread_index]);
 973
 974 #if defined(THREADED_RTS)
 975     if (n_gc_threads > 1) {
 976         scavenge_loop();
 977     } else {
 978         scavenge_loop1();
 979     }
 980 #else
 981     scavenge_loop();
 982 #endif
 983
 984     collect_gct_blocks();
 985
 986     // scavenge_loop() only exits when there's no work to do
 987     r = dec_running();
 988
 989     traceEventGcIdle(&capabilities[gct->thread_index]);
 990
 991     debugTrace(DEBUG_gc, "%d GC threads still running", r);
 992
 993     while (gc_running_threads != 0) {
 994         // usleep(1);
 995         if (any_work()) {
 996             inc_running();
 997             goto loop;
 998         }
 999         // any_work() does not remove the work from the queue, it
1000         // just checks for the presence of work.  If we find any,
1001         // then we increment gc_running_threads and go back to
1002         // scavenge_loop() to perform any pending work.
1003     }
1004
1005     traceEventGcDone(&capabilities[gct->thread_index]);
1006 }
1007
1008 #if defined(THREADED_RTS)
1009
1010 void
1011 gcWorkerThread (Capability *cap)
1012 {
1013     gc_thread *saved_gct;
1014
1015     // necessary if we stole a callee-saves register for gct:
1016     saved_gct = gct;
1017
1018     gct = gc_threads[cap->no];
1019     gct->id = osThreadId();
1020
1021     // Wait until we're told to wake up
1022     RELEASE_SPIN_LOCK(&gct->mut_spin);
1023     gct->wakeup = GC_THREAD_STANDING_BY;
1024     debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1025     ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1026
1027 #ifdef USE_PAPI
1028     // start performance counters in this thread...
1029     if (gct->papi_events == -1) {
1030         papi_init_eventset(&gct->papi_events);
1031     }
1032     papi_thread_start_gc1_count(gct->papi_events);
1033 #endif
1034
1035     // Every thread evacuates some roots.
1036     gct->evac_gen_no = 0;
1037     markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
1038                          rtsTrue/*prune sparks*/);
1039     scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
1040
1041     scavenge_until_all_done();
1042
1043 #ifdef THREADED_RTS
1044     // Now that the whole heap is marked, we discard any sparks that
1045     // were found to be unreachable.  The main GC thread is currently
1046     // marking heap reachable via weak pointers, so it is
1047     // non-deterministic whether a spark will be retained if it is
1048     // only reachable via weak pointers.  To fix this problem would
1049     // require another GC barrier, which is too high a price.
1050     pruneSparkQueue(cap);
1051 #endif
1052
1053 #ifdef USE_PAPI
1054     // count events in this thread towards the GC totals
1055     papi_thread_stop_gc1_count(gct->papi_events);
1056 #endif
1057
1058     // Wait until we're told to continue
1059     RELEASE_SPIN_LOCK(&gct->gc_spin);
1060     gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1061     debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1062                gct->thread_index);
1063     ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1064     debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1065
1066     SET_GCT(saved_gct);
1067 }
1068
1069 #endif
1070
1071 #if defined(THREADED_RTS)
1072
1073 void
1074 waitForGcThreads (Capability *cap USED_IF_THREADS)
1075 {
1076     const nat n_threads = RtsFlags.ParFlags.nNodes;
1077     const nat me = cap->no;
1078     nat i, j;
1079     rtsBool retry = rtsTrue;
1080
1081     while(retry) {
1082         for (i=0; i < n_threads; i++) {
1083             if (i == me) continue;
1084             if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1085                 prodCapability(&capabilities[i], cap->running_task);
1086             }
1087         }
1088         for (j=0; j < 10; j++) {
1089             retry = rtsFalse;
1090             for (i=0; i < n_threads; i++) {
1091                 if (i == me) continue;
1092                 write_barrier();
1093                 setContextSwitches();
1094                 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1095                     retry = rtsTrue;
1096                 }
1097             }
1098             if (!retry) break;
1099             yieldThread();
1100         }
1101     }
1102 }
1103
1104 #endif // THREADED_RTS
1105
1106 static void
1107 start_gc_threads (void)
1108 {
1109 #if defined(THREADED_RTS)
1110     gc_running_threads = 0;
1111 #endif
1112 }
1113
1114 static void
1115 wakeup_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1116 {
1117 #if defined(THREADED_RTS)
1118     nat i;
1119     for (i=0; i < n_threads; i++) {
1120         if (i == me) continue;
1121         inc_running();
1122         debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1123         if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1124
1125         gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1126         ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1127         RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1128     }
1129 #endif
1130 }
1131
1132 // After GC is complete, we must wait for all GC threads to enter the
1133 // standby state, otherwise they may still be executing inside
1134 // any_work(), and may even remain awake until the next GC starts.
1135 static void
1136 shutdown_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1137 {
1138 #if defined(THREADED_RTS)
1139     nat i;
1140     for (i=0; i < n_threads; i++) {
1141         if (i == me) continue;
1142         while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1143     }
1144 #endif
1145 }
1146
1147 #if defined(THREADED_RTS)
1148 void
1149 releaseGCThreads (Capability *cap USED_IF_THREADS)
1150 {
1151     const nat n_threads = RtsFlags.ParFlags.nNodes;
1152     const nat me = cap->no;
1153     nat i;
1154     for (i=0; i < n_threads; i++) {
1155         if (i == me) continue;
1156         if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1157             barf("releaseGCThreads");
1158
1159         gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1160         ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1161         RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1162     }
1163 }
1164 #endif
1165
1166 /* ----------------------------------------------------------------------------
1167    Initialise a generation that is to be collected
1168    ------------------------------------------------------------------------- */
1169
1170 static void
1171 prepare_collected_gen (generation *gen)
1172 {
1173     nat i, g, n;
1174     gen_workspace *ws;
1175     bdescr *bd, *next;
1176
1177     // Throw away the current mutable list.  Invariant: the mutable
1178     // list always has at least one block; this means we can avoid a
1179     // check for NULL in recordMutable().
1180     g = gen->no;
1181     if (g != 0) {
1182         for (i = 0; i < n_capabilities; i++) {
1183             freeChain(capabilities[i].mut_lists[g]);
1184             capabilities[i].mut_lists[g] = allocBlock();
1185         }
1186     }
1187
1188     gen = &generations[g];
1189     ASSERT(gen->no == g);
1190
1191     // we'll construct a new list of threads in this step
1192     // during GC, throw away the current list.
1193     gen->old_threads = gen->threads;
1194     gen->threads = END_TSO_QUEUE;
1195
1196     // deprecate the existing blocks
1197     gen->old_blocks   = gen->blocks;
1198     gen->n_old_blocks = gen->n_blocks;
1199     gen->blocks       = NULL;
1200     gen->n_blocks     = 0;
1201     gen->n_words      = 0;
1202     gen->live_estimate = 0;
1203
1204     // initialise the large object queues.
1205     ASSERT(gen->scavenged_large_objects == NULL);
1206     ASSERT(gen->n_scavenged_large_blocks == 0);
1207
1208     // grab all the partial blocks stashed in the gc_thread workspaces and
1209     // move them to the old_blocks list of this gen.
1210     for (n = 0; n < n_capabilities; n++) {
1211         ws = &gc_threads[n]->gens[gen->no];
1212
1213         for (bd = ws->part_list; bd != NULL; bd = next) {
1214             next = bd->link;
1215             bd->link = gen->old_blocks;
1216             gen->old_blocks = bd;
1217             gen->n_old_blocks += bd->blocks;
1218         }
1219         ws->part_list = NULL;
1220         ws->n_part_blocks = 0;
1221
1222         ASSERT(ws->scavd_list == NULL);
1223         ASSERT(ws->n_scavd_blocks == 0);
1224
1225         if (ws->todo_free != ws->todo_bd->start) {
1226             ws->todo_bd->free = ws->todo_free;
1227             ws->todo_bd->link = gen->old_blocks;
1228             gen->old_blocks = ws->todo_bd;
1229             gen->n_old_blocks += ws->todo_bd->blocks;
1230             alloc_todo_block(ws,0); // always has one block.
1231         }
1232     }
1233
1234     // mark the small objects as from-space
1235     for (bd = gen->old_blocks; bd; bd = bd->link) {
1236         bd->flags &= ~BF_EVACUATED;
1237     }
1238
1239     // mark the large objects as from-space
1240     for (bd = gen->large_objects; bd; bd = bd->link) {
1241         bd->flags &= ~BF_EVACUATED;
1242     }
1243
1244     // for a compacted generation, we need to allocate the bitmap
1245     if (gen->mark) {
1246         nat bitmap_size; // in bytes
1247         bdescr *bitmap_bdescr;
1248         StgWord *bitmap;
1249
1250         bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1251
1252         if (bitmap_size > 0) {
1253             bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1254                                        / BLOCK_SIZE);
1255             gen->bitmap = bitmap_bdescr;
1256             bitmap = bitmap_bdescr->start;
1257
1258             debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1259                        bitmap_size, bitmap);
1260
1261             // don't forget to fill it with zeros!
1262             memset(bitmap, 0, bitmap_size);
1263
1264             // For each block in this step, point to its bitmap from the
1265             // block descriptor.
1266             for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1267                 bd->u.bitmap = bitmap;
1268                 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1269
1270                 // Also at this point we set the BF_MARKED flag
1271                 // for this block.  The invariant is that
1272                 // BF_MARKED is always unset, except during GC
1273                 // when it is set on those blocks which will be
1274                 // compacted.
1275                 if (!(bd->flags & BF_FRAGMENTED)) {
1276                     bd->flags |= BF_MARKED;
1277                 }
1278
1279                 // BF_SWEPT should be marked only for blocks that are being
1280                 // collected in sweep()
1281                 bd->flags &= ~BF_SWEPT;
1282             }
1283         }
1284     }
1285 }
1286
1287
1288 /* ----------------------------------------------------------------------------
1289    Save the mutable lists in saved_mut_lists
1290    ------------------------------------------------------------------------- */
1291
1292 static void
1293 stash_mut_list (Capability *cap, nat gen_no)
1294 {
1295     cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1296     cap->mut_lists[gen_no] = allocBlock_sync();
1297 }
1298
1299 /* ----------------------------------------------------------------------------
1300    Initialise a generation that is *not* to be collected
1301    ------------------------------------------------------------------------- */
1302
1303 static void
1304 prepare_uncollected_gen (generation *gen)
1305 {
1306     nat i;
1307
1308
1309     ASSERT(gen->no > 0);
1310
1311     // save the current mutable lists for this generation, and
1312     // allocate a fresh block for each one.  We'll traverse these
1313     // mutable lists as roots early on in the GC.
1314     for (i = 0; i < n_capabilities; i++) {
1315         stash_mut_list(&capabilities[i], gen->no);
1316     }
1317
1318     ASSERT(gen->scavenged_large_objects == NULL);
1319     ASSERT(gen->n_scavenged_large_blocks == 0);
1320 }
1321
1322 /* -----------------------------------------------------------------------------
1323    Collect the completed blocks from a GC thread and attach them to
1324    the generation.
1325    -------------------------------------------------------------------------- */
1326
1327 static void
1328 collect_gct_blocks (void)
1329 {
1330     nat g;
1331     gen_workspace *ws;
1332     bdescr *bd, *prev;
1333
1334     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1335         ws = &gct->gens[g];
1336
1337         // there may still be a block attached to ws->todo_bd;
1338         // leave it there to use next time.
1339
1340         if (ws->scavd_list != NULL) {
1341             ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1342
1343             ASSERT(gct->scan_bd == NULL);
1344             ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1345
1346             prev = NULL;
1347             for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1348                 ws->gen->n_words += bd->free - bd->start;
1349                 prev = bd;
1350             }
1351             if (prev != NULL) {
1352                 prev->link = ws->gen->blocks;
1353                 ws->gen->blocks = ws->scavd_list;
1354             }
1355             ws->gen->n_blocks += ws->n_scavd_blocks;
1356
1357             ws->scavd_list = NULL;
1358             ws->n_scavd_blocks = 0;
1359
1360             RELEASE_SPIN_LOCK(&ws->gen->sync);
1361         }
1362     }
1363 }
1364
1365 /* -----------------------------------------------------------------------------
1366    Initialise a gc_thread before GC
1367    -------------------------------------------------------------------------- */
1368
1369 static void
1370 init_gc_thread (gc_thread *t)
1371 {
1372     t->static_objects = END_OF_STATIC_LIST;
1373     t->scavenged_static_objects = END_OF_STATIC_LIST;
1374     t->scan_bd = NULL;
1375     t->mut_lists = capabilities[t->thread_index].mut_lists;
1376     t->evac_gen_no = 0;
1377     t->failed_to_evac = rtsFalse;
1378     t->eager_promotion = rtsTrue;
1379     t->thunk_selector_depth = 0;
1380     t->copied = 0;
1381     t->scanned = 0;
1382     t->any_work = 0;
1383     t->no_work = 0;
1384     t->scav_find_work = 0;
1385 }
1386
1387 /* -----------------------------------------------------------------------------
1388    Function we pass to evacuate roots.
1389    -------------------------------------------------------------------------- */
1390
1391 static void
1392 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1393 {
1394     // we stole a register for gct, but this function is called from
1395     // *outside* the GC where the register variable is not in effect,
1396     // so we need to save and restore it here.  NB. only call
1397     // mark_root() from the main GC thread, otherwise gct will be
1398     // incorrect.
1399     gc_thread *saved_gct;
1400     saved_gct = gct;
1401     SET_GCT(user);
1402
1403     evacuate(root);
1404
1405     SET_GCT(saved_gct);
1406 }
1407
1408 /* -----------------------------------------------------------------------------
1409    Initialising the static object & mutable lists
1410    -------------------------------------------------------------------------- */
1411
1412 static void
1413 zero_static_object_list(StgClosure* first_static)
1414 {
1415   StgClosure* p;
1416   StgClosure* link;
1417   const StgInfoTable *info;
1418
1419   for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1420     info = get_itbl(p);
1421     link = *STATIC_LINK(info, p);
1422     *STATIC_LINK(info,p) = NULL;
1423   }
1424 }
1425
1426 /* ----------------------------------------------------------------------------
1427    Reset the sizes of the older generations when we do a major
1428    collection.
1429
1430    CURRENT STRATEGY: make all generations except zero the same size.
1431    We have to stay within the maximum heap size, and leave a certain
1432    percentage of the maximum heap size available to allocate into.
1433    ------------------------------------------------------------------------- */
1434
1435 static void
1436 resize_generations (void)
1437 {
1438     nat g;
1439
1440     if (major_gc && RtsFlags.GcFlags.generations > 1) {
1441         nat live, size, min_alloc, words;
1442         const nat max  = RtsFlags.GcFlags.maxHeapSize;
1443         const nat gens = RtsFlags.GcFlags.generations;
1444
1445         // live in the oldest generations
1446         if (oldest_gen->live_estimate != 0) {
1447             words = oldest_gen->live_estimate;
1448         } else {
1449             words = oldest_gen->n_words;
1450         }
1451         live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1452             oldest_gen->n_large_blocks;
1453
1454         // default max size for all generations except zero
1455         size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1456                        RtsFlags.GcFlags.minOldGenSize);
1457
1458         if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1459             RtsFlags.GcFlags.heapSizeSuggestion = size;
1460         }
1461
1462         // minimum size for generation zero
1463         min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1464                             RtsFlags.GcFlags.minAllocAreaSize);
1465
1466         // Auto-enable compaction when the residency reaches a
1467         // certain percentage of the maximum heap size (default: 30%).
1468         if (RtsFlags.GcFlags.compact ||
1469             (max > 0 &&
1470              oldest_gen->n_blocks >
1471              (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1472             oldest_gen->mark = 1;
1473             oldest_gen->compact = 1;
1474 //        debugBelch("compaction: on\n", live);
1475         } else {
1476             oldest_gen->mark = 0;
1477             oldest_gen->compact = 0;
1478 //        debugBelch("compaction: off\n", live);
1479         }
1480
1481         if (RtsFlags.GcFlags.sweep) {
1482             oldest_gen->mark = 1;
1483         }
1484
1485         // if we're going to go over the maximum heap size, reduce the
1486         // size of the generations accordingly.  The calculation is
1487         // different if compaction is turned on, because we don't need
1488         // to double the space required to collect the old generation.
1489         if (max != 0) {
1490
1491             // this test is necessary to ensure that the calculations
1492             // below don't have any negative results - we're working
1493             // with unsigned values here.
1494             if (max < min_alloc) {
1495                 heapOverflow();
1496             }
1497
1498             if (oldest_gen->compact) {
1499                 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1500                     size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1501                 }
1502             } else {
1503                 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1504                     size = (max - min_alloc) / ((gens - 1) * 2);
1505                 }
1506             }
1507
1508             if (size < live) {
1509                 heapOverflow();
1510             }
1511         }
1512
1513 #if 0
1514         debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1515                    min_alloc, size, max);
1516 #endif
1517
1518         for (g = 0; g < gens; g++) {
1519             generations[g].max_blocks = size;
1520         }
1521     }
1522 }
1523
1524 /* -----------------------------------------------------------------------------
1525    Calculate the new size of the nursery, and resize it.
1526    -------------------------------------------------------------------------- */
1527
1528 static void
1529 resize_nursery (void)
1530 {
1531     const lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities;
1532
1533     if (RtsFlags.GcFlags.generations == 1)
1534     {   // Two-space collector:
1535         nat blocks;
1536
1537         /* set up a new nursery.  Allocate a nursery size based on a
1538          * function of the amount of live data (by default a factor of 2)
1539          * Use the blocks from the old nursery if possible, freeing up any
1540          * left over blocks.
1541          *
1542          * If we get near the maximum heap size, then adjust our nursery
1543          * size accordingly.  If the nursery is the same size as the live
1544          * data (L), then we need 3L bytes.  We can reduce the size of the
1545          * nursery to bring the required memory down near 2L bytes.
1546          *
1547          * A normal 2-space collector would need 4L bytes to give the same
1548          * performance we get from 3L bytes, reducing to the same
1549          * performance at 2L bytes.
1550          */
1551         blocks = generations[0].n_blocks;
1552
1553         if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1554              blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1555              RtsFlags.GcFlags.maxHeapSize )
1556         {
1557             long adjusted_blocks;  // signed on purpose
1558             int pc_free;
1559
1560             adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1561
1562             debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1563                        RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1564
1565             pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1566             if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1567             {
1568                 heapOverflow();
1569             }
1570             blocks = adjusted_blocks;
1571         }
1572         else
1573         {
1574             blocks *= RtsFlags.GcFlags.oldGenFactor;
1575             if (blocks < min_nursery)
1576             {
1577                 blocks = min_nursery;
1578             }
1579         }
1580         resizeNurseries(blocks);
1581     }
1582     else  // Generational collector
1583     {
1584         /*
1585          * If the user has given us a suggested heap size, adjust our
1586          * allocation area to make best use of the memory available.
1587          */
1588         if (RtsFlags.GcFlags.heapSizeSuggestion)
1589         {
1590             long blocks;
1591             const nat needed = calcNeeded();    // approx blocks needed at next GC
1592
1593             /* Guess how much will be live in generation 0 step 0 next time.
1594              * A good approximation is obtained by finding the
1595              * percentage of g0 that was live at the last minor GC.
1596              *
1597              * We have an accurate figure for the amount of copied data in
1598              * 'copied', but we must convert this to a number of blocks, with
1599              * a small adjustment for estimated slop at the end of a block
1600              * (- 10 words).
1601              */
1602             if (N == 0)
1603             {
1604                 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1605                     / countNurseryBlocks();
1606             }
1607
1608             /* Estimate a size for the allocation area based on the
1609              * information available.  We might end up going slightly under
1610              * or over the suggested heap size, but we should be pretty
1611              * close on average.
1612              *
1613              * Formula:            suggested - needed
1614              *                ----------------------------
1615              *                    1 + g0_pcnt_kept/100
1616              *
1617              * where 'needed' is the amount of memory needed at the next
1618              * collection for collecting all gens except g0.
1619              */
1620             blocks =
1621                 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1622                 (100 + (long)g0_pcnt_kept);
1623
1624             if (blocks < (long)min_nursery) {
1625                 blocks = min_nursery;
1626             }
1627
1628             resizeNurseries((nat)blocks);
1629         }
1630         else
1631         {
1632             // we might have added extra large blocks to the nursery, so
1633             // resize back to minAllocAreaSize again.
1634             resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1635         }
1636     }
1637 }
1638
1639 /* -----------------------------------------------------------------------------
1640    Sanity code for CAF garbage collection.
1641
1642    With DEBUG turned on, we manage a CAF list in addition to the SRT
1643    mechanism.  After GC, we run down the CAF list and blackhole any
1644    CAFs which have been garbage collected.  This means we get an error
1645    whenever the program tries to enter a garbage collected CAF.
1646
1647    Any garbage collected CAFs are taken off the CAF list at the same
1648    time.
1649    -------------------------------------------------------------------------- */
1650
1651 #if 0 && defined(DEBUG)
1652
1653 static void
1654 gcCAFs(void)
1655 {
1656   StgClosure*  p;
1657   StgClosure** pp;
1658   const StgInfoTable *info;
1659   nat i;
1660
1661   i = 0;
1662   p = caf_list;
1663   pp = &caf_list;
1664
1665   while (p != NULL) {
1666
1667     info = get_itbl(p);
1668
1669     ASSERT(info->type == IND_STATIC);
1670
1671     if (STATIC_LINK(info,p) == NULL) {
1672         debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1673         // black hole it
1674         SET_INFO(p,&stg_BLACKHOLE_info);
1675         p = STATIC_LINK2(info,p);
1676         *pp = p;
1677     }
1678     else {
1679       pp = &STATIC_LINK2(info,p);
1680       p = *pp;
1681       i++;
1682     }
1683
1684   }
1685
1686   debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1687 }
1688 #endif