rts/sm/GC.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team 1998-2008
   4  *
   5  * Generational garbage collector
   6  *
   7  * Documentation on the architecture of the Garbage Collector can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 #include "PosixSource.h"
  15 #include "Rts.h"
  16 #include "HsFFI.h"
  17
  18 #include "Storage.h"
  19 #include "RtsUtils.h"
  20 #include "Apply.h"
  21 #include "Updates.h"
  22 #include "Stats.h"
  23 #include "Schedule.h"
  24 #include "Sanity.h"
  25 #include "BlockAlloc.h"
  26 #include "ProfHeap.h"
  27 #include "Weak.h"
  28 #include "Prelude.h"
  29 #include "RtsSignals.h"
  30 #include "STM.h"
  31 #if defined(RTS_GTK_FRONTPANEL)
  32 #include "FrontPanel.h"
  33 #endif
  34 #include "Trace.h"
  35 #include "RetainerProfile.h"
  36 #include "LdvProfile.h"
  37 #include "RaiseAsync.h"
  38 #include "Papi.h"
  39 #include "Stable.h"
  40
  41 #include "GC.h"
  42 #include "GCThread.h"
  43 #include "Compact.h"
  44 #include "Evac.h"
  45 #include "Scav.h"
  46 #include "GCUtils.h"
  47 #include "MarkStack.h"
  48 #include "MarkWeak.h"
  49 #include "Sparks.h"
  50 #include "Sweep.h"
  51
  52 #include <string.h> // for memset()
  53 #include <unistd.h>
  54
  55 /* -----------------------------------------------------------------------------
  56    Global variables
  57    -------------------------------------------------------------------------- */
  58
  59 /* STATIC OBJECT LIST.
  60  *
  61  * During GC:
  62  * We maintain a linked list of static objects that are still live.
  63  * The requirements for this list are:
  64  *
  65  *  - we need to scan the list while adding to it, in order to
  66  *    scavenge all the static objects (in the same way that
  67  *    breadth-first scavenging works for dynamic objects).
  68  *
  69  *  - we need to be able to tell whether an object is already on
  70  *    the list, to break loops.
  71  *
  72  * Each static object has a "static link field", which we use for
  73  * linking objects on to the list.  We use a stack-type list, consing
  74  * objects on the front as they are added (this means that the
  75  * scavenge phase is depth-first, not breadth-first, but that
  76  * shouldn't matter).
  77  *
  78  * A separate list is kept for objects that have been scavenged
  79  * already - this is so that we can zero all the marks afterwards.
  80  *
  81  * An object is on the list if its static link field is non-zero; this
  82  * means that we have to mark the end of the list with '1', not NULL.
  83  *
  84  * Extra notes for generational GC:
  85  *
  86  * Each generation has a static object list associated with it.  When
  87  * collecting generations up to N, we treat the static object lists
  88  * from generations > N as roots.
  89  *
  90  * We build up a static object list while collecting generations 0..N,
  91  * which is then appended to the static object list of generation N+1.
  92  */
  93
  94 /* N is the oldest generation being collected, where the generations
  95  * are numbered starting at 0.  A major GC (indicated by the major_gc
  96  * flag) is when we're collecting all generations.  We only attempt to
  97  * deal with static objects and GC CAFs when doing a major GC.
  98  */
  99 nat N;
 100 rtsBool major_gc;
 101
 102 /* Data used for allocation area sizing.
 103  */
 104 static lnat g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
 105
 106 /* Mut-list stats */
 107 #ifdef DEBUG
 108 nat mutlist_MUTVARS,
 109     mutlist_MUTARRS,
 110     mutlist_MVARS,
 111     mutlist_OTHERS;
 112 #endif
 113
 114 /* Thread-local data for each GC thread
 115  */
 116 gc_thread **gc_threads = NULL;
 117
 118 #if !defined(THREADED_RTS)
 119 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
 120 #endif
 121
 122 // Number of threads running in *this* GC.  Affects how many
 123 // step->todos[] lists we have to look in to find work.
 124 nat n_gc_threads;
 125
 126 // For stats:
 127 long copied;        // *words* copied & scavenged during this GC
 128
 129 rtsBool work_stealing;
 130
 131 DECLARE_GCT
 132
 133 /* -----------------------------------------------------------------------------
 134    Static function declarations
 135    -------------------------------------------------------------------------- */
 136
 137 static void mark_root               (void *user, StgClosure **root);
 138 static void zero_static_object_list (StgClosure* first_static);
 139 static nat  initialise_N            (rtsBool force_major_gc);
 140 static void prepare_collected_gen   (generation *gen);
 141 static void prepare_uncollected_gen (generation *gen);
 142 static void init_gc_thread          (gc_thread *t);
 143 static void resize_generations      (void);
 144 static void resize_nursery          (void);
 145 static void start_gc_threads        (void);
 146 static void scavenge_until_all_done (void);
 147 static StgWord inc_running          (void);
 148 static StgWord dec_running          (void);
 149 static void wakeup_gc_threads       (nat n_threads, nat me);
 150 static void shutdown_gc_threads     (nat n_threads, nat me);
 151 static void collect_gct_blocks      (void);
 152
 153 #if 0 && defined(DEBUG)
 154 static void gcCAFs                  (void);
 155 #endif
 156
 157 /* -----------------------------------------------------------------------------
 158    The mark stack.
 159    -------------------------------------------------------------------------- */
 160
 161 bdescr *mark_stack_top_bd; // topmost block in the mark stack
 162 bdescr *mark_stack_bd;     // current block in the mark stack
 163 StgPtr mark_sp;            // pointer to the next unallocated mark stack entry
 164
 165 /* -----------------------------------------------------------------------------
 166    GarbageCollect: the main entry point to the garbage collector.
 167
 168    Locks held: all capabilities are held throughout GarbageCollect().
 169    -------------------------------------------------------------------------- */
 170
 171 void
 172 GarbageCollect (rtsBool force_major_gc,
 173                 nat gc_type USED_IF_THREADS,
 174                 Capability *cap)
 175 {
 176   bdescr *bd;
 177   generation *gen;
 178   lnat live_blocks, live_words, allocated, max_copied, avg_copied;
 179   gc_thread *saved_gct;
 180   nat g, t, n;
 181
 182   // necessary if we stole a callee-saves register for gct:
 183   saved_gct = gct;
 184
 185 #ifdef PROFILING
 186   CostCentreStack *prev_CCS;
 187 #endif
 188
 189   ACQUIRE_SM_LOCK;
 190
 191 #if defined(RTS_USER_SIGNALS)
 192   if (RtsFlags.MiscFlags.install_signal_handlers) {
 193     // block signals
 194     blockUserSignals();
 195   }
 196 #endif
 197
 198   ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
 199   // otherwise adjust the padding in gen_workspace.
 200
 201   // tell the stats department that we've started a GC
 202   stat_startGC();
 203
 204   // tell the STM to discard any cached closures it's hoping to re-use
 205   stmPreGCHook();
 206
 207   // lock the StablePtr table
 208   stablePtrPreGC();
 209
 210 #ifdef DEBUG
 211   mutlist_MUTVARS = 0;
 212   mutlist_MUTARRS = 0;
 213   mutlist_OTHERS = 0;
 214 #endif
 215
 216   // attribute any costs to CCS_GC
 217 #ifdef PROFILING
 218   prev_CCS = CCCS;
 219   CCCS = CCS_GC;
 220 #endif
 221
 222   /* Approximate how much we allocated.
 223    * Todo: only when generating stats?
 224    */
 225   allocated = calcAllocated(rtsFalse/* don't count the nursery yet */);
 226
 227   /* Figure out which generation to collect
 228    */
 229   n = initialise_N(force_major_gc);
 230
 231 #if defined(THREADED_RTS)
 232   work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
 233                   N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
 234       // It's not always a good idea to do load balancing in parallel
 235       // GC.  In particular, for a parallel program we don't want to
 236       // lose locality by moving cached data into another CPU's cache
 237       // (this effect can be quite significant).
 238       //
 239       // We could have a more complex way to deterimine whether to do
 240       // work stealing or not, e.g. it might be a good idea to do it
 241       // if the heap is big.  For now, we just turn it on or off with
 242       // a flag.
 243 #endif
 244
 245   /* Start threads, so they can be spinning up while we finish initialisation.
 246    */
 247   start_gc_threads();
 248
 249 #if defined(THREADED_RTS)
 250   /* How many threads will be participating in this GC?
 251    * We don't try to parallelise minor GCs (unless the user asks for
 252    * it with +RTS -gn0), or mark/compact/sweep GC.
 253    */
 254   if (gc_type == PENDING_GC_PAR) {
 255       n_gc_threads = RtsFlags.ParFlags.nNodes;
 256   } else {
 257       n_gc_threads = 1;
 258   }
 259 #else
 260   n_gc_threads = 1;
 261 #endif
 262
 263   debugTrace(DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
 264         N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
 265
 266 #ifdef RTS_GTK_FRONTPANEL
 267   if (RtsFlags.GcFlags.frontpanel) {
 268       updateFrontPanelBeforeGC(N);
 269   }
 270 #endif
 271
 272 #ifdef DEBUG
 273   // check for memory leaks if DEBUG is on
 274   memInventory(DEBUG_gc);
 275 #endif
 276
 277   // check sanity *before* GC
 278   IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
 279
 280   // Initialise all our gc_thread structures
 281   for (t = 0; t < n_gc_threads; t++) {
 282       init_gc_thread(gc_threads[t]);
 283   }
 284
 285   // Initialise all the generations/steps that we're collecting.
 286   for (g = 0; g <= N; g++) {
 287       prepare_collected_gen(&generations[g]);
 288   }
 289   // Initialise all the generations/steps that we're *not* collecting.
 290   for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
 291       prepare_uncollected_gen(&generations[g]);
 292   }
 293
 294   /* Allocate a mark stack if we're doing a major collection.
 295    */
 296   if (major_gc && oldest_gen->mark) {
 297       mark_stack_bd     = allocBlock();
 298       mark_stack_top_bd = mark_stack_bd;
 299       mark_stack_bd->link = NULL;
 300       mark_stack_bd->u.back = NULL;
 301       mark_sp           = mark_stack_bd->start;
 302   } else {
 303       mark_stack_bd     = NULL;
 304       mark_stack_top_bd = NULL;
 305       mark_sp           = NULL;
 306   }
 307
 308   // this is the main thread
 309 #ifdef THREADED_RTS
 310   if (n_gc_threads == 1) {
 311       SET_GCT(gc_threads[0]);
 312   } else {
 313       SET_GCT(gc_threads[cap->no]);
 314   }
 315 #else
 316 SET_GCT(gc_threads[0]);
 317 #endif
 318
 319   /* -----------------------------------------------------------------------
 320    * follow all the roots that we know about:
 321    */
 322
 323   // the main thread is running: this prevents any other threads from
 324   // exiting prematurely, so we can start them now.
 325   // NB. do this after the mutable lists have been saved above, otherwise
 326   // the other GC threads will be writing into the old mutable lists.
 327   inc_running();
 328   wakeup_gc_threads(n_gc_threads, gct->thread_index);
 329
 330   // scavenge the capability-private mutable lists.  This isn't part
 331   // of markSomeCapabilities() because markSomeCapabilities() can only
 332   // call back into the GC via mark_root() (due to the gct register
 333   // variable).
 334   if (n_gc_threads == 1) {
 335       for (n = 0; n < n_capabilities; n++) {
 336 #if defined(THREADED_RTS)
 337           scavenge_capability_mut_Lists1(&capabilities[n]);
 338 #else
 339           scavenge_capability_mut_lists(&capabilities[n]);
 340 #endif
 341       }
 342   } else {
 343       scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
 344   }
 345
 346   // follow roots from the CAF list (used by GHCi)
 347   gct->evac_gen_no = 0;
 348   markCAFs(mark_root, gct);
 349
 350   // follow all the roots that the application knows about.
 351   gct->evac_gen_no = 0;
 352   markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
 353                        rtsTrue/*prune sparks*/);
 354
 355 #if defined(RTS_USER_SIGNALS)
 356   // mark the signal handlers (signals should be already blocked)
 357   markSignalHandlers(mark_root, gct);
 358 #endif
 359
 360   // Mark the weak pointer list, and prepare to detect dead weak pointers.
 361   markWeakPtrList();
 362   initWeakForGC();
 363
 364   // Mark the stable pointer table.
 365   markStablePtrTable(mark_root, gct);
 366
 367   /* -------------------------------------------------------------------------
 368    * Repeatedly scavenge all the areas we know about until there's no
 369    * more scavenging to be done.
 370    */
 371   for (;;)
 372   {
 373       scavenge_until_all_done();
 374       // The other threads are now stopped.  We might recurse back to
 375       // here, but from now on this is the only thread.
 376
 377       // must be last...  invariant is that everything is fully
 378       // scavenged at this point.
 379       if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
 380           inc_running();
 381           continue;
 382       }
 383
 384       // If we get to here, there's really nothing left to do.
 385       break;
 386   }
 387
 388   shutdown_gc_threads(n_gc_threads, gct->thread_index);
 389
 390   // Now see which stable names are still alive.
 391   gcStablePtrTable();
 392
 393 #ifdef THREADED_RTS
 394   if (n_gc_threads == 1) {
 395       for (n = 0; n < n_capabilities; n++) {
 396           pruneSparkQueue(&capabilities[n]);
 397       }
 398   } else {
 399       pruneSparkQueue(&capabilities[gct->thread_index]);
 400   }
 401 #endif
 402
 403 #ifdef PROFILING
 404   // We call processHeapClosureForDead() on every closure destroyed during
 405   // the current garbage collection, so we invoke LdvCensusForDead().
 406   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
 407       || RtsFlags.ProfFlags.bioSelector != NULL)
 408     LdvCensusForDead(N);
 409 #endif
 410
 411   // NO MORE EVACUATION AFTER THIS POINT!
 412
 413   // Two-space collector: free the old to-space.
 414   // g0->old_blocks is the old nursery
 415   // g0->blocks is to-space from the previous GC
 416   if (RtsFlags.GcFlags.generations == 1) {
 417       if (g0->blocks != NULL) {
 418           freeChain(g0->blocks);
 419           g0->blocks = NULL;
 420       }
 421   }
 422
 423   // Finally: compact or sweep the oldest generation.
 424   if (major_gc && oldest_gen->mark) {
 425       if (oldest_gen->compact)
 426           compact(gct->scavenged_static_objects);
 427       else
 428           sweep(oldest_gen);
 429   }
 430
 431   copied = 0;
 432   max_copied = 0;
 433   avg_copied = 0;
 434   {
 435       nat i;
 436       for (i=0; i < n_gc_threads; i++) {
 437           if (n_gc_threads > 1) {
 438               debugTrace(DEBUG_gc,"thread %d:", i);
 439               debugTrace(DEBUG_gc,"   copied           %ld", gc_threads[i]->copied * sizeof(W_));
 440               debugTrace(DEBUG_gc,"   scanned          %ld", gc_threads[i]->scanned * sizeof(W_));
 441               debugTrace(DEBUG_gc,"   any_work         %ld", gc_threads[i]->any_work);
 442               debugTrace(DEBUG_gc,"   no_work          %ld", gc_threads[i]->no_work);
 443               debugTrace(DEBUG_gc,"   scav_find_work %ld",   gc_threads[i]->scav_find_work);
 444           }
 445           copied += gc_threads[i]->copied;
 446           max_copied = stg_max(gc_threads[i]->copied, max_copied);
 447       }
 448       if (n_gc_threads == 1) {
 449           max_copied = 0;
 450           avg_copied = 0;
 451       } else {
 452           avg_copied = copied;
 453       }
 454   }
 455
 456   // Run through all the generations/steps and tidy up.
 457   // We're going to:
 458   //   - count the amount of "live" data (live_words, live_blocks)
 459   //   - count the amount of "copied" data in this GC (copied)
 460   //   - free from-space
 461   //   - make to-space the new from-space (set BF_EVACUATED on all blocks)
 462   //
 463   live_words = 0;
 464   live_blocks = 0;
 465
 466   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 467
 468     if (g == N) {
 469       generations[g].collections++; // for stats
 470       if (n_gc_threads > 1) generations[g].par_collections++;
 471     }
 472
 473     // Count the mutable list as bytes "copied" for the purposes of
 474     // stats.  Every mutable list is copied during every GC.
 475     if (g > 0) {
 476         nat mut_list_size = 0;
 477         for (n = 0; n < n_capabilities; n++) {
 478             mut_list_size += countOccupied(capabilities[n].mut_lists[g]);
 479         }
 480         copied +=  mut_list_size;
 481
 482         debugTrace(DEBUG_gc,
 483                    "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
 484                    (unsigned long)(mut_list_size * sizeof(W_)),
 485                    mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
 486     }
 487
 488     bdescr *next, *prev;
 489     gen = &generations[g];
 490
 491     // for generations we collected...
 492     if (g <= N) {
 493
 494         /* free old memory and shift to-space into from-space for all
 495          * the collected steps (except the allocation area).  These
 496          * freed blocks will probaby be quickly recycled.
 497          */
 498         if (gen->mark)
 499         {
 500             // tack the new blocks on the end of the existing blocks
 501             if (gen->old_blocks != NULL) {
 502
 503                 prev = NULL;
 504                 for (bd = gen->old_blocks; bd != NULL; bd = next) {
 505
 506                     next = bd->link;
 507
 508                     if (!(bd->flags & BF_MARKED))
 509                     {
 510                         if (prev == NULL) {
 511                             gen->old_blocks = next;
 512                         } else {
 513                             prev->link = next;
 514                         }
 515                         freeGroup(bd);
 516                         gen->n_old_blocks--;
 517                     }
 518                     else
 519                     {
 520                         gen->n_words += bd->free - bd->start;
 521
 522                         // NB. this step might not be compacted next
 523                         // time, so reset the BF_MARKED flags.
 524                         // They are set before GC if we're going to
 525                         // compact.  (search for BF_MARKED above).
 526                         bd->flags &= ~BF_MARKED;
 527
 528                         // between GCs, all blocks in the heap except
 529                         // for the nursery have the BF_EVACUATED flag set.
 530                         bd->flags |= BF_EVACUATED;
 531
 532                         prev = bd;
 533                     }
 534                 }
 535
 536                 if (prev != NULL) {
 537                     prev->link = gen->blocks;
 538                     gen->blocks = gen->old_blocks;
 539                 }
 540             }
 541             // add the new blocks to the block tally
 542             gen->n_blocks += gen->n_old_blocks;
 543             ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
 544             ASSERT(countOccupied(gen->blocks) == gen->n_words);
 545         }
 546         else // not copacted
 547         {
 548             freeChain(gen->old_blocks);
 549         }
 550
 551         gen->old_blocks = NULL;
 552         gen->n_old_blocks = 0;
 553
 554         /* LARGE OBJECTS.  The current live large objects are chained on
 555          * scavenged_large, having been moved during garbage
 556          * collection from large_objects.  Any objects left on the
 557          * large_objects list are therefore dead, so we free them here.
 558          */
 559         freeChain(gen->large_objects);
 560         gen->large_objects  = gen->scavenged_large_objects;
 561         gen->n_large_blocks = gen->n_scavenged_large_blocks;
 562         gen->n_new_large_words = 0;
 563     }
 564     else // for generations > N
 565     {
 566         /* For older generations, we need to append the
 567          * scavenged_large_object list (i.e. large objects that have been
 568          * promoted during this GC) to the large_object list for that step.
 569          */
 570         for (bd = gen->scavenged_large_objects; bd; bd = next) {
 571             next = bd->link;
 572             dbl_link_onto(bd, &gen->large_objects);
 573         }
 574
 575         // add the new blocks we promoted during this GC
 576         gen->n_large_blocks += gen->n_scavenged_large_blocks;
 577     }
 578
 579     ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
 580
 581     gen->scavenged_large_objects = NULL;
 582     gen->n_scavenged_large_blocks = 0;
 583
 584     // Count "live" data
 585     live_words  += genLiveWords(gen);
 586     live_blocks += genLiveBlocks(gen);
 587
 588     // add in the partial blocks in the gen_workspaces, but ignore gen 0
 589     // if this is a local GC (we can't count another capability's part_list)
 590     {
 591         nat i;
 592         for (i = 0; i < n_capabilities; i++) {
 593             live_words  += gcThreadLiveWords(i, gen->no);
 594             live_blocks += gcThreadLiveBlocks(i, gen->no);
 595         }
 596     }
 597   } // for all generations
 598
 599   // update the max size of older generations after a major GC
 600   resize_generations();
 601
 602   // Start a new pinned_object_block
 603   for (n = 0; n < n_capabilities; n++) {
 604       capabilities[n].pinned_object_block = NULL;
 605   }
 606
 607   // Free the mark stack.
 608   if (mark_stack_top_bd != NULL) {
 609       debugTrace(DEBUG_gc, "mark stack: %d blocks",
 610                  countBlocks(mark_stack_top_bd));
 611       freeChain(mark_stack_top_bd);
 612   }
 613
 614   // Free any bitmaps.
 615   for (g = 0; g <= N; g++) {
 616       gen = &generations[g];
 617       if (gen->bitmap != NULL) {
 618           freeGroup(gen->bitmap);
 619           gen->bitmap = NULL;
 620       }
 621   }
 622
 623   // Reset the nursery: make the blocks empty
 624   allocated += clearNurseries();
 625
 626   resize_nursery();
 627
 628   resetNurseries();
 629
 630  // mark the garbage collected CAFs as dead
 631 #if 0 && defined(DEBUG) // doesn't work at the moment
 632   if (major_gc) { gcCAFs(); }
 633 #endif
 634
 635 #ifdef PROFILING
 636   // resetStaticObjectForRetainerProfiling() must be called before
 637   // zeroing below.
 638   if (n_gc_threads > 1) {
 639       barf("profiling is currently broken with multi-threaded GC");
 640       // ToDo: fix the gct->scavenged_static_objects below
 641   }
 642   resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
 643 #endif
 644
 645   // zero the scavenged static object list
 646   if (major_gc) {
 647       nat i;
 648       for (i = 0; i < n_gc_threads; i++) {
 649           zero_static_object_list(gc_threads[i]->scavenged_static_objects);
 650       }
 651   }
 652
 653   // Update the stable pointer hash table.
 654   updateStablePtrTable(major_gc);
 655
 656   // unlock the StablePtr table.  Must be before scheduleFinalizers(),
 657   // because a finalizer may call hs_free_fun_ptr() or
 658   // hs_free_stable_ptr(), both of which access the StablePtr table.
 659   stablePtrPostGC();
 660
 661   // Start any pending finalizers.  Must be after
 662   // updateStablePtrTable() and stablePtrPostGC() (see #4221).
 663   RELEASE_SM_LOCK;
 664   scheduleFinalizers(cap, old_weak_ptr_list);
 665   ACQUIRE_SM_LOCK;
 666
 667   // check sanity after GC
 668   // before resurrectThreads(), because that might overwrite some
 669   // closures, which will cause problems with THREADED where we don't
 670   // fill slop.
 671   IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc));
 672
 673   // send exceptions to any threads which were about to die
 674   RELEASE_SM_LOCK;
 675   resurrectThreads(resurrected_threads);
 676   ACQUIRE_SM_LOCK;
 677
 678   if (major_gc) {
 679       nat need, got;
 680       need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
 681       got = mblocks_allocated;
 682       /* If the amount of data remains constant, next major GC we'll
 683          require (F+1)*need. We leave (F+2)*need in order to reduce
 684          repeated deallocation and reallocation. */
 685       need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
 686       if (got > need) {
 687           returnMemoryToOS(got - need);
 688       }
 689   }
 690
 691   // extra GC trace info
 692   IF_DEBUG(gc, statDescribeGens());
 693
 694 #ifdef DEBUG
 695   // symbol-table based profiling
 696   /*  heapCensus(to_blocks); */ /* ToDo */
 697 #endif
 698
 699   // restore enclosing cost centre
 700 #ifdef PROFILING
 701   CCCS = prev_CCS;
 702 #endif
 703
 704 #ifdef DEBUG
 705   // check for memory leaks if DEBUG is on
 706   memInventory(DEBUG_gc);
 707 #endif
 708
 709 #ifdef RTS_GTK_FRONTPANEL
 710   if (RtsFlags.GcFlags.frontpanel) {
 711       updateFrontPanelAfterGC( N, live );
 712   }
 713 #endif
 714
 715   // ok, GC over: tell the stats department what happened.
 716   stat_endGC(allocated, live_words, copied, N, max_copied, avg_copied,
 717              live_blocks * BLOCK_SIZE_W - live_words /* slop */);
 718
 719   // Guess which generation we'll collect *next* time
 720   initialise_N(force_major_gc);
 721
 722 #if defined(RTS_USER_SIGNALS)
 723   if (RtsFlags.MiscFlags.install_signal_handlers) {
 724     // unblock signals again
 725     unblockUserSignals();
 726   }
 727 #endif
 728
 729   RELEASE_SM_LOCK;
 730
 731   SET_GCT(saved_gct);
 732 }
 733
 734 /* -----------------------------------------------------------------------------
 735    Figure out which generation to collect, initialise N and major_gc.
 736
 737    Also returns the total number of blocks in generations that will be
 738    collected.
 739    -------------------------------------------------------------------------- */
 740
 741 static nat
 742 initialise_N (rtsBool force_major_gc)
 743 {
 744     int g;
 745     nat blocks, blocks_total;
 746
 747     blocks = 0;
 748     blocks_total = 0;
 749
 750     if (force_major_gc) {
 751         N = RtsFlags.GcFlags.generations - 1;
 752     } else {
 753         N = 0;
 754     }
 755
 756     for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
 757
 758         blocks = generations[g].n_words / BLOCK_SIZE_W
 759                + generations[g].n_large_blocks;
 760
 761         if (blocks >= generations[g].max_blocks) {
 762             N = stg_max(N,g);
 763         }
 764         if ((nat)g <= N) {
 765             blocks_total += blocks;
 766         }
 767     }
 768
 769     blocks_total += countNurseryBlocks();
 770
 771     major_gc = (N == RtsFlags.GcFlags.generations-1);
 772     return blocks_total;
 773 }
 774
 775 /* -----------------------------------------------------------------------------
 776    Initialise the gc_thread structures.
 777    -------------------------------------------------------------------------- */
 778
 779 #define GC_THREAD_INACTIVE             0
 780 #define GC_THREAD_STANDING_BY          1
 781 #define GC_THREAD_RUNNING              2
 782 #define GC_THREAD_WAITING_TO_CONTINUE  3
 783
 784 static void
 785 new_gc_thread (nat n, gc_thread *t)
 786 {
 787     nat g;
 788     gen_workspace *ws;
 789
 790 #ifdef THREADED_RTS
 791     t->id = 0;
 792     initSpinLock(&t->gc_spin);
 793     initSpinLock(&t->mut_spin);
 794     ACQUIRE_SPIN_LOCK(&t->gc_spin);
 795     t->wakeup = GC_THREAD_INACTIVE;  // starts true, so we can wait for the
 796                           // thread to start up, see wakeup_gc_threads
 797 #endif
 798
 799     t->thread_index = n;
 800     t->free_blocks = NULL;
 801     t->gc_count = 0;
 802
 803     init_gc_thread(t);
 804
 805 #ifdef USE_PAPI
 806     t->papi_events = -1;
 807 #endif
 808
 809     for (g = 0; g < RtsFlags.GcFlags.generations; g++)
 810     {
 811         ws = &t->gens[g];
 812         ws->gen = &generations[g];
 813         ASSERT(g == ws->gen->no);
 814         ws->my_gct = t;
 815
 816         // We want to call
 817         //   alloc_todo_block(ws,0);
 818         // but can't, because it uses gct which isn't set up at this point.
 819         // Hence, allocate a block for todo_bd manually:
 820         {
 821             bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet
 822             initBdescr(bd, ws->gen, ws->gen->to);
 823             bd->flags = BF_EVACUATED;
 824             bd->u.scan = bd->free = bd->start;
 825
 826             ws->todo_bd = bd;
 827             ws->todo_free = bd->free;
 828             ws->todo_lim = bd->start + BLOCK_SIZE_W;
 829         }
 830
 831         ws->todo_q = newWSDeque(128);
 832         ws->todo_overflow = NULL;
 833         ws->n_todo_overflow = 0;
 834         ws->todo_large_objects = NULL;
 835
 836         ws->part_list = NULL;
 837         ws->n_part_blocks = 0;
 838
 839         ws->scavd_list = NULL;
 840         ws->n_scavd_blocks = 0;
 841     }
 842 }
 843
 844
 845 void
 846 initGcThreads (void)
 847 {
 848     if (gc_threads == NULL) {
 849 #if defined(THREADED_RTS)
 850         nat i;
 851         gc_threads = stgMallocBytes (RtsFlags.ParFlags.nNodes *
 852                                      sizeof(gc_thread*),
 853                                      "alloc_gc_threads");
 854
 855         for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
 856             gc_threads[i] =
 857                 stgMallocBytes(sizeof(gc_thread) +
 858                                RtsFlags.GcFlags.generations * sizeof(gen_workspace),
 859                                "alloc_gc_threads");
 860
 861             new_gc_thread(i, gc_threads[i]);
 862         }
 863 #else
 864         gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
 865         gc_threads[0] = gct;
 866         new_gc_thread(0,gc_threads[0]);
 867 #endif
 868     }
 869 }
 870
 871 void
 872 freeGcThreads (void)
 873 {
 874     nat g;
 875     if (gc_threads != NULL) {
 876 #if defined(THREADED_RTS)
 877         nat i;
 878         for (i = 0; i < n_capabilities; i++) {
 879             for (g = 0; g < RtsFlags.GcFlags.generations; g++)
 880             {
 881                 freeWSDeque(gc_threads[i]->gens[g].todo_q);
 882             }
 883             stgFree (gc_threads[i]);
 884         }
 885         stgFree (gc_threads);
 886 #else
 887         for (g = 0; g < RtsFlags.GcFlags.generations; g++)
 888         {
 889             freeWSDeque(gc_threads[0]->gens[g].todo_q);
 890         }
 891         stgFree (gc_threads);
 892 #endif
 893         gc_threads = NULL;
 894     }
 895 }
 896
 897 /* ----------------------------------------------------------------------------
 898    Start GC threads
 899    ------------------------------------------------------------------------- */
 900
 901 static volatile StgWord gc_running_threads;
 902
 903 static StgWord
 904 inc_running (void)
 905 {
 906     StgWord new;
 907     new = atomic_inc(&gc_running_threads);
 908     ASSERT(new <= n_gc_threads);
 909     return new;
 910 }
 911
 912 static StgWord
 913 dec_running (void)
 914 {
 915     ASSERT(gc_running_threads != 0);
 916     return atomic_dec(&gc_running_threads);
 917 }
 918
 919 static rtsBool
 920 any_work (void)
 921 {
 922     int g;
 923     gen_workspace *ws;
 924
 925     gct->any_work++;
 926
 927     write_barrier();
 928
 929     // scavenge objects in compacted generation
 930     if (mark_stack_bd != NULL && !mark_stack_empty()) {
 931         return rtsTrue;
 932     }
 933
 934     // Check for global work in any step.  We don't need to check for
 935     // local work, because we have already exited scavenge_loop(),
 936     // which means there is no local work for this thread.
 937     for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
 938         ws = &gct->gens[g];
 939         if (ws->todo_large_objects) return rtsTrue;
 940         if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
 941         if (ws->todo_overflow) return rtsTrue;
 942     }
 943
 944 #if defined(THREADED_RTS)
 945     if (work_stealing) {
 946         nat n;
 947         // look for work to steal
 948         for (n = 0; n < n_gc_threads; n++) {
 949             if (n == gct->thread_index) continue;
 950             for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
 951                 ws = &gc_threads[n]->gens[g];
 952                 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
 953             }
 954         }
 955     }
 956 #endif
 957
 958     gct->no_work++;
 959 #if defined(THREADED_RTS)
 960     yieldThread();
 961 #endif
 962
 963     return rtsFalse;
 964 }
 965
 966 static void
 967 scavenge_until_all_done (void)
 968 {
 969     nat r;
 970
 971
 972 loop:
 973     traceEventGcWork(&capabilities[gct->thread_index]);
 974
 975 #if defined(THREADED_RTS)
 976     if (n_gc_threads > 1) {
 977         scavenge_loop();
 978     } else {
 979         scavenge_loop1();
 980     }
 981 #else
 982     scavenge_loop();
 983 #endif
 984
 985     collect_gct_blocks();
 986
 987     // scavenge_loop() only exits when there's no work to do
 988     r = dec_running();
 989
 990     traceEventGcIdle(&capabilities[gct->thread_index]);
 991
 992     debugTrace(DEBUG_gc, "%d GC threads still running", r);
 993
 994     while (gc_running_threads != 0) {
 995         // usleep(1);
 996         if (any_work()) {
 997             inc_running();
 998             goto loop;
 999         }
1000         // any_work() does not remove the work from the queue, it
1001         // just checks for the presence of work.  If we find any,
1002         // then we increment gc_running_threads and go back to
1003         // scavenge_loop() to perform any pending work.
1004     }
1005
1006     traceEventGcDone(&capabilities[gct->thread_index]);
1007 }
1008
1009 #if defined(THREADED_RTS)
1010
1011 void
1012 gcWorkerThread (Capability *cap)
1013 {
1014     gc_thread *saved_gct;
1015
1016     // necessary if we stole a callee-saves register for gct:
1017     saved_gct = gct;
1018
1019     gct = gc_threads[cap->no];
1020     gct->id = osThreadId();
1021
1022     // Wait until we're told to wake up
1023     RELEASE_SPIN_LOCK(&gct->mut_spin);
1024     gct->wakeup = GC_THREAD_STANDING_BY;
1025     debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1026     ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1027
1028 #ifdef USE_PAPI
1029     // start performance counters in this thread...
1030     if (gct->papi_events == -1) {
1031         papi_init_eventset(&gct->papi_events);
1032     }
1033     papi_thread_start_gc1_count(gct->papi_events);
1034 #endif
1035
1036     // Every thread evacuates some roots.
1037     gct->evac_gen_no = 0;
1038     markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
1039                          rtsTrue/*prune sparks*/);
1040     scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
1041
1042     scavenge_until_all_done();
1043
1044 #ifdef THREADED_RTS
1045     // Now that the whole heap is marked, we discard any sparks that
1046     // were found to be unreachable.  The main GC thread is currently
1047     // marking heap reachable via weak pointers, so it is
1048     // non-deterministic whether a spark will be retained if it is
1049     // only reachable via weak pointers.  To fix this problem would
1050     // require another GC barrier, which is too high a price.
1051     pruneSparkQueue(cap);
1052 #endif
1053
1054 #ifdef USE_PAPI
1055     // count events in this thread towards the GC totals
1056     papi_thread_stop_gc1_count(gct->papi_events);
1057 #endif
1058
1059     // Wait until we're told to continue
1060     RELEASE_SPIN_LOCK(&gct->gc_spin);
1061     gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1062     debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1063                gct->thread_index);
1064     ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1065     debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1066
1067     SET_GCT(saved_gct);
1068 }
1069
1070 #endif
1071
1072 #if defined(THREADED_RTS)
1073
1074 void
1075 waitForGcThreads (Capability *cap USED_IF_THREADS)
1076 {
1077     const nat n_threads = RtsFlags.ParFlags.nNodes;
1078     const nat me = cap->no;
1079     nat i, j;
1080     rtsBool retry = rtsTrue;
1081
1082     while(retry) {
1083         for (i=0; i < n_threads; i++) {
1084             if (i == me) continue;
1085             if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1086                 prodCapability(&capabilities[i], cap->running_task);
1087             }
1088         }
1089         for (j=0; j < 10; j++) {
1090             retry = rtsFalse;
1091             for (i=0; i < n_threads; i++) {
1092                 if (i == me) continue;
1093                 write_barrier();
1094                 setContextSwitches();
1095                 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1096                     retry = rtsTrue;
1097                 }
1098             }
1099             if (!retry) break;
1100             yieldThread();
1101         }
1102     }
1103 }
1104
1105 #endif // THREADED_RTS
1106
1107 static void
1108 start_gc_threads (void)
1109 {
1110 #if defined(THREADED_RTS)
1111     gc_running_threads = 0;
1112 #endif
1113 }
1114
1115 static void
1116 wakeup_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1117 {
1118 #if defined(THREADED_RTS)
1119     nat i;
1120     for (i=0; i < n_threads; i++) {
1121         if (i == me) continue;
1122         inc_running();
1123         debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1124         if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1125
1126         gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1127         ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1128         RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1129     }
1130 #endif
1131 }
1132
1133 // After GC is complete, we must wait for all GC threads to enter the
1134 // standby state, otherwise they may still be executing inside
1135 // any_work(), and may even remain awake until the next GC starts.
1136 static void
1137 shutdown_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1138 {
1139 #if defined(THREADED_RTS)
1140     nat i;
1141     for (i=0; i < n_threads; i++) {
1142         if (i == me) continue;
1143         while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1144     }
1145 #endif
1146 }
1147
1148 #if defined(THREADED_RTS)
1149 void
1150 releaseGCThreads (Capability *cap USED_IF_THREADS)
1151 {
1152     const nat n_threads = RtsFlags.ParFlags.nNodes;
1153     const nat me = cap->no;
1154     nat i;
1155     for (i=0; i < n_threads; i++) {
1156         if (i == me) continue;
1157         if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1158             barf("releaseGCThreads");
1159
1160         gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1161         ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1162         RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1163     }
1164 }
1165 #endif
1166
1167 /* ----------------------------------------------------------------------------
1168    Initialise a generation that is to be collected
1169    ------------------------------------------------------------------------- */
1170
1171 static void
1172 prepare_collected_gen (generation *gen)
1173 {
1174     nat i, g, n;
1175     gen_workspace *ws;
1176     bdescr *bd, *next;
1177
1178     // Throw away the current mutable list.  Invariant: the mutable
1179     // list always has at least one block; this means we can avoid a
1180     // check for NULL in recordMutable().
1181     g = gen->no;
1182     if (g != 0) {
1183         for (i = 0; i < n_capabilities; i++) {
1184             freeChain(capabilities[i].mut_lists[g]);
1185             capabilities[i].mut_lists[g] = allocBlock();
1186         }
1187     }
1188
1189     gen = &generations[g];
1190     ASSERT(gen->no == g);
1191
1192     // we'll construct a new list of threads in this step
1193     // during GC, throw away the current list.
1194     gen->old_threads = gen->threads;
1195     gen->threads = END_TSO_QUEUE;
1196
1197     // deprecate the existing blocks
1198     gen->old_blocks   = gen->blocks;
1199     gen->n_old_blocks = gen->n_blocks;
1200     gen->blocks       = NULL;
1201     gen->n_blocks     = 0;
1202     gen->n_words      = 0;
1203     gen->live_estimate = 0;
1204
1205     // initialise the large object queues.
1206     ASSERT(gen->scavenged_large_objects == NULL);
1207     ASSERT(gen->n_scavenged_large_blocks == 0);
1208
1209     // grab all the partial blocks stashed in the gc_thread workspaces and
1210     // move them to the old_blocks list of this gen.
1211     for (n = 0; n < n_capabilities; n++) {
1212         ws = &gc_threads[n]->gens[gen->no];
1213
1214         for (bd = ws->part_list; bd != NULL; bd = next) {
1215             next = bd->link;
1216             bd->link = gen->old_blocks;
1217             gen->old_blocks = bd;
1218             gen->n_old_blocks += bd->blocks;
1219         }
1220         ws->part_list = NULL;
1221         ws->n_part_blocks = 0;
1222
1223         ASSERT(ws->scavd_list == NULL);
1224         ASSERT(ws->n_scavd_blocks == 0);
1225
1226         if (ws->todo_free != ws->todo_bd->start) {
1227             ws->todo_bd->free = ws->todo_free;
1228             ws->todo_bd->link = gen->old_blocks;
1229             gen->old_blocks = ws->todo_bd;
1230             gen->n_old_blocks += ws->todo_bd->blocks;
1231             alloc_todo_block(ws,0); // always has one block.
1232         }
1233     }
1234
1235     // mark the small objects as from-space
1236     for (bd = gen->old_blocks; bd; bd = bd->link) {
1237         bd->flags &= ~BF_EVACUATED;
1238     }
1239
1240     // mark the large objects as from-space
1241     for (bd = gen->large_objects; bd; bd = bd->link) {
1242         bd->flags &= ~BF_EVACUATED;
1243     }
1244
1245     // for a compacted generation, we need to allocate the bitmap
1246     if (gen->mark) {
1247         nat bitmap_size; // in bytes
1248         bdescr *bitmap_bdescr;
1249         StgWord *bitmap;
1250
1251         bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1252
1253         if (bitmap_size > 0) {
1254             bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1255                                        / BLOCK_SIZE);
1256             gen->bitmap = bitmap_bdescr;
1257             bitmap = bitmap_bdescr->start;
1258
1259             debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1260                        bitmap_size, bitmap);
1261
1262             // don't forget to fill it with zeros!
1263             memset(bitmap, 0, bitmap_size);
1264
1265             // For each block in this step, point to its bitmap from the
1266             // block descriptor.
1267             for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1268                 bd->u.bitmap = bitmap;
1269                 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1270
1271                 // Also at this point we set the BF_MARKED flag
1272                 // for this block.  The invariant is that
1273                 // BF_MARKED is always unset, except during GC
1274                 // when it is set on those blocks which will be
1275                 // compacted.
1276                 if (!(bd->flags & BF_FRAGMENTED)) {
1277                     bd->flags |= BF_MARKED;
1278                 }
1279
1280                 // BF_SWEPT should be marked only for blocks that are being
1281                 // collected in sweep()
1282                 bd->flags &= ~BF_SWEPT;
1283             }
1284         }
1285     }
1286 }
1287
1288
1289 /* ----------------------------------------------------------------------------
1290    Save the mutable lists in saved_mut_lists
1291    ------------------------------------------------------------------------- */
1292
1293 static void
1294 stash_mut_list (Capability *cap, nat gen_no)
1295 {
1296     cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1297     cap->mut_lists[gen_no] = allocBlock_sync();
1298 }
1299
1300 /* ----------------------------------------------------------------------------
1301    Initialise a generation that is *not* to be collected
1302    ------------------------------------------------------------------------- */
1303
1304 static void
1305 prepare_uncollected_gen (generation *gen)
1306 {
1307     nat i;
1308
1309
1310     ASSERT(gen->no > 0);
1311
1312     // save the current mutable lists for this generation, and
1313     // allocate a fresh block for each one.  We'll traverse these
1314     // mutable lists as roots early on in the GC.
1315     for (i = 0; i < n_capabilities; i++) {
1316         stash_mut_list(&capabilities[i], gen->no);
1317     }
1318
1319     ASSERT(gen->scavenged_large_objects == NULL);
1320     ASSERT(gen->n_scavenged_large_blocks == 0);
1321 }
1322
1323 /* -----------------------------------------------------------------------------
1324    Collect the completed blocks from a GC thread and attach them to
1325    the generation.
1326    -------------------------------------------------------------------------- */
1327
1328 static void
1329 collect_gct_blocks (void)
1330 {
1331     nat g;
1332     gen_workspace *ws;
1333     bdescr *bd, *prev;
1334
1335     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1336         ws = &gct->gens[g];
1337
1338         // there may still be a block attached to ws->todo_bd;
1339         // leave it there to use next time.
1340
1341         if (ws->scavd_list != NULL) {
1342             ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1343
1344             ASSERT(gct->scan_bd == NULL);
1345             ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1346
1347             prev = NULL;
1348             for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1349                 ws->gen->n_words += bd->free - bd->start;
1350                 prev = bd;
1351             }
1352             if (prev != NULL) {
1353                 prev->link = ws->gen->blocks;
1354                 ws->gen->blocks = ws->scavd_list;
1355             }
1356             ws->gen->n_blocks += ws->n_scavd_blocks;
1357
1358             ws->scavd_list = NULL;
1359             ws->n_scavd_blocks = 0;
1360
1361             RELEASE_SPIN_LOCK(&ws->gen->sync);
1362         }
1363     }
1364 }
1365
1366 /* -----------------------------------------------------------------------------
1367    Initialise a gc_thread before GC
1368    -------------------------------------------------------------------------- */
1369
1370 static void
1371 init_gc_thread (gc_thread *t)
1372 {
1373     t->static_objects = END_OF_STATIC_LIST;
1374     t->scavenged_static_objects = END_OF_STATIC_LIST;
1375     t->scan_bd = NULL;
1376     t->mut_lists = capabilities[t->thread_index].mut_lists;
1377     t->evac_gen_no = 0;
1378     t->failed_to_evac = rtsFalse;
1379     t->eager_promotion = rtsTrue;
1380     t->thunk_selector_depth = 0;
1381     t->copied = 0;
1382     t->scanned = 0;
1383     t->any_work = 0;
1384     t->no_work = 0;
1385     t->scav_find_work = 0;
1386 }
1387
1388 /* -----------------------------------------------------------------------------
1389    Function we pass to evacuate roots.
1390    -------------------------------------------------------------------------- */
1391
1392 static void
1393 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1394 {
1395     // we stole a register for gct, but this function is called from
1396     // *outside* the GC where the register variable is not in effect,
1397     // so we need to save and restore it here.  NB. only call
1398     // mark_root() from the main GC thread, otherwise gct will be
1399     // incorrect.
1400     gc_thread *saved_gct;
1401     saved_gct = gct;
1402     SET_GCT(user);
1403
1404     evacuate(root);
1405
1406     SET_GCT(saved_gct);
1407 }
1408
1409 /* -----------------------------------------------------------------------------
1410    Initialising the static object & mutable lists
1411    -------------------------------------------------------------------------- */
1412
1413 static void
1414 zero_static_object_list(StgClosure* first_static)
1415 {
1416   StgClosure* p;
1417   StgClosure* link;
1418   const StgInfoTable *info;
1419
1420   for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1421     info = get_itbl(p);
1422     link = *STATIC_LINK(info, p);
1423     *STATIC_LINK(info,p) = NULL;
1424   }
1425 }
1426
1427 /* ----------------------------------------------------------------------------
1428    Reset the sizes of the older generations when we do a major
1429    collection.
1430
1431    CURRENT STRATEGY: make all generations except zero the same size.
1432    We have to stay within the maximum heap size, and leave a certain
1433    percentage of the maximum heap size available to allocate into.
1434    ------------------------------------------------------------------------- */
1435
1436 static void
1437 resize_generations (void)
1438 {
1439     nat g;
1440
1441     if (major_gc && RtsFlags.GcFlags.generations > 1) {
1442         nat live, size, min_alloc, words;
1443         const nat max  = RtsFlags.GcFlags.maxHeapSize;
1444         const nat gens = RtsFlags.GcFlags.generations;
1445
1446         // live in the oldest generations
1447         if (oldest_gen->live_estimate != 0) {
1448             words = oldest_gen->live_estimate;
1449         } else {
1450             words = oldest_gen->n_words;
1451         }
1452         live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1453             oldest_gen->n_large_blocks;
1454
1455         // default max size for all generations except zero
1456         size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1457                        RtsFlags.GcFlags.minOldGenSize);
1458
1459         if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1460             RtsFlags.GcFlags.heapSizeSuggestion = size;
1461         }
1462
1463         // minimum size for generation zero
1464         min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1465                             RtsFlags.GcFlags.minAllocAreaSize);
1466
1467         // Auto-enable compaction when the residency reaches a
1468         // certain percentage of the maximum heap size (default: 30%).
1469         if (RtsFlags.GcFlags.compact ||
1470             (max > 0 &&
1471              oldest_gen->n_blocks >
1472              (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1473             oldest_gen->mark = 1;
1474             oldest_gen->compact = 1;
1475 //        debugBelch("compaction: on\n", live);
1476         } else {
1477             oldest_gen->mark = 0;
1478             oldest_gen->compact = 0;
1479 //        debugBelch("compaction: off\n", live);
1480         }
1481
1482         if (RtsFlags.GcFlags.sweep) {
1483             oldest_gen->mark = 1;
1484         }
1485
1486         // if we're going to go over the maximum heap size, reduce the
1487         // size of the generations accordingly.  The calculation is
1488         // different if compaction is turned on, because we don't need
1489         // to double the space required to collect the old generation.
1490         if (max != 0) {
1491
1492             // this test is necessary to ensure that the calculations
1493             // below don't have any negative results - we're working
1494             // with unsigned values here.
1495             if (max < min_alloc) {
1496                 heapOverflow();
1497             }
1498
1499             if (oldest_gen->compact) {
1500                 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1501                     size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1502                 }
1503             } else {
1504                 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1505                     size = (max - min_alloc) / ((gens - 1) * 2);
1506                 }
1507             }
1508
1509             if (size < live) {
1510                 heapOverflow();
1511             }
1512         }
1513
1514 #if 0
1515         debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1516                    min_alloc, size, max);
1517 #endif
1518
1519         for (g = 0; g < gens; g++) {
1520             generations[g].max_blocks = size;
1521         }
1522     }
1523 }
1524
1525 /* -----------------------------------------------------------------------------
1526    Calculate the new size of the nursery, and resize it.
1527    -------------------------------------------------------------------------- */
1528
1529 static void
1530 resize_nursery (void)
1531 {
1532     const lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities;
1533
1534     if (RtsFlags.GcFlags.generations == 1)
1535     {   // Two-space collector:
1536         nat blocks;
1537
1538         /* set up a new nursery.  Allocate a nursery size based on a
1539          * function of the amount of live data (by default a factor of 2)
1540          * Use the blocks from the old nursery if possible, freeing up any
1541          * left over blocks.
1542          *
1543          * If we get near the maximum heap size, then adjust our nursery
1544          * size accordingly.  If the nursery is the same size as the live
1545          * data (L), then we need 3L bytes.  We can reduce the size of the
1546          * nursery to bring the required memory down near 2L bytes.
1547          *
1548          * A normal 2-space collector would need 4L bytes to give the same
1549          * performance we get from 3L bytes, reducing to the same
1550          * performance at 2L bytes.
1551          */
1552         blocks = generations[0].n_blocks;
1553
1554         if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1555              blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1556              RtsFlags.GcFlags.maxHeapSize )
1557         {
1558             long adjusted_blocks;  // signed on purpose
1559             int pc_free;
1560
1561             adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1562
1563             debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1564                        RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1565
1566             pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1567             if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1568             {
1569                 heapOverflow();
1570             }
1571             blocks = adjusted_blocks;
1572         }
1573         else
1574         {
1575             blocks *= RtsFlags.GcFlags.oldGenFactor;
1576             if (blocks < min_nursery)
1577             {
1578                 blocks = min_nursery;
1579             }
1580         }
1581         resizeNurseries(blocks);
1582     }
1583     else  // Generational collector
1584     {
1585         /*
1586          * If the user has given us a suggested heap size, adjust our
1587          * allocation area to make best use of the memory available.
1588          */
1589         if (RtsFlags.GcFlags.heapSizeSuggestion)
1590         {
1591             long blocks;
1592             const nat needed = calcNeeded();    // approx blocks needed at next GC
1593
1594             /* Guess how much will be live in generation 0 step 0 next time.
1595              * A good approximation is obtained by finding the
1596              * percentage of g0 that was live at the last minor GC.
1597              *
1598              * We have an accurate figure for the amount of copied data in
1599              * 'copied', but we must convert this to a number of blocks, with
1600              * a small adjustment for estimated slop at the end of a block
1601              * (- 10 words).
1602              */
1603             if (N == 0)
1604             {
1605                 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1606                     / countNurseryBlocks();
1607             }
1608
1609             /* Estimate a size for the allocation area based on the
1610              * information available.  We might end up going slightly under
1611              * or over the suggested heap size, but we should be pretty
1612              * close on average.
1613              *
1614              * Formula:            suggested - needed
1615              *                ----------------------------
1616              *                    1 + g0_pcnt_kept/100
1617              *
1618              * where 'needed' is the amount of memory needed at the next
1619              * collection for collecting all gens except g0.
1620              */
1621             blocks =
1622                 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1623                 (100 + (long)g0_pcnt_kept);
1624
1625             if (blocks < (long)min_nursery) {
1626                 blocks = min_nursery;
1627             }
1628
1629             resizeNurseries((nat)blocks);
1630         }
1631         else
1632         {
1633             // we might have added extra large blocks to the nursery, so
1634             // resize back to minAllocAreaSize again.
1635             resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1636         }
1637     }
1638 }
1639
1640 /* -----------------------------------------------------------------------------
1641    Sanity code for CAF garbage collection.
1642
1643    With DEBUG turned on, we manage a CAF list in addition to the SRT
1644    mechanism.  After GC, we run down the CAF list and blackhole any
1645    CAFs which have been garbage collected.  This means we get an error
1646    whenever the program tries to enter a garbage collected CAF.
1647
1648    Any garbage collected CAFs are taken off the CAF list at the same
1649    time.
1650    -------------------------------------------------------------------------- */
1651
1652 #if 0 && defined(DEBUG)
1653
1654 static void
1655 gcCAFs(void)
1656 {
1657   StgClosure*  p;
1658   StgClosure** pp;
1659   const StgInfoTable *info;
1660   nat i;
1661
1662   i = 0;
1663   p = caf_list;
1664   pp = &caf_list;
1665
1666   while (p != NULL) {
1667
1668     info = get_itbl(p);
1669
1670     ASSERT(info->type == IND_STATIC);
1671
1672     if (STATIC_LINK(info,p) == NULL) {
1673         debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1674         // black hole it
1675         SET_INFO(p,&stg_BLACKHOLE_info);
1676         p = STATIC_LINK2(info,p);
1677         *pp = p;
1678     }
1679     else {
1680       pp = &STATIC_LINK2(info,p);
1681       p = *pp;
1682       i++;
1683     }
1684
1685   }
1686
1687   debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1688 }
1689 #endif