rts/sm/GC.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team 1998-2008
   4  *
   5  * Generational garbage collector
   6  *
   7  * Documentation on the architecture of the Garbage Collector can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 #include "PosixSource.h"
  15 #include "Rts.h"
  16 #include "HsFFI.h"
  17
  18 #include "Storage.h"
  19 #include "RtsUtils.h"
  20 #include "Apply.h"
  21 #include "Updates.h"
  22 #include "Stats.h"
  23 #include "Schedule.h"
  24 #include "Sanity.h"
  25 #include "BlockAlloc.h"
  26 #include "ProfHeap.h"
  27 #include "Weak.h"
  28 #include "Prelude.h"
  29 #include "RtsSignals.h"
  30 #include "STM.h"
  31 #if defined(RTS_GTK_FRONTPANEL)
  32 #include "FrontPanel.h"
  33 #endif
  34 #include "Trace.h"
  35 #include "RetainerProfile.h"
  36 #include "LdvProfile.h"
  37 #include "RaiseAsync.h"
  38 #include "Papi.h"
  39 #include "Stable.h"
  40
  41 #include "GC.h"
  42 #include "GCThread.h"
  43 #include "Compact.h"
  44 #include "Evac.h"
  45 #include "Scav.h"
  46 #include "GCUtils.h"
  47 #include "MarkStack.h"
  48 #include "MarkWeak.h"
  49 #include "Sparks.h"
  50 #include "Sweep.h"
  51
  52 #include <string.h> // for memset()
  53 #include <unistd.h>
  54
  55 /* -----------------------------------------------------------------------------
  56    Global variables
  57    -------------------------------------------------------------------------- */
  58
  59 /* STATIC OBJECT LIST.
  60  *
  61  * During GC:
  62  * We maintain a linked list of static objects that are still live.
  63  * The requirements for this list are:
  64  *
  65  *  - we need to scan the list while adding to it, in order to
  66  *    scavenge all the static objects (in the same way that
  67  *    breadth-first scavenging works for dynamic objects).
  68  *
  69  *  - we need to be able to tell whether an object is already on
  70  *    the list, to break loops.
  71  *
  72  * Each static object has a "static link field", which we use for
  73  * linking objects on to the list.  We use a stack-type list, consing
  74  * objects on the front as they are added (this means that the
  75  * scavenge phase is depth-first, not breadth-first, but that
  76  * shouldn't matter).
  77  *
  78  * A separate list is kept for objects that have been scavenged
  79  * already - this is so that we can zero all the marks afterwards.
  80  *
  81  * An object is on the list if its static link field is non-zero; this
  82  * means that we have to mark the end of the list with '1', not NULL.
  83  *
  84  * Extra notes for generational GC:
  85  *
  86  * Each generation has a static object list associated with it.  When
  87  * collecting generations up to N, we treat the static object lists
  88  * from generations > N as roots.
  89  *
  90  * We build up a static object list while collecting generations 0..N,
  91  * which is then appended to the static object list of generation N+1.
  92  */
  93
  94 /* N is the oldest generation being collected, where the generations
  95  * are numbered starting at 0.  A major GC (indicated by the major_gc
  96  * flag) is when we're collecting all generations.  We only attempt to
  97  * deal with static objects and GC CAFs when doing a major GC.
  98  */
  99 nat N;
 100 rtsBool major_gc;
 101
 102 /* Data used for allocation area sizing.
 103  */
 104 static lnat g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
 105
 106 /* Mut-list stats */
 107 #ifdef DEBUG
 108 nat mutlist_MUTVARS,
 109     mutlist_MUTARRS,
 110     mutlist_MVARS,
 111     mutlist_OTHERS;
 112 #endif
 113
 114 /* Thread-local data for each GC thread
 115  */
 116 gc_thread **gc_threads = NULL;
 117
 118 #if !defined(THREADED_RTS)
 119 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
 120 #endif
 121
 122 // Number of threads running in *this* GC.  Affects how many
 123 // step->todos[] lists we have to look in to find work.
 124 nat n_gc_threads;
 125
 126 // For stats:
 127 long copied;        // *words* copied & scavenged during this GC
 128
 129 rtsBool work_stealing;
 130
 131 DECLARE_GCT
 132
 133 /* -----------------------------------------------------------------------------
 134    Static function declarations
 135    -------------------------------------------------------------------------- */
 136
 137 static void mark_root               (void *user, StgClosure **root);
 138 static void zero_static_object_list (StgClosure* first_static);
 139 static nat  initialise_N            (rtsBool force_major_gc);
 140 static void init_collected_gen      (nat g, nat threads);
 141 static void init_uncollected_gen    (nat g, nat threads);
 142 static void init_gc_thread          (gc_thread *t);
 143 static void resize_generations      (void);
 144 static void resize_nursery          (void);
 145 static void start_gc_threads        (void);
 146 static void scavenge_until_all_done (void);
 147 static StgWord inc_running          (void);
 148 static StgWord dec_running          (void);
 149 static void wakeup_gc_threads       (nat n_threads, nat me);
 150 static void shutdown_gc_threads     (nat n_threads, nat me);
 151
 152 #if 0 && defined(DEBUG)
 153 static void gcCAFs                  (void);
 154 #endif
 155
 156 /* -----------------------------------------------------------------------------
 157    The mark stack.
 158    -------------------------------------------------------------------------- */
 159
 160 bdescr *mark_stack_top_bd; // topmost block in the mark stack
 161 bdescr *mark_stack_bd;     // current block in the mark stack
 162 StgPtr mark_sp;            // pointer to the next unallocated mark stack entry
 163
 164 /* -----------------------------------------------------------------------------
 165    GarbageCollect: the main entry point to the garbage collector.
 166
 167    Locks held: all capabilities are held throughout GarbageCollect().
 168    -------------------------------------------------------------------------- */
 169
 170 void
 171 GarbageCollect (rtsBool force_major_gc,
 172                 nat gc_type USED_IF_THREADS,
 173                 Capability *cap)
 174 {
 175   bdescr *bd;
 176   generation *gen;
 177   lnat live, allocated, max_copied, avg_copied, slop;
 178   gc_thread *saved_gct;
 179   nat g, t, n;
 180
 181   // necessary if we stole a callee-saves register for gct:
 182   saved_gct = gct;
 183
 184 #ifdef PROFILING
 185   CostCentreStack *prev_CCS;
 186 #endif
 187
 188   ACQUIRE_SM_LOCK;
 189
 190 #if defined(RTS_USER_SIGNALS)
 191   if (RtsFlags.MiscFlags.install_signal_handlers) {
 192     // block signals
 193     blockUserSignals();
 194   }
 195 #endif
 196
 197   ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
 198   // otherwise adjust the padding in gen_workspace.
 199
 200   // tell the stats department that we've started a GC
 201   stat_startGC();
 202
 203   // tell the STM to discard any cached closures it's hoping to re-use
 204   stmPreGCHook();
 205
 206   // lock the StablePtr table
 207   stablePtrPreGC();
 208
 209 #ifdef DEBUG
 210   mutlist_MUTVARS = 0;
 211   mutlist_MUTARRS = 0;
 212   mutlist_OTHERS = 0;
 213 #endif
 214
 215   // attribute any costs to CCS_GC
 216 #ifdef PROFILING
 217   prev_CCS = CCCS;
 218   CCCS = CCS_GC;
 219 #endif
 220
 221   /* Approximate how much we allocated.
 222    * Todo: only when generating stats?
 223    */
 224   allocated = calcAllocated();
 225
 226   /* Figure out which generation to collect
 227    */
 228   n = initialise_N(force_major_gc);
 229
 230 #if defined(THREADED_RTS)
 231   work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
 232                   N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
 233       // It's not always a good idea to do load balancing in parallel
 234       // GC.  In particular, for a parallel program we don't want to
 235       // lose locality by moving cached data into another CPU's cache
 236       // (this effect can be quite significant).
 237       //
 238       // We could have a more complex way to deterimine whether to do
 239       // work stealing or not, e.g. it might be a good idea to do it
 240       // if the heap is big.  For now, we just turn it on or off with
 241       // a flag.
 242 #endif
 243
 244   /* Start threads, so they can be spinning up while we finish initialisation.
 245    */
 246   start_gc_threads();
 247
 248 #if defined(THREADED_RTS)
 249   /* How many threads will be participating in this GC?
 250    * We don't try to parallelise minor GCs (unless the user asks for
 251    * it with +RTS -gn0), or mark/compact/sweep GC.
 252    */
 253   if (gc_type == PENDING_GC_PAR) {
 254       n_gc_threads = RtsFlags.ParFlags.nNodes;
 255   } else {
 256       n_gc_threads = 1;
 257   }
 258 #else
 259   n_gc_threads = 1;
 260 #endif
 261
 262   debugTrace(DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
 263         N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
 264
 265 #ifdef RTS_GTK_FRONTPANEL
 266   if (RtsFlags.GcFlags.frontpanel) {
 267       updateFrontPanelBeforeGC(N);
 268   }
 269 #endif
 270
 271 #ifdef DEBUG
 272   // check for memory leaks if DEBUG is on
 273   memInventory(DEBUG_gc);
 274 #endif
 275
 276   // check sanity *before* GC
 277   IF_DEBUG(sanity, checkSanity(rtsTrue));
 278
 279   // Initialise all our gc_thread structures
 280   for (t = 0; t < n_gc_threads; t++) {
 281       init_gc_thread(gc_threads[t]);
 282   }
 283
 284   // Initialise all the generations/steps that we're collecting.
 285   for (g = 0; g <= N; g++) {
 286       init_collected_gen(g,n_gc_threads);
 287   }
 288
 289   // Initialise all the generations/steps that we're *not* collecting.
 290   for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
 291       init_uncollected_gen(g,n_gc_threads);
 292   }
 293
 294   /* Allocate a mark stack if we're doing a major collection.
 295    */
 296   if (major_gc && oldest_gen->mark) {
 297       mark_stack_bd     = allocBlock();
 298       mark_stack_top_bd = mark_stack_bd;
 299       mark_stack_bd->link = NULL;
 300       mark_stack_bd->u.back = NULL;
 301       mark_sp           = mark_stack_bd->start;
 302   } else {
 303       mark_stack_bd     = NULL;
 304       mark_stack_top_bd = NULL;
 305       mark_sp           = NULL;
 306   }
 307
 308   // this is the main thread
 309 #ifdef THREADED_RTS
 310   if (n_gc_threads == 1) {
 311       SET_GCT(gc_threads[0]);
 312   } else {
 313       SET_GCT(gc_threads[cap->no]);
 314   }
 315 #else
 316 SET_GCT(gc_threads[0]);
 317 #endif
 318
 319   /* -----------------------------------------------------------------------
 320    * follow all the roots that we know about:
 321    */
 322
 323   // the main thread is running: this prevents any other threads from
 324   // exiting prematurely, so we can start them now.
 325   // NB. do this after the mutable lists have been saved above, otherwise
 326   // the other GC threads will be writing into the old mutable lists.
 327   inc_running();
 328   wakeup_gc_threads(n_gc_threads, gct->thread_index);
 329
 330   // Mutable lists from each generation > N
 331   // we want to *scavenge* these roots, not evacuate them: they're not
 332   // going to move in this GC.
 333   // Also do them in reverse generation order, for the usual reason:
 334   // namely to reduce the likelihood of spurious old->new pointers.
 335   //
 336   for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
 337 #if defined(THREADED_RTS)
 338       if (n_gc_threads > 1) {
 339           scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]);
 340       } else {
 341           scavenge_mutable_list1(generations[g].saved_mut_list, &generations[g]);
 342       }
 343 #else
 344       scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]);
 345 #endif
 346       freeChain_sync(generations[g].saved_mut_list);
 347       generations[g].saved_mut_list = NULL;
 348
 349   }
 350
 351   // scavenge the capability-private mutable lists.  This isn't part
 352   // of markSomeCapabilities() because markSomeCapabilities() can only
 353   // call back into the GC via mark_root() (due to the gct register
 354   // variable).
 355   if (n_gc_threads == 1) {
 356       for (n = 0; n < n_capabilities; n++) {
 357 #if defined(THREADED_RTS)
 358           scavenge_capability_mut_Lists1(&capabilities[n]);
 359 #else
 360           scavenge_capability_mut_lists(&capabilities[n]);
 361 #endif
 362       }
 363   } else {
 364       scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
 365   }
 366
 367   // follow roots from the CAF list (used by GHCi)
 368   gct->evac_gen = 0;
 369   markCAFs(mark_root, gct);
 370
 371   // follow all the roots that the application knows about.
 372   gct->evac_gen = 0;
 373   markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
 374                        rtsTrue/*prune sparks*/);
 375
 376 #if defined(RTS_USER_SIGNALS)
 377   // mark the signal handlers (signals should be already blocked)
 378   markSignalHandlers(mark_root, gct);
 379 #endif
 380
 381   // Mark the weak pointer list, and prepare to detect dead weak pointers.
 382   markWeakPtrList();
 383   initWeakForGC();
 384
 385   // Mark the stable pointer table.
 386   markStablePtrTable(mark_root, gct);
 387
 388   /* -------------------------------------------------------------------------
 389    * Repeatedly scavenge all the areas we know about until there's no
 390    * more scavenging to be done.
 391    */
 392   for (;;)
 393   {
 394       scavenge_until_all_done();
 395       // The other threads are now stopped.  We might recurse back to
 396       // here, but from now on this is the only thread.
 397
 398       // must be last...  invariant is that everything is fully
 399       // scavenged at this point.
 400       if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
 401           inc_running();
 402           continue;
 403       }
 404
 405       // If we get to here, there's really nothing left to do.
 406       break;
 407   }
 408
 409   shutdown_gc_threads(n_gc_threads, gct->thread_index);
 410
 411   // Now see which stable names are still alive.
 412   gcStablePtrTable();
 413
 414 #ifdef PROFILING
 415   // We call processHeapClosureForDead() on every closure destroyed during
 416   // the current garbage collection, so we invoke LdvCensusForDead().
 417   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
 418       || RtsFlags.ProfFlags.bioSelector != NULL)
 419     LdvCensusForDead(N);
 420 #endif
 421
 422   // NO MORE EVACUATION AFTER THIS POINT!
 423
 424   // Two-space collector: free the old to-space.
 425   // g0->old_blocks is the old nursery
 426   // g0->blocks is to-space from the previous GC
 427   if (RtsFlags.GcFlags.generations == 1) {
 428       if (g0->blocks != NULL) {
 429           freeChain(g0->blocks);
 430           g0->blocks = NULL;
 431       }
 432   }
 433
 434   // For each workspace, in each thread, move the copied blocks to the step
 435   {
 436       gc_thread *thr;
 437       gen_workspace *ws;
 438       bdescr *prev, *next;
 439
 440       for (t = 0; t < n_gc_threads; t++) {
 441           thr = gc_threads[t];
 442
 443           for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 444               ws = &thr->gens[g];
 445
 446               // Push the final block
 447               if (ws->todo_bd) {
 448                   push_scanned_block(ws->todo_bd, ws);
 449               }
 450
 451               ASSERT(gct->scan_bd == NULL);
 452               ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
 453
 454               prev = NULL;
 455               for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
 456                   ws->gen->n_words += bd->free - bd->start;
 457                   prev = bd;
 458               }
 459               if (prev != NULL) {
 460                   prev->link = ws->gen->blocks;
 461                   ws->gen->blocks = ws->scavd_list;
 462               }
 463               ws->gen->n_blocks += ws->n_scavd_blocks;
 464           }
 465       }
 466
 467       // Add all the partial blocks *after* we've added all the full
 468       // blocks.  This is so that we can grab the partial blocks back
 469       // again and try to fill them up in the next GC.
 470       for (t = 0; t < n_gc_threads; t++) {
 471           thr = gc_threads[t];
 472
 473           for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 474               ws = &thr->gens[g];
 475
 476               prev = NULL;
 477               for (bd = ws->part_list; bd != NULL; bd = next) {
 478                   next = bd->link;
 479                   if (bd->free == bd->start) {
 480                       if (prev == NULL) {
 481                           ws->part_list = next;
 482                       } else {
 483                           prev->link = next;
 484                       }
 485                       freeGroup(bd);
 486                       ws->n_part_blocks--;
 487                   } else {
 488                       ws->gen->n_words += bd->free - bd->start;
 489                       prev = bd;
 490                   }
 491               }
 492               if (prev != NULL) {
 493                   prev->link = ws->gen->blocks;
 494                   ws->gen->blocks = ws->part_list;
 495               }
 496               ws->gen->n_blocks += ws->n_part_blocks;
 497
 498               ASSERT(countBlocks(ws->gen->blocks) == ws->gen->n_blocks);
 499               ASSERT(countOccupied(ws->gen->blocks) == ws->gen->n_words);
 500           }
 501       }
 502   }
 503
 504   // Finally: compact or sweep the oldest generation.
 505   if (major_gc && oldest_gen->mark) {
 506       if (oldest_gen->compact)
 507           compact(gct->scavenged_static_objects);
 508       else
 509           sweep(oldest_gen);
 510   }
 511
 512   /* run through all the generations/steps and tidy up
 513    */
 514   copied = 0;
 515   max_copied = 0;
 516   avg_copied = 0;
 517   {
 518       nat i;
 519       for (i=0; i < n_gc_threads; i++) {
 520           if (n_gc_threads > 1) {
 521               debugTrace(DEBUG_gc,"thread %d:", i);
 522               debugTrace(DEBUG_gc,"   copied           %ld", gc_threads[i]->copied * sizeof(W_));
 523               debugTrace(DEBUG_gc,"   scanned          %ld", gc_threads[i]->scanned * sizeof(W_));
 524               debugTrace(DEBUG_gc,"   any_work         %ld", gc_threads[i]->any_work);
 525               debugTrace(DEBUG_gc,"   no_work          %ld", gc_threads[i]->no_work);
 526               debugTrace(DEBUG_gc,"   scav_find_work %ld",   gc_threads[i]->scav_find_work);
 527           }
 528           copied += gc_threads[i]->copied;
 529           max_copied = stg_max(gc_threads[i]->copied, max_copied);
 530       }
 531       if (n_gc_threads == 1) {
 532           max_copied = 0;
 533           avg_copied = 0;
 534       } else {
 535           avg_copied = copied;
 536       }
 537   }
 538
 539   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 540
 541     if (g == N) {
 542       generations[g].collections++; // for stats
 543       if (n_gc_threads > 1) generations[g].par_collections++;
 544     }
 545
 546     // Count the mutable list as bytes "copied" for the purposes of
 547     // stats.  Every mutable list is copied during every GC.
 548     if (g > 0) {
 549         nat mut_list_size = 0;
 550         for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
 551             mut_list_size += bd->free - bd->start;
 552         }
 553         for (n = 0; n < n_capabilities; n++) {
 554             for (bd = capabilities[n].mut_lists[g];
 555                  bd != NULL; bd = bd->link) {
 556                 mut_list_size += bd->free - bd->start;
 557             }
 558         }
 559         copied +=  mut_list_size;
 560
 561         debugTrace(DEBUG_gc,
 562                    "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
 563                    (unsigned long)(mut_list_size * sizeof(W_)),
 564                    mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
 565     }
 566
 567     bdescr *next, *prev;
 568     gen = &generations[g];
 569
 570     // for generations we collected...
 571     if (g <= N) {
 572
 573         /* free old memory and shift to-space into from-space for all
 574          * the collected steps (except the allocation area).  These
 575          * freed blocks will probaby be quickly recycled.
 576          */
 577         if (gen->mark)
 578         {
 579             // tack the new blocks on the end of the existing blocks
 580             if (gen->old_blocks != NULL) {
 581
 582                 prev = NULL;
 583                 for (bd = gen->old_blocks; bd != NULL; bd = next) {
 584
 585                     next = bd->link;
 586
 587                     if (!(bd->flags & BF_MARKED))
 588                     {
 589                         if (prev == NULL) {
 590                             gen->old_blocks = next;
 591                         } else {
 592                             prev->link = next;
 593                         }
 594                         freeGroup(bd);
 595                         gen->n_old_blocks--;
 596                     }
 597                     else
 598                     {
 599                         gen->n_words += bd->free - bd->start;
 600
 601                         // NB. this step might not be compacted next
 602                         // time, so reset the BF_MARKED flags.
 603                         // They are set before GC if we're going to
 604                         // compact.  (search for BF_MARKED above).
 605                         bd->flags &= ~BF_MARKED;
 606
 607                         // between GCs, all blocks in the heap except
 608                         // for the nursery have the BF_EVACUATED flag set.
 609                         bd->flags |= BF_EVACUATED;
 610
 611                         prev = bd;
 612                     }
 613                 }
 614
 615                 if (prev != NULL) {
 616                     prev->link = gen->blocks;
 617                     gen->blocks = gen->old_blocks;
 618                 }
 619             }
 620             // add the new blocks to the block tally
 621             gen->n_blocks += gen->n_old_blocks;
 622             ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
 623             ASSERT(countOccupied(gen->blocks) == gen->n_words);
 624         }
 625         else // not copacted
 626         {
 627             freeChain(gen->old_blocks);
 628         }
 629
 630         gen->old_blocks = NULL;
 631         gen->n_old_blocks = 0;
 632
 633         /* LARGE OBJECTS.  The current live large objects are chained on
 634          * scavenged_large, having been moved during garbage
 635          * collection from large_objects.  Any objects left on the
 636          * large_objects list are therefore dead, so we free them here.
 637          */
 638         freeChain(gen->large_objects);
 639         gen->large_objects  = gen->scavenged_large_objects;
 640         gen->n_large_blocks = gen->n_scavenged_large_blocks;
 641         gen->n_new_large_blocks = 0;
 642         ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
 643     }
 644     else // for generations > N
 645     {
 646         /* For older generations, we need to append the
 647          * scavenged_large_object list (i.e. large objects that have been
 648          * promoted during this GC) to the large_object list for that step.
 649          */
 650         for (bd = gen->scavenged_large_objects; bd; bd = next) {
 651             next = bd->link;
 652             dbl_link_onto(bd, &gen->large_objects);
 653         }
 654
 655         // add the new blocks we promoted during this GC
 656         gen->n_large_blocks += gen->n_scavenged_large_blocks;
 657         ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
 658     }
 659   } // for all generations
 660
 661   // update the max size of older generations after a major GC
 662   resize_generations();
 663
 664   // Calculate the amount of live data for stats.
 665   live = calcLiveWords();
 666
 667   // Free the small objects allocated via allocate(), since this will
 668   // all have been copied into G0S1 now.
 669   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 670
 671   // Start a new pinned_object_block
 672   for (n = 0; n < n_capabilities; n++) {
 673       capabilities[n].pinned_object_block = NULL;
 674   }
 675
 676   // Free the mark stack.
 677   if (mark_stack_top_bd != NULL) {
 678       debugTrace(DEBUG_gc, "mark stack: %d blocks",
 679                  countBlocks(mark_stack_top_bd));
 680       freeChain(mark_stack_top_bd);
 681   }
 682
 683   // Free any bitmaps.
 684   for (g = 0; g <= N; g++) {
 685       gen = &generations[g];
 686       if (gen->bitmap != NULL) {
 687           freeGroup(gen->bitmap);
 688           gen->bitmap = NULL;
 689       }
 690   }
 691
 692   resize_nursery();
 693
 694  // mark the garbage collected CAFs as dead
 695 #if 0 && defined(DEBUG) // doesn't work at the moment
 696   if (major_gc) { gcCAFs(); }
 697 #endif
 698
 699 #ifdef PROFILING
 700   // resetStaticObjectForRetainerProfiling() must be called before
 701   // zeroing below.
 702   if (n_gc_threads > 1) {
 703       barf("profiling is currently broken with multi-threaded GC");
 704       // ToDo: fix the gct->scavenged_static_objects below
 705   }
 706   resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
 707 #endif
 708
 709   // zero the scavenged static object list
 710   if (major_gc) {
 711       nat i;
 712       for (i = 0; i < n_gc_threads; i++) {
 713           zero_static_object_list(gc_threads[i]->scavenged_static_objects);
 714       }
 715   }
 716
 717   // Reset the nursery
 718   resetNurseries();
 719
 720   // start any pending finalizers
 721   RELEASE_SM_LOCK;
 722   scheduleFinalizers(cap, old_weak_ptr_list);
 723   ACQUIRE_SM_LOCK;
 724
 725   // send exceptions to any threads which were about to die
 726   RELEASE_SM_LOCK;
 727   resurrectThreads(resurrected_threads);
 728   ACQUIRE_SM_LOCK;
 729
 730   // Update the stable pointer hash table.
 731   updateStablePtrTable(major_gc);
 732
 733   // check sanity after GC
 734   IF_DEBUG(sanity, checkSanity(rtsTrue));
 735
 736   // extra GC trace info
 737   IF_DEBUG(gc, statDescribeGens());
 738
 739 #ifdef DEBUG
 740   // symbol-table based profiling
 741   /*  heapCensus(to_blocks); */ /* ToDo */
 742 #endif
 743
 744   // restore enclosing cost centre
 745 #ifdef PROFILING
 746   CCCS = prev_CCS;
 747 #endif
 748
 749 #ifdef DEBUG
 750   // check for memory leaks if DEBUG is on
 751   memInventory(DEBUG_gc);
 752 #endif
 753
 754 #ifdef RTS_GTK_FRONTPANEL
 755   if (RtsFlags.GcFlags.frontpanel) {
 756       updateFrontPanelAfterGC( N, live );
 757   }
 758 #endif
 759
 760   // ok, GC over: tell the stats department what happened.
 761   slop = calcLiveBlocks() * BLOCK_SIZE_W - live;
 762   stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop);
 763
 764   // unlock the StablePtr table
 765   stablePtrPostGC();
 766
 767   // Guess which generation we'll collect *next* time
 768   initialise_N(force_major_gc);
 769
 770 #if defined(RTS_USER_SIGNALS)
 771   if (RtsFlags.MiscFlags.install_signal_handlers) {
 772     // unblock signals again
 773     unblockUserSignals();
 774   }
 775 #endif
 776
 777   RELEASE_SM_LOCK;
 778
 779   SET_GCT(saved_gct);
 780 }
 781
 782 /* -----------------------------------------------------------------------------
 783    Figure out which generation to collect, initialise N and major_gc.
 784
 785    Also returns the total number of blocks in generations that will be
 786    collected.
 787    -------------------------------------------------------------------------- */
 788
 789 static nat
 790 initialise_N (rtsBool force_major_gc)
 791 {
 792     int g;
 793     nat blocks, blocks_total;
 794
 795     blocks = 0;
 796     blocks_total = 0;
 797
 798     if (force_major_gc) {
 799         N = RtsFlags.GcFlags.generations - 1;
 800     } else {
 801         N = 0;
 802     }
 803
 804     for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
 805
 806         blocks = generations[g].n_words / BLOCK_SIZE_W
 807                + generations[g].n_large_blocks;
 808
 809         if (blocks >= generations[g].max_blocks) {
 810             N = stg_max(N,g);
 811         }
 812         if ((nat)g <= N) {
 813             blocks_total += blocks;
 814         }
 815     }
 816
 817     blocks_total += countNurseryBlocks();
 818
 819     major_gc = (N == RtsFlags.GcFlags.generations-1);
 820     return blocks_total;
 821 }
 822
 823 /* -----------------------------------------------------------------------------
 824    Initialise the gc_thread structures.
 825    -------------------------------------------------------------------------- */
 826
 827 #define GC_THREAD_INACTIVE             0
 828 #define GC_THREAD_STANDING_BY          1
 829 #define GC_THREAD_RUNNING              2
 830 #define GC_THREAD_WAITING_TO_CONTINUE  3
 831
 832 static void
 833 new_gc_thread (nat n, gc_thread *t)
 834 {
 835     nat g;
 836     gen_workspace *ws;
 837
 838 #ifdef THREADED_RTS
 839     t->id = 0;
 840     initSpinLock(&t->gc_spin);
 841     initSpinLock(&t->mut_spin);
 842     ACQUIRE_SPIN_LOCK(&t->gc_spin);
 843     t->wakeup = GC_THREAD_INACTIVE;  // starts true, so we can wait for the
 844                           // thread to start up, see wakeup_gc_threads
 845 #endif
 846
 847     t->thread_index = n;
 848     t->free_blocks = NULL;
 849     t->gc_count = 0;
 850
 851     init_gc_thread(t);
 852
 853 #ifdef USE_PAPI
 854     t->papi_events = -1;
 855 #endif
 856
 857     for (g = 0; g < RtsFlags.GcFlags.generations; g++)
 858     {
 859         ws = &t->gens[g];
 860         ws->gen = &generations[g];
 861         ASSERT(g == ws->gen->no);
 862         ws->my_gct = t;
 863
 864         ws->todo_bd = NULL;
 865         ws->todo_q = newWSDeque(128);
 866         ws->todo_overflow = NULL;
 867         ws->n_todo_overflow = 0;
 868
 869         ws->part_list = NULL;
 870         ws->n_part_blocks = 0;
 871
 872         ws->scavd_list = NULL;
 873         ws->n_scavd_blocks = 0;
 874     }
 875 }
 876
 877
 878 void
 879 initGcThreads (void)
 880 {
 881     if (gc_threads == NULL) {
 882 #if defined(THREADED_RTS)
 883         nat i;
 884         gc_threads = stgMallocBytes (RtsFlags.ParFlags.nNodes *
 885                                      sizeof(gc_thread*),
 886                                      "alloc_gc_threads");
 887
 888         for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
 889             gc_threads[i] =
 890                 stgMallocBytes(sizeof(gc_thread) +
 891                                RtsFlags.GcFlags.generations * sizeof(gen_workspace),
 892                                "alloc_gc_threads");
 893
 894             new_gc_thread(i, gc_threads[i]);
 895         }
 896 #else
 897         gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
 898         gc_threads[0] = gct;
 899         new_gc_thread(0,gc_threads[0]);
 900 #endif
 901     }
 902 }
 903
 904 void
 905 freeGcThreads (void)
 906 {
 907     nat g;
 908     if (gc_threads != NULL) {
 909 #if defined(THREADED_RTS)
 910         nat i;
 911         for (i = 0; i < n_capabilities; i++) {
 912             for (g = 0; g < RtsFlags.GcFlags.generations; g++)
 913             {
 914                 freeWSDeque(gc_threads[i]->gens[g].todo_q);
 915             }
 916             stgFree (gc_threads[i]);
 917         }
 918         stgFree (gc_threads);
 919 #else
 920         for (g = 0; g < RtsFlags.GcFlags.generations; g++)
 921         {
 922             freeWSDeque(gc_threads[0]->gens[g].todo_q);
 923         }
 924         stgFree (gc_threads);
 925 #endif
 926         gc_threads = NULL;
 927     }
 928 }
 929
 930 /* ----------------------------------------------------------------------------
 931    Start GC threads
 932    ------------------------------------------------------------------------- */
 933
 934 static volatile StgWord gc_running_threads;
 935
 936 static StgWord
 937 inc_running (void)
 938 {
 939     StgWord new;
 940     new = atomic_inc(&gc_running_threads);
 941     ASSERT(new <= n_gc_threads);
 942     return new;
 943 }
 944
 945 static StgWord
 946 dec_running (void)
 947 {
 948     ASSERT(gc_running_threads != 0);
 949     return atomic_dec(&gc_running_threads);
 950 }
 951
 952 static rtsBool
 953 any_work (void)
 954 {
 955     int g;
 956     gen_workspace *ws;
 957
 958     gct->any_work++;
 959
 960     write_barrier();
 961
 962     // scavenge objects in compacted generation
 963     if (mark_stack_bd != NULL && !mark_stack_empty()) {
 964         return rtsTrue;
 965     }
 966
 967     // Check for global work in any step.  We don't need to check for
 968     // local work, because we have already exited scavenge_loop(),
 969     // which means there is no local work for this thread.
 970     for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
 971         ws = &gct->gens[g];
 972         if (ws->todo_large_objects) return rtsTrue;
 973         if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
 974         if (ws->todo_overflow) return rtsTrue;
 975     }
 976
 977 #if defined(THREADED_RTS)
 978     if (work_stealing) {
 979         nat n;
 980         // look for work to steal
 981         for (n = 0; n < n_gc_threads; n++) {
 982             if (n == gct->thread_index) continue;
 983             for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
 984                 ws = &gc_threads[n]->gens[g];
 985                 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
 986             }
 987         }
 988     }
 989 #endif
 990
 991     gct->no_work++;
 992 #if defined(THREADED_RTS)
 993     yieldThread();
 994 #endif
 995
 996     return rtsFalse;
 997 }
 998
 999 static void
1000 scavenge_until_all_done (void)
1001 {
1002     nat r;
1003
1004
1005 loop:
1006     traceEventGcWork(&capabilities[gct->thread_index]);
1007
1008 #if defined(THREADED_RTS)
1009     if (n_gc_threads > 1) {
1010         scavenge_loop();
1011     } else {
1012         scavenge_loop1();
1013     }
1014 #else
1015     scavenge_loop();
1016 #endif
1017
1018     // scavenge_loop() only exits when there's no work to do
1019     r = dec_running();
1020
1021     traceEventGcIdle(&capabilities[gct->thread_index]);
1022
1023     debugTrace(DEBUG_gc, "%d GC threads still running", r);
1024
1025     while (gc_running_threads != 0) {
1026         // usleep(1);
1027         if (any_work()) {
1028             inc_running();
1029             goto loop;
1030         }
1031         // any_work() does not remove the work from the queue, it
1032         // just checks for the presence of work.  If we find any,
1033         // then we increment gc_running_threads and go back to
1034         // scavenge_loop() to perform any pending work.
1035     }
1036
1037     traceEventGcDone(&capabilities[gct->thread_index]);
1038 }
1039
1040 #if defined(THREADED_RTS)
1041
1042 void
1043 gcWorkerThread (Capability *cap)
1044 {
1045     gc_thread *saved_gct;
1046
1047     // necessary if we stole a callee-saves register for gct:
1048     saved_gct = gct;
1049
1050     gct = gc_threads[cap->no];
1051     gct->id = osThreadId();
1052
1053     // Wait until we're told to wake up
1054     RELEASE_SPIN_LOCK(&gct->mut_spin);
1055     gct->wakeup = GC_THREAD_STANDING_BY;
1056     debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1057     ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1058
1059 #ifdef USE_PAPI
1060     // start performance counters in this thread...
1061     if (gct->papi_events == -1) {
1062         papi_init_eventset(&gct->papi_events);
1063     }
1064     papi_thread_start_gc1_count(gct->papi_events);
1065 #endif
1066
1067     // Every thread evacuates some roots.
1068     gct->evac_gen = 0;
1069     markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
1070                          rtsTrue/*prune sparks*/);
1071     scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
1072
1073     scavenge_until_all_done();
1074
1075 #ifdef USE_PAPI
1076     // count events in this thread towards the GC totals
1077     papi_thread_stop_gc1_count(gct->papi_events);
1078 #endif
1079
1080     // Wait until we're told to continue
1081     RELEASE_SPIN_LOCK(&gct->gc_spin);
1082     gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1083     debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1084                gct->thread_index);
1085     ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1086     debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1087
1088     SET_GCT(saved_gct);
1089 }
1090
1091 #endif
1092
1093 #if defined(THREADED_RTS)
1094
1095 void
1096 waitForGcThreads (Capability *cap USED_IF_THREADS)
1097 {
1098     nat n_threads = RtsFlags.ParFlags.nNodes;
1099     nat me = cap->no;
1100     nat i, j;
1101     rtsBool retry = rtsTrue;
1102
1103     while(retry) {
1104         for (i=0; i < n_threads; i++) {
1105             if (i == me) continue;
1106             if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1107                 prodCapability(&capabilities[i], cap->running_task);
1108             }
1109         }
1110         for (j=0; j < 10; j++) {
1111             retry = rtsFalse;
1112             for (i=0; i < n_threads; i++) {
1113                 if (i == me) continue;
1114                 write_barrier();
1115                 setContextSwitches();
1116                 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1117                     retry = rtsTrue;
1118                 }
1119             }
1120             if (!retry) break;
1121             yieldThread();
1122         }
1123     }
1124 }
1125
1126 #endif // THREADED_RTS
1127
1128 static void
1129 start_gc_threads (void)
1130 {
1131 #if defined(THREADED_RTS)
1132     gc_running_threads = 0;
1133 #endif
1134 }
1135
1136 static void
1137 wakeup_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1138 {
1139 #if defined(THREADED_RTS)
1140     nat i;
1141     for (i=0; i < n_threads; i++) {
1142         if (i == me) continue;
1143         inc_running();
1144         debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1145         if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1146
1147         gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1148         ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1149         RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1150     }
1151 #endif
1152 }
1153
1154 // After GC is complete, we must wait for all GC threads to enter the
1155 // standby state, otherwise they may still be executing inside
1156 // any_work(), and may even remain awake until the next GC starts.
1157 static void
1158 shutdown_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1159 {
1160 #if defined(THREADED_RTS)
1161     nat i;
1162     for (i=0; i < n_threads; i++) {
1163         if (i == me) continue;
1164         while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1165     }
1166 #endif
1167 }
1168
1169 #if defined(THREADED_RTS)
1170 void
1171 releaseGCThreads (Capability *cap USED_IF_THREADS)
1172 {
1173     nat n_threads = RtsFlags.ParFlags.nNodes;
1174     nat me = cap->no;
1175     nat i;
1176     for (i=0; i < n_threads; i++) {
1177         if (i == me) continue;
1178         if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1179             barf("releaseGCThreads");
1180
1181         gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1182         ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1183         RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1184     }
1185 }
1186 #endif
1187
1188 /* ----------------------------------------------------------------------------
1189    Initialise a generation that is to be collected
1190    ------------------------------------------------------------------------- */
1191
1192 static void
1193 init_collected_gen (nat g, nat n_threads)
1194 {
1195     nat t, i;
1196     gen_workspace *ws;
1197     generation *gen;
1198     bdescr *bd;
1199
1200     // Throw away the current mutable list.  Invariant: the mutable
1201     // list always has at least one block; this means we can avoid a
1202     // check for NULL in recordMutable().
1203     if (g != 0) {
1204         freeChain(generations[g].mut_list);
1205         generations[g].mut_list = allocBlock();
1206         for (i = 0; i < n_capabilities; i++) {
1207             freeChain(capabilities[i].mut_lists[g]);
1208             capabilities[i].mut_lists[g] = allocBlock();
1209         }
1210     }
1211
1212     gen = &generations[g];
1213     ASSERT(gen->no == g);
1214
1215     // we'll construct a new list of threads in this step
1216     // during GC, throw away the current list.
1217     gen->old_threads = gen->threads;
1218     gen->threads = END_TSO_QUEUE;
1219
1220     // deprecate the existing blocks
1221     gen->old_blocks   = gen->blocks;
1222     gen->n_old_blocks = gen->n_blocks;
1223     gen->blocks       = NULL;
1224     gen->n_blocks     = 0;
1225     gen->n_words      = 0;
1226     gen->live_estimate = 0;
1227
1228     // initialise the large object queues.
1229     gen->scavenged_large_objects = NULL;
1230     gen->n_scavenged_large_blocks = 0;
1231
1232     // mark the small objects as from-space
1233     for (bd = gen->old_blocks; bd; bd = bd->link) {
1234         bd->flags &= ~BF_EVACUATED;
1235     }
1236
1237     // mark the large objects as from-space
1238     for (bd = gen->large_objects; bd; bd = bd->link) {
1239         bd->flags &= ~BF_EVACUATED;
1240     }
1241
1242     // for a compacted generation, we need to allocate the bitmap
1243     if (gen->mark) {
1244         nat bitmap_size; // in bytes
1245         bdescr *bitmap_bdescr;
1246         StgWord *bitmap;
1247
1248         bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1249
1250         if (bitmap_size > 0) {
1251             bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1252                                        / BLOCK_SIZE);
1253             gen->bitmap = bitmap_bdescr;
1254             bitmap = bitmap_bdescr->start;
1255
1256             debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1257                        bitmap_size, bitmap);
1258
1259             // don't forget to fill it with zeros!
1260             memset(bitmap, 0, bitmap_size);
1261
1262             // For each block in this step, point to its bitmap from the
1263             // block descriptor.
1264             for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1265                 bd->u.bitmap = bitmap;
1266                 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1267
1268                 // Also at this point we set the BF_MARKED flag
1269                 // for this block.  The invariant is that
1270                 // BF_MARKED is always unset, except during GC
1271                 // when it is set on those blocks which will be
1272                 // compacted.
1273                 if (!(bd->flags & BF_FRAGMENTED)) {
1274                     bd->flags |= BF_MARKED;
1275                 }
1276             }
1277         }
1278     }
1279
1280     // For each GC thread, for each step, allocate a "todo" block to
1281     // store evacuated objects to be scavenged, and a block to store
1282     // evacuated objects that do not need to be scavenged.
1283     for (t = 0; t < n_threads; t++) {
1284         ws = &gc_threads[t]->gens[g];
1285
1286         ws->todo_large_objects = NULL;
1287
1288         ws->part_list = NULL;
1289         ws->n_part_blocks = 0;
1290
1291         // allocate the first to-space block; extra blocks will be
1292         // chained on as necessary.
1293         ws->todo_bd = NULL;
1294         ASSERT(looksEmptyWSDeque(ws->todo_q));
1295         alloc_todo_block(ws,0);
1296
1297         ws->todo_overflow = NULL;
1298         ws->n_todo_overflow = 0;
1299
1300         ws->scavd_list = NULL;
1301         ws->n_scavd_blocks = 0;
1302     }
1303 }
1304
1305
1306 /* ----------------------------------------------------------------------------
1307    Initialise a generation that is *not* to be collected
1308    ------------------------------------------------------------------------- */
1309
1310 static void
1311 init_uncollected_gen (nat g, nat threads)
1312 {
1313     nat t, n;
1314     gen_workspace *ws;
1315     generation *gen;
1316     bdescr *bd;
1317
1318     // save the current mutable lists for this generation, and
1319     // allocate a fresh block for each one.  We'll traverse these
1320     // mutable lists as roots early on in the GC.
1321     generations[g].saved_mut_list = generations[g].mut_list;
1322     generations[g].mut_list = allocBlock();
1323     for (n = 0; n < n_capabilities; n++) {
1324         capabilities[n].saved_mut_lists[g] = capabilities[n].mut_lists[g];
1325         capabilities[n].mut_lists[g] = allocBlock();
1326     }
1327
1328     gen = &generations[g];
1329
1330     gen->scavenged_large_objects = NULL;
1331     gen->n_scavenged_large_blocks = 0;
1332
1333     for (t = 0; t < threads; t++) {
1334         ws = &gc_threads[t]->gens[g];
1335
1336         ASSERT(looksEmptyWSDeque(ws->todo_q));
1337         ws->todo_large_objects = NULL;
1338
1339         ws->part_list = NULL;
1340         ws->n_part_blocks = 0;
1341
1342         ws->scavd_list = NULL;
1343         ws->n_scavd_blocks = 0;
1344
1345         // If the block at the head of the list in this generation
1346         // is less than 3/4 full, then use it as a todo block.
1347         if (gen->blocks && isPartiallyFull(gen->blocks))
1348         {
1349             ws->todo_bd = gen->blocks;
1350             ws->todo_free = ws->todo_bd->free;
1351             ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
1352             gen->blocks = gen->blocks->link;
1353             gen->n_blocks -= 1;
1354             gen->n_words -= ws->todo_bd->free - ws->todo_bd->start;
1355             ws->todo_bd->link = NULL;
1356             // we must scan from the current end point.
1357             ws->todo_bd->u.scan = ws->todo_bd->free;
1358         }
1359         else
1360         {
1361             ws->todo_bd = NULL;
1362             alloc_todo_block(ws,0);
1363         }
1364     }
1365
1366     // deal out any more partial blocks to the threads' part_lists
1367     t = 0;
1368     while (gen->blocks && isPartiallyFull(gen->blocks))
1369     {
1370         bd = gen->blocks;
1371         gen->blocks = bd->link;
1372         ws = &gc_threads[t]->gens[g];
1373         bd->link = ws->part_list;
1374         ws->part_list = bd;
1375         ws->n_part_blocks += 1;
1376         bd->u.scan = bd->free;
1377         gen->n_blocks -= 1;
1378         gen->n_words -= bd->free - bd->start;
1379         t++;
1380         if (t == n_gc_threads) t = 0;
1381     }
1382 }
1383
1384 /* -----------------------------------------------------------------------------
1385    Initialise a gc_thread before GC
1386    -------------------------------------------------------------------------- */
1387
1388 static void
1389 init_gc_thread (gc_thread *t)
1390 {
1391     t->static_objects = END_OF_STATIC_LIST;
1392     t->scavenged_static_objects = END_OF_STATIC_LIST;
1393     t->scan_bd = NULL;
1394     t->mut_lists = capabilities[t->thread_index].mut_lists;
1395     t->evac_gen = 0;
1396     t->failed_to_evac = rtsFalse;
1397     t->eager_promotion = rtsTrue;
1398     t->thunk_selector_depth = 0;
1399     t->copied = 0;
1400     t->scanned = 0;
1401     t->any_work = 0;
1402     t->no_work = 0;
1403     t->scav_find_work = 0;
1404 }
1405
1406 /* -----------------------------------------------------------------------------
1407    Function we pass to evacuate roots.
1408    -------------------------------------------------------------------------- */
1409
1410 static void
1411 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1412 {
1413     // we stole a register for gct, but this function is called from
1414     // *outside* the GC where the register variable is not in effect,
1415     // so we need to save and restore it here.  NB. only call
1416     // mark_root() from the main GC thread, otherwise gct will be
1417     // incorrect.
1418     gc_thread *saved_gct;
1419     saved_gct = gct;
1420     SET_GCT(user);
1421
1422     evacuate(root);
1423
1424     SET_GCT(saved_gct);
1425 }
1426
1427 /* -----------------------------------------------------------------------------
1428    Initialising the static object & mutable lists
1429    -------------------------------------------------------------------------- */
1430
1431 static void
1432 zero_static_object_list(StgClosure* first_static)
1433 {
1434   StgClosure* p;
1435   StgClosure* link;
1436   const StgInfoTable *info;
1437
1438   for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1439     info = get_itbl(p);
1440     link = *STATIC_LINK(info, p);
1441     *STATIC_LINK(info,p) = NULL;
1442   }
1443 }
1444
1445 /* ----------------------------------------------------------------------------
1446    Reset the sizes of the older generations when we do a major
1447    collection.
1448
1449    CURRENT STRATEGY: make all generations except zero the same size.
1450    We have to stay within the maximum heap size, and leave a certain
1451    percentage of the maximum heap size available to allocate into.
1452    ------------------------------------------------------------------------- */
1453
1454 static void
1455 resize_generations (void)
1456 {
1457     nat g;
1458
1459     if (major_gc && RtsFlags.GcFlags.generations > 1) {
1460         nat live, size, min_alloc, words;
1461         nat max  = RtsFlags.GcFlags.maxHeapSize;
1462         nat gens = RtsFlags.GcFlags.generations;
1463
1464         // live in the oldest generations
1465         if (oldest_gen->live_estimate != 0) {
1466             words = oldest_gen->live_estimate;
1467         } else {
1468             words = oldest_gen->n_words;
1469         }
1470         live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1471             oldest_gen->n_large_blocks;
1472
1473         // default max size for all generations except zero
1474         size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1475                        RtsFlags.GcFlags.minOldGenSize);
1476
1477         if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1478             RtsFlags.GcFlags.heapSizeSuggestion = size;
1479         }
1480
1481         // minimum size for generation zero
1482         min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1483                             RtsFlags.GcFlags.minAllocAreaSize);
1484
1485         // Auto-enable compaction when the residency reaches a
1486         // certain percentage of the maximum heap size (default: 30%).
1487         if (RtsFlags.GcFlags.generations > 1 &&
1488             (RtsFlags.GcFlags.compact ||
1489              (max > 0 &&
1490               oldest_gen->n_blocks >
1491               (RtsFlags.GcFlags.compactThreshold * max) / 100))) {
1492             oldest_gen->mark = 1;
1493             oldest_gen->compact = 1;
1494 //        debugBelch("compaction: on\n", live);
1495         } else {
1496             oldest_gen->mark = 0;
1497             oldest_gen->compact = 0;
1498 //        debugBelch("compaction: off\n", live);
1499         }
1500
1501         if (RtsFlags.GcFlags.sweep) {
1502             oldest_gen->mark = 1;
1503         }
1504
1505         // if we're going to go over the maximum heap size, reduce the
1506         // size of the generations accordingly.  The calculation is
1507         // different if compaction is turned on, because we don't need
1508         // to double the space required to collect the old generation.
1509         if (max != 0) {
1510
1511             // this test is necessary to ensure that the calculations
1512             // below don't have any negative results - we're working
1513             // with unsigned values here.
1514             if (max < min_alloc) {
1515                 heapOverflow();
1516             }
1517
1518             if (oldest_gen->compact) {
1519                 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1520                     size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1521                 }
1522             } else {
1523                 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1524                     size = (max - min_alloc) / ((gens - 1) * 2);
1525                 }
1526             }
1527
1528             if (size < live) {
1529                 heapOverflow();
1530             }
1531         }
1532
1533 #if 0
1534         debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1535                    min_alloc, size, max);
1536 #endif
1537
1538         for (g = 0; g < gens; g++) {
1539             generations[g].max_blocks = size;
1540         }
1541     }
1542 }
1543
1544 /* -----------------------------------------------------------------------------
1545    Calculate the new size of the nursery, and resize it.
1546    -------------------------------------------------------------------------- */
1547
1548 static void
1549 resize_nursery (void)
1550 {
1551     lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities;
1552
1553     if (RtsFlags.GcFlags.generations == 1)
1554     {   // Two-space collector:
1555         nat blocks;
1556
1557         /* set up a new nursery.  Allocate a nursery size based on a
1558          * function of the amount of live data (by default a factor of 2)
1559          * Use the blocks from the old nursery if possible, freeing up any
1560          * left over blocks.
1561          *
1562          * If we get near the maximum heap size, then adjust our nursery
1563          * size accordingly.  If the nursery is the same size as the live
1564          * data (L), then we need 3L bytes.  We can reduce the size of the
1565          * nursery to bring the required memory down near 2L bytes.
1566          *
1567          * A normal 2-space collector would need 4L bytes to give the same
1568          * performance we get from 3L bytes, reducing to the same
1569          * performance at 2L bytes.
1570          */
1571         blocks = generations[0].n_blocks;
1572
1573         if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1574              blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1575              RtsFlags.GcFlags.maxHeapSize )
1576         {
1577             long adjusted_blocks;  // signed on purpose
1578             int pc_free;
1579
1580             adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1581
1582             debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1583                        RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1584
1585             pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1586             if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1587             {
1588                 heapOverflow();
1589             }
1590             blocks = adjusted_blocks;
1591         }
1592         else
1593         {
1594             blocks *= RtsFlags.GcFlags.oldGenFactor;
1595             if (blocks < min_nursery)
1596             {
1597                 blocks = min_nursery;
1598             }
1599         }
1600         resizeNurseries(blocks);
1601     }
1602     else  // Generational collector
1603     {
1604         /*
1605          * If the user has given us a suggested heap size, adjust our
1606          * allocation area to make best use of the memory available.
1607          */
1608         if (RtsFlags.GcFlags.heapSizeSuggestion)
1609         {
1610             long blocks;
1611             nat needed = calcNeeded();  // approx blocks needed at next GC
1612
1613             /* Guess how much will be live in generation 0 step 0 next time.
1614              * A good approximation is obtained by finding the
1615              * percentage of g0 that was live at the last minor GC.
1616              *
1617              * We have an accurate figure for the amount of copied data in
1618              * 'copied', but we must convert this to a number of blocks, with
1619              * a small adjustment for estimated slop at the end of a block
1620              * (- 10 words).
1621              */
1622             if (N == 0)
1623             {
1624                 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1625                     / countNurseryBlocks();
1626             }
1627
1628             /* Estimate a size for the allocation area based on the
1629              * information available.  We might end up going slightly under
1630              * or over the suggested heap size, but we should be pretty
1631              * close on average.
1632              *
1633              * Formula:            suggested - needed
1634              *                ----------------------------
1635              *                    1 + g0_pcnt_kept/100
1636              *
1637              * where 'needed' is the amount of memory needed at the next
1638              * collection for collecting all gens except g0.
1639              */
1640             blocks =
1641                 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1642                 (100 + (long)g0_pcnt_kept);
1643
1644             if (blocks < (long)min_nursery) {
1645                 blocks = min_nursery;
1646             }
1647
1648             resizeNurseries((nat)blocks);
1649         }
1650         else
1651         {
1652             // we might have added extra large blocks to the nursery, so
1653             // resize back to minAllocAreaSize again.
1654             resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1655         }
1656     }
1657 }
1658
1659 /* -----------------------------------------------------------------------------
1660    Sanity code for CAF garbage collection.
1661
1662    With DEBUG turned on, we manage a CAF list in addition to the SRT
1663    mechanism.  After GC, we run down the CAF list and blackhole any
1664    CAFs which have been garbage collected.  This means we get an error
1665    whenever the program tries to enter a garbage collected CAF.
1666
1667    Any garbage collected CAFs are taken off the CAF list at the same
1668    time.
1669    -------------------------------------------------------------------------- */
1670
1671 #if 0 && defined(DEBUG)
1672
1673 static void
1674 gcCAFs(void)
1675 {
1676   StgClosure*  p;
1677   StgClosure** pp;
1678   const StgInfoTable *info;
1679   nat i;
1680
1681   i = 0;
1682   p = caf_list;
1683   pp = &caf_list;
1684
1685   while (p != NULL) {
1686
1687     info = get_itbl(p);
1688
1689     ASSERT(info->type == IND_STATIC);
1690
1691     if (STATIC_LINK(info,p) == NULL) {
1692         debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1693         // black hole it
1694         SET_INFO(p,&stg_BLACKHOLE_info);
1695         p = STATIC_LINK2(info,p);
1696         *pp = p;
1697     }
1698     else {
1699       pp = &STATIC_LINK2(info,p);
1700       p = *pp;
1701       i++;
1702     }
1703
1704   }
1705
1706   debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1707 }
1708 #endif