rts/sm/GC.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team 1998-2008
   4  *
   5  * Generational garbage collector
   6  *
   7  * Documentation on the architecture of the Garbage Collector can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 // #include "PosixSource.h"
  15 #include "Rts.h"
  16 #include "RtsFlags.h"
  17 #include "RtsUtils.h"
  18 #include "Apply.h"
  19 #include "OSThreads.h"
  20 #include "LdvProfile.h"
  21 #include "Updates.h"
  22 #include "Stats.h"
  23 #include "Schedule.h"
  24 #include "Sanity.h"
  25 #include "BlockAlloc.h"
  26 #include "MBlock.h"
  27 #include "ProfHeap.h"
  28 #include "SchedAPI.h"
  29 #include "Weak.h"
  30 #include "Prelude.h"
  31 #include "ParTicky.h"           // ToDo: move into Rts.h
  32 #include "RtsSignals.h"
  33 #include "STM.h"
  34 #include "HsFFI.h"
  35 #include "Linker.h"
  36 #if defined(RTS_GTK_FRONTPANEL)
  37 #include "FrontPanel.h"
  38 #endif
  39 #include "Trace.h"
  40 #include "RetainerProfile.h"
  41 #include "RaiseAsync.h"
  42 #include "Papi.h"
  43
  44 #include "GC.h"
  45 #include "GCThread.h"
  46 #include "Compact.h"
  47 #include "Evac.h"
  48 #include "Scav.h"
  49 #include "GCUtils.h"
  50 #include "MarkWeak.h"
  51 #include "Sparks.h"
  52 #include "Sweep.h"
  53
  54 #include <string.h> // for memset()
  55 #include <unistd.h>
  56
  57 /* -----------------------------------------------------------------------------
  58    Global variables
  59    -------------------------------------------------------------------------- */
  60
  61 /* STATIC OBJECT LIST.
  62  *
  63  * During GC:
  64  * We maintain a linked list of static objects that are still live.
  65  * The requirements for this list are:
  66  *
  67  *  - we need to scan the list while adding to it, in order to
  68  *    scavenge all the static objects (in the same way that
  69  *    breadth-first scavenging works for dynamic objects).
  70  *
  71  *  - we need to be able to tell whether an object is already on
  72  *    the list, to break loops.
  73  *
  74  * Each static object has a "static link field", which we use for
  75  * linking objects on to the list.  We use a stack-type list, consing
  76  * objects on the front as they are added (this means that the
  77  * scavenge phase is depth-first, not breadth-first, but that
  78  * shouldn't matter).
  79  *
  80  * A separate list is kept for objects that have been scavenged
  81  * already - this is so that we can zero all the marks afterwards.
  82  *
  83  * An object is on the list if its static link field is non-zero; this
  84  * means that we have to mark the end of the list with '1', not NULL.
  85  *
  86  * Extra notes for generational GC:
  87  *
  88  * Each generation has a static object list associated with it.  When
  89  * collecting generations up to N, we treat the static object lists
  90  * from generations > N as roots.
  91  *
  92  * We build up a static object list while collecting generations 0..N,
  93  * which is then appended to the static object list of generation N+1.
  94  */
  95
  96 /* N is the oldest generation being collected, where the generations
  97  * are numbered starting at 0.  A major GC (indicated by the major_gc
  98  * flag) is when we're collecting all generations.  We only attempt to
  99  * deal with static objects and GC CAFs when doing a major GC.
 100  */
 101 nat N;
 102 rtsBool major_gc;
 103
 104 /* Data used for allocation area sizing.
 105  */
 106 static lnat g0s0_pcnt_kept = 30; // percentage of g0s0 live at last minor GC
 107
 108 /* Mut-list stats */
 109 #ifdef DEBUG
 110 nat mutlist_MUTVARS,
 111     mutlist_MUTARRS,
 112     mutlist_MVARS,
 113     mutlist_OTHERS;
 114 #endif
 115
 116 /* Thread-local data for each GC thread
 117  */
 118 gc_thread **gc_threads = NULL;
 119 // gc_thread *gct = NULL;  // this thread's gct TODO: make thread-local
 120
 121 // Number of threads running in *this* GC.  Affects how many
 122 // step->todos[] lists we have to look in to find work.
 123 nat n_gc_threads;
 124
 125 // For stats:
 126 long copied;        // *words* copied & scavenged during this GC
 127
 128 #ifdef THREADED_RTS
 129 SpinLock recordMutableGen_sync;
 130 #endif
 131
 132 DECLARE_GCT
 133
 134 /* -----------------------------------------------------------------------------
 135    Static function declarations
 136    -------------------------------------------------------------------------- */
 137
 138 static void mark_root               (void *user, StgClosure **root);
 139 static void zero_static_object_list (StgClosure* first_static);
 140 static nat  initialise_N            (rtsBool force_major_gc);
 141 static void init_collected_gen      (nat g, nat threads);
 142 static void init_uncollected_gen    (nat g, nat threads);
 143 static void init_gc_thread          (gc_thread *t);
 144 static void update_task_list        (void);
 145 static void resize_generations      (void);
 146 static void resize_nursery          (void);
 147 static void start_gc_threads        (void);
 148 static void scavenge_until_all_done (void);
 149 static nat  inc_running             (void);
 150 static nat  dec_running             (void);
 151 static void wakeup_gc_threads       (nat n_threads, nat me);
 152 static void shutdown_gc_threads     (nat n_threads, nat me);
 153 static void continue_gc_threads     (nat n_threads, nat me);
 154
 155 #if 0 && defined(DEBUG)
 156 static void gcCAFs                  (void);
 157 #endif
 158
 159 /* -----------------------------------------------------------------------------
 160    The mark bitmap & stack.
 161    -------------------------------------------------------------------------- */
 162
 163 #define MARK_STACK_BLOCKS 4
 164
 165 bdescr *mark_stack_bdescr;
 166 StgPtr *mark_stack;
 167 StgPtr *mark_sp;
 168 StgPtr *mark_splim;
 169
 170 // Flag and pointers used for falling back to a linear scan when the
 171 // mark stack overflows.
 172 rtsBool mark_stack_overflowed;
 173 bdescr *oldgen_scan_bd;
 174 StgPtr  oldgen_scan;
 175
 176 /* -----------------------------------------------------------------------------
 177    GarbageCollect: the main entry point to the garbage collector.
 178
 179    Locks held: all capabilities are held throughout GarbageCollect().
 180    -------------------------------------------------------------------------- */
 181
 182 void
 183 GarbageCollect (rtsBool force_major_gc,
 184                 nat gc_type USED_IF_THREADS,
 185                 Capability *cap USED_IF_THREADS)
 186 {
 187   bdescr *bd;
 188   step *stp;
 189   lnat live, allocated, max_copied, avg_copied, slop;
 190   gc_thread *saved_gct;
 191   nat g, s, t, n;
 192
 193   // necessary if we stole a callee-saves register for gct:
 194   saved_gct = gct;
 195
 196 #ifdef PROFILING
 197   CostCentreStack *prev_CCS;
 198 #endif
 199
 200   ACQUIRE_SM_LOCK;
 201
 202 #if defined(RTS_USER_SIGNALS)
 203   if (RtsFlags.MiscFlags.install_signal_handlers) {
 204     // block signals
 205     blockUserSignals();
 206   }
 207 #endif
 208
 209   ASSERT(sizeof(step_workspace) == 16 * sizeof(StgWord));
 210   // otherwise adjust the padding in step_workspace.
 211
 212   // tell the stats department that we've started a GC
 213   stat_startGC();
 214
 215   // tell the STM to discard any cached closures it's hoping to re-use
 216   stmPreGCHook();
 217
 218 #ifdef DEBUG
 219   mutlist_MUTVARS = 0;
 220   mutlist_MUTARRS = 0;
 221   mutlist_OTHERS = 0;
 222 #endif
 223
 224   // attribute any costs to CCS_GC
 225 #ifdef PROFILING
 226   prev_CCS = CCCS;
 227   CCCS = CCS_GC;
 228 #endif
 229
 230   /* Approximate how much we allocated.
 231    * Todo: only when generating stats?
 232    */
 233   allocated = calcAllocated();
 234
 235   /* Figure out which generation to collect
 236    */
 237   n = initialise_N(force_major_gc);
 238
 239   /* Start threads, so they can be spinning up while we finish initialisation.
 240    */
 241   start_gc_threads();
 242
 243 #if defined(THREADED_RTS)
 244   /* How many threads will be participating in this GC?
 245    * We don't try to parallelise minor GCs (unless the user asks for
 246    * it with +RTS -gn0), or mark/compact/sweep GC.
 247    */
 248   if (gc_type == PENDING_GC_PAR) {
 249       n_gc_threads = RtsFlags.ParFlags.nNodes;
 250   } else {
 251       n_gc_threads = 1;
 252   }
 253 #else
 254   n_gc_threads = 1;
 255 #endif
 256
 257   trace(TRACE_gc|DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
 258         N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
 259
 260 #ifdef RTS_GTK_FRONTPANEL
 261   if (RtsFlags.GcFlags.frontpanel) {
 262       updateFrontPanelBeforeGC(N);
 263   }
 264 #endif
 265
 266 #ifdef DEBUG
 267   // check for memory leaks if DEBUG is on
 268   memInventory(traceClass(DEBUG_gc));
 269 #endif
 270
 271   // check stack sanity *before* GC
 272   IF_DEBUG(sanity, checkFreeListSanity());
 273   IF_DEBUG(sanity, checkMutableLists());
 274
 275   // Initialise all our gc_thread structures
 276   for (t = 0; t < n_gc_threads; t++) {
 277       init_gc_thread(gc_threads[t]);
 278   }
 279
 280   // Initialise all the generations/steps that we're collecting.
 281   for (g = 0; g <= N; g++) {
 282       init_collected_gen(g,n_gc_threads);
 283   }
 284
 285   // Initialise all the generations/steps that we're *not* collecting.
 286   for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
 287       init_uncollected_gen(g,n_gc_threads);
 288   }
 289
 290   /* Allocate a mark stack if we're doing a major collection.
 291    */
 292   if (major_gc && oldest_gen->steps[0].mark) {
 293       nat mark_stack_blocks;
 294       mark_stack_blocks = stg_max(MARK_STACK_BLOCKS,
 295                                   oldest_gen->steps[0].n_old_blocks / 100);
 296       mark_stack_bdescr = allocGroup(mark_stack_blocks);
 297       mark_stack = (StgPtr *)mark_stack_bdescr->start;
 298       mark_sp    = mark_stack;
 299       mark_splim = mark_stack + (mark_stack_blocks * BLOCK_SIZE_W);
 300   } else {
 301       mark_stack_bdescr = NULL;
 302   }
 303
 304   // this is the main thread
 305 #ifdef THREADED_RTS
 306   if (n_gc_threads == 1) {
 307       gct = gc_threads[0];
 308   } else {
 309       gct = gc_threads[cap->no];
 310   }
 311 #else
 312   gct = gc_threads[0];
 313 #endif
 314
 315   /* -----------------------------------------------------------------------
 316    * follow all the roots that we know about:
 317    *   - mutable lists from each generation > N
 318    * we want to *scavenge* these roots, not evacuate them: they're not
 319    * going to move in this GC.
 320    * Also do them in reverse generation order, for the usual reason:
 321    * namely to reduce the likelihood of spurious old->new pointers.
 322    */
 323   for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
 324       generations[g].saved_mut_list = generations[g].mut_list;
 325       generations[g].mut_list = allocBlock();
 326       // mut_list always has at least one block.
 327   }
 328
 329   // the main thread is running: this prevents any other threads from
 330   // exiting prematurely, so we can start them now.
 331   // NB. do this after the mutable lists have been saved above, otherwise
 332   // the other GC threads will be writing into the old mutable lists.
 333   inc_running();
 334   wakeup_gc_threads(n_gc_threads, gct->thread_index);
 335
 336   for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
 337       scavenge_mutable_list(&generations[g]);
 338   }
 339
 340   // follow roots from the CAF list (used by GHCi)
 341   gct->evac_step = 0;
 342   markCAFs(mark_root, gct);
 343
 344   // follow all the roots that the application knows about.
 345   gct->evac_step = 0;
 346   markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
 347                        rtsTrue/*prune sparks*/);
 348
 349 #if defined(RTS_USER_SIGNALS)
 350   // mark the signal handlers (signals should be already blocked)
 351   markSignalHandlers(mark_root, gct);
 352 #endif
 353
 354   // Mark the weak pointer list, and prepare to detect dead weak pointers.
 355   markWeakPtrList();
 356   initWeakForGC();
 357
 358   // Mark the stable pointer table.
 359   markStablePtrTable(mark_root, gct);
 360
 361   /* -------------------------------------------------------------------------
 362    * Repeatedly scavenge all the areas we know about until there's no
 363    * more scavenging to be done.
 364    */
 365   for (;;)
 366   {
 367       scavenge_until_all_done();
 368       // The other threads are now stopped.  We might recurse back to
 369       // here, but from now on this is the only thread.
 370
 371       // if any blackholes are alive, make the threads that wait on
 372       // them alive too.
 373       if (traverseBlackholeQueue()) {
 374           inc_running();
 375           continue;
 376       }
 377
 378       // must be last...  invariant is that everything is fully
 379       // scavenged at this point.
 380       if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
 381           inc_running();
 382           continue;
 383       }
 384
 385       // If we get to here, there's really nothing left to do.
 386       break;
 387   }
 388
 389   shutdown_gc_threads(n_gc_threads, gct->thread_index);
 390
 391   // Update pointers from the Task list
 392   update_task_list();
 393
 394   // Now see which stable names are still alive.
 395   gcStablePtrTable();
 396
 397 #ifdef PROFILING
 398   // We call processHeapClosureForDead() on every closure destroyed during
 399   // the current garbage collection, so we invoke LdvCensusForDead().
 400   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
 401       || RtsFlags.ProfFlags.bioSelector != NULL)
 402     LdvCensusForDead(N);
 403 #endif
 404
 405   // NO MORE EVACUATION AFTER THIS POINT!
 406
 407   // Two-space collector: free the old to-space.
 408   // g0s0->old_blocks is the old nursery
 409   // g0s0->blocks is to-space from the previous GC
 410   if (RtsFlags.GcFlags.generations == 1) {
 411       if (g0s0->blocks != NULL) {
 412           freeChain(g0s0->blocks);
 413           g0s0->blocks = NULL;
 414       }
 415   }
 416
 417   // For each workspace, in each thread, move the copied blocks to the step
 418   {
 419       gc_thread *thr;
 420       step_workspace *ws;
 421       bdescr *prev, *next;
 422
 423       for (t = 0; t < n_gc_threads; t++) {
 424           thr = gc_threads[t];
 425
 426           // not step 0
 427           if (RtsFlags.GcFlags.generations == 1) {
 428               s = 0;
 429           } else {
 430               s = 1;
 431           }
 432           for (; s < total_steps; s++) {
 433               ws = &thr->steps[s];
 434
 435               // Push the final block
 436               if (ws->todo_bd) {
 437                   push_scanned_block(ws->todo_bd, ws);
 438               }
 439
 440               ASSERT(gct->scan_bd == NULL);
 441               ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
 442
 443               prev = NULL;
 444               for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
 445                   ws->step->n_words += bd->free - bd->start;
 446                   prev = bd;
 447               }
 448               if (prev != NULL) {
 449                   prev->link = ws->step->blocks;
 450                   ws->step->blocks = ws->scavd_list;
 451               }
 452               ws->step->n_blocks += ws->n_scavd_blocks;
 453           }
 454       }
 455
 456       // Add all the partial blocks *after* we've added all the full
 457       // blocks.  This is so that we can grab the partial blocks back
 458       // again and try to fill them up in the next GC.
 459       for (t = 0; t < n_gc_threads; t++) {
 460           thr = gc_threads[t];
 461
 462           // not step 0
 463           if (RtsFlags.GcFlags.generations == 1) {
 464               s = 0;
 465           } else {
 466               s = 1;
 467           }
 468           for (; s < total_steps; s++) {
 469               ws = &thr->steps[s];
 470
 471               prev = NULL;
 472               for (bd = ws->part_list; bd != NULL; bd = next) {
 473                   next = bd->link;
 474                   if (bd->free == bd->start) {
 475                       if (prev == NULL) {
 476                           ws->part_list = next;
 477                       } else {
 478                           prev->link = next;
 479                       }
 480                       freeGroup(bd);
 481                       ws->n_part_blocks--;
 482                   } else {
 483                       ws->step->n_words += bd->free - bd->start;
 484                       prev = bd;
 485                   }
 486               }
 487               if (prev != NULL) {
 488                   prev->link = ws->step->blocks;
 489                   ws->step->blocks = ws->part_list;
 490               }
 491               ws->step->n_blocks += ws->n_part_blocks;
 492
 493               ASSERT(countBlocks(ws->step->blocks) == ws->step->n_blocks);
 494               ASSERT(countOccupied(ws->step->blocks) == ws->step->n_words);
 495           }
 496       }
 497   }
 498
 499   // Finally: compact or sweep the oldest generation.
 500   if (major_gc && oldest_gen->steps[0].mark) {
 501       if (oldest_gen->steps[0].compact)
 502           compact(gct->scavenged_static_objects);
 503       else
 504           sweep(&oldest_gen->steps[0]);
 505   }
 506
 507   /* run through all the generations/steps and tidy up
 508    */
 509   copied = 0;
 510   max_copied = 0;
 511   avg_copied = 0;
 512   {
 513       nat i;
 514       for (i=0; i < n_gc_threads; i++) {
 515           if (n_gc_threads > 1) {
 516               trace(TRACE_gc,"thread %d:", i);
 517               trace(TRACE_gc,"   copied           %ld", gc_threads[i]->copied * sizeof(W_));
 518               trace(TRACE_gc,"   scanned          %ld", gc_threads[i]->scanned * sizeof(W_));
 519               trace(TRACE_gc,"   any_work         %ld", gc_threads[i]->any_work);
 520               trace(TRACE_gc,"   no_work          %ld", gc_threads[i]->no_work);
 521               trace(TRACE_gc,"   scav_find_work %ld",   gc_threads[i]->scav_find_work);
 522           }
 523           copied += gc_threads[i]->copied;
 524           max_copied = stg_max(gc_threads[i]->copied, max_copied);
 525       }
 526       if (n_gc_threads == 1) {
 527           max_copied = 0;
 528           avg_copied = 0;
 529       } else {
 530           avg_copied = copied;
 531       }
 532   }
 533
 534   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 535
 536     if (g == N) {
 537       generations[g].collections++; // for stats
 538       if (n_gc_threads > 1) generations[g].par_collections++;
 539     }
 540
 541     // Count the mutable list as bytes "copied" for the purposes of
 542     // stats.  Every mutable list is copied during every GC.
 543     if (g > 0) {
 544         nat mut_list_size = 0;
 545         for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
 546             mut_list_size += bd->free - bd->start;
 547         }
 548         copied +=  mut_list_size;
 549
 550         debugTrace(DEBUG_gc,
 551                    "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
 552                    (unsigned long)(mut_list_size * sizeof(W_)),
 553                    mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
 554     }
 555
 556     for (s = 0; s < generations[g].n_steps; s++) {
 557       bdescr *next, *prev;
 558       stp = &generations[g].steps[s];
 559
 560       // for generations we collected...
 561       if (g <= N) {
 562
 563         /* free old memory and shift to-space into from-space for all
 564          * the collected steps (except the allocation area).  These
 565          * freed blocks will probaby be quickly recycled.
 566          */
 567         if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) {
 568             if (stp->mark)
 569             {
 570                 // tack the new blocks on the end of the existing blocks
 571                 if (stp->old_blocks != NULL) {
 572
 573                     prev = NULL;
 574                     for (bd = stp->old_blocks; bd != NULL; bd = next) {
 575
 576                         next = bd->link;
 577
 578                         if (!(bd->flags & BF_MARKED))
 579                         {
 580                             if (prev == NULL) {
 581                                 stp->old_blocks = next;
 582                             } else {
 583                                 prev->link = next;
 584                             }
 585                             freeGroup(bd);
 586                             stp->n_old_blocks--;
 587                         }
 588                         else
 589                         {
 590                             stp->n_words += bd->free - bd->start;
 591
 592                             // NB. this step might not be compacted next
 593                             // time, so reset the BF_MARKED flags.
 594                             // They are set before GC if we're going to
 595                             // compact.  (search for BF_MARKED above).
 596                             bd->flags &= ~BF_MARKED;
 597
 598                             // between GCs, all blocks in the heap except
 599                             // for the nursery have the BF_EVACUATED flag set.
 600                             bd->flags |= BF_EVACUATED;
 601
 602                             prev = bd;
 603                         }
 604                     }
 605
 606                     if (prev != NULL) {
 607                         prev->link = stp->blocks;
 608                         stp->blocks = stp->old_blocks;
 609                     }
 610                 }
 611                 // add the new blocks to the block tally
 612                 stp->n_blocks += stp->n_old_blocks;
 613                 ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
 614                 ASSERT(countOccupied(stp->blocks) == stp->n_words);
 615             }
 616             else // not copacted
 617             {
 618                 freeChain(stp->old_blocks);
 619             }
 620             stp->old_blocks = NULL;
 621             stp->n_old_blocks = 0;
 622         }
 623
 624         /* LARGE OBJECTS.  The current live large objects are chained on
 625          * scavenged_large, having been moved during garbage
 626          * collection from large_objects.  Any objects left on
 627          * large_objects list are therefore dead, so we free them here.
 628          */
 629         for (bd = stp->large_objects; bd != NULL; bd = next) {
 630           next = bd->link;
 631           freeGroup(bd);
 632           bd = next;
 633         }
 634
 635         stp->large_objects  = stp->scavenged_large_objects;
 636         stp->n_large_blocks = stp->n_scavenged_large_blocks;
 637
 638       }
 639       else // for older generations...
 640       {
 641         /* For older generations, we need to append the
 642          * scavenged_large_object list (i.e. large objects that have been
 643          * promoted during this GC) to the large_object list for that step.
 644          */
 645         for (bd = stp->scavenged_large_objects; bd; bd = next) {
 646           next = bd->link;
 647           dbl_link_onto(bd, &stp->large_objects);
 648         }
 649
 650         // add the new blocks we promoted during this GC
 651         stp->n_large_blocks += stp->n_scavenged_large_blocks;
 652       }
 653     }
 654   }
 655
 656   // update the max size of older generations after a major GC
 657   resize_generations();
 658
 659   // Calculate the amount of live data for stats.
 660   live = calcLiveWords();
 661
 662   // Free the small objects allocated via allocate(), since this will
 663   // all have been copied into G0S1 now.
 664   if (RtsFlags.GcFlags.generations > 1) {
 665       if (g0s0->blocks != NULL) {
 666           freeChain(g0s0->blocks);
 667           g0s0->blocks = NULL;
 668       }
 669       g0s0->n_blocks = 0;
 670       g0s0->n_words = 0;
 671   }
 672   alloc_blocks = 0;
 673   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 674
 675   // Start a new pinned_object_block
 676   pinned_object_block = NULL;
 677
 678   // Free the mark stack.
 679   if (mark_stack_bdescr != NULL) {
 680       freeGroup(mark_stack_bdescr);
 681   }
 682
 683   // Free any bitmaps.
 684   for (g = 0; g <= N; g++) {
 685       for (s = 0; s < generations[g].n_steps; s++) {
 686           stp = &generations[g].steps[s];
 687           if (stp->bitmap != NULL) {
 688               freeGroup(stp->bitmap);
 689               stp->bitmap = NULL;
 690           }
 691       }
 692   }
 693
 694   resize_nursery();
 695
 696  // mark the garbage collected CAFs as dead
 697 #if 0 && defined(DEBUG) // doesn't work at the moment
 698   if (major_gc) { gcCAFs(); }
 699 #endif
 700
 701 #ifdef PROFILING
 702   // resetStaticObjectForRetainerProfiling() must be called before
 703   // zeroing below.
 704   if (n_gc_threads > 1) {
 705       barf("profiling is currently broken with multi-threaded GC");
 706       // ToDo: fix the gct->scavenged_static_objects below
 707   }
 708   resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
 709 #endif
 710
 711   // zero the scavenged static object list
 712   if (major_gc) {
 713       nat i;
 714       for (i = 0; i < n_gc_threads; i++) {
 715           zero_static_object_list(gc_threads[i]->scavenged_static_objects);
 716       }
 717   }
 718
 719   // Reset the nursery
 720   resetNurseries();
 721
 722   // start any pending finalizers
 723   RELEASE_SM_LOCK;
 724   scheduleFinalizers(last_free_capability, old_weak_ptr_list);
 725   ACQUIRE_SM_LOCK;
 726
 727   // send exceptions to any threads which were about to die
 728   RELEASE_SM_LOCK;
 729   resurrectThreads(resurrected_threads);
 730   performPendingThrowTos(exception_threads);
 731   ACQUIRE_SM_LOCK;
 732
 733   // Update the stable pointer hash table.
 734   updateStablePtrTable(major_gc);
 735
 736   // check sanity after GC
 737   IF_DEBUG(sanity, checkSanity());
 738
 739   // extra GC trace info
 740   if (traceClass(TRACE_gc|DEBUG_gc)) statDescribeGens();
 741
 742 #ifdef DEBUG
 743   // symbol-table based profiling
 744   /*  heapCensus(to_blocks); */ /* ToDo */
 745 #endif
 746
 747   // restore enclosing cost centre
 748 #ifdef PROFILING
 749   CCCS = prev_CCS;
 750 #endif
 751
 752 #ifdef DEBUG
 753   // check for memory leaks if DEBUG is on
 754   memInventory(traceClass(DEBUG_gc));
 755 #endif
 756
 757 #ifdef RTS_GTK_FRONTPANEL
 758   if (RtsFlags.GcFlags.frontpanel) {
 759       updateFrontPanelAfterGC( N, live );
 760   }
 761 #endif
 762
 763   // ok, GC over: tell the stats department what happened.
 764   slop = calcLiveBlocks() * BLOCK_SIZE_W - live;
 765   stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop);
 766
 767   // Guess which generation we'll collect *next* time
 768   initialise_N(force_major_gc);
 769
 770 #if defined(RTS_USER_SIGNALS)
 771   if (RtsFlags.MiscFlags.install_signal_handlers) {
 772     // unblock signals again
 773     unblockUserSignals();
 774   }
 775 #endif
 776
 777   continue_gc_threads(n_gc_threads, gct->thread_index);
 778
 779   RELEASE_SM_LOCK;
 780
 781   gct = saved_gct;
 782 }
 783
 784 /* -----------------------------------------------------------------------------
 785    Figure out which generation to collect, initialise N and major_gc.
 786
 787    Also returns the total number of blocks in generations that will be
 788    collected.
 789    -------------------------------------------------------------------------- */
 790
 791 static nat
 792 initialise_N (rtsBool force_major_gc)
 793 {
 794     int g;
 795     nat s, blocks, blocks_total;
 796
 797     blocks = 0;
 798     blocks_total = 0;
 799
 800     if (force_major_gc) {
 801         N = RtsFlags.GcFlags.generations - 1;
 802     } else {
 803         N = 0;
 804     }
 805
 806     for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
 807         blocks = 0;
 808         for (s = 0; s < generations[g].n_steps; s++) {
 809             blocks += generations[g].steps[s].n_words / BLOCK_SIZE_W;
 810             blocks += generations[g].steps[s].n_large_blocks;
 811         }
 812         if (blocks >= generations[g].max_blocks) {
 813             N = stg_max(N,g);
 814         }
 815         if ((nat)g <= N) {
 816             blocks_total += blocks;
 817         }
 818     }
 819
 820     blocks_total += countNurseryBlocks();
 821
 822     major_gc = (N == RtsFlags.GcFlags.generations-1);
 823     return blocks_total;
 824 }
 825
 826 /* -----------------------------------------------------------------------------
 827    Initialise the gc_thread structures.
 828    -------------------------------------------------------------------------- */
 829
 830 #define GC_THREAD_INACTIVE             0
 831 #define GC_THREAD_STANDING_BY          1
 832 #define GC_THREAD_RUNNING              2
 833 #define GC_THREAD_WAITING_TO_CONTINUE  3
 834
 835 static gc_thread *
 836 alloc_gc_thread (int n)
 837 {
 838     nat s;
 839     step_workspace *ws;
 840     gc_thread *t;
 841
 842     t = stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace),
 843                        "alloc_gc_thread");
 844
 845 #ifdef THREADED_RTS
 846     t->id = 0;
 847     initSpinLock(&t->gc_spin);
 848     initSpinLock(&t->mut_spin);
 849     ACQUIRE_SPIN_LOCK(&t->gc_spin);
 850     t->wakeup = GC_THREAD_INACTIVE;  // starts true, so we can wait for the
 851                           // thread to start up, see wakeup_gc_threads
 852 #endif
 853
 854     t->thread_index = n;
 855     t->free_blocks = NULL;
 856     t->gc_count = 0;
 857
 858     init_gc_thread(t);
 859
 860 #ifdef USE_PAPI
 861     t->papi_events = -1;
 862 #endif
 863
 864     for (s = 0; s < total_steps; s++)
 865     {
 866         ws = &t->steps[s];
 867         ws->step = &all_steps[s];
 868         ASSERT(s == ws->step->abs_no);
 869         ws->gct = t;
 870
 871         ws->todo_bd = NULL;
 872         ws->buffer_todo_bd = NULL;
 873
 874         ws->part_list = NULL;
 875         ws->n_part_blocks = 0;
 876
 877         ws->scavd_list = NULL;
 878         ws->n_scavd_blocks = 0;
 879     }
 880
 881     return t;
 882 }
 883
 884
 885 void
 886 initGcThreads (void)
 887 {
 888     if (gc_threads == NULL) {
 889 #if defined(THREADED_RTS)
 890         nat i;
 891         gc_threads = stgMallocBytes (RtsFlags.ParFlags.nNodes *
 892                                      sizeof(gc_thread*),
 893                                      "alloc_gc_threads");
 894
 895         for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
 896             gc_threads[i] = alloc_gc_thread(i);
 897         }
 898 #else
 899         gc_threads = stgMallocBytes (sizeof(gc_thread*),
 900                                      "alloc_gc_threads");
 901
 902         gc_threads[0] = alloc_gc_thread(0);
 903 #endif
 904     }
 905 }
 906
 907 /* ----------------------------------------------------------------------------
 908    Start GC threads
 909    ------------------------------------------------------------------------- */
 910
 911 static nat gc_running_threads;
 912
 913 #if defined(THREADED_RTS)
 914 static Mutex gc_running_mutex;
 915 #endif
 916
 917 static nat
 918 inc_running (void)
 919 {
 920     nat n_running;
 921     ACQUIRE_LOCK(&gc_running_mutex);
 922     n_running = ++gc_running_threads;
 923     RELEASE_LOCK(&gc_running_mutex);
 924     ASSERT(n_running <= n_gc_threads);
 925     return n_running;
 926 }
 927
 928 static nat
 929 dec_running (void)
 930 {
 931     nat n_running;
 932     ACQUIRE_LOCK(&gc_running_mutex);
 933     ASSERT(n_gc_threads != 0);
 934     n_running = --gc_running_threads;
 935     RELEASE_LOCK(&gc_running_mutex);
 936     return n_running;
 937 }
 938
 939 static rtsBool
 940 any_work (void)
 941 {
 942     int s;
 943     step_workspace *ws;
 944
 945     gct->any_work++;
 946
 947     write_barrier();
 948
 949     // scavenge objects in compacted generation
 950     if (mark_stack_overflowed || oldgen_scan_bd != NULL ||
 951         (mark_stack_bdescr != NULL && !mark_stack_empty())) {
 952         return rtsTrue;
 953     }
 954
 955     // Check for global work in any step.  We don't need to check for
 956     // local work, because we have already exited scavenge_loop(),
 957     // which means there is no local work for this thread.
 958     for (s = total_steps-1; s >= 0; s--) {
 959         if (s == 0 && RtsFlags.GcFlags.generations > 1) {
 960             continue;
 961         }
 962         ws = &gct->steps[s];
 963         if (ws->todo_large_objects) return rtsTrue;
 964         if (ws->step->todos) return rtsTrue;
 965     }
 966
 967     gct->no_work++;
 968
 969     return rtsFalse;
 970 }
 971
 972 static void
 973 scavenge_until_all_done (void)
 974 {
 975     nat r;
 976
 977     debugTrace(DEBUG_gc, "GC thread %d working", gct->thread_index);
 978
 979 loop:
 980 #if defined(THREADED_RTS)
 981     if (n_gc_threads > 1) {
 982         scavenge_loop();
 983     } else {
 984         scavenge_loop1();
 985     }
 986 #else
 987     scavenge_loop();
 988 #endif
 989
 990     // scavenge_loop() only exits when there's no work to do
 991     r = dec_running();
 992
 993     debugTrace(DEBUG_gc, "GC thread %d idle (%d still running)",
 994                gct->thread_index, r);
 995
 996     while (gc_running_threads != 0) {
 997         // usleep(1);
 998         if (any_work()) {
 999             inc_running();
1000             goto loop;
1001         }
1002         // any_work() does not remove the work from the queue, it
1003         // just checks for the presence of work.  If we find any,
1004         // then we increment gc_running_threads and go back to
1005         // scavenge_loop() to perform any pending work.
1006     }
1007
1008     // All threads are now stopped
1009     debugTrace(DEBUG_gc, "GC thread %d finished.", gct->thread_index);
1010 }
1011
1012 #if defined(THREADED_RTS)
1013
1014 void
1015 gcWorkerThread (Capability *cap)
1016 {
1017     cap->in_gc = rtsTrue;
1018
1019     gct = gc_threads[cap->no];
1020     gct->id = osThreadId();
1021
1022     // Wait until we're told to wake up
1023     RELEASE_SPIN_LOCK(&gct->mut_spin);
1024     gct->wakeup = GC_THREAD_STANDING_BY;
1025     debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1026     ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1027
1028 #ifdef USE_PAPI
1029     // start performance counters in this thread...
1030     if (gct->papi_events == -1) {
1031         papi_init_eventset(&gct->papi_events);
1032     }
1033     papi_thread_start_gc1_count(gct->papi_events);
1034 #endif
1035
1036     // Every thread evacuates some roots.
1037     gct->evac_step = 0;
1038     markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
1039                          rtsTrue/*prune sparks*/);
1040
1041     scavenge_until_all_done();
1042
1043 #ifdef USE_PAPI
1044     // count events in this thread towards the GC totals
1045     papi_thread_stop_gc1_count(gct->papi_events);
1046 #endif
1047
1048     // Wait until we're told to continue
1049     RELEASE_SPIN_LOCK(&gct->gc_spin);
1050     gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1051     debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1052                gct->thread_index);
1053     ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1054     debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1055 }
1056
1057 #endif
1058
1059 void
1060 waitForGcThreads (Capability *cap USED_IF_THREADS)
1061 {
1062 #if defined(THREADED_RTS)
1063     nat n_threads = RtsFlags.ParFlags.nNodes;
1064     nat me = cap->no;
1065     nat i, j;
1066     rtsBool retry = rtsTrue;
1067
1068     while(retry) {
1069         for (i=0; i < n_threads; i++) {
1070             if (i == me) continue;
1071             if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1072                 prodCapability(&capabilities[i], cap->running_task);
1073             }
1074         }
1075         for (j=0; j < 10000000; j++) {
1076             retry = rtsFalse;
1077             for (i=0; i < n_threads; i++) {
1078                 if (i == me) continue;
1079                 write_barrier();
1080                 setContextSwitches();
1081                 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1082                     retry = rtsTrue;
1083                 }
1084             }
1085             if (!retry) break;
1086         }
1087     }
1088 #endif
1089 }
1090
1091 static void
1092 start_gc_threads (void)
1093 {
1094 #if defined(THREADED_RTS)
1095     gc_running_threads = 0;
1096     initMutex(&gc_running_mutex);
1097 #endif
1098 }
1099
1100 static void
1101 wakeup_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1102 {
1103 #if defined(THREADED_RTS)
1104     nat i;
1105     for (i=0; i < n_threads; i++) {
1106         if (i == me) continue;
1107         inc_running();
1108         debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1109         if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1110
1111         gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1112         ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1113         RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1114     }
1115 #endif
1116 }
1117
1118 // After GC is complete, we must wait for all GC threads to enter the
1119 // standby state, otherwise they may still be executing inside
1120 // any_work(), and may even remain awake until the next GC starts.
1121 static void
1122 shutdown_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1123 {
1124 #if defined(THREADED_RTS)
1125     nat i;
1126     for (i=0; i < n_threads; i++) {
1127         if (i == me) continue;
1128         while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1129     }
1130 #endif
1131 }
1132
1133 static void
1134 continue_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1135 {
1136 #if defined(THREADED_RTS)
1137     nat i;
1138     for (i=0; i < n_threads; i++) {
1139         if (i == me) continue;
1140         if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) barf("continue_gc_threads");
1141
1142         gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1143         ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1144         RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1145     }
1146 #endif
1147 }
1148
1149 /* ----------------------------------------------------------------------------
1150    Initialise a generation that is to be collected
1151    ------------------------------------------------------------------------- */
1152
1153 static void
1154 init_collected_gen (nat g, nat n_threads)
1155 {
1156     nat s, t, i;
1157     step_workspace *ws;
1158     step *stp;
1159     bdescr *bd;
1160
1161     // Throw away the current mutable list.  Invariant: the mutable
1162     // list always has at least one block; this means we can avoid a
1163     // check for NULL in recordMutable().
1164     if (g != 0) {
1165         freeChain(generations[g].mut_list);
1166         generations[g].mut_list = allocBlock();
1167         for (i = 0; i < n_capabilities; i++) {
1168             freeChain(capabilities[i].mut_lists[g]);
1169             capabilities[i].mut_lists[g] = allocBlock();
1170         }
1171     }
1172
1173     for (s = 0; s < generations[g].n_steps; s++) {
1174
1175         stp = &generations[g].steps[s];
1176         ASSERT(stp->gen_no == g);
1177
1178         // we'll construct a new list of threads in this step
1179         // during GC, throw away the current list.
1180         stp->old_threads = stp->threads;
1181         stp->threads = END_TSO_QUEUE;
1182
1183         // generation 0, step 0 doesn't need to-space
1184         if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
1185             continue;
1186         }
1187
1188         // deprecate the existing blocks
1189         stp->old_blocks   = stp->blocks;
1190         stp->n_old_blocks = stp->n_blocks;
1191         stp->blocks       = NULL;
1192         stp->n_blocks     = 0;
1193         stp->n_words      = 0;
1194         stp->live_estimate = 0;
1195
1196         // we don't have any to-be-scavenged blocks yet
1197         stp->todos = NULL;
1198         stp->todos_last = NULL;
1199         stp->n_todos = 0;
1200
1201         // initialise the large object queues.
1202         stp->scavenged_large_objects = NULL;
1203         stp->n_scavenged_large_blocks = 0;
1204
1205         // mark the small objects as from-space
1206         for (bd = stp->old_blocks; bd; bd = bd->link) {
1207             bd->flags &= ~BF_EVACUATED;
1208         }
1209
1210         // mark the large objects as from-space
1211         for (bd = stp->large_objects; bd; bd = bd->link) {
1212             bd->flags &= ~BF_EVACUATED;
1213         }
1214
1215         // for a compacted step, we need to allocate the bitmap
1216         if (stp->mark) {
1217             nat bitmap_size; // in bytes
1218             bdescr *bitmap_bdescr;
1219             StgWord *bitmap;
1220
1221             bitmap_size = stp->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1222
1223             if (bitmap_size > 0) {
1224                 bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1225                                            / BLOCK_SIZE);
1226                 stp->bitmap = bitmap_bdescr;
1227                 bitmap = bitmap_bdescr->start;
1228
1229                 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1230                            bitmap_size, bitmap);
1231
1232                 // don't forget to fill it with zeros!
1233                 memset(bitmap, 0, bitmap_size);
1234
1235                 // For each block in this step, point to its bitmap from the
1236                 // block descriptor.
1237                 for (bd=stp->old_blocks; bd != NULL; bd = bd->link) {
1238                     bd->u.bitmap = bitmap;
1239                     bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1240
1241                     // Also at this point we set the BF_MARKED flag
1242                     // for this block.  The invariant is that
1243                     // BF_MARKED is always unset, except during GC
1244                     // when it is set on those blocks which will be
1245                     // compacted.
1246                     if (!(bd->flags & BF_FRAGMENTED)) {
1247                         bd->flags |= BF_MARKED;
1248                     }
1249                 }
1250             }
1251         }
1252     }
1253
1254     // For each GC thread, for each step, allocate a "todo" block to
1255     // store evacuated objects to be scavenged, and a block to store
1256     // evacuated objects that do not need to be scavenged.
1257     for (t = 0; t < n_threads; t++) {
1258         for (s = 0; s < generations[g].n_steps; s++) {
1259
1260             // we don't copy objects into g0s0, unless -G0
1261             if (g==0 && s==0 && RtsFlags.GcFlags.generations > 1) continue;
1262
1263             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1264
1265             ws->todo_large_objects = NULL;
1266
1267             ws->part_list = NULL;
1268             ws->n_part_blocks = 0;
1269
1270             // allocate the first to-space block; extra blocks will be
1271             // chained on as necessary.
1272             ws->todo_bd = NULL;
1273             ws->buffer_todo_bd = NULL;
1274             alloc_todo_block(ws,0);
1275
1276             ws->scavd_list = NULL;
1277             ws->n_scavd_blocks = 0;
1278         }
1279     }
1280 }
1281
1282
1283 /* ----------------------------------------------------------------------------
1284    Initialise a generation that is *not* to be collected
1285    ------------------------------------------------------------------------- */
1286
1287 static void
1288 init_uncollected_gen (nat g, nat threads)
1289 {
1290     nat s, t, i;
1291     step_workspace *ws;
1292     step *stp;
1293     bdescr *bd;
1294
1295     for (s = 0; s < generations[g].n_steps; s++) {
1296         stp = &generations[g].steps[s];
1297         stp->scavenged_large_objects = NULL;
1298         stp->n_scavenged_large_blocks = 0;
1299     }
1300
1301     for (s = 0; s < generations[g].n_steps; s++) {
1302
1303         stp = &generations[g].steps[s];
1304
1305         for (t = 0; t < threads; t++) {
1306             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1307
1308             ws->buffer_todo_bd = NULL;
1309             ws->todo_large_objects = NULL;
1310
1311             ws->part_list = NULL;
1312             ws->n_part_blocks = 0;
1313
1314             ws->scavd_list = NULL;
1315             ws->n_scavd_blocks = 0;
1316
1317             // If the block at the head of the list in this generation
1318             // is less than 3/4 full, then use it as a todo block.
1319             if (stp->blocks && isPartiallyFull(stp->blocks))
1320             {
1321                 ws->todo_bd = stp->blocks;
1322                 ws->todo_free = ws->todo_bd->free;
1323                 ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
1324                 stp->blocks = stp->blocks->link;
1325                 stp->n_blocks -= 1;
1326                 stp->n_words -= ws->todo_bd->free - ws->todo_bd->start;
1327                 ws->todo_bd->link = NULL;
1328                 // we must scan from the current end point.
1329                 ws->todo_bd->u.scan = ws->todo_bd->free;
1330             }
1331             else
1332             {
1333                 ws->todo_bd = NULL;
1334                 alloc_todo_block(ws,0);
1335             }
1336         }
1337
1338         // deal out any more partial blocks to the threads' part_lists
1339         t = 0;
1340         while (stp->blocks && isPartiallyFull(stp->blocks))
1341         {
1342             bd = stp->blocks;
1343             stp->blocks = bd->link;
1344             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1345             bd->link = ws->part_list;
1346             ws->part_list = bd;
1347             ws->n_part_blocks += 1;
1348             bd->u.scan = bd->free;
1349             stp->n_blocks -= 1;
1350             stp->n_words -= bd->free - bd->start;
1351             t++;
1352             if (t == n_gc_threads) t = 0;
1353         }
1354     }
1355
1356
1357     // Move the private mutable lists from each capability onto the
1358     // main mutable list for the generation.
1359     for (i = 0; i < n_capabilities; i++) {
1360         for (bd = capabilities[i].mut_lists[g];
1361              bd->link != NULL; bd = bd->link) {
1362             /* nothing */
1363         }
1364         bd->link = generations[g].mut_list;
1365         generations[g].mut_list = capabilities[i].mut_lists[g];
1366         capabilities[i].mut_lists[g] = allocBlock();
1367     }
1368 }
1369
1370 /* -----------------------------------------------------------------------------
1371    Initialise a gc_thread before GC
1372    -------------------------------------------------------------------------- */
1373
1374 static void
1375 init_gc_thread (gc_thread *t)
1376 {
1377     t->static_objects = END_OF_STATIC_LIST;
1378     t->scavenged_static_objects = END_OF_STATIC_LIST;
1379     t->scan_bd = NULL;
1380     t->evac_step = 0;
1381     t->failed_to_evac = rtsFalse;
1382     t->eager_promotion = rtsTrue;
1383     t->thunk_selector_depth = 0;
1384     t->copied = 0;
1385     t->scanned = 0;
1386     t->any_work = 0;
1387     t->no_work = 0;
1388     t->scav_find_work = 0;
1389 }
1390
1391 /* -----------------------------------------------------------------------------
1392    Function we pass to evacuate roots.
1393    -------------------------------------------------------------------------- */
1394
1395 static void
1396 mark_root(void *user, StgClosure **root)
1397 {
1398     // we stole a register for gct, but this function is called from
1399     // *outside* the GC where the register variable is not in effect,
1400     // so we need to save and restore it here.  NB. only call
1401     // mark_root() from the main GC thread, otherwise gct will be
1402     // incorrect.
1403     gc_thread *saved_gct;
1404     saved_gct = gct;
1405     gct = user;
1406
1407     evacuate(root);
1408
1409     gct = saved_gct;
1410 }
1411
1412 /* -----------------------------------------------------------------------------
1413    Initialising the static object & mutable lists
1414    -------------------------------------------------------------------------- */
1415
1416 static void
1417 zero_static_object_list(StgClosure* first_static)
1418 {
1419   StgClosure* p;
1420   StgClosure* link;
1421   const StgInfoTable *info;
1422
1423   for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1424     info = get_itbl(p);
1425     link = *STATIC_LINK(info, p);
1426     *STATIC_LINK(info,p) = NULL;
1427   }
1428 }
1429
1430 /* ----------------------------------------------------------------------------
1431    Update the pointers from the task list
1432
1433    These are treated as weak pointers because we want to allow a main
1434    thread to get a BlockedOnDeadMVar exception in the same way as any
1435    other thread.  Note that the threads should all have been retained
1436    by GC by virtue of being on the all_threads list, we're just
1437    updating pointers here.
1438    ------------------------------------------------------------------------- */
1439
1440 static void
1441 update_task_list (void)
1442 {
1443     Task *task;
1444     StgTSO *tso;
1445     for (task = all_tasks; task != NULL; task = task->all_link) {
1446         if (!task->stopped && task->tso) {
1447             ASSERT(task->tso->bound == task);
1448             tso = (StgTSO *) isAlive((StgClosure *)task->tso);
1449             if (tso == NULL) {
1450                 barf("task %p: main thread %d has been GC'd",
1451 #ifdef THREADED_RTS
1452                      (void *)task->id,
1453 #else
1454                      (void *)task,
1455 #endif
1456                      task->tso->id);
1457             }
1458             task->tso = tso;
1459         }
1460     }
1461 }
1462
1463 /* ----------------------------------------------------------------------------
1464    Reset the sizes of the older generations when we do a major
1465    collection.
1466
1467    CURRENT STRATEGY: make all generations except zero the same size.
1468    We have to stay within the maximum heap size, and leave a certain
1469    percentage of the maximum heap size available to allocate into.
1470    ------------------------------------------------------------------------- */
1471
1472 static void
1473 resize_generations (void)
1474 {
1475     nat g;
1476
1477     if (major_gc && RtsFlags.GcFlags.generations > 1) {
1478         nat live, size, min_alloc, words;
1479         nat max  = RtsFlags.GcFlags.maxHeapSize;
1480         nat gens = RtsFlags.GcFlags.generations;
1481
1482         // live in the oldest generations
1483         if (oldest_gen->steps[0].live_estimate != 0) {
1484             words = oldest_gen->steps[0].live_estimate;
1485         } else {
1486             words = oldest_gen->steps[0].n_words;
1487         }
1488         live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1489             oldest_gen->steps[0].n_large_blocks;
1490
1491         // default max size for all generations except zero
1492         size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1493                        RtsFlags.GcFlags.minOldGenSize);
1494
1495         // minimum size for generation zero
1496         min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1497                             RtsFlags.GcFlags.minAllocAreaSize);
1498
1499         // Auto-enable compaction when the residency reaches a
1500         // certain percentage of the maximum heap size (default: 30%).
1501         if (RtsFlags.GcFlags.generations > 1 &&
1502             (RtsFlags.GcFlags.compact ||
1503              (max > 0 &&
1504               oldest_gen->steps[0].n_blocks >
1505               (RtsFlags.GcFlags.compactThreshold * max) / 100))) {
1506             oldest_gen->steps[0].mark = 1;
1507             oldest_gen->steps[0].compact = 1;
1508 //        debugBelch("compaction: on\n", live);
1509         } else {
1510             oldest_gen->steps[0].mark = 0;
1511             oldest_gen->steps[0].compact = 0;
1512 //        debugBelch("compaction: off\n", live);
1513         }
1514
1515         if (RtsFlags.GcFlags.sweep) {
1516             oldest_gen->steps[0].mark = 1;
1517         }
1518
1519         // if we're going to go over the maximum heap size, reduce the
1520         // size of the generations accordingly.  The calculation is
1521         // different if compaction is turned on, because we don't need
1522         // to double the space required to collect the old generation.
1523         if (max != 0) {
1524
1525             // this test is necessary to ensure that the calculations
1526             // below don't have any negative results - we're working
1527             // with unsigned values here.
1528             if (max < min_alloc) {
1529                 heapOverflow();
1530             }
1531
1532             if (oldest_gen->steps[0].compact) {
1533                 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1534                     size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1535                 }
1536             } else {
1537                 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1538                     size = (max - min_alloc) / ((gens - 1) * 2);
1539                 }
1540             }
1541
1542             if (size < live) {
1543                 heapOverflow();
1544             }
1545         }
1546
1547 #if 0
1548         debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1549                    min_alloc, size, max);
1550 #endif
1551
1552         for (g = 0; g < gens; g++) {
1553             generations[g].max_blocks = size;
1554         }
1555     }
1556 }
1557
1558 /* -----------------------------------------------------------------------------
1559    Calculate the new size of the nursery, and resize it.
1560    -------------------------------------------------------------------------- */
1561
1562 static void
1563 resize_nursery (void)
1564 {
1565     if (RtsFlags.GcFlags.generations == 1)
1566     {   // Two-space collector:
1567         nat blocks;
1568
1569         /* set up a new nursery.  Allocate a nursery size based on a
1570          * function of the amount of live data (by default a factor of 2)
1571          * Use the blocks from the old nursery if possible, freeing up any
1572          * left over blocks.
1573          *
1574          * If we get near the maximum heap size, then adjust our nursery
1575          * size accordingly.  If the nursery is the same size as the live
1576          * data (L), then we need 3L bytes.  We can reduce the size of the
1577          * nursery to bring the required memory down near 2L bytes.
1578          *
1579          * A normal 2-space collector would need 4L bytes to give the same
1580          * performance we get from 3L bytes, reducing to the same
1581          * performance at 2L bytes.
1582          */
1583         blocks = g0s0->n_blocks;
1584
1585         if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1586              blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1587              RtsFlags.GcFlags.maxHeapSize )
1588         {
1589             long adjusted_blocks;  // signed on purpose
1590             int pc_free;
1591
1592             adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1593
1594             debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1595                        RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1596
1597             pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1598             if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1599             {
1600                 heapOverflow();
1601             }
1602             blocks = adjusted_blocks;
1603         }
1604         else
1605         {
1606             blocks *= RtsFlags.GcFlags.oldGenFactor;
1607             if (blocks < RtsFlags.GcFlags.minAllocAreaSize)
1608             {
1609                 blocks = RtsFlags.GcFlags.minAllocAreaSize;
1610             }
1611         }
1612         resizeNurseries(blocks);
1613     }
1614     else  // Generational collector
1615     {
1616         /*
1617          * If the user has given us a suggested heap size, adjust our
1618          * allocation area to make best use of the memory available.
1619          */
1620         if (RtsFlags.GcFlags.heapSizeSuggestion)
1621         {
1622             long blocks;
1623             nat needed = calcNeeded();  // approx blocks needed at next GC
1624
1625             /* Guess how much will be live in generation 0 step 0 next time.
1626              * A good approximation is obtained by finding the
1627              * percentage of g0s0 that was live at the last minor GC.
1628              *
1629              * We have an accurate figure for the amount of copied data in
1630              * 'copied', but we must convert this to a number of blocks, with
1631              * a small adjustment for estimated slop at the end of a block
1632              * (- 10 words).
1633              */
1634             if (N == 0)
1635             {
1636                 g0s0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1637                     / countNurseryBlocks();
1638             }
1639
1640             /* Estimate a size for the allocation area based on the
1641              * information available.  We might end up going slightly under
1642              * or over the suggested heap size, but we should be pretty
1643              * close on average.
1644              *
1645              * Formula:            suggested - needed
1646              *                ----------------------------
1647              *                    1 + g0s0_pcnt_kept/100
1648              *
1649              * where 'needed' is the amount of memory needed at the next
1650              * collection for collecting all steps except g0s0.
1651              */
1652             blocks =
1653                 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1654                 (100 + (long)g0s0_pcnt_kept);
1655
1656             if (blocks < (long)RtsFlags.GcFlags.minAllocAreaSize) {
1657                 blocks = RtsFlags.GcFlags.minAllocAreaSize;
1658             }
1659
1660             resizeNurseries((nat)blocks);
1661         }
1662         else
1663         {
1664             // we might have added extra large blocks to the nursery, so
1665             // resize back to minAllocAreaSize again.
1666             resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1667         }
1668     }
1669 }
1670
1671 /* -----------------------------------------------------------------------------
1672    Sanity code for CAF garbage collection.
1673
1674    With DEBUG turned on, we manage a CAF list in addition to the SRT
1675    mechanism.  After GC, we run down the CAF list and blackhole any
1676    CAFs which have been garbage collected.  This means we get an error
1677    whenever the program tries to enter a garbage collected CAF.
1678
1679    Any garbage collected CAFs are taken off the CAF list at the same
1680    time.
1681    -------------------------------------------------------------------------- */
1682
1683 #if 0 && defined(DEBUG)
1684
1685 static void
1686 gcCAFs(void)
1687 {
1688   StgClosure*  p;
1689   StgClosure** pp;
1690   const StgInfoTable *info;
1691   nat i;
1692
1693   i = 0;
1694   p = caf_list;
1695   pp = &caf_list;
1696
1697   while (p != NULL) {
1698
1699     info = get_itbl(p);
1700
1701     ASSERT(info->type == IND_STATIC);
1702
1703     if (STATIC_LINK(info,p) == NULL) {
1704         debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1705         // black hole it
1706         SET_INFO(p,&stg_BLACKHOLE_info);
1707         p = STATIC_LINK2(info,p);
1708         *pp = p;
1709     }
1710     else {
1711       pp = &STATIC_LINK2(info,p);
1712       p = *pp;
1713       i++;
1714     }
1715
1716   }
1717
1718   debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1719 }
1720 #endif