rts/sm/GC.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team 1998-2008
   4  *
   5  * Generational garbage collector
   6  *
   7  * Documentation on the architecture of the Garbage Collector can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 #include "PosixSource.h"
  15 #include "Rts.h"
  16 #include "HsFFI.h"
  17
  18 #include "Storage.h"
  19 #include "RtsUtils.h"
  20 #include "Apply.h"
  21 #include "Updates.h"
  22 #include "Stats.h"
  23 #include "Schedule.h"
  24 #include "Sanity.h"
  25 #include "BlockAlloc.h"
  26 #include "ProfHeap.h"
  27 #include "Weak.h"
  28 #include "Prelude.h"
  29 #include "RtsSignals.h"
  30 #include "STM.h"
  31 #if defined(RTS_GTK_FRONTPANEL)
  32 #include "FrontPanel.h"
  33 #endif
  34 #include "Trace.h"
  35 #include "RetainerProfile.h"
  36 #include "LdvProfile.h"
  37 #include "RaiseAsync.h"
  38 #include "Papi.h"
  39 #include "Stable.h"
  40
  41 #include "GC.h"
  42 #include "GCThread.h"
  43 #include "Compact.h"
  44 #include "Evac.h"
  45 #include "Scav.h"
  46 #include "GCUtils.h"
  47 #include "MarkWeak.h"
  48 #include "Sparks.h"
  49 #include "Sweep.h"
  50
  51 #include <string.h> // for memset()
  52 #include <unistd.h>
  53
  54 /* -----------------------------------------------------------------------------
  55    Global variables
  56    -------------------------------------------------------------------------- */
  57
  58 /* STATIC OBJECT LIST.
  59  *
  60  * During GC:
  61  * We maintain a linked list of static objects that are still live.
  62  * The requirements for this list are:
  63  *
  64  *  - we need to scan the list while adding to it, in order to
  65  *    scavenge all the static objects (in the same way that
  66  *    breadth-first scavenging works for dynamic objects).
  67  *
  68  *  - we need to be able to tell whether an object is already on
  69  *    the list, to break loops.
  70  *
  71  * Each static object has a "static link field", which we use for
  72  * linking objects on to the list.  We use a stack-type list, consing
  73  * objects on the front as they are added (this means that the
  74  * scavenge phase is depth-first, not breadth-first, but that
  75  * shouldn't matter).
  76  *
  77  * A separate list is kept for objects that have been scavenged
  78  * already - this is so that we can zero all the marks afterwards.
  79  *
  80  * An object is on the list if its static link field is non-zero; this
  81  * means that we have to mark the end of the list with '1', not NULL.
  82  *
  83  * Extra notes for generational GC:
  84  *
  85  * Each generation has a static object list associated with it.  When
  86  * collecting generations up to N, we treat the static object lists
  87  * from generations > N as roots.
  88  *
  89  * We build up a static object list while collecting generations 0..N,
  90  * which is then appended to the static object list of generation N+1.
  91  */
  92
  93 /* N is the oldest generation being collected, where the generations
  94  * are numbered starting at 0.  A major GC (indicated by the major_gc
  95  * flag) is when we're collecting all generations.  We only attempt to
  96  * deal with static objects and GC CAFs when doing a major GC.
  97  */
  98 nat N;
  99 rtsBool major_gc;
 100
 101 /* Data used for allocation area sizing.
 102  */
 103 static lnat g0s0_pcnt_kept = 30; // percentage of g0s0 live at last minor GC
 104
 105 /* Mut-list stats */
 106 #ifdef DEBUG
 107 nat mutlist_MUTVARS,
 108     mutlist_MUTARRS,
 109     mutlist_MVARS,
 110     mutlist_OTHERS;
 111 #endif
 112
 113 /* Thread-local data for each GC thread
 114  */
 115 gc_thread **gc_threads = NULL;
 116
 117 #if !defined(THREADED_RTS)
 118 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(step_workspace)];
 119 #endif
 120
 121 // Number of threads running in *this* GC.  Affects how many
 122 // step->todos[] lists we have to look in to find work.
 123 nat n_gc_threads;
 124
 125 // For stats:
 126 long copied;        // *words* copied & scavenged during this GC
 127
 128 rtsBool work_stealing;
 129
 130 DECLARE_GCT
 131
 132 /* -----------------------------------------------------------------------------
 133    Static function declarations
 134    -------------------------------------------------------------------------- */
 135
 136 static void mark_root               (void *user, StgClosure **root);
 137 static void zero_static_object_list (StgClosure* first_static);
 138 static nat  initialise_N            (rtsBool force_major_gc);
 139 static void init_collected_gen      (nat g, nat threads);
 140 static void init_uncollected_gen    (nat g, nat threads);
 141 static void init_gc_thread          (gc_thread *t);
 142 static void update_task_list        (void);
 143 static void resize_generations      (void);
 144 static void resize_nursery          (void);
 145 static void start_gc_threads        (void);
 146 static void scavenge_until_all_done (void);
 147 static StgWord inc_running          (void);
 148 static StgWord dec_running          (void);
 149 static void wakeup_gc_threads       (nat n_threads, nat me);
 150 static void shutdown_gc_threads     (nat n_threads, nat me);
 151
 152 #if 0 && defined(DEBUG)
 153 static void gcCAFs                  (void);
 154 #endif
 155
 156 /* -----------------------------------------------------------------------------
 157    The mark bitmap & stack.
 158    -------------------------------------------------------------------------- */
 159
 160 #define MARK_STACK_BLOCKS 4
 161
 162 bdescr *mark_stack_bdescr;
 163 StgPtr *mark_stack;
 164 StgPtr *mark_sp;
 165 StgPtr *mark_splim;
 166
 167 // Flag and pointers used for falling back to a linear scan when the
 168 // mark stack overflows.
 169 rtsBool mark_stack_overflowed;
 170 bdescr *oldgen_scan_bd;
 171 StgPtr  oldgen_scan;
 172
 173 /* -----------------------------------------------------------------------------
 174    GarbageCollect: the main entry point to the garbage collector.
 175
 176    Locks held: all capabilities are held throughout GarbageCollect().
 177    -------------------------------------------------------------------------- */
 178
 179 void
 180 GarbageCollect (rtsBool force_major_gc,
 181                 nat gc_type USED_IF_THREADS,
 182                 Capability *cap)
 183 {
 184   bdescr *bd;
 185   step *stp;
 186   lnat live, allocated, max_copied, avg_copied, slop;
 187   gc_thread *saved_gct;
 188   nat g, s, t, n;
 189
 190   // necessary if we stole a callee-saves register for gct:
 191   saved_gct = gct;
 192
 193 #ifdef PROFILING
 194   CostCentreStack *prev_CCS;
 195 #endif
 196
 197   ACQUIRE_SM_LOCK;
 198
 199 #if defined(RTS_USER_SIGNALS)
 200   if (RtsFlags.MiscFlags.install_signal_handlers) {
 201     // block signals
 202     blockUserSignals();
 203   }
 204 #endif
 205
 206   ASSERT(sizeof(step_workspace) == 16 * sizeof(StgWord));
 207   // otherwise adjust the padding in step_workspace.
 208
 209   // tell the stats department that we've started a GC
 210   stat_startGC();
 211
 212   // tell the STM to discard any cached closures it's hoping to re-use
 213   stmPreGCHook();
 214
 215   // lock the StablePtr table
 216   stablePtrPreGC();
 217
 218 #ifdef DEBUG
 219   mutlist_MUTVARS = 0;
 220   mutlist_MUTARRS = 0;
 221   mutlist_OTHERS = 0;
 222 #endif
 223
 224   // attribute any costs to CCS_GC
 225 #ifdef PROFILING
 226   prev_CCS = CCCS;
 227   CCCS = CCS_GC;
 228 #endif
 229
 230   /* Approximate how much we allocated.
 231    * Todo: only when generating stats?
 232    */
 233   allocated = calcAllocated();
 234
 235   /* Figure out which generation to collect
 236    */
 237   n = initialise_N(force_major_gc);
 238
 239 #if defined(THREADED_RTS)
 240   work_stealing = RtsFlags.ParFlags.parGcLoadBalancing;
 241       // It's not always a good idea to do load balancing in parallel
 242       // GC.  In particular, for a parallel program we don't want to
 243       // lose locality by moving cached data into another CPU's cache
 244       // (this effect can be quite significant).
 245       //
 246       // We could have a more complex way to deterimine whether to do
 247       // work stealing or not, e.g. it might be a good idea to do it
 248       // if the heap is big.  For now, we just turn it on or off with
 249       // a flag.
 250 #endif
 251
 252   /* Start threads, so they can be spinning up while we finish initialisation.
 253    */
 254   start_gc_threads();
 255
 256 #if defined(THREADED_RTS)
 257   /* How many threads will be participating in this GC?
 258    * We don't try to parallelise minor GCs (unless the user asks for
 259    * it with +RTS -gn0), or mark/compact/sweep GC.
 260    */
 261   if (gc_type == PENDING_GC_PAR) {
 262       n_gc_threads = RtsFlags.ParFlags.nNodes;
 263   } else {
 264       n_gc_threads = 1;
 265   }
 266 #else
 267   n_gc_threads = 1;
 268 #endif
 269
 270   debugTrace(DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
 271         N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
 272
 273 #ifdef RTS_GTK_FRONTPANEL
 274   if (RtsFlags.GcFlags.frontpanel) {
 275       updateFrontPanelBeforeGC(N);
 276   }
 277 #endif
 278
 279 #ifdef DEBUG
 280   // check for memory leaks if DEBUG is on
 281   memInventory(traceClass(DEBUG_gc));
 282 #endif
 283
 284   // check stack sanity *before* GC
 285   IF_DEBUG(sanity, checkFreeListSanity());
 286   IF_DEBUG(sanity, checkMutableLists(rtsTrue));
 287
 288   // Initialise all our gc_thread structures
 289   for (t = 0; t < n_gc_threads; t++) {
 290       init_gc_thread(gc_threads[t]);
 291   }
 292
 293   // Initialise all the generations/steps that we're collecting.
 294   for (g = 0; g <= N; g++) {
 295       init_collected_gen(g,n_gc_threads);
 296   }
 297
 298   // Initialise all the generations/steps that we're *not* collecting.
 299   for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
 300       init_uncollected_gen(g,n_gc_threads);
 301   }
 302
 303   /* Allocate a mark stack if we're doing a major collection.
 304    */
 305   if (major_gc && oldest_gen->steps[0].mark) {
 306       nat mark_stack_blocks;
 307       mark_stack_blocks = stg_max(MARK_STACK_BLOCKS,
 308                                   oldest_gen->steps[0].n_old_blocks / 100);
 309       mark_stack_bdescr = allocGroup(mark_stack_blocks);
 310       mark_stack = (StgPtr *)mark_stack_bdescr->start;
 311       mark_sp    = mark_stack;
 312       mark_splim = mark_stack + (mark_stack_blocks * BLOCK_SIZE_W);
 313   } else {
 314       mark_stack_bdescr = NULL;
 315   }
 316
 317   // this is the main thread
 318 #ifdef THREADED_RTS
 319   if (n_gc_threads == 1) {
 320       SET_GCT(gc_threads[0]);
 321   } else {
 322       SET_GCT(gc_threads[cap->no]);
 323   }
 324 #else
 325 SET_GCT(gc_threads[0]);
 326 #endif
 327
 328   /* -----------------------------------------------------------------------
 329    * follow all the roots that we know about:
 330    */
 331
 332   // the main thread is running: this prevents any other threads from
 333   // exiting prematurely, so we can start them now.
 334   // NB. do this after the mutable lists have been saved above, otherwise
 335   // the other GC threads will be writing into the old mutable lists.
 336   inc_running();
 337   wakeup_gc_threads(n_gc_threads, gct->thread_index);
 338
 339   // Mutable lists from each generation > N
 340   // we want to *scavenge* these roots, not evacuate them: they're not
 341   // going to move in this GC.
 342   // Also do them in reverse generation order, for the usual reason:
 343   // namely to reduce the likelihood of spurious old->new pointers.
 344   //
 345   for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
 346       scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]);
 347       freeChain_sync(generations[g].saved_mut_list);
 348       generations[g].saved_mut_list = NULL;
 349
 350   }
 351
 352   // scavenge the capability-private mutable lists.  This isn't part
 353   // of markSomeCapabilities() because markSomeCapabilities() can only
 354   // call back into the GC via mark_root() (due to the gct register
 355   // variable).
 356   if (n_gc_threads == 1) {
 357       for (n = 0; n < n_capabilities; n++) {
 358           scavenge_capability_mut_lists(&capabilities[n]);
 359       }
 360   } else {
 361       scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
 362   }
 363
 364   // follow roots from the CAF list (used by GHCi)
 365   gct->evac_step = 0;
 366   markCAFs(mark_root, gct);
 367
 368   // follow all the roots that the application knows about.
 369   gct->evac_step = 0;
 370   markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
 371                        rtsTrue/*prune sparks*/);
 372
 373 #if defined(RTS_USER_SIGNALS)
 374   // mark the signal handlers (signals should be already blocked)
 375   markSignalHandlers(mark_root, gct);
 376 #endif
 377
 378   // Mark the weak pointer list, and prepare to detect dead weak pointers.
 379   markWeakPtrList();
 380   initWeakForGC();
 381
 382   // Mark the stable pointer table.
 383   markStablePtrTable(mark_root, gct);
 384
 385   /* -------------------------------------------------------------------------
 386    * Repeatedly scavenge all the areas we know about until there's no
 387    * more scavenging to be done.
 388    */
 389   for (;;)
 390   {
 391       scavenge_until_all_done();
 392       // The other threads are now stopped.  We might recurse back to
 393       // here, but from now on this is the only thread.
 394
 395       // if any blackholes are alive, make the threads that wait on
 396       // them alive too.
 397       if (traverseBlackholeQueue()) {
 398           inc_running();
 399           continue;
 400       }
 401
 402       // must be last...  invariant is that everything is fully
 403       // scavenged at this point.
 404       if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
 405           inc_running();
 406           continue;
 407       }
 408
 409       // If we get to here, there's really nothing left to do.
 410       break;
 411   }
 412
 413   shutdown_gc_threads(n_gc_threads, gct->thread_index);
 414
 415   // Update pointers from the Task list
 416   update_task_list();
 417
 418   // Now see which stable names are still alive.
 419   gcStablePtrTable();
 420
 421 #ifdef PROFILING
 422   // We call processHeapClosureForDead() on every closure destroyed during
 423   // the current garbage collection, so we invoke LdvCensusForDead().
 424   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
 425       || RtsFlags.ProfFlags.bioSelector != NULL)
 426     LdvCensusForDead(N);
 427 #endif
 428
 429   // NO MORE EVACUATION AFTER THIS POINT!
 430
 431   // Two-space collector: free the old to-space.
 432   // g0s0->old_blocks is the old nursery
 433   // g0s0->blocks is to-space from the previous GC
 434   if (RtsFlags.GcFlags.generations == 1) {
 435       if (g0s0->blocks != NULL) {
 436           freeChain(g0s0->blocks);
 437           g0s0->blocks = NULL;
 438       }
 439   }
 440
 441   // For each workspace, in each thread, move the copied blocks to the step
 442   {
 443       gc_thread *thr;
 444       step_workspace *ws;
 445       bdescr *prev, *next;
 446
 447       for (t = 0; t < n_gc_threads; t++) {
 448           thr = gc_threads[t];
 449
 450           // not step 0
 451           if (RtsFlags.GcFlags.generations == 1) {
 452               s = 0;
 453           } else {
 454               s = 1;
 455           }
 456           for (; s < total_steps; s++) {
 457               ws = &thr->steps[s];
 458
 459               // Push the final block
 460               if (ws->todo_bd) {
 461                   push_scanned_block(ws->todo_bd, ws);
 462               }
 463
 464               ASSERT(gct->scan_bd == NULL);
 465               ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
 466
 467               prev = NULL;
 468               for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
 469                   ws->step->n_words += bd->free - bd->start;
 470                   prev = bd;
 471               }
 472               if (prev != NULL) {
 473                   prev->link = ws->step->blocks;
 474                   ws->step->blocks = ws->scavd_list;
 475               }
 476               ws->step->n_blocks += ws->n_scavd_blocks;
 477           }
 478       }
 479
 480       // Add all the partial blocks *after* we've added all the full
 481       // blocks.  This is so that we can grab the partial blocks back
 482       // again and try to fill them up in the next GC.
 483       for (t = 0; t < n_gc_threads; t++) {
 484           thr = gc_threads[t];
 485
 486           // not step 0
 487           if (RtsFlags.GcFlags.generations == 1) {
 488               s = 0;
 489           } else {
 490               s = 1;
 491           }
 492           for (; s < total_steps; s++) {
 493               ws = &thr->steps[s];
 494
 495               prev = NULL;
 496               for (bd = ws->part_list; bd != NULL; bd = next) {
 497                   next = bd->link;
 498                   if (bd->free == bd->start) {
 499                       if (prev == NULL) {
 500                           ws->part_list = next;
 501                       } else {
 502                           prev->link = next;
 503                       }
 504                       freeGroup(bd);
 505                       ws->n_part_blocks--;
 506                   } else {
 507                       ws->step->n_words += bd->free - bd->start;
 508                       prev = bd;
 509                   }
 510               }
 511               if (prev != NULL) {
 512                   prev->link = ws->step->blocks;
 513                   ws->step->blocks = ws->part_list;
 514               }
 515               ws->step->n_blocks += ws->n_part_blocks;
 516
 517               ASSERT(countBlocks(ws->step->blocks) == ws->step->n_blocks);
 518               ASSERT(countOccupied(ws->step->blocks) == ws->step->n_words);
 519           }
 520       }
 521   }
 522
 523   // Finally: compact or sweep the oldest generation.
 524   if (major_gc && oldest_gen->steps[0].mark) {
 525       if (oldest_gen->steps[0].compact)
 526           compact(gct->scavenged_static_objects);
 527       else
 528           sweep(&oldest_gen->steps[0]);
 529   }
 530
 531   /* run through all the generations/steps and tidy up
 532    */
 533   copied = 0;
 534   max_copied = 0;
 535   avg_copied = 0;
 536   {
 537       nat i;
 538       for (i=0; i < n_gc_threads; i++) {
 539           if (n_gc_threads > 1) {
 540               debugTrace(DEBUG_gc,"thread %d:", i);
 541               debugTrace(DEBUG_gc,"   copied           %ld", gc_threads[i]->copied * sizeof(W_));
 542               debugTrace(DEBUG_gc,"   scanned          %ld", gc_threads[i]->scanned * sizeof(W_));
 543               debugTrace(DEBUG_gc,"   any_work         %ld", gc_threads[i]->any_work);
 544               debugTrace(DEBUG_gc,"   no_work          %ld", gc_threads[i]->no_work);
 545               debugTrace(DEBUG_gc,"   scav_find_work %ld",   gc_threads[i]->scav_find_work);
 546           }
 547           copied += gc_threads[i]->copied;
 548           max_copied = stg_max(gc_threads[i]->copied, max_copied);
 549       }
 550       if (n_gc_threads == 1) {
 551           max_copied = 0;
 552           avg_copied = 0;
 553       } else {
 554           avg_copied = copied;
 555       }
 556   }
 557
 558   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 559
 560     if (g == N) {
 561       generations[g].collections++; // for stats
 562       if (n_gc_threads > 1) generations[g].par_collections++;
 563     }
 564
 565     // Count the mutable list as bytes "copied" for the purposes of
 566     // stats.  Every mutable list is copied during every GC.
 567     if (g > 0) {
 568         nat mut_list_size = 0;
 569         for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
 570             mut_list_size += bd->free - bd->start;
 571         }
 572         for (n = 0; n < n_capabilities; n++) {
 573             for (bd = capabilities[n].mut_lists[g];
 574                  bd != NULL; bd = bd->link) {
 575                 mut_list_size += bd->free - bd->start;
 576             }
 577         }
 578         copied +=  mut_list_size;
 579
 580         debugTrace(DEBUG_gc,
 581                    "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
 582                    (unsigned long)(mut_list_size * sizeof(W_)),
 583                    mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
 584     }
 585
 586     for (s = 0; s < generations[g].n_steps; s++) {
 587       bdescr *next, *prev;
 588       stp = &generations[g].steps[s];
 589
 590       // for generations we collected...
 591       if (g <= N) {
 592
 593         /* free old memory and shift to-space into from-space for all
 594          * the collected steps (except the allocation area).  These
 595          * freed blocks will probaby be quickly recycled.
 596          */
 597         if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) {
 598             if (stp->mark)
 599             {
 600                 // tack the new blocks on the end of the existing blocks
 601                 if (stp->old_blocks != NULL) {
 602
 603                     prev = NULL;
 604                     for (bd = stp->old_blocks; bd != NULL; bd = next) {
 605
 606                         next = bd->link;
 607
 608                         if (!(bd->flags & BF_MARKED))
 609                         {
 610                             if (prev == NULL) {
 611                                 stp->old_blocks = next;
 612                             } else {
 613                                 prev->link = next;
 614                             }
 615                             freeGroup(bd);
 616                             stp->n_old_blocks--;
 617                         }
 618                         else
 619                         {
 620                             stp->n_words += bd->free - bd->start;
 621
 622                             // NB. this step might not be compacted next
 623                             // time, so reset the BF_MARKED flags.
 624                             // They are set before GC if we're going to
 625                             // compact.  (search for BF_MARKED above).
 626                             bd->flags &= ~BF_MARKED;
 627
 628                             // between GCs, all blocks in the heap except
 629                             // for the nursery have the BF_EVACUATED flag set.
 630                             bd->flags |= BF_EVACUATED;
 631
 632                             prev = bd;
 633                         }
 634                     }
 635
 636                     if (prev != NULL) {
 637                         prev->link = stp->blocks;
 638                         stp->blocks = stp->old_blocks;
 639                     }
 640                 }
 641                 // add the new blocks to the block tally
 642                 stp->n_blocks += stp->n_old_blocks;
 643                 ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
 644                 ASSERT(countOccupied(stp->blocks) == stp->n_words);
 645             }
 646             else // not copacted
 647             {
 648                 freeChain(stp->old_blocks);
 649             }
 650             stp->old_blocks = NULL;
 651             stp->n_old_blocks = 0;
 652         }
 653
 654         /* LARGE OBJECTS.  The current live large objects are chained on
 655          * scavenged_large, having been moved during garbage
 656          * collection from large_objects.  Any objects left on
 657          * large_objects list are therefore dead, so we free them here.
 658          */
 659         for (bd = stp->large_objects; bd != NULL; bd = next) {
 660           next = bd->link;
 661           freeGroup(bd);
 662           bd = next;
 663         }
 664
 665         stp->large_objects  = stp->scavenged_large_objects;
 666         stp->n_large_blocks = stp->n_scavenged_large_blocks;
 667
 668       }
 669       else // for older generations...
 670       {
 671         /* For older generations, we need to append the
 672          * scavenged_large_object list (i.e. large objects that have been
 673          * promoted during this GC) to the large_object list for that step.
 674          */
 675         for (bd = stp->scavenged_large_objects; bd; bd = next) {
 676           next = bd->link;
 677           dbl_link_onto(bd, &stp->large_objects);
 678         }
 679
 680         // add the new blocks we promoted during this GC
 681         stp->n_large_blocks += stp->n_scavenged_large_blocks;
 682       }
 683     }
 684   }
 685
 686   // update the max size of older generations after a major GC
 687   resize_generations();
 688
 689   // Calculate the amount of live data for stats.
 690   live = calcLiveWords();
 691
 692   // Free the small objects allocated via allocate(), since this will
 693   // all have been copied into G0S1 now.
 694   if (RtsFlags.GcFlags.generations > 1) {
 695       if (g0s0->blocks != NULL) {
 696           freeChain(g0s0->blocks);
 697           g0s0->blocks = NULL;
 698       }
 699       g0s0->n_blocks = 0;
 700       g0s0->n_words = 0;
 701   }
 702   alloc_blocks = 0;
 703   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 704
 705   // Start a new pinned_object_block
 706   pinned_object_block = NULL;
 707
 708   // Free the mark stack.
 709   if (mark_stack_bdescr != NULL) {
 710       freeGroup(mark_stack_bdescr);
 711   }
 712
 713   // Free any bitmaps.
 714   for (g = 0; g <= N; g++) {
 715       for (s = 0; s < generations[g].n_steps; s++) {
 716           stp = &generations[g].steps[s];
 717           if (stp->bitmap != NULL) {
 718               freeGroup(stp->bitmap);
 719               stp->bitmap = NULL;
 720           }
 721       }
 722   }
 723
 724   resize_nursery();
 725
 726  // mark the garbage collected CAFs as dead
 727 #if 0 && defined(DEBUG) // doesn't work at the moment
 728   if (major_gc) { gcCAFs(); }
 729 #endif
 730
 731 #ifdef PROFILING
 732   // resetStaticObjectForRetainerProfiling() must be called before
 733   // zeroing below.
 734   if (n_gc_threads > 1) {
 735       barf("profiling is currently broken with multi-threaded GC");
 736       // ToDo: fix the gct->scavenged_static_objects below
 737   }
 738   resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
 739 #endif
 740
 741   // zero the scavenged static object list
 742   if (major_gc) {
 743       nat i;
 744       for (i = 0; i < n_gc_threads; i++) {
 745           zero_static_object_list(gc_threads[i]->scavenged_static_objects);
 746       }
 747   }
 748
 749   // Reset the nursery
 750   resetNurseries();
 751
 752   // start any pending finalizers
 753   RELEASE_SM_LOCK;
 754   scheduleFinalizers(cap, old_weak_ptr_list);
 755   ACQUIRE_SM_LOCK;
 756
 757   // send exceptions to any threads which were about to die
 758   RELEASE_SM_LOCK;
 759   resurrectThreads(resurrected_threads);
 760   performPendingThrowTos(exception_threads);
 761   ACQUIRE_SM_LOCK;
 762
 763   // Update the stable pointer hash table.
 764   updateStablePtrTable(major_gc);
 765
 766   // check sanity after GC
 767   IF_DEBUG(sanity, checkSanity());
 768
 769   // extra GC trace info
 770   IF_DEBUG(gc, statDescribeGens());
 771
 772 #ifdef DEBUG
 773   // symbol-table based profiling
 774   /*  heapCensus(to_blocks); */ /* ToDo */
 775 #endif
 776
 777   // restore enclosing cost centre
 778 #ifdef PROFILING
 779   CCCS = prev_CCS;
 780 #endif
 781
 782 #ifdef DEBUG
 783   // check for memory leaks if DEBUG is on
 784   memInventory(traceClass(DEBUG_gc));
 785 #endif
 786
 787 #ifdef RTS_GTK_FRONTPANEL
 788   if (RtsFlags.GcFlags.frontpanel) {
 789       updateFrontPanelAfterGC( N, live );
 790   }
 791 #endif
 792
 793   // ok, GC over: tell the stats department what happened.
 794   slop = calcLiveBlocks() * BLOCK_SIZE_W - live;
 795   stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop);
 796
 797   // unlock the StablePtr table
 798   stablePtrPostGC();
 799
 800   // Guess which generation we'll collect *next* time
 801   initialise_N(force_major_gc);
 802
 803 #if defined(RTS_USER_SIGNALS)
 804   if (RtsFlags.MiscFlags.install_signal_handlers) {
 805     // unblock signals again
 806     unblockUserSignals();
 807   }
 808 #endif
 809
 810   RELEASE_SM_LOCK;
 811
 812   SET_GCT(saved_gct);
 813 }
 814
 815 /* -----------------------------------------------------------------------------
 816    Figure out which generation to collect, initialise N and major_gc.
 817
 818    Also returns the total number of blocks in generations that will be
 819    collected.
 820    -------------------------------------------------------------------------- */
 821
 822 static nat
 823 initialise_N (rtsBool force_major_gc)
 824 {
 825     int g;
 826     nat s, blocks, blocks_total;
 827
 828     blocks = 0;
 829     blocks_total = 0;
 830
 831     if (force_major_gc) {
 832         N = RtsFlags.GcFlags.generations - 1;
 833     } else {
 834         N = 0;
 835     }
 836
 837     for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
 838         blocks = 0;
 839         for (s = 0; s < generations[g].n_steps; s++) {
 840             blocks += generations[g].steps[s].n_words / BLOCK_SIZE_W;
 841             blocks += generations[g].steps[s].n_large_blocks;
 842         }
 843         if (blocks >= generations[g].max_blocks) {
 844             N = stg_max(N,g);
 845         }
 846         if ((nat)g <= N) {
 847             blocks_total += blocks;
 848         }
 849     }
 850
 851     blocks_total += countNurseryBlocks();
 852
 853     major_gc = (N == RtsFlags.GcFlags.generations-1);
 854     return blocks_total;
 855 }
 856
 857 /* -----------------------------------------------------------------------------
 858    Initialise the gc_thread structures.
 859    -------------------------------------------------------------------------- */
 860
 861 #define GC_THREAD_INACTIVE             0
 862 #define GC_THREAD_STANDING_BY          1
 863 #define GC_THREAD_RUNNING              2
 864 #define GC_THREAD_WAITING_TO_CONTINUE  3
 865
 866 static void
 867 new_gc_thread (nat n, gc_thread *t)
 868 {
 869     nat s;
 870     step_workspace *ws;
 871
 872 #ifdef THREADED_RTS
 873     t->id = 0;
 874     initSpinLock(&t->gc_spin);
 875     initSpinLock(&t->mut_spin);
 876     ACQUIRE_SPIN_LOCK(&t->gc_spin);
 877     t->wakeup = GC_THREAD_INACTIVE;  // starts true, so we can wait for the
 878                           // thread to start up, see wakeup_gc_threads
 879 #endif
 880
 881     t->thread_index = n;
 882     t->free_blocks = NULL;
 883     t->gc_count = 0;
 884
 885     init_gc_thread(t);
 886
 887 #ifdef USE_PAPI
 888     t->papi_events = -1;
 889 #endif
 890
 891     for (s = 0; s < total_steps; s++)
 892     {
 893         ws = &t->steps[s];
 894         ws->step = &all_steps[s];
 895         ASSERT(s == ws->step->abs_no);
 896         ws->my_gct = t;
 897
 898         ws->todo_bd = NULL;
 899         ws->todo_q = newWSDeque(128);
 900         ws->todo_overflow = NULL;
 901         ws->n_todo_overflow = 0;
 902
 903         ws->part_list = NULL;
 904         ws->n_part_blocks = 0;
 905
 906         ws->scavd_list = NULL;
 907         ws->n_scavd_blocks = 0;
 908     }
 909 }
 910
 911
 912 void
 913 initGcThreads (void)
 914 {
 915     if (gc_threads == NULL) {
 916 #if defined(THREADED_RTS)
 917         nat i;
 918         gc_threads = stgMallocBytes (RtsFlags.ParFlags.nNodes *
 919                                      sizeof(gc_thread*),
 920                                      "alloc_gc_threads");
 921
 922         for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
 923             gc_threads[i] =
 924                 stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace),
 925                                "alloc_gc_threads");
 926
 927             new_gc_thread(i, gc_threads[i]);
 928         }
 929 #else
 930         gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
 931         gc_threads[0] = gct;
 932         new_gc_thread(0,gc_threads[0]);
 933 #endif
 934     }
 935 }
 936
 937 void
 938 freeGcThreads (void)
 939 {
 940     if (gc_threads != NULL) {
 941 #if defined(THREADED_RTS)
 942         nat i;
 943         for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
 944             stgFree (gc_threads[i]);
 945         }
 946         stgFree (gc_threads);
 947 #else
 948         stgFree (gc_threads);
 949 #endif
 950         gc_threads = NULL;
 951     }
 952 }
 953
 954 /* ----------------------------------------------------------------------------
 955    Start GC threads
 956    ------------------------------------------------------------------------- */
 957
 958 static volatile StgWord gc_running_threads;
 959
 960 static StgWord
 961 inc_running (void)
 962 {
 963     StgWord new;
 964     new = atomic_inc(&gc_running_threads);
 965     ASSERT(new <= n_gc_threads);
 966     return new;
 967 }
 968
 969 static StgWord
 970 dec_running (void)
 971 {
 972     ASSERT(gc_running_threads != 0);
 973     return atomic_dec(&gc_running_threads);
 974 }
 975
 976 static rtsBool
 977 any_work (void)
 978 {
 979     int s;
 980     step_workspace *ws;
 981
 982     gct->any_work++;
 983
 984     write_barrier();
 985
 986     // scavenge objects in compacted generation
 987     if (mark_stack_overflowed || oldgen_scan_bd != NULL ||
 988         (mark_stack_bdescr != NULL && !mark_stack_empty())) {
 989         return rtsTrue;
 990     }
 991
 992     // Check for global work in any step.  We don't need to check for
 993     // local work, because we have already exited scavenge_loop(),
 994     // which means there is no local work for this thread.
 995     for (s = total_steps-1; s >= 0; s--) {
 996         if (s == 0 && RtsFlags.GcFlags.generations > 1) {
 997             continue;
 998         }
 999         ws = &gct->steps[s];
1000         if (ws->todo_large_objects) return rtsTrue;
1001         if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
1002         if (ws->todo_overflow) return rtsTrue;
1003     }
1004
1005 #if defined(THREADED_RTS)
1006     if (work_stealing) {
1007         nat n;
1008         // look for work to steal
1009         for (n = 0; n < n_gc_threads; n++) {
1010             if (n == gct->thread_index) continue;
1011             for (s = total_steps-1; s >= 0; s--) {
1012                 ws = &gc_threads[n]->steps[s];
1013                 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
1014             }
1015         }
1016     }
1017 #endif
1018
1019     gct->no_work++;
1020
1021     return rtsFalse;
1022 }
1023
1024 static void
1025 scavenge_until_all_done (void)
1026 {
1027     nat r;
1028
1029     debugTrace(DEBUG_gc, "GC thread %d working", gct->thread_index);
1030
1031 loop:
1032 #if defined(THREADED_RTS)
1033     if (n_gc_threads > 1) {
1034         scavenge_loop();
1035     } else {
1036         scavenge_loop1();
1037     }
1038 #else
1039     scavenge_loop();
1040 #endif
1041
1042     // scavenge_loop() only exits when there's no work to do
1043     r = dec_running();
1044
1045     debugTrace(DEBUG_gc, "GC thread %d idle (%d still running)",
1046                gct->thread_index, r);
1047
1048     while (gc_running_threads != 0) {
1049         // usleep(1);
1050         if (any_work()) {
1051             inc_running();
1052             goto loop;
1053         }
1054         // any_work() does not remove the work from the queue, it
1055         // just checks for the presence of work.  If we find any,
1056         // then we increment gc_running_threads and go back to
1057         // scavenge_loop() to perform any pending work.
1058     }
1059
1060     // All threads are now stopped
1061     debugTrace(DEBUG_gc, "GC thread %d finished.", gct->thread_index);
1062 }
1063
1064 #if defined(THREADED_RTS)
1065
1066 void
1067 gcWorkerThread (Capability *cap)
1068 {
1069     gc_thread *saved_gct;
1070
1071     // necessary if we stole a callee-saves register for gct:
1072     saved_gct = gct;
1073
1074     cap->in_gc = rtsTrue;
1075
1076     gct = gc_threads[cap->no];
1077     gct->id = osThreadId();
1078
1079     // Wait until we're told to wake up
1080     RELEASE_SPIN_LOCK(&gct->mut_spin);
1081     gct->wakeup = GC_THREAD_STANDING_BY;
1082     debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1083     ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1084
1085 #ifdef USE_PAPI
1086     // start performance counters in this thread...
1087     if (gct->papi_events == -1) {
1088         papi_init_eventset(&gct->papi_events);
1089     }
1090     papi_thread_start_gc1_count(gct->papi_events);
1091 #endif
1092
1093     // Every thread evacuates some roots.
1094     gct->evac_step = 0;
1095     markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
1096                          rtsTrue/*prune sparks*/);
1097     scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
1098
1099     scavenge_until_all_done();
1100
1101 #ifdef USE_PAPI
1102     // count events in this thread towards the GC totals
1103     papi_thread_stop_gc1_count(gct->papi_events);
1104 #endif
1105
1106     // Wait until we're told to continue
1107     RELEASE_SPIN_LOCK(&gct->gc_spin);
1108     gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1109     debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1110                gct->thread_index);
1111     ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1112     debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1113
1114     SET_GCT(saved_gct);
1115 }
1116
1117 #endif
1118
1119 #if defined(THREADED_RTS)
1120
1121 void
1122 waitForGcThreads (Capability *cap USED_IF_THREADS)
1123 {
1124     nat n_threads = RtsFlags.ParFlags.nNodes;
1125     nat me = cap->no;
1126     nat i, j;
1127     rtsBool retry = rtsTrue;
1128
1129     while(retry) {
1130         for (i=0; i < n_threads; i++) {
1131             if (i == me) continue;
1132             if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1133                 prodCapability(&capabilities[i], cap->running_task);
1134             }
1135         }
1136         for (j=0; j < 10000000; j++) {
1137             retry = rtsFalse;
1138             for (i=0; i < n_threads; i++) {
1139                 if (i == me) continue;
1140                 write_barrier();
1141                 setContextSwitches();
1142                 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1143                     retry = rtsTrue;
1144                 }
1145             }
1146             if (!retry) break;
1147         }
1148     }
1149 }
1150
1151 #endif // THREADED_RTS
1152
1153 static void
1154 start_gc_threads (void)
1155 {
1156 #if defined(THREADED_RTS)
1157     gc_running_threads = 0;
1158 #endif
1159 }
1160
1161 static void
1162 wakeup_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1163 {
1164 #if defined(THREADED_RTS)
1165     nat i;
1166     for (i=0; i < n_threads; i++) {
1167         if (i == me) continue;
1168         inc_running();
1169         debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1170         if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1171
1172         gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1173         ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1174         RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1175     }
1176 #endif
1177 }
1178
1179 // After GC is complete, we must wait for all GC threads to enter the
1180 // standby state, otherwise they may still be executing inside
1181 // any_work(), and may even remain awake until the next GC starts.
1182 static void
1183 shutdown_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1184 {
1185 #if defined(THREADED_RTS)
1186     nat i;
1187     for (i=0; i < n_threads; i++) {
1188         if (i == me) continue;
1189         while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1190     }
1191 #endif
1192 }
1193
1194 #if defined(THREADED_RTS)
1195 void
1196 releaseGCThreads (Capability *cap USED_IF_THREADS)
1197 {
1198     nat n_threads = RtsFlags.ParFlags.nNodes;
1199     nat me = cap->no;
1200     nat i;
1201     for (i=0; i < n_threads; i++) {
1202         if (i == me) continue;
1203         if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1204             barf("releaseGCThreads");
1205
1206         gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1207         ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1208         RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1209     }
1210 }
1211 #endif
1212
1213 /* ----------------------------------------------------------------------------
1214    Initialise a generation that is to be collected
1215    ------------------------------------------------------------------------- */
1216
1217 static void
1218 init_collected_gen (nat g, nat n_threads)
1219 {
1220     nat s, t, i;
1221     step_workspace *ws;
1222     step *stp;
1223     bdescr *bd;
1224
1225     // Throw away the current mutable list.  Invariant: the mutable
1226     // list always has at least one block; this means we can avoid a
1227     // check for NULL in recordMutable().
1228     if (g != 0) {
1229         freeChain(generations[g].mut_list);
1230         generations[g].mut_list = allocBlock();
1231         for (i = 0; i < n_capabilities; i++) {
1232             freeChain(capabilities[i].mut_lists[g]);
1233             capabilities[i].mut_lists[g] = allocBlock();
1234         }
1235     }
1236
1237     for (s = 0; s < generations[g].n_steps; s++) {
1238
1239         stp = &generations[g].steps[s];
1240         ASSERT(stp->gen_no == g);
1241
1242         // we'll construct a new list of threads in this step
1243         // during GC, throw away the current list.
1244         stp->old_threads = stp->threads;
1245         stp->threads = END_TSO_QUEUE;
1246
1247         // generation 0, step 0 doesn't need to-space
1248         if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
1249             continue;
1250         }
1251
1252         // deprecate the existing blocks
1253         stp->old_blocks   = stp->blocks;
1254         stp->n_old_blocks = stp->n_blocks;
1255         stp->blocks       = NULL;
1256         stp->n_blocks     = 0;
1257         stp->n_words      = 0;
1258         stp->live_estimate = 0;
1259
1260         // initialise the large object queues.
1261         stp->scavenged_large_objects = NULL;
1262         stp->n_scavenged_large_blocks = 0;
1263
1264         // mark the small objects as from-space
1265         for (bd = stp->old_blocks; bd; bd = bd->link) {
1266             bd->flags &= ~BF_EVACUATED;
1267         }
1268
1269         // mark the large objects as from-space
1270         for (bd = stp->large_objects; bd; bd = bd->link) {
1271             bd->flags &= ~BF_EVACUATED;
1272         }
1273
1274         // for a compacted step, we need to allocate the bitmap
1275         if (stp->mark) {
1276             nat bitmap_size; // in bytes
1277             bdescr *bitmap_bdescr;
1278             StgWord *bitmap;
1279
1280             bitmap_size = stp->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1281
1282             if (bitmap_size > 0) {
1283                 bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1284                                            / BLOCK_SIZE);
1285                 stp->bitmap = bitmap_bdescr;
1286                 bitmap = bitmap_bdescr->start;
1287
1288                 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1289                            bitmap_size, bitmap);
1290
1291                 // don't forget to fill it with zeros!
1292                 memset(bitmap, 0, bitmap_size);
1293
1294                 // For each block in this step, point to its bitmap from the
1295                 // block descriptor.
1296                 for (bd=stp->old_blocks; bd != NULL; bd = bd->link) {
1297                     bd->u.bitmap = bitmap;
1298                     bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1299
1300                     // Also at this point we set the BF_MARKED flag
1301                     // for this block.  The invariant is that
1302                     // BF_MARKED is always unset, except during GC
1303                     // when it is set on those blocks which will be
1304                     // compacted.
1305                     if (!(bd->flags & BF_FRAGMENTED)) {
1306                         bd->flags |= BF_MARKED;
1307                     }
1308                 }
1309             }
1310         }
1311     }
1312
1313     // For each GC thread, for each step, allocate a "todo" block to
1314     // store evacuated objects to be scavenged, and a block to store
1315     // evacuated objects that do not need to be scavenged.
1316     for (t = 0; t < n_threads; t++) {
1317         for (s = 0; s < generations[g].n_steps; s++) {
1318
1319             // we don't copy objects into g0s0, unless -G0
1320             if (g==0 && s==0 && RtsFlags.GcFlags.generations > 1) continue;
1321
1322             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1323
1324             ws->todo_large_objects = NULL;
1325
1326             ws->part_list = NULL;
1327             ws->n_part_blocks = 0;
1328
1329             // allocate the first to-space block; extra blocks will be
1330             // chained on as necessary.
1331             ws->todo_bd = NULL;
1332             ASSERT(looksEmptyWSDeque(ws->todo_q));
1333             alloc_todo_block(ws,0);
1334
1335             ws->todo_overflow = NULL;
1336             ws->n_todo_overflow = 0;
1337
1338             ws->scavd_list = NULL;
1339             ws->n_scavd_blocks = 0;
1340         }
1341     }
1342 }
1343
1344
1345 /* ----------------------------------------------------------------------------
1346    Initialise a generation that is *not* to be collected
1347    ------------------------------------------------------------------------- */
1348
1349 static void
1350 init_uncollected_gen (nat g, nat threads)
1351 {
1352     nat s, t, n;
1353     step_workspace *ws;
1354     step *stp;
1355     bdescr *bd;
1356
1357     // save the current mutable lists for this generation, and
1358     // allocate a fresh block for each one.  We'll traverse these
1359     // mutable lists as roots early on in the GC.
1360     generations[g].saved_mut_list = generations[g].mut_list;
1361     generations[g].mut_list = allocBlock();
1362     for (n = 0; n < n_capabilities; n++) {
1363         capabilities[n].saved_mut_lists[g] = capabilities[n].mut_lists[g];
1364         capabilities[n].mut_lists[g] = allocBlock();
1365     }
1366
1367     for (s = 0; s < generations[g].n_steps; s++) {
1368         stp = &generations[g].steps[s];
1369         stp->scavenged_large_objects = NULL;
1370         stp->n_scavenged_large_blocks = 0;
1371     }
1372
1373     for (s = 0; s < generations[g].n_steps; s++) {
1374
1375         stp = &generations[g].steps[s];
1376
1377         for (t = 0; t < threads; t++) {
1378             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1379
1380             ASSERT(looksEmptyWSDeque(ws->todo_q));
1381             ws->todo_large_objects = NULL;
1382
1383             ws->part_list = NULL;
1384             ws->n_part_blocks = 0;
1385
1386             ws->scavd_list = NULL;
1387             ws->n_scavd_blocks = 0;
1388
1389             // If the block at the head of the list in this generation
1390             // is less than 3/4 full, then use it as a todo block.
1391             if (stp->blocks && isPartiallyFull(stp->blocks))
1392             {
1393                 ws->todo_bd = stp->blocks;
1394                 ws->todo_free = ws->todo_bd->free;
1395                 ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
1396                 stp->blocks = stp->blocks->link;
1397                 stp->n_blocks -= 1;
1398                 stp->n_words -= ws->todo_bd->free - ws->todo_bd->start;
1399                 ws->todo_bd->link = NULL;
1400                 // we must scan from the current end point.
1401                 ws->todo_bd->u.scan = ws->todo_bd->free;
1402             }
1403             else
1404             {
1405                 ws->todo_bd = NULL;
1406                 alloc_todo_block(ws,0);
1407             }
1408         }
1409
1410         // deal out any more partial blocks to the threads' part_lists
1411         t = 0;
1412         while (stp->blocks && isPartiallyFull(stp->blocks))
1413         {
1414             bd = stp->blocks;
1415             stp->blocks = bd->link;
1416             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1417             bd->link = ws->part_list;
1418             ws->part_list = bd;
1419             ws->n_part_blocks += 1;
1420             bd->u.scan = bd->free;
1421             stp->n_blocks -= 1;
1422             stp->n_words -= bd->free - bd->start;
1423             t++;
1424             if (t == n_gc_threads) t = 0;
1425         }
1426     }
1427 }
1428
1429 /* -----------------------------------------------------------------------------
1430    Initialise a gc_thread before GC
1431    -------------------------------------------------------------------------- */
1432
1433 static void
1434 init_gc_thread (gc_thread *t)
1435 {
1436     t->static_objects = END_OF_STATIC_LIST;
1437     t->scavenged_static_objects = END_OF_STATIC_LIST;
1438     t->scan_bd = NULL;
1439     t->mut_lists = capabilities[t->thread_index].mut_lists;
1440     t->evac_step = 0;
1441     t->failed_to_evac = rtsFalse;
1442     t->eager_promotion = rtsTrue;
1443     t->thunk_selector_depth = 0;
1444     t->copied = 0;
1445     t->scanned = 0;
1446     t->any_work = 0;
1447     t->no_work = 0;
1448     t->scav_find_work = 0;
1449 }
1450
1451 /* -----------------------------------------------------------------------------
1452    Function we pass to evacuate roots.
1453    -------------------------------------------------------------------------- */
1454
1455 static void
1456 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1457 {
1458     // we stole a register for gct, but this function is called from
1459     // *outside* the GC where the register variable is not in effect,
1460     // so we need to save and restore it here.  NB. only call
1461     // mark_root() from the main GC thread, otherwise gct will be
1462     // incorrect.
1463     gc_thread *saved_gct;
1464     saved_gct = gct;
1465     SET_GCT(user);
1466
1467     evacuate(root);
1468
1469     SET_GCT(saved_gct);
1470 }
1471
1472 /* -----------------------------------------------------------------------------
1473    Initialising the static object & mutable lists
1474    -------------------------------------------------------------------------- */
1475
1476 static void
1477 zero_static_object_list(StgClosure* first_static)
1478 {
1479   StgClosure* p;
1480   StgClosure* link;
1481   const StgInfoTable *info;
1482
1483   for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1484     info = get_itbl(p);
1485     link = *STATIC_LINK(info, p);
1486     *STATIC_LINK(info,p) = NULL;
1487   }
1488 }
1489
1490 /* ----------------------------------------------------------------------------
1491    Update the pointers from the task list
1492
1493    These are treated as weak pointers because we want to allow a main
1494    thread to get a BlockedOnDeadMVar exception in the same way as any
1495    other thread.  Note that the threads should all have been retained
1496    by GC by virtue of being on the all_threads list, we're just
1497    updating pointers here.
1498    ------------------------------------------------------------------------- */
1499
1500 static void
1501 update_task_list (void)
1502 {
1503     Task *task;
1504     StgTSO *tso;
1505     for (task = all_tasks; task != NULL; task = task->all_link) {
1506         if (!task->stopped && task->tso) {
1507             ASSERT(task->tso->bound == task);
1508             tso = (StgTSO *) isAlive((StgClosure *)task->tso);
1509             if (tso == NULL) {
1510                 barf("task %p: main thread %d has been GC'd",
1511 #ifdef THREADED_RTS
1512                      (void *)task->id,
1513 #else
1514                      (void *)task,
1515 #endif
1516                      task->tso->id);
1517             }
1518             task->tso = tso;
1519         }
1520     }
1521 }
1522
1523 /* ----------------------------------------------------------------------------
1524    Reset the sizes of the older generations when we do a major
1525    collection.
1526
1527    CURRENT STRATEGY: make all generations except zero the same size.
1528    We have to stay within the maximum heap size, and leave a certain
1529    percentage of the maximum heap size available to allocate into.
1530    ------------------------------------------------------------------------- */
1531
1532 static void
1533 resize_generations (void)
1534 {
1535     nat g;
1536
1537     if (major_gc && RtsFlags.GcFlags.generations > 1) {
1538         nat live, size, min_alloc, words;
1539         nat max  = RtsFlags.GcFlags.maxHeapSize;
1540         nat gens = RtsFlags.GcFlags.generations;
1541
1542         // live in the oldest generations
1543         if (oldest_gen->steps[0].live_estimate != 0) {
1544             words = oldest_gen->steps[0].live_estimate;
1545         } else {
1546             words = oldest_gen->steps[0].n_words;
1547         }
1548         live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1549             oldest_gen->steps[0].n_large_blocks;
1550
1551         // default max size for all generations except zero
1552         size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1553                        RtsFlags.GcFlags.minOldGenSize);
1554
1555         // minimum size for generation zero
1556         min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1557                             RtsFlags.GcFlags.minAllocAreaSize);
1558
1559         // Auto-enable compaction when the residency reaches a
1560         // certain percentage of the maximum heap size (default: 30%).
1561         if (RtsFlags.GcFlags.generations > 1 &&
1562             (RtsFlags.GcFlags.compact ||
1563              (max > 0 &&
1564               oldest_gen->steps[0].n_blocks >
1565               (RtsFlags.GcFlags.compactThreshold * max) / 100))) {
1566             oldest_gen->steps[0].mark = 1;
1567             oldest_gen->steps[0].compact = 1;
1568 //        debugBelch("compaction: on\n", live);
1569         } else {
1570             oldest_gen->steps[0].mark = 0;
1571             oldest_gen->steps[0].compact = 0;
1572 //        debugBelch("compaction: off\n", live);
1573         }
1574
1575         if (RtsFlags.GcFlags.sweep) {
1576             oldest_gen->steps[0].mark = 1;
1577         }
1578
1579         // if we're going to go over the maximum heap size, reduce the
1580         // size of the generations accordingly.  The calculation is
1581         // different if compaction is turned on, because we don't need
1582         // to double the space required to collect the old generation.
1583         if (max != 0) {
1584
1585             // this test is necessary to ensure that the calculations
1586             // below don't have any negative results - we're working
1587             // with unsigned values here.
1588             if (max < min_alloc) {
1589                 heapOverflow();
1590             }
1591
1592             if (oldest_gen->steps[0].compact) {
1593                 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1594                     size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1595                 }
1596             } else {
1597                 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1598                     size = (max - min_alloc) / ((gens - 1) * 2);
1599                 }
1600             }
1601
1602             if (size < live) {
1603                 heapOverflow();
1604             }
1605         }
1606
1607 #if 0
1608         debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1609                    min_alloc, size, max);
1610 #endif
1611
1612         for (g = 0; g < gens; g++) {
1613             generations[g].max_blocks = size;
1614         }
1615     }
1616 }
1617
1618 /* -----------------------------------------------------------------------------
1619    Calculate the new size of the nursery, and resize it.
1620    -------------------------------------------------------------------------- */
1621
1622 static void
1623 resize_nursery (void)
1624 {
1625     if (RtsFlags.GcFlags.generations == 1)
1626     {   // Two-space collector:
1627         nat blocks;
1628
1629         /* set up a new nursery.  Allocate a nursery size based on a
1630          * function of the amount of live data (by default a factor of 2)
1631          * Use the blocks from the old nursery if possible, freeing up any
1632          * left over blocks.
1633          *
1634          * If we get near the maximum heap size, then adjust our nursery
1635          * size accordingly.  If the nursery is the same size as the live
1636          * data (L), then we need 3L bytes.  We can reduce the size of the
1637          * nursery to bring the required memory down near 2L bytes.
1638          *
1639          * A normal 2-space collector would need 4L bytes to give the same
1640          * performance we get from 3L bytes, reducing to the same
1641          * performance at 2L bytes.
1642          */
1643         blocks = g0s0->n_blocks;
1644
1645         if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1646              blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1647              RtsFlags.GcFlags.maxHeapSize )
1648         {
1649             long adjusted_blocks;  // signed on purpose
1650             int pc_free;
1651
1652             adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1653
1654             debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1655                        RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1656
1657             pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1658             if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1659             {
1660                 heapOverflow();
1661             }
1662             blocks = adjusted_blocks;
1663         }
1664         else
1665         {
1666             blocks *= RtsFlags.GcFlags.oldGenFactor;
1667             if (blocks < RtsFlags.GcFlags.minAllocAreaSize)
1668             {
1669                 blocks = RtsFlags.GcFlags.minAllocAreaSize;
1670             }
1671         }
1672         resizeNurseries(blocks);
1673     }
1674     else  // Generational collector
1675     {
1676         /*
1677          * If the user has given us a suggested heap size, adjust our
1678          * allocation area to make best use of the memory available.
1679          */
1680         if (RtsFlags.GcFlags.heapSizeSuggestion)
1681         {
1682             long blocks;
1683             nat needed = calcNeeded();  // approx blocks needed at next GC
1684
1685             /* Guess how much will be live in generation 0 step 0 next time.
1686              * A good approximation is obtained by finding the
1687              * percentage of g0s0 that was live at the last minor GC.
1688              *
1689              * We have an accurate figure for the amount of copied data in
1690              * 'copied', but we must convert this to a number of blocks, with
1691              * a small adjustment for estimated slop at the end of a block
1692              * (- 10 words).
1693              */
1694             if (N == 0)
1695             {
1696                 g0s0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1697                     / countNurseryBlocks();
1698             }
1699
1700             /* Estimate a size for the allocation area based on the
1701              * information available.  We might end up going slightly under
1702              * or over the suggested heap size, but we should be pretty
1703              * close on average.
1704              *
1705              * Formula:            suggested - needed
1706              *                ----------------------------
1707              *                    1 + g0s0_pcnt_kept/100
1708              *
1709              * where 'needed' is the amount of memory needed at the next
1710              * collection for collecting all steps except g0s0.
1711              */
1712             blocks =
1713                 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1714                 (100 + (long)g0s0_pcnt_kept);
1715
1716             if (blocks < (long)RtsFlags.GcFlags.minAllocAreaSize) {
1717                 blocks = RtsFlags.GcFlags.minAllocAreaSize;
1718             }
1719
1720             resizeNurseries((nat)blocks);
1721         }
1722         else
1723         {
1724             // we might have added extra large blocks to the nursery, so
1725             // resize back to minAllocAreaSize again.
1726             resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1727         }
1728     }
1729 }
1730
1731 /* -----------------------------------------------------------------------------
1732    Sanity code for CAF garbage collection.
1733
1734    With DEBUG turned on, we manage a CAF list in addition to the SRT
1735    mechanism.  After GC, we run down the CAF list and blackhole any
1736    CAFs which have been garbage collected.  This means we get an error
1737    whenever the program tries to enter a garbage collected CAF.
1738
1739    Any garbage collected CAFs are taken off the CAF list at the same
1740    time.
1741    -------------------------------------------------------------------------- */
1742
1743 #if 0 && defined(DEBUG)
1744
1745 static void
1746 gcCAFs(void)
1747 {
1748   StgClosure*  p;
1749   StgClosure** pp;
1750   const StgInfoTable *info;
1751   nat i;
1752
1753   i = 0;
1754   p = caf_list;
1755   pp = &caf_list;
1756
1757   while (p != NULL) {
1758
1759     info = get_itbl(p);
1760
1761     ASSERT(info->type == IND_STATIC);
1762
1763     if (STATIC_LINK(info,p) == NULL) {
1764         debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1765         // black hole it
1766         SET_INFO(p,&stg_BLACKHOLE_info);
1767         p = STATIC_LINK2(info,p);
1768         *pp = p;
1769     }
1770     else {
1771       pp = &STATIC_LINK2(info,p);
1772       p = *pp;
1773       i++;
1774     }
1775
1776   }
1777
1778   debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1779 }
1780 #endif