rts/sm/GC.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team 1998-2008
   4  *
   5  * Generational garbage collector
   6  *
   7  * Documentation on the architecture of the Garbage Collector can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 #include "PosixSource.h"
  15 #include "Rts.h"
  16 #include "HsFFI.h"
  17
  18 #include "Storage.h"
  19 #include "RtsUtils.h"
  20 #include "Apply.h"
  21 #include "Updates.h"
  22 #include "Stats.h"
  23 #include "Schedule.h"
  24 #include "Sanity.h"
  25 #include "BlockAlloc.h"
  26 #include "ProfHeap.h"
  27 #include "Weak.h"
  28 #include "Prelude.h"
  29 #include "RtsSignals.h"
  30 #include "STM.h"
  31 #if defined(RTS_GTK_FRONTPANEL)
  32 #include "FrontPanel.h"
  33 #endif
  34 #include "Trace.h"
  35 #include "RetainerProfile.h"
  36 #include "LdvProfile.h"
  37 #include "RaiseAsync.h"
  38 #include "Papi.h"
  39 #include "Stable.h"
  40
  41 #include "GC.h"
  42 #include "GCThread.h"
  43 #include "Compact.h"
  44 #include "Evac.h"
  45 #include "Scav.h"
  46 #include "GCUtils.h"
  47 #include "MarkStack.h"
  48 #include "MarkWeak.h"
  49 #include "Sparks.h"
  50 #include "Sweep.h"
  51
  52 #include <string.h> // for memset()
  53 #include <unistd.h>
  54
  55 /* -----------------------------------------------------------------------------
  56    Global variables
  57    -------------------------------------------------------------------------- */
  58
  59 /* STATIC OBJECT LIST.
  60  *
  61  * During GC:
  62  * We maintain a linked list of static objects that are still live.
  63  * The requirements for this list are:
  64  *
  65  *  - we need to scan the list while adding to it, in order to
  66  *    scavenge all the static objects (in the same way that
  67  *    breadth-first scavenging works for dynamic objects).
  68  *
  69  *  - we need to be able to tell whether an object is already on
  70  *    the list, to break loops.
  71  *
  72  * Each static object has a "static link field", which we use for
  73  * linking objects on to the list.  We use a stack-type list, consing
  74  * objects on the front as they are added (this means that the
  75  * scavenge phase is depth-first, not breadth-first, but that
  76  * shouldn't matter).
  77  *
  78  * A separate list is kept for objects that have been scavenged
  79  * already - this is so that we can zero all the marks afterwards.
  80  *
  81  * An object is on the list if its static link field is non-zero; this
  82  * means that we have to mark the end of the list with '1', not NULL.
  83  *
  84  * Extra notes for generational GC:
  85  *
  86  * Each generation has a static object list associated with it.  When
  87  * collecting generations up to N, we treat the static object lists
  88  * from generations > N as roots.
  89  *
  90  * We build up a static object list while collecting generations 0..N,
  91  * which is then appended to the static object list of generation N+1.
  92  */
  93
  94 /* N is the oldest generation being collected, where the generations
  95  * are numbered starting at 0.  A major GC (indicated by the major_gc
  96  * flag) is when we're collecting all generations.  We only attempt to
  97  * deal with static objects and GC CAFs when doing a major GC.
  98  */
  99 nat N;
 100 rtsBool major_gc;
 101
 102 /* Data used for allocation area sizing.
 103  */
 104 static lnat g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
 105
 106 /* Mut-list stats */
 107 #ifdef DEBUG
 108 nat mutlist_MUTVARS,
 109     mutlist_MUTARRS,
 110     mutlist_MVARS,
 111     mutlist_OTHERS;
 112 #endif
 113
 114 /* Thread-local data for each GC thread
 115  */
 116 gc_thread **gc_threads = NULL;
 117
 118 #if !defined(THREADED_RTS)
 119 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
 120 #endif
 121
 122 // Number of threads running in *this* GC.  Affects how many
 123 // step->todos[] lists we have to look in to find work.
 124 nat n_gc_threads;
 125
 126 // For stats:
 127 long copied;        // *words* copied & scavenged during this GC
 128
 129 rtsBool work_stealing;
 130
 131 DECLARE_GCT
 132
 133 /* -----------------------------------------------------------------------------
 134    Static function declarations
 135    -------------------------------------------------------------------------- */
 136
 137 static void mark_root               (void *user, StgClosure **root);
 138 static void zero_static_object_list (StgClosure* first_static);
 139 static nat  initialise_N            (rtsBool force_major_gc);
 140 static void init_collected_gen      (nat g, nat threads);
 141 static void init_uncollected_gen    (nat g, nat threads);
 142 static void init_gc_thread          (gc_thread *t);
 143 static void resize_generations      (void);
 144 static void resize_nursery          (void);
 145 static void start_gc_threads        (void);
 146 static void scavenge_until_all_done (void);
 147 static StgWord inc_running          (void);
 148 static StgWord dec_running          (void);
 149 static void wakeup_gc_threads       (nat n_threads, nat me);
 150 static void shutdown_gc_threads     (nat n_threads, nat me);
 151
 152 #if 0 && defined(DEBUG)
 153 static void gcCAFs                  (void);
 154 #endif
 155
 156 /* -----------------------------------------------------------------------------
 157    The mark stack.
 158    -------------------------------------------------------------------------- */
 159
 160 bdescr *mark_stack_top_bd; // topmost block in the mark stack
 161 bdescr *mark_stack_bd;     // current block in the mark stack
 162 StgPtr mark_sp;            // pointer to the next unallocated mark stack entry
 163
 164 /* -----------------------------------------------------------------------------
 165    GarbageCollect: the main entry point to the garbage collector.
 166
 167    Locks held: all capabilities are held throughout GarbageCollect().
 168    -------------------------------------------------------------------------- */
 169
 170 void
 171 GarbageCollect (rtsBool force_major_gc,
 172                 nat gc_type USED_IF_THREADS,
 173                 Capability *cap)
 174 {
 175   bdescr *bd;
 176   generation *gen;
 177   lnat live, allocated, max_copied, avg_copied, slop;
 178   gc_thread *saved_gct;
 179   nat g, t, n;
 180
 181   // necessary if we stole a callee-saves register for gct:
 182   saved_gct = gct;
 183
 184 #ifdef PROFILING
 185   CostCentreStack *prev_CCS;
 186 #endif
 187
 188   ACQUIRE_SM_LOCK;
 189
 190 #if defined(RTS_USER_SIGNALS)
 191   if (RtsFlags.MiscFlags.install_signal_handlers) {
 192     // block signals
 193     blockUserSignals();
 194   }
 195 #endif
 196
 197   ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
 198   // otherwise adjust the padding in gen_workspace.
 199
 200   // tell the stats department that we've started a GC
 201   stat_startGC();
 202
 203   // tell the STM to discard any cached closures it's hoping to re-use
 204   stmPreGCHook();
 205
 206   // lock the StablePtr table
 207   stablePtrPreGC();
 208
 209 #ifdef DEBUG
 210   mutlist_MUTVARS = 0;
 211   mutlist_MUTARRS = 0;
 212   mutlist_OTHERS = 0;
 213 #endif
 214
 215   // attribute any costs to CCS_GC
 216 #ifdef PROFILING
 217   prev_CCS = CCCS;
 218   CCCS = CCS_GC;
 219 #endif
 220
 221   /* Approximate how much we allocated.
 222    * Todo: only when generating stats?
 223    */
 224   allocated = calcAllocated(rtsFalse/* don't count the nursery yet */);
 225
 226   /* Figure out which generation to collect
 227    */
 228   n = initialise_N(force_major_gc);
 229
 230 #if defined(THREADED_RTS)
 231   work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
 232                   N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
 233       // It's not always a good idea to do load balancing in parallel
 234       // GC.  In particular, for a parallel program we don't want to
 235       // lose locality by moving cached data into another CPU's cache
 236       // (this effect can be quite significant).
 237       //
 238       // We could have a more complex way to deterimine whether to do
 239       // work stealing or not, e.g. it might be a good idea to do it
 240       // if the heap is big.  For now, we just turn it on or off with
 241       // a flag.
 242 #endif
 243
 244   /* Start threads, so they can be spinning up while we finish initialisation.
 245    */
 246   start_gc_threads();
 247
 248 #if defined(THREADED_RTS)
 249   /* How many threads will be participating in this GC?
 250    * We don't try to parallelise minor GCs (unless the user asks for
 251    * it with +RTS -gn0), or mark/compact/sweep GC.
 252    */
 253   if (gc_type == PENDING_GC_PAR) {
 254       n_gc_threads = RtsFlags.ParFlags.nNodes;
 255   } else {
 256       n_gc_threads = 1;
 257   }
 258 #else
 259   n_gc_threads = 1;
 260 #endif
 261
 262   debugTrace(DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
 263         N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
 264
 265 #ifdef RTS_GTK_FRONTPANEL
 266   if (RtsFlags.GcFlags.frontpanel) {
 267       updateFrontPanelBeforeGC(N);
 268   }
 269 #endif
 270
 271 #ifdef DEBUG
 272   // check for memory leaks if DEBUG is on
 273   memInventory(DEBUG_gc);
 274 #endif
 275
 276   // check sanity *before* GC
 277   IF_DEBUG(sanity, checkSanity(rtsTrue));
 278
 279   // Initialise all our gc_thread structures
 280   for (t = 0; t < n_gc_threads; t++) {
 281       init_gc_thread(gc_threads[t]);
 282   }
 283
 284   // Initialise all the generations/steps that we're collecting.
 285   for (g = 0; g <= N; g++) {
 286       init_collected_gen(g,n_gc_threads);
 287   }
 288
 289   // Initialise all the generations/steps that we're *not* collecting.
 290   for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
 291       init_uncollected_gen(g,n_gc_threads);
 292   }
 293
 294   /* Allocate a mark stack if we're doing a major collection.
 295    */
 296   if (major_gc && oldest_gen->mark) {
 297       mark_stack_bd     = allocBlock();
 298       mark_stack_top_bd = mark_stack_bd;
 299       mark_stack_bd->link = NULL;
 300       mark_stack_bd->u.back = NULL;
 301       mark_sp           = mark_stack_bd->start;
 302   } else {
 303       mark_stack_bd     = NULL;
 304       mark_stack_top_bd = NULL;
 305       mark_sp           = NULL;
 306   }
 307
 308   // this is the main thread
 309 #ifdef THREADED_RTS
 310   if (n_gc_threads == 1) {
 311       SET_GCT(gc_threads[0]);
 312   } else {
 313       SET_GCT(gc_threads[cap->no]);
 314   }
 315 #else
 316 SET_GCT(gc_threads[0]);
 317 #endif
 318
 319   /* -----------------------------------------------------------------------
 320    * follow all the roots that we know about:
 321    */
 322
 323   // the main thread is running: this prevents any other threads from
 324   // exiting prematurely, so we can start them now.
 325   // NB. do this after the mutable lists have been saved above, otherwise
 326   // the other GC threads will be writing into the old mutable lists.
 327   inc_running();
 328   wakeup_gc_threads(n_gc_threads, gct->thread_index);
 329
 330   // Mutable lists from each generation > N
 331   // we want to *scavenge* these roots, not evacuate them: they're not
 332   // going to move in this GC.
 333   // Also do them in reverse generation order, for the usual reason:
 334   // namely to reduce the likelihood of spurious old->new pointers.
 335   //
 336   for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
 337 #if defined(THREADED_RTS)
 338       if (n_gc_threads > 1) {
 339           scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]);
 340       } else {
 341           scavenge_mutable_list1(generations[g].saved_mut_list, &generations[g]);
 342       }
 343 #else
 344       scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]);
 345 #endif
 346       freeChain_sync(generations[g].saved_mut_list);
 347       generations[g].saved_mut_list = NULL;
 348
 349   }
 350
 351   // scavenge the capability-private mutable lists.  This isn't part
 352   // of markSomeCapabilities() because markSomeCapabilities() can only
 353   // call back into the GC via mark_root() (due to the gct register
 354   // variable).
 355   if (n_gc_threads == 1) {
 356       for (n = 0; n < n_capabilities; n++) {
 357 #if defined(THREADED_RTS)
 358           scavenge_capability_mut_Lists1(&capabilities[n]);
 359 #else
 360           scavenge_capability_mut_lists(&capabilities[n]);
 361 #endif
 362       }
 363   } else {
 364       scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
 365   }
 366
 367   // follow roots from the CAF list (used by GHCi)
 368   gct->evac_gen = 0;
 369   markCAFs(mark_root, gct);
 370
 371   // follow all the roots that the application knows about.
 372   gct->evac_gen = 0;
 373   markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
 374                        rtsTrue/*prune sparks*/);
 375
 376 #if defined(RTS_USER_SIGNALS)
 377   // mark the signal handlers (signals should be already blocked)
 378   markSignalHandlers(mark_root, gct);
 379 #endif
 380
 381   // Mark the weak pointer list, and prepare to detect dead weak pointers.
 382   markWeakPtrList();
 383   initWeakForGC();
 384
 385   // Mark the stable pointer table.
 386   markStablePtrTable(mark_root, gct);
 387
 388   /* -------------------------------------------------------------------------
 389    * Repeatedly scavenge all the areas we know about until there's no
 390    * more scavenging to be done.
 391    */
 392   for (;;)
 393   {
 394       scavenge_until_all_done();
 395       // The other threads are now stopped.  We might recurse back to
 396       // here, but from now on this is the only thread.
 397
 398       // must be last...  invariant is that everything is fully
 399       // scavenged at this point.
 400       if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
 401           inc_running();
 402           continue;
 403       }
 404
 405       // If we get to here, there's really nothing left to do.
 406       break;
 407   }
 408
 409   shutdown_gc_threads(n_gc_threads, gct->thread_index);
 410
 411   // Now see which stable names are still alive.
 412   gcStablePtrTable();
 413
 414 #ifdef THREADED_RTS
 415   if (n_gc_threads == 1) {
 416       for (n = 0; n < n_capabilities; n++) {
 417           pruneSparkQueue(&capabilities[n]);
 418       }
 419   } else {
 420       pruneSparkQueue(&capabilities[gct->thread_index]);
 421   }
 422 #endif
 423
 424 #ifdef PROFILING
 425   // We call processHeapClosureForDead() on every closure destroyed during
 426   // the current garbage collection, so we invoke LdvCensusForDead().
 427   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
 428       || RtsFlags.ProfFlags.bioSelector != NULL)
 429     LdvCensusForDead(N);
 430 #endif
 431
 432   // NO MORE EVACUATION AFTER THIS POINT!
 433
 434   // Two-space collector: free the old to-space.
 435   // g0->old_blocks is the old nursery
 436   // g0->blocks is to-space from the previous GC
 437   if (RtsFlags.GcFlags.generations == 1) {
 438       if (g0->blocks != NULL) {
 439           freeChain(g0->blocks);
 440           g0->blocks = NULL;
 441       }
 442   }
 443
 444   // For each workspace, in each thread, move the copied blocks to the step
 445   {
 446       gc_thread *thr;
 447       gen_workspace *ws;
 448       bdescr *prev, *next;
 449
 450       for (t = 0; t < n_gc_threads; t++) {
 451           thr = gc_threads[t];
 452
 453           for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 454               ws = &thr->gens[g];
 455
 456               // Push the final block
 457               if (ws->todo_bd) {
 458                   push_scanned_block(ws->todo_bd, ws);
 459               }
 460
 461               ASSERT(gct->scan_bd == NULL);
 462               ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
 463
 464               prev = NULL;
 465               for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
 466                   ws->gen->n_words += bd->free - bd->start;
 467                   prev = bd;
 468               }
 469               if (prev != NULL) {
 470                   prev->link = ws->gen->blocks;
 471                   ws->gen->blocks = ws->scavd_list;
 472               }
 473               ws->gen->n_blocks += ws->n_scavd_blocks;
 474           }
 475       }
 476
 477       // Add all the partial blocks *after* we've added all the full
 478       // blocks.  This is so that we can grab the partial blocks back
 479       // again and try to fill them up in the next GC.
 480       for (t = 0; t < n_gc_threads; t++) {
 481           thr = gc_threads[t];
 482
 483           for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 484               ws = &thr->gens[g];
 485
 486               prev = NULL;
 487               for (bd = ws->part_list; bd != NULL; bd = next) {
 488                   next = bd->link;
 489                   if (bd->free == bd->start) {
 490                       if (prev == NULL) {
 491                           ws->part_list = next;
 492                       } else {
 493                           prev->link = next;
 494                       }
 495                       freeGroup(bd);
 496                       ws->n_part_blocks--;
 497                   } else {
 498                       ws->gen->n_words += bd->free - bd->start;
 499                       prev = bd;
 500                   }
 501               }
 502               if (prev != NULL) {
 503                   prev->link = ws->gen->blocks;
 504                   ws->gen->blocks = ws->part_list;
 505               }
 506               ws->gen->n_blocks += ws->n_part_blocks;
 507
 508               ASSERT(countBlocks(ws->gen->blocks) == ws->gen->n_blocks);
 509               ASSERT(countOccupied(ws->gen->blocks) == ws->gen->n_words);
 510           }
 511       }
 512   }
 513
 514   // Finally: compact or sweep the oldest generation.
 515   if (major_gc && oldest_gen->mark) {
 516       if (oldest_gen->compact)
 517           compact(gct->scavenged_static_objects);
 518       else
 519           sweep(oldest_gen);
 520   }
 521
 522   /* run through all the generations/steps and tidy up
 523    */
 524   copied = 0;
 525   max_copied = 0;
 526   avg_copied = 0;
 527   {
 528       nat i;
 529       for (i=0; i < n_gc_threads; i++) {
 530           if (n_gc_threads > 1) {
 531               debugTrace(DEBUG_gc,"thread %d:", i);
 532               debugTrace(DEBUG_gc,"   copied           %ld", gc_threads[i]->copied * sizeof(W_));
 533               debugTrace(DEBUG_gc,"   scanned          %ld", gc_threads[i]->scanned * sizeof(W_));
 534               debugTrace(DEBUG_gc,"   any_work         %ld", gc_threads[i]->any_work);
 535               debugTrace(DEBUG_gc,"   no_work          %ld", gc_threads[i]->no_work);
 536               debugTrace(DEBUG_gc,"   scav_find_work %ld",   gc_threads[i]->scav_find_work);
 537           }
 538           copied += gc_threads[i]->copied;
 539           max_copied = stg_max(gc_threads[i]->copied, max_copied);
 540       }
 541       if (n_gc_threads == 1) {
 542           max_copied = 0;
 543           avg_copied = 0;
 544       } else {
 545           avg_copied = copied;
 546       }
 547   }
 548
 549   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 550
 551     if (g == N) {
 552       generations[g].collections++; // for stats
 553       if (n_gc_threads > 1) generations[g].par_collections++;
 554     }
 555
 556     // Count the mutable list as bytes "copied" for the purposes of
 557     // stats.  Every mutable list is copied during every GC.
 558     if (g > 0) {
 559         nat mut_list_size = 0;
 560         for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
 561             mut_list_size += bd->free - bd->start;
 562         }
 563         for (n = 0; n < n_capabilities; n++) {
 564             for (bd = capabilities[n].mut_lists[g];
 565                  bd != NULL; bd = bd->link) {
 566                 mut_list_size += bd->free - bd->start;
 567             }
 568         }
 569         copied +=  mut_list_size;
 570
 571         debugTrace(DEBUG_gc,
 572                    "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
 573                    (unsigned long)(mut_list_size * sizeof(W_)),
 574                    mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
 575     }
 576
 577     bdescr *next, *prev;
 578     gen = &generations[g];
 579
 580     // for generations we collected...
 581     if (g <= N) {
 582
 583         /* free old memory and shift to-space into from-space for all
 584          * the collected steps (except the allocation area).  These
 585          * freed blocks will probaby be quickly recycled.
 586          */
 587         if (gen->mark)
 588         {
 589             // tack the new blocks on the end of the existing blocks
 590             if (gen->old_blocks != NULL) {
 591
 592                 prev = NULL;
 593                 for (bd = gen->old_blocks; bd != NULL; bd = next) {
 594
 595                     next = bd->link;
 596
 597                     if (!(bd->flags & BF_MARKED))
 598                     {
 599                         if (prev == NULL) {
 600                             gen->old_blocks = next;
 601                         } else {
 602                             prev->link = next;
 603                         }
 604                         freeGroup(bd);
 605                         gen->n_old_blocks--;
 606                     }
 607                     else
 608                     {
 609                         gen->n_words += bd->free - bd->start;
 610
 611                         // NB. this step might not be compacted next
 612                         // time, so reset the BF_MARKED flags.
 613                         // They are set before GC if we're going to
 614                         // compact.  (search for BF_MARKED above).
 615                         bd->flags &= ~BF_MARKED;
 616
 617                         // between GCs, all blocks in the heap except
 618                         // for the nursery have the BF_EVACUATED flag set.
 619                         bd->flags |= BF_EVACUATED;
 620
 621                         prev = bd;
 622                     }
 623                 }
 624
 625                 if (prev != NULL) {
 626                     prev->link = gen->blocks;
 627                     gen->blocks = gen->old_blocks;
 628                 }
 629             }
 630             // add the new blocks to the block tally
 631             gen->n_blocks += gen->n_old_blocks;
 632             ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
 633             ASSERT(countOccupied(gen->blocks) == gen->n_words);
 634         }
 635         else // not copacted
 636         {
 637             freeChain(gen->old_blocks);
 638         }
 639
 640         gen->old_blocks = NULL;
 641         gen->n_old_blocks = 0;
 642
 643         /* LARGE OBJECTS.  The current live large objects are chained on
 644          * scavenged_large, having been moved during garbage
 645          * collection from large_objects.  Any objects left on the
 646          * large_objects list are therefore dead, so we free them here.
 647          */
 648         freeChain(gen->large_objects);
 649         gen->large_objects  = gen->scavenged_large_objects;
 650         gen->n_large_blocks = gen->n_scavenged_large_blocks;
 651         gen->n_new_large_words = 0;
 652         ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
 653     }
 654     else // for generations > N
 655     {
 656         /* For older generations, we need to append the
 657          * scavenged_large_object list (i.e. large objects that have been
 658          * promoted during this GC) to the large_object list for that step.
 659          */
 660         for (bd = gen->scavenged_large_objects; bd; bd = next) {
 661             next = bd->link;
 662             dbl_link_onto(bd, &gen->large_objects);
 663         }
 664
 665         // add the new blocks we promoted during this GC
 666         gen->n_large_blocks += gen->n_scavenged_large_blocks;
 667         ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
 668     }
 669   } // for all generations
 670
 671   // update the max size of older generations after a major GC
 672   resize_generations();
 673
 674   // Calculate the amount of live data for stats.
 675   live = calcLiveWords();
 676
 677   // Start a new pinned_object_block
 678   for (n = 0; n < n_capabilities; n++) {
 679       capabilities[n].pinned_object_block = NULL;
 680   }
 681
 682   // Free the mark stack.
 683   if (mark_stack_top_bd != NULL) {
 684       debugTrace(DEBUG_gc, "mark stack: %d blocks",
 685                  countBlocks(mark_stack_top_bd));
 686       freeChain(mark_stack_top_bd);
 687   }
 688
 689   // Free any bitmaps.
 690   for (g = 0; g <= N; g++) {
 691       gen = &generations[g];
 692       if (gen->bitmap != NULL) {
 693           freeGroup(gen->bitmap);
 694           gen->bitmap = NULL;
 695       }
 696   }
 697
 698   // Reset the nursery: make the blocks empty
 699   allocated += clearNurseries();
 700
 701   resize_nursery();
 702
 703   resetNurseries();
 704
 705  // mark the garbage collected CAFs as dead
 706 #if 0 && defined(DEBUG) // doesn't work at the moment
 707   if (major_gc) { gcCAFs(); }
 708 #endif
 709
 710 #ifdef PROFILING
 711   // resetStaticObjectForRetainerProfiling() must be called before
 712   // zeroing below.
 713   if (n_gc_threads > 1) {
 714       barf("profiling is currently broken with multi-threaded GC");
 715       // ToDo: fix the gct->scavenged_static_objects below
 716   }
 717   resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
 718 #endif
 719
 720   // zero the scavenged static object list
 721   if (major_gc) {
 722       nat i;
 723       for (i = 0; i < n_gc_threads; i++) {
 724           zero_static_object_list(gc_threads[i]->scavenged_static_objects);
 725       }
 726   }
 727
 728   // send exceptions to any threads which were about to die
 729   RELEASE_SM_LOCK;
 730   resurrectThreads(resurrected_threads);
 731   ACQUIRE_SM_LOCK;
 732
 733   // Update the stable pointer hash table.
 734   updateStablePtrTable(major_gc);
 735
 736   // unlock the StablePtr table.  Must be before scheduleFinalizers(),
 737   // because a finalizer may call hs_free_fun_ptr() or
 738   // hs_free_stable_ptr(), both of which access the StablePtr table.
 739   stablePtrPostGC();
 740
 741   // Start any pending finalizers.  Must be after
 742   // updateStablePtrTable() and stablePtrPostGC() (see #4221).
 743   RELEASE_SM_LOCK;
 744   scheduleFinalizers(cap, old_weak_ptr_list);
 745   ACQUIRE_SM_LOCK;
 746
 747   if (major_gc) {
 748       nat need, got;
 749       need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
 750       got = mblocks_allocated;
 751       /* If the amount of data remains constant, next major GC we'll
 752          require (F+1)*need. We leave (F+2)*need in order to reduce
 753          repeated deallocation and reallocation. */
 754       need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
 755       if (got > need) {
 756           returnMemoryToOS(got - need);
 757       }
 758   }
 759
 760   // check sanity after GC
 761   IF_DEBUG(sanity, checkSanity(rtsTrue));
 762
 763   // extra GC trace info
 764   IF_DEBUG(gc, statDescribeGens());
 765
 766 #ifdef DEBUG
 767   // symbol-table based profiling
 768   /*  heapCensus(to_blocks); */ /* ToDo */
 769 #endif
 770
 771   // restore enclosing cost centre
 772 #ifdef PROFILING
 773   CCCS = prev_CCS;
 774 #endif
 775
 776 #ifdef DEBUG
 777   // check for memory leaks if DEBUG is on
 778   memInventory(DEBUG_gc);
 779 #endif
 780
 781 #ifdef RTS_GTK_FRONTPANEL
 782   if (RtsFlags.GcFlags.frontpanel) {
 783       updateFrontPanelAfterGC( N, live );
 784   }
 785 #endif
 786
 787   // ok, GC over: tell the stats department what happened.
 788   slop = calcLiveBlocks() * BLOCK_SIZE_W - live;
 789   stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop);
 790
 791   // Guess which generation we'll collect *next* time
 792   initialise_N(force_major_gc);
 793
 794 #if defined(RTS_USER_SIGNALS)
 795   if (RtsFlags.MiscFlags.install_signal_handlers) {
 796     // unblock signals again
 797     unblockUserSignals();
 798   }
 799 #endif
 800
 801   RELEASE_SM_LOCK;
 802
 803   SET_GCT(saved_gct);
 804 }
 805
 806 /* -----------------------------------------------------------------------------
 807    Figure out which generation to collect, initialise N and major_gc.
 808
 809    Also returns the total number of blocks in generations that will be
 810    collected.
 811    -------------------------------------------------------------------------- */
 812
 813 static nat
 814 initialise_N (rtsBool force_major_gc)
 815 {
 816     int g;
 817     nat blocks, blocks_total;
 818
 819     blocks = 0;
 820     blocks_total = 0;
 821
 822     if (force_major_gc) {
 823         N = RtsFlags.GcFlags.generations - 1;
 824     } else {
 825         N = 0;
 826     }
 827
 828     for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
 829
 830         blocks = generations[g].n_words / BLOCK_SIZE_W
 831                + generations[g].n_large_blocks;
 832
 833         if (blocks >= generations[g].max_blocks) {
 834             N = stg_max(N,g);
 835         }
 836         if ((nat)g <= N) {
 837             blocks_total += blocks;
 838         }
 839     }
 840
 841     blocks_total += countNurseryBlocks();
 842
 843     major_gc = (N == RtsFlags.GcFlags.generations-1);
 844     return blocks_total;
 845 }
 846
 847 /* -----------------------------------------------------------------------------
 848    Initialise the gc_thread structures.
 849    -------------------------------------------------------------------------- */
 850
 851 #define GC_THREAD_INACTIVE             0
 852 #define GC_THREAD_STANDING_BY          1
 853 #define GC_THREAD_RUNNING              2
 854 #define GC_THREAD_WAITING_TO_CONTINUE  3
 855
 856 static void
 857 new_gc_thread (nat n, gc_thread *t)
 858 {
 859     nat g;
 860     gen_workspace *ws;
 861
 862 #ifdef THREADED_RTS
 863     t->id = 0;
 864     initSpinLock(&t->gc_spin);
 865     initSpinLock(&t->mut_spin);
 866     ACQUIRE_SPIN_LOCK(&t->gc_spin);
 867     t->wakeup = GC_THREAD_INACTIVE;  // starts true, so we can wait for the
 868                           // thread to start up, see wakeup_gc_threads
 869 #endif
 870
 871     t->thread_index = n;
 872     t->free_blocks = NULL;
 873     t->gc_count = 0;
 874
 875     init_gc_thread(t);
 876
 877 #ifdef USE_PAPI
 878     t->papi_events = -1;
 879 #endif
 880
 881     for (g = 0; g < RtsFlags.GcFlags.generations; g++)
 882     {
 883         ws = &t->gens[g];
 884         ws->gen = &generations[g];
 885         ASSERT(g == ws->gen->no);
 886         ws->my_gct = t;
 887
 888         ws->todo_bd = NULL;
 889         ws->todo_q = newWSDeque(128);
 890         ws->todo_overflow = NULL;
 891         ws->n_todo_overflow = 0;
 892
 893         ws->part_list = NULL;
 894         ws->n_part_blocks = 0;
 895
 896         ws->scavd_list = NULL;
 897         ws->n_scavd_blocks = 0;
 898     }
 899 }
 900
 901
 902 void
 903 initGcThreads (void)
 904 {
 905     if (gc_threads == NULL) {
 906 #if defined(THREADED_RTS)
 907         nat i;
 908         gc_threads = stgMallocBytes (RtsFlags.ParFlags.nNodes *
 909                                      sizeof(gc_thread*),
 910                                      "alloc_gc_threads");
 911
 912         for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
 913             gc_threads[i] =
 914                 stgMallocBytes(sizeof(gc_thread) +
 915                                RtsFlags.GcFlags.generations * sizeof(gen_workspace),
 916                                "alloc_gc_threads");
 917
 918             new_gc_thread(i, gc_threads[i]);
 919         }
 920 #else
 921         gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
 922         gc_threads[0] = gct;
 923         new_gc_thread(0,gc_threads[0]);
 924 #endif
 925     }
 926 }
 927
 928 void
 929 freeGcThreads (void)
 930 {
 931     nat g;
 932     if (gc_threads != NULL) {
 933 #if defined(THREADED_RTS)
 934         nat i;
 935         for (i = 0; i < n_capabilities; i++) {
 936             for (g = 0; g < RtsFlags.GcFlags.generations; g++)
 937             {
 938                 freeWSDeque(gc_threads[i]->gens[g].todo_q);
 939             }
 940             stgFree (gc_threads[i]);
 941         }
 942         stgFree (gc_threads);
 943 #else
 944         for (g = 0; g < RtsFlags.GcFlags.generations; g++)
 945         {
 946             freeWSDeque(gc_threads[0]->gens[g].todo_q);
 947         }
 948         stgFree (gc_threads);
 949 #endif
 950         gc_threads = NULL;
 951     }
 952 }
 953
 954 /* ----------------------------------------------------------------------------
 955    Start GC threads
 956    ------------------------------------------------------------------------- */
 957
 958 static volatile StgWord gc_running_threads;
 959
 960 static StgWord
 961 inc_running (void)
 962 {
 963     StgWord new;
 964     new = atomic_inc(&gc_running_threads);
 965     ASSERT(new <= n_gc_threads);
 966     return new;
 967 }
 968
 969 static StgWord
 970 dec_running (void)
 971 {
 972     ASSERT(gc_running_threads != 0);
 973     return atomic_dec(&gc_running_threads);
 974 }
 975
 976 static rtsBool
 977 any_work (void)
 978 {
 979     int g;
 980     gen_workspace *ws;
 981
 982     gct->any_work++;
 983
 984     write_barrier();
 985
 986     // scavenge objects in compacted generation
 987     if (mark_stack_bd != NULL && !mark_stack_empty()) {
 988         return rtsTrue;
 989     }
 990
 991     // Check for global work in any step.  We don't need to check for
 992     // local work, because we have already exited scavenge_loop(),
 993     // which means there is no local work for this thread.
 994     for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
 995         ws = &gct->gens[g];
 996         if (ws->todo_large_objects) return rtsTrue;
 997         if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
 998         if (ws->todo_overflow) return rtsTrue;
 999     }
1000
1001 #if defined(THREADED_RTS)
1002     if (work_stealing) {
1003         nat n;
1004         // look for work to steal
1005         for (n = 0; n < n_gc_threads; n++) {
1006             if (n == gct->thread_index) continue;
1007             for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
1008                 ws = &gc_threads[n]->gens[g];
1009                 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
1010             }
1011         }
1012     }
1013 #endif
1014
1015     gct->no_work++;
1016 #if defined(THREADED_RTS)
1017     yieldThread();
1018 #endif
1019
1020     return rtsFalse;
1021 }
1022
1023 static void
1024 scavenge_until_all_done (void)
1025 {
1026     nat r;
1027
1028
1029 loop:
1030     traceEventGcWork(&capabilities[gct->thread_index]);
1031
1032 #if defined(THREADED_RTS)
1033     if (n_gc_threads > 1) {
1034         scavenge_loop();
1035     } else {
1036         scavenge_loop1();
1037     }
1038 #else
1039     scavenge_loop();
1040 #endif
1041
1042     // scavenge_loop() only exits when there's no work to do
1043     r = dec_running();
1044
1045     traceEventGcIdle(&capabilities[gct->thread_index]);
1046
1047     debugTrace(DEBUG_gc, "%d GC threads still running", r);
1048
1049     while (gc_running_threads != 0) {
1050         // usleep(1);
1051         if (any_work()) {
1052             inc_running();
1053             goto loop;
1054         }
1055         // any_work() does not remove the work from the queue, it
1056         // just checks for the presence of work.  If we find any,
1057         // then we increment gc_running_threads and go back to
1058         // scavenge_loop() to perform any pending work.
1059     }
1060
1061     traceEventGcDone(&capabilities[gct->thread_index]);
1062 }
1063
1064 #if defined(THREADED_RTS)
1065
1066 void
1067 gcWorkerThread (Capability *cap)
1068 {
1069     gc_thread *saved_gct;
1070
1071     // necessary if we stole a callee-saves register for gct:
1072     saved_gct = gct;
1073
1074     gct = gc_threads[cap->no];
1075     gct->id = osThreadId();
1076
1077     // Wait until we're told to wake up
1078     RELEASE_SPIN_LOCK(&gct->mut_spin);
1079     gct->wakeup = GC_THREAD_STANDING_BY;
1080     debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1081     ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1082
1083 #ifdef USE_PAPI
1084     // start performance counters in this thread...
1085     if (gct->papi_events == -1) {
1086         papi_init_eventset(&gct->papi_events);
1087     }
1088     papi_thread_start_gc1_count(gct->papi_events);
1089 #endif
1090
1091     // Every thread evacuates some roots.
1092     gct->evac_gen = 0;
1093     markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
1094                          rtsTrue/*prune sparks*/);
1095     scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
1096
1097     scavenge_until_all_done();
1098
1099 #ifdef THREADED_RTS
1100     // Now that the whole heap is marked, we discard any sparks that
1101     // were found to be unreachable.  The main GC thread is currently
1102     // marking heap reachable via weak pointers, so it is
1103     // non-deterministic whether a spark will be retained if it is
1104     // only reachable via weak pointers.  To fix this problem would
1105     // require another GC barrier, which is too high a price.
1106     pruneSparkQueue(cap);
1107 #endif
1108
1109 #ifdef USE_PAPI
1110     // count events in this thread towards the GC totals
1111     papi_thread_stop_gc1_count(gct->papi_events);
1112 #endif
1113
1114     // Wait until we're told to continue
1115     RELEASE_SPIN_LOCK(&gct->gc_spin);
1116     gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1117     debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1118                gct->thread_index);
1119     ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1120     debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1121
1122     SET_GCT(saved_gct);
1123 }
1124
1125 #endif
1126
1127 #if defined(THREADED_RTS)
1128
1129 void
1130 waitForGcThreads (Capability *cap USED_IF_THREADS)
1131 {
1132     const nat n_threads = RtsFlags.ParFlags.nNodes;
1133     const nat me = cap->no;
1134     nat i, j;
1135     rtsBool retry = rtsTrue;
1136
1137     while(retry) {
1138         for (i=0; i < n_threads; i++) {
1139             if (i == me) continue;
1140             if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1141                 prodCapability(&capabilities[i], cap->running_task);
1142             }
1143         }
1144         for (j=0; j < 10; j++) {
1145             retry = rtsFalse;
1146             for (i=0; i < n_threads; i++) {
1147                 if (i == me) continue;
1148                 write_barrier();
1149                 setContextSwitches();
1150                 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1151                     retry = rtsTrue;
1152                 }
1153             }
1154             if (!retry) break;
1155             yieldThread();
1156         }
1157     }
1158 }
1159
1160 #endif // THREADED_RTS
1161
1162 static void
1163 start_gc_threads (void)
1164 {
1165 #if defined(THREADED_RTS)
1166     gc_running_threads = 0;
1167 #endif
1168 }
1169
1170 static void
1171 wakeup_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1172 {
1173 #if defined(THREADED_RTS)
1174     nat i;
1175     for (i=0; i < n_threads; i++) {
1176         if (i == me) continue;
1177         inc_running();
1178         debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1179         if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1180
1181         gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1182         ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1183         RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1184     }
1185 #endif
1186 }
1187
1188 // After GC is complete, we must wait for all GC threads to enter the
1189 // standby state, otherwise they may still be executing inside
1190 // any_work(), and may even remain awake until the next GC starts.
1191 static void
1192 shutdown_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1193 {
1194 #if defined(THREADED_RTS)
1195     nat i;
1196     for (i=0; i < n_threads; i++) {
1197         if (i == me) continue;
1198         while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1199     }
1200 #endif
1201 }
1202
1203 #if defined(THREADED_RTS)
1204 void
1205 releaseGCThreads (Capability *cap USED_IF_THREADS)
1206 {
1207     const nat n_threads = RtsFlags.ParFlags.nNodes;
1208     const nat me = cap->no;
1209     nat i;
1210     for (i=0; i < n_threads; i++) {
1211         if (i == me) continue;
1212         if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1213             barf("releaseGCThreads");
1214
1215         gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1216         ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1217         RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1218     }
1219 }
1220 #endif
1221
1222 /* ----------------------------------------------------------------------------
1223    Initialise a generation that is to be collected
1224    ------------------------------------------------------------------------- */
1225
1226 static void
1227 init_collected_gen (nat g, nat n_threads)
1228 {
1229     nat t, i;
1230     gen_workspace *ws;
1231     generation *gen;
1232     bdescr *bd;
1233
1234     // Throw away the current mutable list.  Invariant: the mutable
1235     // list always has at least one block; this means we can avoid a
1236     // check for NULL in recordMutable().
1237     if (g != 0) {
1238         freeChain(generations[g].mut_list);
1239         generations[g].mut_list = allocBlock();
1240         for (i = 0; i < n_capabilities; i++) {
1241             freeChain(capabilities[i].mut_lists[g]);
1242             capabilities[i].mut_lists[g] = allocBlock();
1243         }
1244     }
1245
1246     gen = &generations[g];
1247     ASSERT(gen->no == g);
1248
1249     // we'll construct a new list of threads in this step
1250     // during GC, throw away the current list.
1251     gen->old_threads = gen->threads;
1252     gen->threads = END_TSO_QUEUE;
1253
1254     // deprecate the existing blocks
1255     gen->old_blocks   = gen->blocks;
1256     gen->n_old_blocks = gen->n_blocks;
1257     gen->blocks       = NULL;
1258     gen->n_blocks     = 0;
1259     gen->n_words      = 0;
1260     gen->live_estimate = 0;
1261
1262     // initialise the large object queues.
1263     gen->scavenged_large_objects = NULL;
1264     gen->n_scavenged_large_blocks = 0;
1265
1266     // mark the small objects as from-space
1267     for (bd = gen->old_blocks; bd; bd = bd->link) {
1268         bd->flags &= ~BF_EVACUATED;
1269     }
1270
1271     // mark the large objects as from-space
1272     for (bd = gen->large_objects; bd; bd = bd->link) {
1273         bd->flags &= ~BF_EVACUATED;
1274     }
1275
1276     // for a compacted generation, we need to allocate the bitmap
1277     if (gen->mark) {
1278         nat bitmap_size; // in bytes
1279         bdescr *bitmap_bdescr;
1280         StgWord *bitmap;
1281
1282         bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1283
1284         if (bitmap_size > 0) {
1285             bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1286                                        / BLOCK_SIZE);
1287             gen->bitmap = bitmap_bdescr;
1288             bitmap = bitmap_bdescr->start;
1289
1290             debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1291                        bitmap_size, bitmap);
1292
1293             // don't forget to fill it with zeros!
1294             memset(bitmap, 0, bitmap_size);
1295
1296             // For each block in this step, point to its bitmap from the
1297             // block descriptor.
1298             for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1299                 bd->u.bitmap = bitmap;
1300                 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1301
1302                 // Also at this point we set the BF_MARKED flag
1303                 // for this block.  The invariant is that
1304                 // BF_MARKED is always unset, except during GC
1305                 // when it is set on those blocks which will be
1306                 // compacted.
1307                 if (!(bd->flags & BF_FRAGMENTED)) {
1308                     bd->flags |= BF_MARKED;
1309                 }
1310
1311                 // BF_SWEPT should be marked only for blocks that are being
1312                 // collected in sweep()
1313                 bd->flags &= ~BF_SWEPT;
1314             }
1315         }
1316     }
1317
1318     // For each GC thread, for each step, allocate a "todo" block to
1319     // store evacuated objects to be scavenged, and a block to store
1320     // evacuated objects that do not need to be scavenged.
1321     for (t = 0; t < n_threads; t++) {
1322         ws = &gc_threads[t]->gens[g];
1323
1324         ws->todo_large_objects = NULL;
1325
1326         ws->part_list = NULL;
1327         ws->n_part_blocks = 0;
1328
1329         // allocate the first to-space block; extra blocks will be
1330         // chained on as necessary.
1331         ws->todo_bd = NULL;
1332         ASSERT(looksEmptyWSDeque(ws->todo_q));
1333         alloc_todo_block(ws,0);
1334
1335         ws->todo_overflow = NULL;
1336         ws->n_todo_overflow = 0;
1337
1338         ws->scavd_list = NULL;
1339         ws->n_scavd_blocks = 0;
1340     }
1341 }
1342
1343
1344 /* ----------------------------------------------------------------------------
1345    Initialise a generation that is *not* to be collected
1346    ------------------------------------------------------------------------- */
1347
1348 static void
1349 init_uncollected_gen (nat g, nat threads)
1350 {
1351     nat t, n;
1352     gen_workspace *ws;
1353     generation *gen;
1354     bdescr *bd;
1355
1356     // save the current mutable lists for this generation, and
1357     // allocate a fresh block for each one.  We'll traverse these
1358     // mutable lists as roots early on in the GC.
1359     generations[g].saved_mut_list = generations[g].mut_list;
1360     generations[g].mut_list = allocBlock();
1361     for (n = 0; n < n_capabilities; n++) {
1362         capabilities[n].saved_mut_lists[g] = capabilities[n].mut_lists[g];
1363         capabilities[n].mut_lists[g] = allocBlock();
1364     }
1365
1366     gen = &generations[g];
1367
1368     gen->scavenged_large_objects = NULL;
1369     gen->n_scavenged_large_blocks = 0;
1370
1371     for (t = 0; t < threads; t++) {
1372         ws = &gc_threads[t]->gens[g];
1373
1374         ASSERT(looksEmptyWSDeque(ws->todo_q));
1375         ws->todo_large_objects = NULL;
1376
1377         ws->part_list = NULL;
1378         ws->n_part_blocks = 0;
1379
1380         ws->scavd_list = NULL;
1381         ws->n_scavd_blocks = 0;
1382
1383         // If the block at the head of the list in this generation
1384         // is less than 3/4 full, then use it as a todo block.
1385         if (gen->blocks && isPartiallyFull(gen->blocks))
1386         {
1387             ws->todo_bd = gen->blocks;
1388             ws->todo_free = ws->todo_bd->free;
1389             ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
1390             gen->blocks = gen->blocks->link;
1391             gen->n_blocks -= 1;
1392             gen->n_words -= ws->todo_bd->free - ws->todo_bd->start;
1393             ws->todo_bd->link = NULL;
1394             // we must scan from the current end point.
1395             ws->todo_bd->u.scan = ws->todo_bd->free;
1396         }
1397         else
1398         {
1399             ws->todo_bd = NULL;
1400             alloc_todo_block(ws,0);
1401         }
1402     }
1403
1404     // deal out any more partial blocks to the threads' part_lists
1405     t = 0;
1406     while (gen->blocks && isPartiallyFull(gen->blocks))
1407     {
1408         bd = gen->blocks;
1409         gen->blocks = bd->link;
1410         ws = &gc_threads[t]->gens[g];
1411         bd->link = ws->part_list;
1412         ws->part_list = bd;
1413         ws->n_part_blocks += 1;
1414         bd->u.scan = bd->free;
1415         gen->n_blocks -= 1;
1416         gen->n_words -= bd->free - bd->start;
1417         t++;
1418         if (t == n_gc_threads) t = 0;
1419     }
1420 }
1421
1422 /* -----------------------------------------------------------------------------
1423    Initialise a gc_thread before GC
1424    -------------------------------------------------------------------------- */
1425
1426 static void
1427 init_gc_thread (gc_thread *t)
1428 {
1429     t->static_objects = END_OF_STATIC_LIST;
1430     t->scavenged_static_objects = END_OF_STATIC_LIST;
1431     t->scan_bd = NULL;
1432     t->mut_lists = capabilities[t->thread_index].mut_lists;
1433     t->evac_gen = 0;
1434     t->failed_to_evac = rtsFalse;
1435     t->eager_promotion = rtsTrue;
1436     t->thunk_selector_depth = 0;
1437     t->copied = 0;
1438     t->scanned = 0;
1439     t->any_work = 0;
1440     t->no_work = 0;
1441     t->scav_find_work = 0;
1442 }
1443
1444 /* -----------------------------------------------------------------------------
1445    Function we pass to evacuate roots.
1446    -------------------------------------------------------------------------- */
1447
1448 static void
1449 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1450 {
1451     // we stole a register for gct, but this function is called from
1452     // *outside* the GC where the register variable is not in effect,
1453     // so we need to save and restore it here.  NB. only call
1454     // mark_root() from the main GC thread, otherwise gct will be
1455     // incorrect.
1456     gc_thread *saved_gct;
1457     saved_gct = gct;
1458     SET_GCT(user);
1459
1460     evacuate(root);
1461
1462     SET_GCT(saved_gct);
1463 }
1464
1465 /* -----------------------------------------------------------------------------
1466    Initialising the static object & mutable lists
1467    -------------------------------------------------------------------------- */
1468
1469 static void
1470 zero_static_object_list(StgClosure* first_static)
1471 {
1472   StgClosure* p;
1473   StgClosure* link;
1474   const StgInfoTable *info;
1475
1476   for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1477     info = get_itbl(p);
1478     link = *STATIC_LINK(info, p);
1479     *STATIC_LINK(info,p) = NULL;
1480   }
1481 }
1482
1483 /* ----------------------------------------------------------------------------
1484    Reset the sizes of the older generations when we do a major
1485    collection.
1486
1487    CURRENT STRATEGY: make all generations except zero the same size.
1488    We have to stay within the maximum heap size, and leave a certain
1489    percentage of the maximum heap size available to allocate into.
1490    ------------------------------------------------------------------------- */
1491
1492 static void
1493 resize_generations (void)
1494 {
1495     nat g;
1496
1497     if (major_gc && RtsFlags.GcFlags.generations > 1) {
1498         nat live, size, min_alloc, words;
1499         const nat max  = RtsFlags.GcFlags.maxHeapSize;
1500         const nat gens = RtsFlags.GcFlags.generations;
1501
1502         // live in the oldest generations
1503         if (oldest_gen->live_estimate != 0) {
1504             words = oldest_gen->live_estimate;
1505         } else {
1506             words = oldest_gen->n_words;
1507         }
1508         live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1509             oldest_gen->n_large_blocks;
1510
1511         // default max size for all generations except zero
1512         size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1513                        RtsFlags.GcFlags.minOldGenSize);
1514
1515         if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1516             RtsFlags.GcFlags.heapSizeSuggestion = size;
1517         }
1518
1519         // minimum size for generation zero
1520         min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1521                             RtsFlags.GcFlags.minAllocAreaSize);
1522
1523         // Auto-enable compaction when the residency reaches a
1524         // certain percentage of the maximum heap size (default: 30%).
1525         if (RtsFlags.GcFlags.compact ||
1526             (max > 0 &&
1527              oldest_gen->n_blocks >
1528              (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1529             oldest_gen->mark = 1;
1530             oldest_gen->compact = 1;
1531 //        debugBelch("compaction: on\n", live);
1532         } else {
1533             oldest_gen->mark = 0;
1534             oldest_gen->compact = 0;
1535 //        debugBelch("compaction: off\n", live);
1536         }
1537
1538         if (RtsFlags.GcFlags.sweep) {
1539             oldest_gen->mark = 1;
1540         }
1541
1542         // if we're going to go over the maximum heap size, reduce the
1543         // size of the generations accordingly.  The calculation is
1544         // different if compaction is turned on, because we don't need
1545         // to double the space required to collect the old generation.
1546         if (max != 0) {
1547
1548             // this test is necessary to ensure that the calculations
1549             // below don't have any negative results - we're working
1550             // with unsigned values here.
1551             if (max < min_alloc) {
1552                 heapOverflow();
1553             }
1554
1555             if (oldest_gen->compact) {
1556                 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1557                     size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1558                 }
1559             } else {
1560                 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1561                     size = (max - min_alloc) / ((gens - 1) * 2);
1562                 }
1563             }
1564
1565             if (size < live) {
1566                 heapOverflow();
1567             }
1568         }
1569
1570 #if 0
1571         debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1572                    min_alloc, size, max);
1573 #endif
1574
1575         for (g = 0; g < gens; g++) {
1576             generations[g].max_blocks = size;
1577         }
1578     }
1579 }
1580
1581 /* -----------------------------------------------------------------------------
1582    Calculate the new size of the nursery, and resize it.
1583    -------------------------------------------------------------------------- */
1584
1585 static void
1586 resize_nursery (void)
1587 {
1588     const lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities;
1589
1590     if (RtsFlags.GcFlags.generations == 1)
1591     {   // Two-space collector:
1592         nat blocks;
1593
1594         /* set up a new nursery.  Allocate a nursery size based on a
1595          * function of the amount of live data (by default a factor of 2)
1596          * Use the blocks from the old nursery if possible, freeing up any
1597          * left over blocks.
1598          *
1599          * If we get near the maximum heap size, then adjust our nursery
1600          * size accordingly.  If the nursery is the same size as the live
1601          * data (L), then we need 3L bytes.  We can reduce the size of the
1602          * nursery to bring the required memory down near 2L bytes.
1603          *
1604          * A normal 2-space collector would need 4L bytes to give the same
1605          * performance we get from 3L bytes, reducing to the same
1606          * performance at 2L bytes.
1607          */
1608         blocks = generations[0].n_blocks;
1609
1610         if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1611              blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1612              RtsFlags.GcFlags.maxHeapSize )
1613         {
1614             long adjusted_blocks;  // signed on purpose
1615             int pc_free;
1616
1617             adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1618
1619             debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1620                        RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1621
1622             pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1623             if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1624             {
1625                 heapOverflow();
1626             }
1627             blocks = adjusted_blocks;
1628         }
1629         else
1630         {
1631             blocks *= RtsFlags.GcFlags.oldGenFactor;
1632             if (blocks < min_nursery)
1633             {
1634                 blocks = min_nursery;
1635             }
1636         }
1637         resizeNurseries(blocks);
1638     }
1639     else  // Generational collector
1640     {
1641         /*
1642          * If the user has given us a suggested heap size, adjust our
1643          * allocation area to make best use of the memory available.
1644          */
1645         if (RtsFlags.GcFlags.heapSizeSuggestion)
1646         {
1647             long blocks;
1648             const nat needed = calcNeeded();    // approx blocks needed at next GC
1649
1650             /* Guess how much will be live in generation 0 step 0 next time.
1651              * A good approximation is obtained by finding the
1652              * percentage of g0 that was live at the last minor GC.
1653              *
1654              * We have an accurate figure for the amount of copied data in
1655              * 'copied', but we must convert this to a number of blocks, with
1656              * a small adjustment for estimated slop at the end of a block
1657              * (- 10 words).
1658              */
1659             if (N == 0)
1660             {
1661                 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1662                     / countNurseryBlocks();
1663             }
1664
1665             /* Estimate a size for the allocation area based on the
1666              * information available.  We might end up going slightly under
1667              * or over the suggested heap size, but we should be pretty
1668              * close on average.
1669              *
1670              * Formula:            suggested - needed
1671              *                ----------------------------
1672              *                    1 + g0_pcnt_kept/100
1673              *
1674              * where 'needed' is the amount of memory needed at the next
1675              * collection for collecting all gens except g0.
1676              */
1677             blocks =
1678                 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1679                 (100 + (long)g0_pcnt_kept);
1680
1681             if (blocks < (long)min_nursery) {
1682                 blocks = min_nursery;
1683             }
1684
1685             resizeNurseries((nat)blocks);
1686         }
1687         else
1688         {
1689             // we might have added extra large blocks to the nursery, so
1690             // resize back to minAllocAreaSize again.
1691             resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1692         }
1693     }
1694 }
1695
1696 /* -----------------------------------------------------------------------------
1697    Sanity code for CAF garbage collection.
1698
1699    With DEBUG turned on, we manage a CAF list in addition to the SRT
1700    mechanism.  After GC, we run down the CAF list and blackhole any
1701    CAFs which have been garbage collected.  This means we get an error
1702    whenever the program tries to enter a garbage collected CAF.
1703
1704    Any garbage collected CAFs are taken off the CAF list at the same
1705    time.
1706    -------------------------------------------------------------------------- */
1707
1708 #if 0 && defined(DEBUG)
1709
1710 static void
1711 gcCAFs(void)
1712 {
1713   StgClosure*  p;
1714   StgClosure** pp;
1715   const StgInfoTable *info;
1716   nat i;
1717
1718   i = 0;
1719   p = caf_list;
1720   pp = &caf_list;
1721
1722   while (p != NULL) {
1723
1724     info = get_itbl(p);
1725
1726     ASSERT(info->type == IND_STATIC);
1727
1728     if (STATIC_LINK(info,p) == NULL) {
1729         debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1730         // black hole it
1731         SET_INFO(p,&stg_BLACKHOLE_info);
1732         p = STATIC_LINK2(info,p);
1733         *pp = p;
1734     }
1735     else {
1736       pp = &STATIC_LINK2(info,p);
1737       p = *pp;
1738       i++;
1739     }
1740
1741   }
1742
1743   debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1744 }
1745 #endif