rts/sm/GC.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team 1998-2008
   4  *
   5  * Generational garbage collector
   6  *
   7  * Documentation on the architecture of the Garbage Collector can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 #include "PosixSource.h"
  15 #include "Rts.h"
  16 #include "HsFFI.h"
  17
  18 #include "Storage.h"
  19 #include "RtsUtils.h"
  20 #include "Apply.h"
  21 #include "Updates.h"
  22 #include "Stats.h"
  23 #include "Schedule.h"
  24 #include "Sanity.h"
  25 #include "BlockAlloc.h"
  26 #include "ProfHeap.h"
  27 #include "Weak.h"
  28 #include "Prelude.h"
  29 #include "RtsSignals.h"
  30 #include "STM.h"
  31 #if defined(RTS_GTK_FRONTPANEL)
  32 #include "FrontPanel.h"
  33 #endif
  34 #include "Trace.h"
  35 #include "RetainerProfile.h"
  36 #include "LdvProfile.h"
  37 #include "RaiseAsync.h"
  38 #include "Papi.h"
  39 #include "Stable.h"
  40
  41 #include "GC.h"
  42 #include "GCThread.h"
  43 #include "Compact.h"
  44 #include "Evac.h"
  45 #include "Scav.h"
  46 #include "GCUtils.h"
  47 #include "MarkStack.h"
  48 #include "MarkWeak.h"
  49 #include "Sparks.h"
  50 #include "Sweep.h"
  51
  52 #include <string.h> // for memset()
  53 #include <unistd.h>
  54
  55 /* -----------------------------------------------------------------------------
  56    Global variables
  57    -------------------------------------------------------------------------- */
  58
  59 /* STATIC OBJECT LIST.
  60  *
  61  * During GC:
  62  * We maintain a linked list of static objects that are still live.
  63  * The requirements for this list are:
  64  *
  65  *  - we need to scan the list while adding to it, in order to
  66  *    scavenge all the static objects (in the same way that
  67  *    breadth-first scavenging works for dynamic objects).
  68  *
  69  *  - we need to be able to tell whether an object is already on
  70  *    the list, to break loops.
  71  *
  72  * Each static object has a "static link field", which we use for
  73  * linking objects on to the list.  We use a stack-type list, consing
  74  * objects on the front as they are added (this means that the
  75  * scavenge phase is depth-first, not breadth-first, but that
  76  * shouldn't matter).
  77  *
  78  * A separate list is kept for objects that have been scavenged
  79  * already - this is so that we can zero all the marks afterwards.
  80  *
  81  * An object is on the list if its static link field is non-zero; this
  82  * means that we have to mark the end of the list with '1', not NULL.
  83  *
  84  * Extra notes for generational GC:
  85  *
  86  * Each generation has a static object list associated with it.  When
  87  * collecting generations up to N, we treat the static object lists
  88  * from generations > N as roots.
  89  *
  90  * We build up a static object list while collecting generations 0..N,
  91  * which is then appended to the static object list of generation N+1.
  92  */
  93
  94 /* N is the oldest generation being collected, where the generations
  95  * are numbered starting at 0.  A major GC (indicated by the major_gc
  96  * flag) is when we're collecting all generations.  We only attempt to
  97  * deal with static objects and GC CAFs when doing a major GC.
  98  */
  99 nat N;
 100 rtsBool major_gc;
 101
 102 /* Data used for allocation area sizing.
 103  */
 104 static lnat g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
 105
 106 /* Mut-list stats */
 107 #ifdef DEBUG
 108 nat mutlist_MUTVARS,
 109     mutlist_MUTARRS,
 110     mutlist_MVARS,
 111     mutlist_OTHERS;
 112 #endif
 113
 114 /* Thread-local data for each GC thread
 115  */
 116 gc_thread **gc_threads = NULL;
 117
 118 #if !defined(THREADED_RTS)
 119 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
 120 #endif
 121
 122 // Number of threads running in *this* GC.  Affects how many
 123 // step->todos[] lists we have to look in to find work.
 124 nat n_gc_threads;
 125
 126 // For stats:
 127 long copied;        // *words* copied & scavenged during this GC
 128
 129 rtsBool work_stealing;
 130
 131 DECLARE_GCT
 132
 133 /* -----------------------------------------------------------------------------
 134    Static function declarations
 135    -------------------------------------------------------------------------- */
 136
 137 static void mark_root               (void *user, StgClosure **root);
 138 static void zero_static_object_list (StgClosure* first_static);
 139 static nat  initialise_N            (rtsBool force_major_gc);
 140 static void init_collected_gen      (nat g, nat threads);
 141 static void init_uncollected_gen    (nat g, nat threads);
 142 static void init_gc_thread          (gc_thread *t);
 143 static void update_task_list        (void);
 144 static void resize_generations      (void);
 145 static void resize_nursery          (void);
 146 static void start_gc_threads        (void);
 147 static void scavenge_until_all_done (void);
 148 static StgWord inc_running          (void);
 149 static StgWord dec_running          (void);
 150 static void wakeup_gc_threads       (nat n_threads, nat me);
 151 static void shutdown_gc_threads     (nat n_threads, nat me);
 152
 153 #if 0 && defined(DEBUG)
 154 static void gcCAFs                  (void);
 155 #endif
 156
 157 /* -----------------------------------------------------------------------------
 158    The mark stack.
 159    -------------------------------------------------------------------------- */
 160
 161 bdescr *mark_stack_top_bd; // topmost block in the mark stack
 162 bdescr *mark_stack_bd;     // current block in the mark stack
 163 StgPtr mark_sp;            // pointer to the next unallocated mark stack entry
 164
 165 /* -----------------------------------------------------------------------------
 166    GarbageCollect: the main entry point to the garbage collector.
 167
 168    Locks held: all capabilities are held throughout GarbageCollect().
 169    -------------------------------------------------------------------------- */
 170
 171 void
 172 GarbageCollect (rtsBool force_major_gc,
 173                 nat gc_type USED_IF_THREADS,
 174                 Capability *cap)
 175 {
 176   bdescr *bd;
 177   generation *gen;
 178   lnat live, allocated, max_copied, avg_copied, slop;
 179   gc_thread *saved_gct;
 180   nat g, t, n;
 181
 182   // necessary if we stole a callee-saves register for gct:
 183   saved_gct = gct;
 184
 185 #ifdef PROFILING
 186   CostCentreStack *prev_CCS;
 187 #endif
 188
 189   ACQUIRE_SM_LOCK;
 190
 191 #if defined(RTS_USER_SIGNALS)
 192   if (RtsFlags.MiscFlags.install_signal_handlers) {
 193     // block signals
 194     blockUserSignals();
 195   }
 196 #endif
 197
 198   ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
 199   // otherwise adjust the padding in gen_workspace.
 200
 201   // tell the stats department that we've started a GC
 202   stat_startGC();
 203
 204   // tell the STM to discard any cached closures it's hoping to re-use
 205   stmPreGCHook();
 206
 207   // lock the StablePtr table
 208   stablePtrPreGC();
 209
 210 #ifdef DEBUG
 211   mutlist_MUTVARS = 0;
 212   mutlist_MUTARRS = 0;
 213   mutlist_OTHERS = 0;
 214 #endif
 215
 216   // attribute any costs to CCS_GC
 217 #ifdef PROFILING
 218   prev_CCS = CCCS;
 219   CCCS = CCS_GC;
 220 #endif
 221
 222   /* Approximate how much we allocated.
 223    * Todo: only when generating stats?
 224    */
 225   allocated = calcAllocated();
 226
 227   /* Figure out which generation to collect
 228    */
 229   n = initialise_N(force_major_gc);
 230
 231 #if defined(THREADED_RTS)
 232   work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
 233                   N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
 234       // It's not always a good idea to do load balancing in parallel
 235       // GC.  In particular, for a parallel program we don't want to
 236       // lose locality by moving cached data into another CPU's cache
 237       // (this effect can be quite significant).
 238       //
 239       // We could have a more complex way to deterimine whether to do
 240       // work stealing or not, e.g. it might be a good idea to do it
 241       // if the heap is big.  For now, we just turn it on or off with
 242       // a flag.
 243 #endif
 244
 245   /* Start threads, so they can be spinning up while we finish initialisation.
 246    */
 247   start_gc_threads();
 248
 249 #if defined(THREADED_RTS)
 250   /* How many threads will be participating in this GC?
 251    * We don't try to parallelise minor GCs (unless the user asks for
 252    * it with +RTS -gn0), or mark/compact/sweep GC.
 253    */
 254   if (gc_type == PENDING_GC_PAR) {
 255       n_gc_threads = RtsFlags.ParFlags.nNodes;
 256   } else {
 257       n_gc_threads = 1;
 258   }
 259 #else
 260   n_gc_threads = 1;
 261 #endif
 262
 263   debugTrace(DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
 264         N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
 265
 266 #ifdef RTS_GTK_FRONTPANEL
 267   if (RtsFlags.GcFlags.frontpanel) {
 268       updateFrontPanelBeforeGC(N);
 269   }
 270 #endif
 271
 272 #ifdef DEBUG
 273   // check for memory leaks if DEBUG is on
 274   memInventory(DEBUG_gc);
 275 #endif
 276
 277   // check sanity *before* GC
 278   IF_DEBUG(sanity, checkSanity(rtsTrue));
 279
 280   // Initialise all our gc_thread structures
 281   for (t = 0; t < n_gc_threads; t++) {
 282       init_gc_thread(gc_threads[t]);
 283   }
 284
 285   // Initialise all the generations/steps that we're collecting.
 286   for (g = 0; g <= N; g++) {
 287       init_collected_gen(g,n_gc_threads);
 288   }
 289
 290   // Initialise all the generations/steps that we're *not* collecting.
 291   for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
 292       init_uncollected_gen(g,n_gc_threads);
 293   }
 294
 295   /* Allocate a mark stack if we're doing a major collection.
 296    */
 297   if (major_gc && oldest_gen->mark) {
 298       mark_stack_bd     = allocBlock();
 299       mark_stack_top_bd = mark_stack_bd;
 300       mark_stack_bd->link = NULL;
 301       mark_stack_bd->u.back = NULL;
 302       mark_sp           = mark_stack_bd->start;
 303   } else {
 304       mark_stack_bd     = NULL;
 305       mark_stack_top_bd = NULL;
 306       mark_sp           = NULL;
 307   }
 308
 309   // this is the main thread
 310 #ifdef THREADED_RTS
 311   if (n_gc_threads == 1) {
 312       SET_GCT(gc_threads[0]);
 313   } else {
 314       SET_GCT(gc_threads[cap->no]);
 315   }
 316 #else
 317 SET_GCT(gc_threads[0]);
 318 #endif
 319
 320   /* -----------------------------------------------------------------------
 321    * follow all the roots that we know about:
 322    */
 323
 324   // the main thread is running: this prevents any other threads from
 325   // exiting prematurely, so we can start them now.
 326   // NB. do this after the mutable lists have been saved above, otherwise
 327   // the other GC threads will be writing into the old mutable lists.
 328   inc_running();
 329   wakeup_gc_threads(n_gc_threads, gct->thread_index);
 330
 331   // Mutable lists from each generation > N
 332   // we want to *scavenge* these roots, not evacuate them: they're not
 333   // going to move in this GC.
 334   // Also do them in reverse generation order, for the usual reason:
 335   // namely to reduce the likelihood of spurious old->new pointers.
 336   //
 337   for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
 338 #if defined(THREADED_RTS)
 339       if (n_gc_threads > 1) {
 340           scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]);
 341       } else {
 342           scavenge_mutable_list1(generations[g].saved_mut_list, &generations[g]);
 343       }
 344 #else
 345       scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]);
 346 #endif
 347       freeChain_sync(generations[g].saved_mut_list);
 348       generations[g].saved_mut_list = NULL;
 349
 350   }
 351
 352   // scavenge the capability-private mutable lists.  This isn't part
 353   // of markSomeCapabilities() because markSomeCapabilities() can only
 354   // call back into the GC via mark_root() (due to the gct register
 355   // variable).
 356   if (n_gc_threads == 1) {
 357       for (n = 0; n < n_capabilities; n++) {
 358 #if defined(THREADED_RTS)
 359           scavenge_capability_mut_Lists1(&capabilities[n]);
 360 #else
 361           scavenge_capability_mut_lists(&capabilities[n]);
 362 #endif
 363       }
 364   } else {
 365       scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
 366   }
 367
 368   // follow roots from the CAF list (used by GHCi)
 369   gct->evac_gen = 0;
 370   markCAFs(mark_root, gct);
 371
 372   // follow all the roots that the application knows about.
 373   gct->evac_gen = 0;
 374   markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
 375                        rtsTrue/*prune sparks*/);
 376
 377 #if defined(RTS_USER_SIGNALS)
 378   // mark the signal handlers (signals should be already blocked)
 379   markSignalHandlers(mark_root, gct);
 380 #endif
 381
 382   // Mark the weak pointer list, and prepare to detect dead weak pointers.
 383   markWeakPtrList();
 384   initWeakForGC();
 385
 386   // Mark the stable pointer table.
 387   markStablePtrTable(mark_root, gct);
 388
 389   /* -------------------------------------------------------------------------
 390    * Repeatedly scavenge all the areas we know about until there's no
 391    * more scavenging to be done.
 392    */
 393   for (;;)
 394   {
 395       scavenge_until_all_done();
 396       // The other threads are now stopped.  We might recurse back to
 397       // here, but from now on this is the only thread.
 398
 399       // if any blackholes are alive, make the threads that wait on
 400       // them alive too.
 401       if (traverseBlackholeQueue()) {
 402           inc_running();
 403           continue;
 404       }
 405
 406       // must be last...  invariant is that everything is fully
 407       // scavenged at this point.
 408       if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
 409           inc_running();
 410           continue;
 411       }
 412
 413       // If we get to here, there's really nothing left to do.
 414       break;
 415   }
 416
 417   shutdown_gc_threads(n_gc_threads, gct->thread_index);
 418
 419   // Update pointers from the Task list
 420   update_task_list();
 421
 422   // Now see which stable names are still alive.
 423   gcStablePtrTable();
 424
 425 #ifdef PROFILING
 426   // We call processHeapClosureForDead() on every closure destroyed during
 427   // the current garbage collection, so we invoke LdvCensusForDead().
 428   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
 429       || RtsFlags.ProfFlags.bioSelector != NULL)
 430     LdvCensusForDead(N);
 431 #endif
 432
 433   // NO MORE EVACUATION AFTER THIS POINT!
 434
 435   // Two-space collector: free the old to-space.
 436   // g0->old_blocks is the old nursery
 437   // g0->blocks is to-space from the previous GC
 438   if (RtsFlags.GcFlags.generations == 1) {
 439       if (g0->blocks != NULL) {
 440           freeChain(g0->blocks);
 441           g0->blocks = NULL;
 442       }
 443   }
 444
 445   // For each workspace, in each thread, move the copied blocks to the step
 446   {
 447       gc_thread *thr;
 448       gen_workspace *ws;
 449       bdescr *prev, *next;
 450
 451       for (t = 0; t < n_gc_threads; t++) {
 452           thr = gc_threads[t];
 453
 454           for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 455               ws = &thr->gens[g];
 456
 457               // Push the final block
 458               if (ws->todo_bd) {
 459                   push_scanned_block(ws->todo_bd, ws);
 460               }
 461
 462               ASSERT(gct->scan_bd == NULL);
 463               ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
 464
 465               prev = NULL;
 466               for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
 467                   ws->gen->n_words += bd->free - bd->start;
 468                   prev = bd;
 469               }
 470               if (prev != NULL) {
 471                   prev->link = ws->gen->blocks;
 472                   ws->gen->blocks = ws->scavd_list;
 473               }
 474               ws->gen->n_blocks += ws->n_scavd_blocks;
 475           }
 476       }
 477
 478       // Add all the partial blocks *after* we've added all the full
 479       // blocks.  This is so that we can grab the partial blocks back
 480       // again and try to fill them up in the next GC.
 481       for (t = 0; t < n_gc_threads; t++) {
 482           thr = gc_threads[t];
 483
 484           for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 485               ws = &thr->gens[g];
 486
 487               prev = NULL;
 488               for (bd = ws->part_list; bd != NULL; bd = next) {
 489                   next = bd->link;
 490                   if (bd->free == bd->start) {
 491                       if (prev == NULL) {
 492                           ws->part_list = next;
 493                       } else {
 494                           prev->link = next;
 495                       }
 496                       freeGroup(bd);
 497                       ws->n_part_blocks--;
 498                   } else {
 499                       ws->gen->n_words += bd->free - bd->start;
 500                       prev = bd;
 501                   }
 502               }
 503               if (prev != NULL) {
 504                   prev->link = ws->gen->blocks;
 505                   ws->gen->blocks = ws->part_list;
 506               }
 507               ws->gen->n_blocks += ws->n_part_blocks;
 508
 509               ASSERT(countBlocks(ws->gen->blocks) == ws->gen->n_blocks);
 510               ASSERT(countOccupied(ws->gen->blocks) == ws->gen->n_words);
 511           }
 512       }
 513   }
 514
 515   // Finally: compact or sweep the oldest generation.
 516   if (major_gc && oldest_gen->mark) {
 517       if (oldest_gen->compact)
 518           compact(gct->scavenged_static_objects);
 519       else
 520           sweep(oldest_gen);
 521   }
 522
 523   /* run through all the generations/steps and tidy up
 524    */
 525   copied = 0;
 526   max_copied = 0;
 527   avg_copied = 0;
 528   {
 529       nat i;
 530       for (i=0; i < n_gc_threads; i++) {
 531           if (n_gc_threads > 1) {
 532               debugTrace(DEBUG_gc,"thread %d:", i);
 533               debugTrace(DEBUG_gc,"   copied           %ld", gc_threads[i]->copied * sizeof(W_));
 534               debugTrace(DEBUG_gc,"   scanned          %ld", gc_threads[i]->scanned * sizeof(W_));
 535               debugTrace(DEBUG_gc,"   any_work         %ld", gc_threads[i]->any_work);
 536               debugTrace(DEBUG_gc,"   no_work          %ld", gc_threads[i]->no_work);
 537               debugTrace(DEBUG_gc,"   scav_find_work %ld",   gc_threads[i]->scav_find_work);
 538           }
 539           copied += gc_threads[i]->copied;
 540           max_copied = stg_max(gc_threads[i]->copied, max_copied);
 541       }
 542       if (n_gc_threads == 1) {
 543           max_copied = 0;
 544           avg_copied = 0;
 545       } else {
 546           avg_copied = copied;
 547       }
 548   }
 549
 550   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 551
 552     if (g == N) {
 553       generations[g].collections++; // for stats
 554       if (n_gc_threads > 1) generations[g].par_collections++;
 555     }
 556
 557     // Count the mutable list as bytes "copied" for the purposes of
 558     // stats.  Every mutable list is copied during every GC.
 559     if (g > 0) {
 560         nat mut_list_size = 0;
 561         for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
 562             mut_list_size += bd->free - bd->start;
 563         }
 564         for (n = 0; n < n_capabilities; n++) {
 565             for (bd = capabilities[n].mut_lists[g];
 566                  bd != NULL; bd = bd->link) {
 567                 mut_list_size += bd->free - bd->start;
 568             }
 569         }
 570         copied +=  mut_list_size;
 571
 572         debugTrace(DEBUG_gc,
 573                    "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
 574                    (unsigned long)(mut_list_size * sizeof(W_)),
 575                    mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
 576     }
 577
 578     bdescr *next, *prev;
 579     gen = &generations[g];
 580
 581     // for generations we collected...
 582     if (g <= N) {
 583
 584         /* free old memory and shift to-space into from-space for all
 585          * the collected steps (except the allocation area).  These
 586          * freed blocks will probaby be quickly recycled.
 587          */
 588         if (gen->mark)
 589         {
 590             // tack the new blocks on the end of the existing blocks
 591             if (gen->old_blocks != NULL) {
 592
 593                 prev = NULL;
 594                 for (bd = gen->old_blocks; bd != NULL; bd = next) {
 595
 596                     next = bd->link;
 597
 598                     if (!(bd->flags & BF_MARKED))
 599                     {
 600                         if (prev == NULL) {
 601                             gen->old_blocks = next;
 602                         } else {
 603                             prev->link = next;
 604                         }
 605                         freeGroup(bd);
 606                         gen->n_old_blocks--;
 607                     }
 608                     else
 609                     {
 610                         gen->n_words += bd->free - bd->start;
 611
 612                         // NB. this step might not be compacted next
 613                         // time, so reset the BF_MARKED flags.
 614                         // They are set before GC if we're going to
 615                         // compact.  (search for BF_MARKED above).
 616                         bd->flags &= ~BF_MARKED;
 617
 618                         // between GCs, all blocks in the heap except
 619                         // for the nursery have the BF_EVACUATED flag set.
 620                         bd->flags |= BF_EVACUATED;
 621
 622                         prev = bd;
 623                     }
 624                 }
 625
 626                 if (prev != NULL) {
 627                     prev->link = gen->blocks;
 628                     gen->blocks = gen->old_blocks;
 629                 }
 630             }
 631             // add the new blocks to the block tally
 632             gen->n_blocks += gen->n_old_blocks;
 633             ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
 634             ASSERT(countOccupied(gen->blocks) == gen->n_words);
 635         }
 636         else // not copacted
 637         {
 638             freeChain(gen->old_blocks);
 639         }
 640
 641         gen->old_blocks = NULL;
 642         gen->n_old_blocks = 0;
 643
 644         /* LARGE OBJECTS.  The current live large objects are chained on
 645          * scavenged_large, having been moved during garbage
 646          * collection from large_objects.  Any objects left on the
 647          * large_objects list are therefore dead, so we free them here.
 648          */
 649         freeChain(gen->large_objects);
 650         gen->large_objects  = gen->scavenged_large_objects;
 651         gen->n_large_blocks = gen->n_scavenged_large_blocks;
 652         ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
 653     }
 654     else // for generations > N
 655     {
 656         /* For older generations, we need to append the
 657          * scavenged_large_object list (i.e. large objects that have been
 658          * promoted during this GC) to the large_object list for that step.
 659          */
 660         for (bd = gen->scavenged_large_objects; bd; bd = next) {
 661             next = bd->link;
 662             dbl_link_onto(bd, &gen->large_objects);
 663         }
 664
 665         // add the new blocks we promoted during this GC
 666         gen->n_large_blocks += gen->n_scavenged_large_blocks;
 667         ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
 668     }
 669   } // for all generations
 670
 671   // update the max size of older generations after a major GC
 672   resize_generations();
 673
 674   // Calculate the amount of live data for stats.
 675   live = calcLiveWords();
 676
 677   // Free the small objects allocated via allocate(), since this will
 678   // all have been copied into G0S1 now.
 679   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 680
 681   // Start a new pinned_object_block
 682   for (n = 0; n < n_capabilities; n++) {
 683       capabilities[n].pinned_object_block = NULL;
 684   }
 685
 686   // Free the mark stack.
 687   if (mark_stack_top_bd != NULL) {
 688       debugTrace(DEBUG_gc, "mark stack: %d blocks",
 689                  countBlocks(mark_stack_top_bd));
 690       freeChain(mark_stack_top_bd);
 691   }
 692
 693   // Free any bitmaps.
 694   for (g = 0; g <= N; g++) {
 695       gen = &generations[g];
 696       if (gen->bitmap != NULL) {
 697           freeGroup(gen->bitmap);
 698           gen->bitmap = NULL;
 699       }
 700   }
 701
 702   resize_nursery();
 703
 704  // mark the garbage collected CAFs as dead
 705 #if 0 && defined(DEBUG) // doesn't work at the moment
 706   if (major_gc) { gcCAFs(); }
 707 #endif
 708
 709 #ifdef PROFILING
 710   // resetStaticObjectForRetainerProfiling() must be called before
 711   // zeroing below.
 712   if (n_gc_threads > 1) {
 713       barf("profiling is currently broken with multi-threaded GC");
 714       // ToDo: fix the gct->scavenged_static_objects below
 715   }
 716   resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
 717 #endif
 718
 719   // zero the scavenged static object list
 720   if (major_gc) {
 721       nat i;
 722       for (i = 0; i < n_gc_threads; i++) {
 723           zero_static_object_list(gc_threads[i]->scavenged_static_objects);
 724       }
 725   }
 726
 727   // Reset the nursery
 728   resetNurseries();
 729
 730   // start any pending finalizers
 731   RELEASE_SM_LOCK;
 732   scheduleFinalizers(cap, old_weak_ptr_list);
 733   ACQUIRE_SM_LOCK;
 734
 735   // send exceptions to any threads which were about to die
 736   RELEASE_SM_LOCK;
 737   resurrectThreads(resurrected_threads);
 738   performPendingThrowTos(exception_threads);
 739   ACQUIRE_SM_LOCK;
 740
 741   // Update the stable pointer hash table.
 742   updateStablePtrTable(major_gc);
 743
 744   // check sanity after GC
 745   IF_DEBUG(sanity, checkSanity(rtsTrue));
 746
 747   // extra GC trace info
 748   IF_DEBUG(gc, statDescribeGens());
 749
 750 #ifdef DEBUG
 751   // symbol-table based profiling
 752   /*  heapCensus(to_blocks); */ /* ToDo */
 753 #endif
 754
 755   // restore enclosing cost centre
 756 #ifdef PROFILING
 757   CCCS = prev_CCS;
 758 #endif
 759
 760 #ifdef DEBUG
 761   // check for memory leaks if DEBUG is on
 762   memInventory(DEBUG_gc);
 763 #endif
 764
 765 #ifdef RTS_GTK_FRONTPANEL
 766   if (RtsFlags.GcFlags.frontpanel) {
 767       updateFrontPanelAfterGC( N, live );
 768   }
 769 #endif
 770
 771   // ok, GC over: tell the stats department what happened.
 772   slop = calcLiveBlocks() * BLOCK_SIZE_W - live;
 773   stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop);
 774
 775   // unlock the StablePtr table
 776   stablePtrPostGC();
 777
 778   // Guess which generation we'll collect *next* time
 779   initialise_N(force_major_gc);
 780
 781 #if defined(RTS_USER_SIGNALS)
 782   if (RtsFlags.MiscFlags.install_signal_handlers) {
 783     // unblock signals again
 784     unblockUserSignals();
 785   }
 786 #endif
 787
 788   RELEASE_SM_LOCK;
 789
 790   SET_GCT(saved_gct);
 791 }
 792
 793 /* -----------------------------------------------------------------------------
 794    Figure out which generation to collect, initialise N and major_gc.
 795
 796    Also returns the total number of blocks in generations that will be
 797    collected.
 798    -------------------------------------------------------------------------- */
 799
 800 static nat
 801 initialise_N (rtsBool force_major_gc)
 802 {
 803     int g;
 804     nat blocks, blocks_total;
 805
 806     blocks = 0;
 807     blocks_total = 0;
 808
 809     if (force_major_gc) {
 810         N = RtsFlags.GcFlags.generations - 1;
 811     } else {
 812         N = 0;
 813     }
 814
 815     for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
 816
 817         blocks = generations[g].n_words / BLOCK_SIZE_W
 818                + generations[g].n_large_blocks;
 819
 820         if (blocks >= generations[g].max_blocks) {
 821             N = stg_max(N,g);
 822         }
 823         if ((nat)g <= N) {
 824             blocks_total += blocks;
 825         }
 826     }
 827
 828     blocks_total += countNurseryBlocks();
 829
 830     major_gc = (N == RtsFlags.GcFlags.generations-1);
 831     return blocks_total;
 832 }
 833
 834 /* -----------------------------------------------------------------------------
 835    Initialise the gc_thread structures.
 836    -------------------------------------------------------------------------- */
 837
 838 #define GC_THREAD_INACTIVE             0
 839 #define GC_THREAD_STANDING_BY          1
 840 #define GC_THREAD_RUNNING              2
 841 #define GC_THREAD_WAITING_TO_CONTINUE  3
 842
 843 static void
 844 new_gc_thread (nat n, gc_thread *t)
 845 {
 846     nat g;
 847     gen_workspace *ws;
 848
 849 #ifdef THREADED_RTS
 850     t->id = 0;
 851     initSpinLock(&t->gc_spin);
 852     initSpinLock(&t->mut_spin);
 853     ACQUIRE_SPIN_LOCK(&t->gc_spin);
 854     t->wakeup = GC_THREAD_INACTIVE;  // starts true, so we can wait for the
 855                           // thread to start up, see wakeup_gc_threads
 856 #endif
 857
 858     t->thread_index = n;
 859     t->free_blocks = NULL;
 860     t->gc_count = 0;
 861
 862     init_gc_thread(t);
 863
 864 #ifdef USE_PAPI
 865     t->papi_events = -1;
 866 #endif
 867
 868     for (g = 0; g < RtsFlags.GcFlags.generations; g++)
 869     {
 870         ws = &t->gens[g];
 871         ws->gen = &generations[g];
 872         ASSERT(g == ws->gen->no);
 873         ws->my_gct = t;
 874
 875         ws->todo_bd = NULL;
 876         ws->todo_q = newWSDeque(128);
 877         ws->todo_overflow = NULL;
 878         ws->n_todo_overflow = 0;
 879
 880         ws->part_list = NULL;
 881         ws->n_part_blocks = 0;
 882
 883         ws->scavd_list = NULL;
 884         ws->n_scavd_blocks = 0;
 885     }
 886 }
 887
 888
 889 void
 890 initGcThreads (void)
 891 {
 892     if (gc_threads == NULL) {
 893 #if defined(THREADED_RTS)
 894         nat i;
 895         gc_threads = stgMallocBytes (RtsFlags.ParFlags.nNodes *
 896                                      sizeof(gc_thread*),
 897                                      "alloc_gc_threads");
 898
 899         for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
 900             gc_threads[i] =
 901                 stgMallocBytes(sizeof(gc_thread) +
 902                                RtsFlags.GcFlags.generations * sizeof(gen_workspace),
 903                                "alloc_gc_threads");
 904
 905             new_gc_thread(i, gc_threads[i]);
 906         }
 907 #else
 908         gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
 909         gc_threads[0] = gct;
 910         new_gc_thread(0,gc_threads[0]);
 911 #endif
 912     }
 913 }
 914
 915 void
 916 freeGcThreads (void)
 917 {
 918     nat g;
 919     if (gc_threads != NULL) {
 920 #if defined(THREADED_RTS)
 921         nat i;
 922         for (i = 0; i < n_capabilities; i++) {
 923             for (g = 0; g < RtsFlags.GcFlags.generations; g++)
 924             {
 925                 freeWSDeque(gc_threads[i]->gens[g].todo_q);
 926             }
 927             stgFree (gc_threads[i]);
 928         }
 929         stgFree (gc_threads);
 930 #else
 931         for (g = 0; g < RtsFlags.GcFlags.generations; g++)
 932         {
 933             freeWSDeque(gc_threads[0]->gens[g].todo_q);
 934         }
 935         stgFree (gc_threads);
 936 #endif
 937         gc_threads = NULL;
 938     }
 939 }
 940
 941 /* ----------------------------------------------------------------------------
 942    Start GC threads
 943    ------------------------------------------------------------------------- */
 944
 945 static volatile StgWord gc_running_threads;
 946
 947 static StgWord
 948 inc_running (void)
 949 {
 950     StgWord new;
 951     new = atomic_inc(&gc_running_threads);
 952     ASSERT(new <= n_gc_threads);
 953     return new;
 954 }
 955
 956 static StgWord
 957 dec_running (void)
 958 {
 959     ASSERT(gc_running_threads != 0);
 960     return atomic_dec(&gc_running_threads);
 961 }
 962
 963 static rtsBool
 964 any_work (void)
 965 {
 966     int g;
 967     gen_workspace *ws;
 968
 969     gct->any_work++;
 970
 971     write_barrier();
 972
 973     // scavenge objects in compacted generation
 974     if (mark_stack_bd != NULL && !mark_stack_empty()) {
 975         return rtsTrue;
 976     }
 977
 978     // Check for global work in any step.  We don't need to check for
 979     // local work, because we have already exited scavenge_loop(),
 980     // which means there is no local work for this thread.
 981     for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
 982         ws = &gct->gens[g];
 983         if (ws->todo_large_objects) return rtsTrue;
 984         if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
 985         if (ws->todo_overflow) return rtsTrue;
 986     }
 987
 988 #if defined(THREADED_RTS)
 989     if (work_stealing) {
 990         nat n;
 991         // look for work to steal
 992         for (n = 0; n < n_gc_threads; n++) {
 993             if (n == gct->thread_index) continue;
 994             for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
 995                 ws = &gc_threads[n]->gens[g];
 996                 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
 997             }
 998         }
 999     }
1000 #endif
1001
1002     gct->no_work++;
1003
1004     return rtsFalse;
1005 }
1006
1007 static void
1008 scavenge_until_all_done (void)
1009 {
1010     nat r;
1011
1012
1013 loop:
1014     traceEvent(&capabilities[gct->thread_index], EVENT_GC_WORK);
1015
1016 #if defined(THREADED_RTS)
1017     if (n_gc_threads > 1) {
1018         scavenge_loop();
1019     } else {
1020         scavenge_loop1();
1021     }
1022 #else
1023     scavenge_loop();
1024 #endif
1025
1026     // scavenge_loop() only exits when there's no work to do
1027     r = dec_running();
1028
1029     traceEvent(&capabilities[gct->thread_index], EVENT_GC_IDLE);
1030
1031     debugTrace(DEBUG_gc, "%d GC threads still running", r);
1032
1033     while (gc_running_threads != 0) {
1034         // usleep(1);
1035         if (any_work()) {
1036             inc_running();
1037             goto loop;
1038         }
1039         // any_work() does not remove the work from the queue, it
1040         // just checks for the presence of work.  If we find any,
1041         // then we increment gc_running_threads and go back to
1042         // scavenge_loop() to perform any pending work.
1043     }
1044
1045     traceEvent(&capabilities[gct->thread_index], EVENT_GC_DONE);
1046 }
1047
1048 #if defined(THREADED_RTS)
1049
1050 void
1051 gcWorkerThread (Capability *cap)
1052 {
1053     gc_thread *saved_gct;
1054
1055     // necessary if we stole a callee-saves register for gct:
1056     saved_gct = gct;
1057
1058     gct = gc_threads[cap->no];
1059     gct->id = osThreadId();
1060
1061     // Wait until we're told to wake up
1062     RELEASE_SPIN_LOCK(&gct->mut_spin);
1063     gct->wakeup = GC_THREAD_STANDING_BY;
1064     debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1065     ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1066
1067 #ifdef USE_PAPI
1068     // start performance counters in this thread...
1069     if (gct->papi_events == -1) {
1070         papi_init_eventset(&gct->papi_events);
1071     }
1072     papi_thread_start_gc1_count(gct->papi_events);
1073 #endif
1074
1075     // Every thread evacuates some roots.
1076     gct->evac_gen = 0;
1077     markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
1078                          rtsTrue/*prune sparks*/);
1079     scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
1080
1081     scavenge_until_all_done();
1082
1083 #ifdef USE_PAPI
1084     // count events in this thread towards the GC totals
1085     papi_thread_stop_gc1_count(gct->papi_events);
1086 #endif
1087
1088     // Wait until we're told to continue
1089     RELEASE_SPIN_LOCK(&gct->gc_spin);
1090     gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1091     debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1092                gct->thread_index);
1093     ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1094     debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1095
1096     SET_GCT(saved_gct);
1097 }
1098
1099 #endif
1100
1101 #if defined(THREADED_RTS)
1102
1103 void
1104 waitForGcThreads (Capability *cap USED_IF_THREADS)
1105 {
1106     nat n_threads = RtsFlags.ParFlags.nNodes;
1107     nat me = cap->no;
1108     nat i, j;
1109     rtsBool retry = rtsTrue;
1110
1111     while(retry) {
1112         for (i=0; i < n_threads; i++) {
1113             if (i == me) continue;
1114             if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1115                 prodCapability(&capabilities[i], cap->running_task);
1116             }
1117         }
1118         for (j=0; j < 10; j++) {
1119             retry = rtsFalse;
1120             for (i=0; i < n_threads; i++) {
1121                 if (i == me) continue;
1122                 write_barrier();
1123                 setContextSwitches();
1124                 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1125                     retry = rtsTrue;
1126                 }
1127             }
1128             if (!retry) break;
1129             yieldThread();
1130         }
1131     }
1132 }
1133
1134 #endif // THREADED_RTS
1135
1136 static void
1137 start_gc_threads (void)
1138 {
1139 #if defined(THREADED_RTS)
1140     gc_running_threads = 0;
1141 #endif
1142 }
1143
1144 static void
1145 wakeup_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1146 {
1147 #if defined(THREADED_RTS)
1148     nat i;
1149     for (i=0; i < n_threads; i++) {
1150         if (i == me) continue;
1151         inc_running();
1152         debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1153         if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1154
1155         gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1156         ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1157         RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1158     }
1159 #endif
1160 }
1161
1162 // After GC is complete, we must wait for all GC threads to enter the
1163 // standby state, otherwise they may still be executing inside
1164 // any_work(), and may even remain awake until the next GC starts.
1165 static void
1166 shutdown_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1167 {
1168 #if defined(THREADED_RTS)
1169     nat i;
1170     for (i=0; i < n_threads; i++) {
1171         if (i == me) continue;
1172         while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1173     }
1174 #endif
1175 }
1176
1177 #if defined(THREADED_RTS)
1178 void
1179 releaseGCThreads (Capability *cap USED_IF_THREADS)
1180 {
1181     nat n_threads = RtsFlags.ParFlags.nNodes;
1182     nat me = cap->no;
1183     nat i;
1184     for (i=0; i < n_threads; i++) {
1185         if (i == me) continue;
1186         if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1187             barf("releaseGCThreads");
1188
1189         gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1190         ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1191         RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1192     }
1193 }
1194 #endif
1195
1196 /* ----------------------------------------------------------------------------
1197    Initialise a generation that is to be collected
1198    ------------------------------------------------------------------------- */
1199
1200 static void
1201 init_collected_gen (nat g, nat n_threads)
1202 {
1203     nat t, i;
1204     gen_workspace *ws;
1205     generation *gen;
1206     bdescr *bd;
1207
1208     // Throw away the current mutable list.  Invariant: the mutable
1209     // list always has at least one block; this means we can avoid a
1210     // check for NULL in recordMutable().
1211     if (g != 0) {
1212         freeChain(generations[g].mut_list);
1213         generations[g].mut_list = allocBlock();
1214         for (i = 0; i < n_capabilities; i++) {
1215             freeChain(capabilities[i].mut_lists[g]);
1216             capabilities[i].mut_lists[g] = allocBlock();
1217         }
1218     }
1219
1220     gen = &generations[g];
1221     ASSERT(gen->no == g);
1222
1223     // we'll construct a new list of threads in this step
1224     // during GC, throw away the current list.
1225     gen->old_threads = gen->threads;
1226     gen->threads = END_TSO_QUEUE;
1227
1228     // deprecate the existing blocks
1229     gen->old_blocks   = gen->blocks;
1230     gen->n_old_blocks = gen->n_blocks;
1231     gen->blocks       = NULL;
1232     gen->n_blocks     = 0;
1233     gen->n_words      = 0;
1234     gen->live_estimate = 0;
1235
1236     // initialise the large object queues.
1237     gen->scavenged_large_objects = NULL;
1238     gen->n_scavenged_large_blocks = 0;
1239
1240     // mark the small objects as from-space
1241     for (bd = gen->old_blocks; bd; bd = bd->link) {
1242         bd->flags &= ~BF_EVACUATED;
1243     }
1244
1245     // mark the large objects as from-space
1246     for (bd = gen->large_objects; bd; bd = bd->link) {
1247         bd->flags &= ~BF_EVACUATED;
1248     }
1249
1250     // for a compacted generation, we need to allocate the bitmap
1251     if (gen->mark) {
1252         nat bitmap_size; // in bytes
1253         bdescr *bitmap_bdescr;
1254         StgWord *bitmap;
1255
1256         bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1257
1258         if (bitmap_size > 0) {
1259             bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1260                                        / BLOCK_SIZE);
1261             gen->bitmap = bitmap_bdescr;
1262             bitmap = bitmap_bdescr->start;
1263
1264             debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1265                        bitmap_size, bitmap);
1266
1267             // don't forget to fill it with zeros!
1268             memset(bitmap, 0, bitmap_size);
1269
1270             // For each block in this step, point to its bitmap from the
1271             // block descriptor.
1272             for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1273                 bd->u.bitmap = bitmap;
1274                 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1275
1276                 // Also at this point we set the BF_MARKED flag
1277                 // for this block.  The invariant is that
1278                 // BF_MARKED is always unset, except during GC
1279                 // when it is set on those blocks which will be
1280                 // compacted.
1281                 if (!(bd->flags & BF_FRAGMENTED)) {
1282                     bd->flags |= BF_MARKED;
1283                 }
1284             }
1285         }
1286     }
1287
1288     // For each GC thread, for each step, allocate a "todo" block to
1289     // store evacuated objects to be scavenged, and a block to store
1290     // evacuated objects that do not need to be scavenged.
1291     for (t = 0; t < n_threads; t++) {
1292         ws = &gc_threads[t]->gens[g];
1293
1294         ws->todo_large_objects = NULL;
1295
1296         ws->part_list = NULL;
1297         ws->n_part_blocks = 0;
1298
1299         // allocate the first to-space block; extra blocks will be
1300         // chained on as necessary.
1301         ws->todo_bd = NULL;
1302         ASSERT(looksEmptyWSDeque(ws->todo_q));
1303         alloc_todo_block(ws,0);
1304
1305         ws->todo_overflow = NULL;
1306         ws->n_todo_overflow = 0;
1307
1308         ws->scavd_list = NULL;
1309         ws->n_scavd_blocks = 0;
1310     }
1311 }
1312
1313
1314 /* ----------------------------------------------------------------------------
1315    Initialise a generation that is *not* to be collected
1316    ------------------------------------------------------------------------- */
1317
1318 static void
1319 init_uncollected_gen (nat g, nat threads)
1320 {
1321     nat t, n;
1322     gen_workspace *ws;
1323     generation *gen;
1324     bdescr *bd;
1325
1326     // save the current mutable lists for this generation, and
1327     // allocate a fresh block for each one.  We'll traverse these
1328     // mutable lists as roots early on in the GC.
1329     generations[g].saved_mut_list = generations[g].mut_list;
1330     generations[g].mut_list = allocBlock();
1331     for (n = 0; n < n_capabilities; n++) {
1332         capabilities[n].saved_mut_lists[g] = capabilities[n].mut_lists[g];
1333         capabilities[n].mut_lists[g] = allocBlock();
1334     }
1335
1336     gen = &generations[g];
1337
1338     gen->scavenged_large_objects = NULL;
1339     gen->n_scavenged_large_blocks = 0;
1340
1341     for (t = 0; t < threads; t++) {
1342         ws = &gc_threads[t]->gens[g];
1343
1344         ASSERT(looksEmptyWSDeque(ws->todo_q));
1345         ws->todo_large_objects = NULL;
1346
1347         ws->part_list = NULL;
1348         ws->n_part_blocks = 0;
1349
1350         ws->scavd_list = NULL;
1351         ws->n_scavd_blocks = 0;
1352
1353         // If the block at the head of the list in this generation
1354         // is less than 3/4 full, then use it as a todo block.
1355         if (gen->blocks && isPartiallyFull(gen->blocks))
1356         {
1357             ws->todo_bd = gen->blocks;
1358             ws->todo_free = ws->todo_bd->free;
1359             ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
1360             gen->blocks = gen->blocks->link;
1361             gen->n_blocks -= 1;
1362             gen->n_words -= ws->todo_bd->free - ws->todo_bd->start;
1363             ws->todo_bd->link = NULL;
1364             // we must scan from the current end point.
1365             ws->todo_bd->u.scan = ws->todo_bd->free;
1366         }
1367         else
1368         {
1369             ws->todo_bd = NULL;
1370             alloc_todo_block(ws,0);
1371         }
1372     }
1373
1374     // deal out any more partial blocks to the threads' part_lists
1375     t = 0;
1376     while (gen->blocks && isPartiallyFull(gen->blocks))
1377     {
1378         bd = gen->blocks;
1379         gen->blocks = bd->link;
1380         ws = &gc_threads[t]->gens[g];
1381         bd->link = ws->part_list;
1382         ws->part_list = bd;
1383         ws->n_part_blocks += 1;
1384         bd->u.scan = bd->free;
1385         gen->n_blocks -= 1;
1386         gen->n_words -= bd->free - bd->start;
1387         t++;
1388         if (t == n_gc_threads) t = 0;
1389     }
1390 }
1391
1392 /* -----------------------------------------------------------------------------
1393    Initialise a gc_thread before GC
1394    -------------------------------------------------------------------------- */
1395
1396 static void
1397 init_gc_thread (gc_thread *t)
1398 {
1399     t->static_objects = END_OF_STATIC_LIST;
1400     t->scavenged_static_objects = END_OF_STATIC_LIST;
1401     t->scan_bd = NULL;
1402     t->mut_lists = capabilities[t->thread_index].mut_lists;
1403     t->evac_gen = 0;
1404     t->failed_to_evac = rtsFalse;
1405     t->eager_promotion = rtsTrue;
1406     t->thunk_selector_depth = 0;
1407     t->copied = 0;
1408     t->scanned = 0;
1409     t->any_work = 0;
1410     t->no_work = 0;
1411     t->scav_find_work = 0;
1412 }
1413
1414 /* -----------------------------------------------------------------------------
1415    Function we pass to evacuate roots.
1416    -------------------------------------------------------------------------- */
1417
1418 static void
1419 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1420 {
1421     // we stole a register for gct, but this function is called from
1422     // *outside* the GC where the register variable is not in effect,
1423     // so we need to save and restore it here.  NB. only call
1424     // mark_root() from the main GC thread, otherwise gct will be
1425     // incorrect.
1426     gc_thread *saved_gct;
1427     saved_gct = gct;
1428     SET_GCT(user);
1429
1430     evacuate(root);
1431
1432     SET_GCT(saved_gct);
1433 }
1434
1435 /* -----------------------------------------------------------------------------
1436    Initialising the static object & mutable lists
1437    -------------------------------------------------------------------------- */
1438
1439 static void
1440 zero_static_object_list(StgClosure* first_static)
1441 {
1442   StgClosure* p;
1443   StgClosure* link;
1444   const StgInfoTable *info;
1445
1446   for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1447     info = get_itbl(p);
1448     link = *STATIC_LINK(info, p);
1449     *STATIC_LINK(info,p) = NULL;
1450   }
1451 }
1452
1453 /* ----------------------------------------------------------------------------
1454    Update the pointers from the task list
1455
1456    These are treated as weak pointers because we want to allow a main
1457    thread to get a BlockedOnDeadMVar exception in the same way as any
1458    other thread.  Note that the threads should all have been retained
1459    by GC by virtue of being on the all_threads list, we're just
1460    updating pointers here.
1461    ------------------------------------------------------------------------- */
1462
1463 static void
1464 update_task_list (void)
1465 {
1466     Task *task;
1467     StgTSO *tso;
1468     for (task = all_tasks; task != NULL; task = task->all_link) {
1469         if (!task->stopped && task->tso) {
1470             ASSERT(task->tso->bound == task);
1471             tso = (StgTSO *) isAlive((StgClosure *)task->tso);
1472             if (tso == NULL) {
1473                 barf("task %p: main thread %d has been GC'd",
1474 #ifdef THREADED_RTS
1475                      (void *)task->id,
1476 #else
1477                      (void *)task,
1478 #endif
1479                      task->tso->id);
1480             }
1481             task->tso = tso;
1482         }
1483     }
1484 }
1485
1486 /* ----------------------------------------------------------------------------
1487    Reset the sizes of the older generations when we do a major
1488    collection.
1489
1490    CURRENT STRATEGY: make all generations except zero the same size.
1491    We have to stay within the maximum heap size, and leave a certain
1492    percentage of the maximum heap size available to allocate into.
1493    ------------------------------------------------------------------------- */
1494
1495 static void
1496 resize_generations (void)
1497 {
1498     nat g;
1499
1500     if (major_gc && RtsFlags.GcFlags.generations > 1) {
1501         nat live, size, min_alloc, words;
1502         nat max  = RtsFlags.GcFlags.maxHeapSize;
1503         nat gens = RtsFlags.GcFlags.generations;
1504
1505         // live in the oldest generations
1506         if (oldest_gen->live_estimate != 0) {
1507             words = oldest_gen->live_estimate;
1508         } else {
1509             words = oldest_gen->n_words;
1510         }
1511         live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1512             oldest_gen->n_large_blocks;
1513
1514         // default max size for all generations except zero
1515         size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1516                        RtsFlags.GcFlags.minOldGenSize);
1517
1518         if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1519             RtsFlags.GcFlags.heapSizeSuggestion = size;
1520         }
1521
1522         // minimum size for generation zero
1523         min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1524                             RtsFlags.GcFlags.minAllocAreaSize);
1525
1526         // Auto-enable compaction when the residency reaches a
1527         // certain percentage of the maximum heap size (default: 30%).
1528         if (RtsFlags.GcFlags.generations > 1 &&
1529             (RtsFlags.GcFlags.compact ||
1530              (max > 0 &&
1531               oldest_gen->n_blocks >
1532               (RtsFlags.GcFlags.compactThreshold * max) / 100))) {
1533             oldest_gen->mark = 1;
1534             oldest_gen->compact = 1;
1535 //        debugBelch("compaction: on\n", live);
1536         } else {
1537             oldest_gen->mark = 0;
1538             oldest_gen->compact = 0;
1539 //        debugBelch("compaction: off\n", live);
1540         }
1541
1542         if (RtsFlags.GcFlags.sweep) {
1543             oldest_gen->mark = 1;
1544         }
1545
1546         // if we're going to go over the maximum heap size, reduce the
1547         // size of the generations accordingly.  The calculation is
1548         // different if compaction is turned on, because we don't need
1549         // to double the space required to collect the old generation.
1550         if (max != 0) {
1551
1552             // this test is necessary to ensure that the calculations
1553             // below don't have any negative results - we're working
1554             // with unsigned values here.
1555             if (max < min_alloc) {
1556                 heapOverflow();
1557             }
1558
1559             if (oldest_gen->compact) {
1560                 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1561                     size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1562                 }
1563             } else {
1564                 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1565                     size = (max - min_alloc) / ((gens - 1) * 2);
1566                 }
1567             }
1568
1569             if (size < live) {
1570                 heapOverflow();
1571             }
1572         }
1573
1574 #if 0
1575         debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1576                    min_alloc, size, max);
1577 #endif
1578
1579         for (g = 0; g < gens; g++) {
1580             generations[g].max_blocks = size;
1581         }
1582     }
1583 }
1584
1585 /* -----------------------------------------------------------------------------
1586    Calculate the new size of the nursery, and resize it.
1587    -------------------------------------------------------------------------- */
1588
1589 static void
1590 resize_nursery (void)
1591 {
1592     lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities;
1593
1594     if (RtsFlags.GcFlags.generations == 1)
1595     {   // Two-space collector:
1596         nat blocks;
1597
1598         /* set up a new nursery.  Allocate a nursery size based on a
1599          * function of the amount of live data (by default a factor of 2)
1600          * Use the blocks from the old nursery if possible, freeing up any
1601          * left over blocks.
1602          *
1603          * If we get near the maximum heap size, then adjust our nursery
1604          * size accordingly.  If the nursery is the same size as the live
1605          * data (L), then we need 3L bytes.  We can reduce the size of the
1606          * nursery to bring the required memory down near 2L bytes.
1607          *
1608          * A normal 2-space collector would need 4L bytes to give the same
1609          * performance we get from 3L bytes, reducing to the same
1610          * performance at 2L bytes.
1611          */
1612         blocks = generations[0].n_blocks;
1613
1614         if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1615              blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1616              RtsFlags.GcFlags.maxHeapSize )
1617         {
1618             long adjusted_blocks;  // signed on purpose
1619             int pc_free;
1620
1621             adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1622
1623             debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1624                        RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1625
1626             pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1627             if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1628             {
1629                 heapOverflow();
1630             }
1631             blocks = adjusted_blocks;
1632         }
1633         else
1634         {
1635             blocks *= RtsFlags.GcFlags.oldGenFactor;
1636             if (blocks < min_nursery)
1637             {
1638                 blocks = min_nursery;
1639             }
1640         }
1641         resizeNurseries(blocks);
1642     }
1643     else  // Generational collector
1644     {
1645         /*
1646          * If the user has given us a suggested heap size, adjust our
1647          * allocation area to make best use of the memory available.
1648          */
1649         if (RtsFlags.GcFlags.heapSizeSuggestion)
1650         {
1651             long blocks;
1652             nat needed = calcNeeded();  // approx blocks needed at next GC
1653
1654             /* Guess how much will be live in generation 0 step 0 next time.
1655              * A good approximation is obtained by finding the
1656              * percentage of g0 that was live at the last minor GC.
1657              *
1658              * We have an accurate figure for the amount of copied data in
1659              * 'copied', but we must convert this to a number of blocks, with
1660              * a small adjustment for estimated slop at the end of a block
1661              * (- 10 words).
1662              */
1663             if (N == 0)
1664             {
1665                 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1666                     / countNurseryBlocks();
1667             }
1668
1669             /* Estimate a size for the allocation area based on the
1670              * information available.  We might end up going slightly under
1671              * or over the suggested heap size, but we should be pretty
1672              * close on average.
1673              *
1674              * Formula:            suggested - needed
1675              *                ----------------------------
1676              *                    1 + g0_pcnt_kept/100
1677              *
1678              * where 'needed' is the amount of memory needed at the next
1679              * collection for collecting all gens except g0.
1680              */
1681             blocks =
1682                 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1683                 (100 + (long)g0_pcnt_kept);
1684
1685             if (blocks < (long)min_nursery) {
1686                 blocks = min_nursery;
1687             }
1688
1689             resizeNurseries((nat)blocks);
1690         }
1691         else
1692         {
1693             // we might have added extra large blocks to the nursery, so
1694             // resize back to minAllocAreaSize again.
1695             resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1696         }
1697     }
1698 }
1699
1700 /* -----------------------------------------------------------------------------
1701    Sanity code for CAF garbage collection.
1702
1703    With DEBUG turned on, we manage a CAF list in addition to the SRT
1704    mechanism.  After GC, we run down the CAF list and blackhole any
1705    CAFs which have been garbage collected.  This means we get an error
1706    whenever the program tries to enter a garbage collected CAF.
1707
1708    Any garbage collected CAFs are taken off the CAF list at the same
1709    time.
1710    -------------------------------------------------------------------------- */
1711
1712 #if 0 && defined(DEBUG)
1713
1714 static void
1715 gcCAFs(void)
1716 {
1717   StgClosure*  p;
1718   StgClosure** pp;
1719   const StgInfoTable *info;
1720   nat i;
1721
1722   i = 0;
1723   p = caf_list;
1724   pp = &caf_list;
1725
1726   while (p != NULL) {
1727
1728     info = get_itbl(p);
1729
1730     ASSERT(info->type == IND_STATIC);
1731
1732     if (STATIC_LINK(info,p) == NULL) {
1733         debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1734         // black hole it
1735         SET_INFO(p,&stg_BLACKHOLE_info);
1736         p = STATIC_LINK2(info,p);
1737         *pp = p;
1738     }
1739     else {
1740       pp = &STATIC_LINK2(info,p);
1741       p = *pp;
1742       i++;
1743     }
1744
1745   }
1746
1747   debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1748 }
1749 #endif