rts/sm/GC.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team 1998-2008
   4  *
   5  * Generational garbage collector
   6  *
   7  * Documentation on the architecture of the Garbage Collector can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 #include "PosixSource.h"
  15 #include "Rts.h"
  16 #include "HsFFI.h"
  17
  18 #include "Storage.h"
  19 #include "RtsUtils.h"
  20 #include "Apply.h"
  21 #include "Updates.h"
  22 #include "Stats.h"
  23 #include "Schedule.h"
  24 #include "Sanity.h"
  25 #include "BlockAlloc.h"
  26 #include "ProfHeap.h"
  27 #include "Weak.h"
  28 #include "Prelude.h"
  29 #include "RtsSignals.h"
  30 #include "STM.h"
  31 #if defined(RTS_GTK_FRONTPANEL)
  32 #include "FrontPanel.h"
  33 #endif
  34 #include "Trace.h"
  35 #include "RetainerProfile.h"
  36 #include "LdvProfile.h"
  37 #include "RaiseAsync.h"
  38 #include "Papi.h"
  39 #include "Stable.h"
  40
  41 #include "GC.h"
  42 #include "GCThread.h"
  43 #include "Compact.h"
  44 #include "Evac.h"
  45 #include "Scav.h"
  46 #include "GCUtils.h"
  47 #include "MarkStack.h"
  48 #include "MarkWeak.h"
  49 #include "Sparks.h"
  50 #include "Sweep.h"
  51
  52 #include <string.h> // for memset()
  53 #include <unistd.h>
  54
  55 /* -----------------------------------------------------------------------------
  56    Global variables
  57    -------------------------------------------------------------------------- */
  58
  59 /* STATIC OBJECT LIST.
  60  *
  61  * During GC:
  62  * We maintain a linked list of static objects that are still live.
  63  * The requirements for this list are:
  64  *
  65  *  - we need to scan the list while adding to it, in order to
  66  *    scavenge all the static objects (in the same way that
  67  *    breadth-first scavenging works for dynamic objects).
  68  *
  69  *  - we need to be able to tell whether an object is already on
  70  *    the list, to break loops.
  71  *
  72  * Each static object has a "static link field", which we use for
  73  * linking objects on to the list.  We use a stack-type list, consing
  74  * objects on the front as they are added (this means that the
  75  * scavenge phase is depth-first, not breadth-first, but that
  76  * shouldn't matter).
  77  *
  78  * A separate list is kept for objects that have been scavenged
  79  * already - this is so that we can zero all the marks afterwards.
  80  *
  81  * An object is on the list if its static link field is non-zero; this
  82  * means that we have to mark the end of the list with '1', not NULL.
  83  *
  84  * Extra notes for generational GC:
  85  *
  86  * Each generation has a static object list associated with it.  When
  87  * collecting generations up to N, we treat the static object lists
  88  * from generations > N as roots.
  89  *
  90  * We build up a static object list while collecting generations 0..N,
  91  * which is then appended to the static object list of generation N+1.
  92  */
  93
  94 /* N is the oldest generation being collected, where the generations
  95  * are numbered starting at 0.  A major GC (indicated by the major_gc
  96  * flag) is when we're collecting all generations.  We only attempt to
  97  * deal with static objects and GC CAFs when doing a major GC.
  98  */
  99 nat N;
 100 rtsBool major_gc;
 101
 102 /* Data used for allocation area sizing.
 103  */
 104 static lnat g0s0_pcnt_kept = 30; // percentage of g0s0 live at last minor GC
 105
 106 /* Mut-list stats */
 107 #ifdef DEBUG
 108 nat mutlist_MUTVARS,
 109     mutlist_MUTARRS,
 110     mutlist_MVARS,
 111     mutlist_OTHERS;
 112 #endif
 113
 114 /* Thread-local data for each GC thread
 115  */
 116 gc_thread **gc_threads = NULL;
 117
 118 #if !defined(THREADED_RTS)
 119 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(step_workspace)];
 120 #endif
 121
 122 // Number of threads running in *this* GC.  Affects how many
 123 // step->todos[] lists we have to look in to find work.
 124 nat n_gc_threads;
 125
 126 // For stats:
 127 long copied;        // *words* copied & scavenged during this GC
 128
 129 rtsBool work_stealing;
 130
 131 DECLARE_GCT
 132
 133 /* -----------------------------------------------------------------------------
 134    Static function declarations
 135    -------------------------------------------------------------------------- */
 136
 137 static void mark_root               (void *user, StgClosure **root);
 138 static void zero_static_object_list (StgClosure* first_static);
 139 static nat  initialise_N            (rtsBool force_major_gc);
 140 static void init_collected_gen      (nat g, nat threads);
 141 static void init_uncollected_gen    (nat g, nat threads);
 142 static void init_gc_thread          (gc_thread *t);
 143 static void update_task_list        (void);
 144 static void resize_generations      (void);
 145 static void resize_nursery          (void);
 146 static void start_gc_threads        (void);
 147 static void scavenge_until_all_done (void);
 148 static StgWord inc_running          (void);
 149 static StgWord dec_running          (void);
 150 static void wakeup_gc_threads       (nat n_threads, nat me);
 151 static void shutdown_gc_threads     (nat n_threads, nat me);
 152
 153 #if 0 && defined(DEBUG)
 154 static void gcCAFs                  (void);
 155 #endif
 156
 157 /* -----------------------------------------------------------------------------
 158    The mark stack.
 159    -------------------------------------------------------------------------- */
 160
 161 bdescr *mark_stack_top_bd; // topmost block in the mark stack
 162 bdescr *mark_stack_bd;     // current block in the mark stack
 163 StgPtr mark_sp;            // pointer to the next unallocated mark stack entry
 164
 165 /* -----------------------------------------------------------------------------
 166    GarbageCollect: the main entry point to the garbage collector.
 167
 168    Locks held: all capabilities are held throughout GarbageCollect().
 169    -------------------------------------------------------------------------- */
 170
 171 void
 172 GarbageCollect (rtsBool force_major_gc,
 173                 nat gc_type USED_IF_THREADS,
 174                 Capability *cap)
 175 {
 176   bdescr *bd;
 177   step *stp;
 178   lnat live, allocated, max_copied, avg_copied, slop;
 179   gc_thread *saved_gct;
 180   nat g, s, t, n;
 181
 182   // necessary if we stole a callee-saves register for gct:
 183   saved_gct = gct;
 184
 185 #ifdef PROFILING
 186   CostCentreStack *prev_CCS;
 187 #endif
 188
 189   ACQUIRE_SM_LOCK;
 190
 191 #if defined(RTS_USER_SIGNALS)
 192   if (RtsFlags.MiscFlags.install_signal_handlers) {
 193     // block signals
 194     blockUserSignals();
 195   }
 196 #endif
 197
 198   ASSERT(sizeof(step_workspace) == 16 * sizeof(StgWord));
 199   // otherwise adjust the padding in step_workspace.
 200
 201   // tell the stats department that we've started a GC
 202   stat_startGC();
 203
 204   // tell the STM to discard any cached closures it's hoping to re-use
 205   stmPreGCHook();
 206
 207   // lock the StablePtr table
 208   stablePtrPreGC();
 209
 210 #ifdef DEBUG
 211   mutlist_MUTVARS = 0;
 212   mutlist_MUTARRS = 0;
 213   mutlist_OTHERS = 0;
 214 #endif
 215
 216   // attribute any costs to CCS_GC
 217 #ifdef PROFILING
 218   prev_CCS = CCCS;
 219   CCCS = CCS_GC;
 220 #endif
 221
 222   /* Approximate how much we allocated.
 223    * Todo: only when generating stats?
 224    */
 225   allocated = calcAllocated();
 226
 227   /* Figure out which generation to collect
 228    */
 229   n = initialise_N(force_major_gc);
 230
 231 #if defined(THREADED_RTS)
 232   work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
 233                   N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
 234       // It's not always a good idea to do load balancing in parallel
 235       // GC.  In particular, for a parallel program we don't want to
 236       // lose locality by moving cached data into another CPU's cache
 237       // (this effect can be quite significant).
 238       //
 239       // We could have a more complex way to deterimine whether to do
 240       // work stealing or not, e.g. it might be a good idea to do it
 241       // if the heap is big.  For now, we just turn it on or off with
 242       // a flag.
 243 #endif
 244
 245   /* Start threads, so they can be spinning up while we finish initialisation.
 246    */
 247   start_gc_threads();
 248
 249 #if defined(THREADED_RTS)
 250   /* How many threads will be participating in this GC?
 251    * We don't try to parallelise minor GCs (unless the user asks for
 252    * it with +RTS -gn0), or mark/compact/sweep GC.
 253    */
 254   if (gc_type == PENDING_GC_PAR) {
 255       n_gc_threads = RtsFlags.ParFlags.nNodes;
 256   } else {
 257       n_gc_threads = 1;
 258   }
 259 #else
 260   n_gc_threads = 1;
 261 #endif
 262
 263   debugTrace(DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
 264         N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
 265
 266 #ifdef RTS_GTK_FRONTPANEL
 267   if (RtsFlags.GcFlags.frontpanel) {
 268       updateFrontPanelBeforeGC(N);
 269   }
 270 #endif
 271
 272 #ifdef DEBUG
 273   // check for memory leaks if DEBUG is on
 274   memInventory(DEBUG_gc);
 275 #endif
 276
 277   // check stack sanity *before* GC
 278   IF_DEBUG(sanity, checkFreeListSanity());
 279   IF_DEBUG(sanity, checkMutableLists(rtsTrue));
 280
 281   // Initialise all our gc_thread structures
 282   for (t = 0; t < n_gc_threads; t++) {
 283       init_gc_thread(gc_threads[t]);
 284   }
 285
 286   // Initialise all the generations/steps that we're collecting.
 287   for (g = 0; g <= N; g++) {
 288       init_collected_gen(g,n_gc_threads);
 289   }
 290
 291   // Initialise all the generations/steps that we're *not* collecting.
 292   for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
 293       init_uncollected_gen(g,n_gc_threads);
 294   }
 295
 296   /* Allocate a mark stack if we're doing a major collection.
 297    */
 298   if (major_gc && oldest_gen->steps[0].mark) {
 299       mark_stack_bd     = allocBlock();
 300       mark_stack_top_bd = mark_stack_bd;
 301       mark_stack_bd->link = NULL;
 302       mark_stack_bd->u.back = NULL;
 303       mark_sp           = mark_stack_bd->start;
 304   } else {
 305       mark_stack_bd     = NULL;
 306       mark_stack_top_bd = NULL;
 307       mark_sp           = NULL;
 308   }
 309
 310   // this is the main thread
 311 #ifdef THREADED_RTS
 312   if (n_gc_threads == 1) {
 313       SET_GCT(gc_threads[0]);
 314   } else {
 315       SET_GCT(gc_threads[cap->no]);
 316   }
 317 #else
 318 SET_GCT(gc_threads[0]);
 319 #endif
 320
 321   /* -----------------------------------------------------------------------
 322    * follow all the roots that we know about:
 323    */
 324
 325   // the main thread is running: this prevents any other threads from
 326   // exiting prematurely, so we can start them now.
 327   // NB. do this after the mutable lists have been saved above, otherwise
 328   // the other GC threads will be writing into the old mutable lists.
 329   inc_running();
 330   wakeup_gc_threads(n_gc_threads, gct->thread_index);
 331
 332   // Mutable lists from each generation > N
 333   // we want to *scavenge* these roots, not evacuate them: they're not
 334   // going to move in this GC.
 335   // Also do them in reverse generation order, for the usual reason:
 336   // namely to reduce the likelihood of spurious old->new pointers.
 337   //
 338   for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
 339       scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]);
 340       freeChain_sync(generations[g].saved_mut_list);
 341       generations[g].saved_mut_list = NULL;
 342
 343   }
 344
 345   // scavenge the capability-private mutable lists.  This isn't part
 346   // of markSomeCapabilities() because markSomeCapabilities() can only
 347   // call back into the GC via mark_root() (due to the gct register
 348   // variable).
 349   if (n_gc_threads == 1) {
 350       for (n = 0; n < n_capabilities; n++) {
 351           scavenge_capability_mut_lists(&capabilities[n]);
 352       }
 353   } else {
 354       scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
 355   }
 356
 357   // follow roots from the CAF list (used by GHCi)
 358   gct->evac_step = 0;
 359   markCAFs(mark_root, gct);
 360
 361   // follow all the roots that the application knows about.
 362   gct->evac_step = 0;
 363   markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
 364                        rtsTrue/*prune sparks*/);
 365
 366 #if defined(RTS_USER_SIGNALS)
 367   // mark the signal handlers (signals should be already blocked)
 368   markSignalHandlers(mark_root, gct);
 369 #endif
 370
 371   // Mark the weak pointer list, and prepare to detect dead weak pointers.
 372   markWeakPtrList();
 373   initWeakForGC();
 374
 375   // Mark the stable pointer table.
 376   markStablePtrTable(mark_root, gct);
 377
 378   /* -------------------------------------------------------------------------
 379    * Repeatedly scavenge all the areas we know about until there's no
 380    * more scavenging to be done.
 381    */
 382   for (;;)
 383   {
 384       scavenge_until_all_done();
 385       // The other threads are now stopped.  We might recurse back to
 386       // here, but from now on this is the only thread.
 387
 388       // if any blackholes are alive, make the threads that wait on
 389       // them alive too.
 390       if (traverseBlackholeQueue()) {
 391           inc_running();
 392           continue;
 393       }
 394
 395       // must be last...  invariant is that everything is fully
 396       // scavenged at this point.
 397       if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
 398           inc_running();
 399           continue;
 400       }
 401
 402       // If we get to here, there's really nothing left to do.
 403       break;
 404   }
 405
 406   shutdown_gc_threads(n_gc_threads, gct->thread_index);
 407
 408   // Update pointers from the Task list
 409   update_task_list();
 410
 411   // Now see which stable names are still alive.
 412   gcStablePtrTable();
 413
 414 #ifdef PROFILING
 415   // We call processHeapClosureForDead() on every closure destroyed during
 416   // the current garbage collection, so we invoke LdvCensusForDead().
 417   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
 418       || RtsFlags.ProfFlags.bioSelector != NULL)
 419     LdvCensusForDead(N);
 420 #endif
 421
 422   // NO MORE EVACUATION AFTER THIS POINT!
 423
 424   // Two-space collector: free the old to-space.
 425   // g0s0->old_blocks is the old nursery
 426   // g0s0->blocks is to-space from the previous GC
 427   if (RtsFlags.GcFlags.generations == 1) {
 428       if (g0->steps[0].blocks != NULL) {
 429           freeChain(g0->steps[0].blocks);
 430           g0->steps[0].blocks = NULL;
 431       }
 432   }
 433
 434   // For each workspace, in each thread, move the copied blocks to the step
 435   {
 436       gc_thread *thr;
 437       step_workspace *ws;
 438       bdescr *prev, *next;
 439
 440       for (t = 0; t < n_gc_threads; t++) {
 441           thr = gc_threads[t];
 442
 443           // not step 0
 444           if (RtsFlags.GcFlags.generations == 1) {
 445               s = 0;
 446           } else {
 447               s = 1;
 448           }
 449           for (; s < total_steps; s++) {
 450               ws = &thr->steps[s];
 451
 452               // Push the final block
 453               if (ws->todo_bd) {
 454                   push_scanned_block(ws->todo_bd, ws);
 455               }
 456
 457               ASSERT(gct->scan_bd == NULL);
 458               ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
 459
 460               prev = NULL;
 461               for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
 462                   ws->step->n_words += bd->free - bd->start;
 463                   prev = bd;
 464               }
 465               if (prev != NULL) {
 466                   prev->link = ws->step->blocks;
 467                   ws->step->blocks = ws->scavd_list;
 468               }
 469               ws->step->n_blocks += ws->n_scavd_blocks;
 470           }
 471       }
 472
 473       // Add all the partial blocks *after* we've added all the full
 474       // blocks.  This is so that we can grab the partial blocks back
 475       // again and try to fill them up in the next GC.
 476       for (t = 0; t < n_gc_threads; t++) {
 477           thr = gc_threads[t];
 478
 479           // not step 0
 480           if (RtsFlags.GcFlags.generations == 1) {
 481               s = 0;
 482           } else {
 483               s = 1;
 484           }
 485           for (; s < total_steps; s++) {
 486               ws = &thr->steps[s];
 487
 488               prev = NULL;
 489               for (bd = ws->part_list; bd != NULL; bd = next) {
 490                   next = bd->link;
 491                   if (bd->free == bd->start) {
 492                       if (prev == NULL) {
 493                           ws->part_list = next;
 494                       } else {
 495                           prev->link = next;
 496                       }
 497                       freeGroup(bd);
 498                       ws->n_part_blocks--;
 499                   } else {
 500                       ws->step->n_words += bd->free - bd->start;
 501                       prev = bd;
 502                   }
 503               }
 504               if (prev != NULL) {
 505                   prev->link = ws->step->blocks;
 506                   ws->step->blocks = ws->part_list;
 507               }
 508               ws->step->n_blocks += ws->n_part_blocks;
 509
 510               ASSERT(countBlocks(ws->step->blocks) == ws->step->n_blocks);
 511               ASSERT(countOccupied(ws->step->blocks) == ws->step->n_words);
 512           }
 513       }
 514   }
 515
 516   // Finally: compact or sweep the oldest generation.
 517   if (major_gc && oldest_gen->steps[0].mark) {
 518       if (oldest_gen->steps[0].compact)
 519           compact(gct->scavenged_static_objects);
 520       else
 521           sweep(&oldest_gen->steps[0]);
 522   }
 523
 524   /* run through all the generations/steps and tidy up
 525    */
 526   copied = 0;
 527   max_copied = 0;
 528   avg_copied = 0;
 529   {
 530       nat i;
 531       for (i=0; i < n_gc_threads; i++) {
 532           if (n_gc_threads > 1) {
 533               debugTrace(DEBUG_gc,"thread %d:", i);
 534               debugTrace(DEBUG_gc,"   copied           %ld", gc_threads[i]->copied * sizeof(W_));
 535               debugTrace(DEBUG_gc,"   scanned          %ld", gc_threads[i]->scanned * sizeof(W_));
 536               debugTrace(DEBUG_gc,"   any_work         %ld", gc_threads[i]->any_work);
 537               debugTrace(DEBUG_gc,"   no_work          %ld", gc_threads[i]->no_work);
 538               debugTrace(DEBUG_gc,"   scav_find_work %ld",   gc_threads[i]->scav_find_work);
 539           }
 540           copied += gc_threads[i]->copied;
 541           max_copied = stg_max(gc_threads[i]->copied, max_copied);
 542       }
 543       if (n_gc_threads == 1) {
 544           max_copied = 0;
 545           avg_copied = 0;
 546       } else {
 547           avg_copied = copied;
 548       }
 549   }
 550
 551   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 552
 553     if (g == N) {
 554       generations[g].collections++; // for stats
 555       if (n_gc_threads > 1) generations[g].par_collections++;
 556     }
 557
 558     // Count the mutable list as bytes "copied" for the purposes of
 559     // stats.  Every mutable list is copied during every GC.
 560     if (g > 0) {
 561         nat mut_list_size = 0;
 562         for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
 563             mut_list_size += bd->free - bd->start;
 564         }
 565         for (n = 0; n < n_capabilities; n++) {
 566             for (bd = capabilities[n].mut_lists[g];
 567                  bd != NULL; bd = bd->link) {
 568                 mut_list_size += bd->free - bd->start;
 569             }
 570         }
 571         copied +=  mut_list_size;
 572
 573         debugTrace(DEBUG_gc,
 574                    "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
 575                    (unsigned long)(mut_list_size * sizeof(W_)),
 576                    mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
 577     }
 578
 579     for (s = 0; s < generations[g].n_steps; s++) {
 580       bdescr *next, *prev;
 581       stp = &generations[g].steps[s];
 582
 583       // for generations we collected...
 584       if (g <= N) {
 585
 586         /* free old memory and shift to-space into from-space for all
 587          * the collected steps (except the allocation area).  These
 588          * freed blocks will probaby be quickly recycled.
 589          */
 590         if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) {
 591             if (stp->mark)
 592             {
 593                 // tack the new blocks on the end of the existing blocks
 594                 if (stp->old_blocks != NULL) {
 595
 596                     prev = NULL;
 597                     for (bd = stp->old_blocks; bd != NULL; bd = next) {
 598
 599                         next = bd->link;
 600
 601                         if (!(bd->flags & BF_MARKED))
 602                         {
 603                             if (prev == NULL) {
 604                                 stp->old_blocks = next;
 605                             } else {
 606                                 prev->link = next;
 607                             }
 608                             freeGroup(bd);
 609                             stp->n_old_blocks--;
 610                         }
 611                         else
 612                         {
 613                             stp->n_words += bd->free - bd->start;
 614
 615                             // NB. this step might not be compacted next
 616                             // time, so reset the BF_MARKED flags.
 617                             // They are set before GC if we're going to
 618                             // compact.  (search for BF_MARKED above).
 619                             bd->flags &= ~BF_MARKED;
 620
 621                             // between GCs, all blocks in the heap except
 622                             // for the nursery have the BF_EVACUATED flag set.
 623                             bd->flags |= BF_EVACUATED;
 624
 625                             prev = bd;
 626                         }
 627                     }
 628
 629                     if (prev != NULL) {
 630                         prev->link = stp->blocks;
 631                         stp->blocks = stp->old_blocks;
 632                     }
 633                 }
 634                 // add the new blocks to the block tally
 635                 stp->n_blocks += stp->n_old_blocks;
 636                 ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
 637                 ASSERT(countOccupied(stp->blocks) == stp->n_words);
 638             }
 639             else // not copacted
 640             {
 641                 freeChain(stp->old_blocks);
 642             }
 643             stp->old_blocks = NULL;
 644             stp->n_old_blocks = 0;
 645         }
 646
 647         /* LARGE OBJECTS.  The current live large objects are chained on
 648          * scavenged_large, having been moved during garbage
 649          * collection from large_objects.  Any objects left on the
 650          * large_objects list are therefore dead, so we free them here.
 651          */
 652         freeChain(stp->large_objects);
 653         stp->large_objects  = stp->scavenged_large_objects;
 654         stp->n_large_blocks = stp->n_scavenged_large_blocks;
 655         ASSERT(countBlocks(stp->large_objects) == stp->n_large_blocks);
 656       }
 657       else // for older generations...
 658       {
 659         /* For older generations, we need to append the
 660          * scavenged_large_object list (i.e. large objects that have been
 661          * promoted during this GC) to the large_object list for that step.
 662          */
 663         for (bd = stp->scavenged_large_objects; bd; bd = next) {
 664           next = bd->link;
 665           dbl_link_onto(bd, &stp->large_objects);
 666         }
 667
 668         // add the new blocks we promoted during this GC
 669         stp->n_large_blocks += stp->n_scavenged_large_blocks;
 670         ASSERT(countBlocks(stp->large_objects) == stp->n_large_blocks);
 671       }
 672     }
 673   }
 674
 675   // update the max size of older generations after a major GC
 676   resize_generations();
 677
 678   // Calculate the amount of live data for stats.
 679   live = calcLiveWords();
 680
 681   // Free the small objects allocated via allocate(), since this will
 682   // all have been copied into G0S1 now.
 683   if (RtsFlags.GcFlags.generations > 1) {
 684       if (g0->steps[0].blocks != NULL) {
 685           freeChain(g0->steps[0].blocks);
 686           g0->steps[0].blocks = NULL;
 687       }
 688       g0->steps[0].n_blocks = 0;
 689       g0->steps[0].n_words = 0;
 690   }
 691   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 692
 693   // Start a new pinned_object_block
 694   for (n = 0; n < n_capabilities; n++) {
 695       capabilities[n].pinned_object_block = NULL;
 696   }
 697
 698   // Free the mark stack.
 699   if (mark_stack_top_bd != NULL) {
 700       debugTrace(DEBUG_gc, "mark stack: %d blocks",
 701                  countBlocks(mark_stack_top_bd));
 702       freeChain(mark_stack_top_bd);
 703   }
 704
 705   // Free any bitmaps.
 706   for (g = 0; g <= N; g++) {
 707       for (s = 0; s < generations[g].n_steps; s++) {
 708           stp = &generations[g].steps[s];
 709           if (stp->bitmap != NULL) {
 710               freeGroup(stp->bitmap);
 711               stp->bitmap = NULL;
 712           }
 713       }
 714   }
 715
 716   resize_nursery();
 717
 718  // mark the garbage collected CAFs as dead
 719 #if 0 && defined(DEBUG) // doesn't work at the moment
 720   if (major_gc) { gcCAFs(); }
 721 #endif
 722
 723 #ifdef PROFILING
 724   // resetStaticObjectForRetainerProfiling() must be called before
 725   // zeroing below.
 726   if (n_gc_threads > 1) {
 727       barf("profiling is currently broken with multi-threaded GC");
 728       // ToDo: fix the gct->scavenged_static_objects below
 729   }
 730   resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
 731 #endif
 732
 733   // zero the scavenged static object list
 734   if (major_gc) {
 735       nat i;
 736       for (i = 0; i < n_gc_threads; i++) {
 737           zero_static_object_list(gc_threads[i]->scavenged_static_objects);
 738       }
 739   }
 740
 741   // Reset the nursery
 742   resetNurseries();
 743
 744   // start any pending finalizers
 745   RELEASE_SM_LOCK;
 746   scheduleFinalizers(cap, old_weak_ptr_list);
 747   ACQUIRE_SM_LOCK;
 748
 749   // send exceptions to any threads which were about to die
 750   RELEASE_SM_LOCK;
 751   resurrectThreads(resurrected_threads);
 752   performPendingThrowTos(exception_threads);
 753   ACQUIRE_SM_LOCK;
 754
 755   // Update the stable pointer hash table.
 756   updateStablePtrTable(major_gc);
 757
 758   // check sanity after GC
 759   IF_DEBUG(sanity, checkSanity());
 760
 761   // extra GC trace info
 762   IF_DEBUG(gc, statDescribeGens());
 763
 764 #ifdef DEBUG
 765   // symbol-table based profiling
 766   /*  heapCensus(to_blocks); */ /* ToDo */
 767 #endif
 768
 769   // restore enclosing cost centre
 770 #ifdef PROFILING
 771   CCCS = prev_CCS;
 772 #endif
 773
 774 #ifdef DEBUG
 775   // check for memory leaks if DEBUG is on
 776   memInventory(DEBUG_gc);
 777 #endif
 778
 779 #ifdef RTS_GTK_FRONTPANEL
 780   if (RtsFlags.GcFlags.frontpanel) {
 781       updateFrontPanelAfterGC( N, live );
 782   }
 783 #endif
 784
 785   // ok, GC over: tell the stats department what happened.
 786   slop = calcLiveBlocks() * BLOCK_SIZE_W - live;
 787   stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop);
 788
 789   // unlock the StablePtr table
 790   stablePtrPostGC();
 791
 792   // Guess which generation we'll collect *next* time
 793   initialise_N(force_major_gc);
 794
 795 #if defined(RTS_USER_SIGNALS)
 796   if (RtsFlags.MiscFlags.install_signal_handlers) {
 797     // unblock signals again
 798     unblockUserSignals();
 799   }
 800 #endif
 801
 802   RELEASE_SM_LOCK;
 803
 804   SET_GCT(saved_gct);
 805 }
 806
 807 /* -----------------------------------------------------------------------------
 808    Figure out which generation to collect, initialise N and major_gc.
 809
 810    Also returns the total number of blocks in generations that will be
 811    collected.
 812    -------------------------------------------------------------------------- */
 813
 814 static nat
 815 initialise_N (rtsBool force_major_gc)
 816 {
 817     int g;
 818     nat s, blocks, blocks_total;
 819
 820     blocks = 0;
 821     blocks_total = 0;
 822
 823     if (force_major_gc) {
 824         N = RtsFlags.GcFlags.generations - 1;
 825     } else {
 826         N = 0;
 827     }
 828
 829     for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
 830         blocks = 0;
 831         for (s = 0; s < generations[g].n_steps; s++) {
 832             blocks += generations[g].steps[s].n_words / BLOCK_SIZE_W;
 833             blocks += generations[g].steps[s].n_large_blocks;
 834         }
 835         if (blocks >= generations[g].max_blocks) {
 836             N = stg_max(N,g);
 837         }
 838         if ((nat)g <= N) {
 839             blocks_total += blocks;
 840         }
 841     }
 842
 843     blocks_total += countNurseryBlocks();
 844
 845     major_gc = (N == RtsFlags.GcFlags.generations-1);
 846     return blocks_total;
 847 }
 848
 849 /* -----------------------------------------------------------------------------
 850    Initialise the gc_thread structures.
 851    -------------------------------------------------------------------------- */
 852
 853 #define GC_THREAD_INACTIVE             0
 854 #define GC_THREAD_STANDING_BY          1
 855 #define GC_THREAD_RUNNING              2
 856 #define GC_THREAD_WAITING_TO_CONTINUE  3
 857
 858 static void
 859 new_gc_thread (nat n, gc_thread *t)
 860 {
 861     nat s;
 862     step_workspace *ws;
 863
 864 #ifdef THREADED_RTS
 865     t->id = 0;
 866     initSpinLock(&t->gc_spin);
 867     initSpinLock(&t->mut_spin);
 868     ACQUIRE_SPIN_LOCK(&t->gc_spin);
 869     t->wakeup = GC_THREAD_INACTIVE;  // starts true, so we can wait for the
 870                           // thread to start up, see wakeup_gc_threads
 871 #endif
 872
 873     t->thread_index = n;
 874     t->free_blocks = NULL;
 875     t->gc_count = 0;
 876
 877     init_gc_thread(t);
 878
 879 #ifdef USE_PAPI
 880     t->papi_events = -1;
 881 #endif
 882
 883     for (s = 0; s < total_steps; s++)
 884     {
 885         ws = &t->steps[s];
 886         ws->step = &all_steps[s];
 887         ASSERT(s == ws->step->abs_no);
 888         ws->my_gct = t;
 889
 890         ws->todo_bd = NULL;
 891         ws->todo_q = newWSDeque(128);
 892         ws->todo_overflow = NULL;
 893         ws->n_todo_overflow = 0;
 894
 895         ws->part_list = NULL;
 896         ws->n_part_blocks = 0;
 897
 898         ws->scavd_list = NULL;
 899         ws->n_scavd_blocks = 0;
 900     }
 901 }
 902
 903
 904 void
 905 initGcThreads (void)
 906 {
 907     if (gc_threads == NULL) {
 908 #if defined(THREADED_RTS)
 909         nat i;
 910         gc_threads = stgMallocBytes (RtsFlags.ParFlags.nNodes *
 911                                      sizeof(gc_thread*),
 912                                      "alloc_gc_threads");
 913
 914         for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
 915             gc_threads[i] =
 916                 stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace),
 917                                "alloc_gc_threads");
 918
 919             new_gc_thread(i, gc_threads[i]);
 920         }
 921 #else
 922         gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
 923         gc_threads[0] = gct;
 924         new_gc_thread(0,gc_threads[0]);
 925 #endif
 926     }
 927 }
 928
 929 void
 930 freeGcThreads (void)
 931 {
 932     nat s;
 933     if (gc_threads != NULL) {
 934 #if defined(THREADED_RTS)
 935         nat i;
 936         for (i = 0; i < n_capabilities; i++) {
 937             for (s = 0; s < total_steps; s++)
 938             {
 939                 freeWSDeque(gc_threads[i]->steps[s].todo_q);
 940             }
 941             stgFree (gc_threads[i]);
 942         }
 943         stgFree (gc_threads);
 944 #else
 945         for (s = 0; s < total_steps; s++)
 946         {
 947             freeWSDeque(gc_threads[0]->steps[s].todo_q);
 948         }
 949         stgFree (gc_threads);
 950 #endif
 951         gc_threads = NULL;
 952     }
 953 }
 954
 955 /* ----------------------------------------------------------------------------
 956    Start GC threads
 957    ------------------------------------------------------------------------- */
 958
 959 static volatile StgWord gc_running_threads;
 960
 961 static StgWord
 962 inc_running (void)
 963 {
 964     StgWord new;
 965     new = atomic_inc(&gc_running_threads);
 966     ASSERT(new <= n_gc_threads);
 967     return new;
 968 }
 969
 970 static StgWord
 971 dec_running (void)
 972 {
 973     ASSERT(gc_running_threads != 0);
 974     return atomic_dec(&gc_running_threads);
 975 }
 976
 977 static rtsBool
 978 any_work (void)
 979 {
 980     int s;
 981     step_workspace *ws;
 982
 983     gct->any_work++;
 984
 985     write_barrier();
 986
 987     // scavenge objects in compacted generation
 988     if (mark_stack_bd != NULL && !mark_stack_empty()) {
 989         return rtsTrue;
 990     }
 991
 992     // Check for global work in any step.  We don't need to check for
 993     // local work, because we have already exited scavenge_loop(),
 994     // which means there is no local work for this thread.
 995     for (s = total_steps-1; s >= 0; s--) {
 996         if (s == 0 && RtsFlags.GcFlags.generations > 1) {
 997             continue;
 998         }
 999         ws = &gct->steps[s];
1000         if (ws->todo_large_objects) return rtsTrue;
1001         if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
1002         if (ws->todo_overflow) return rtsTrue;
1003     }
1004
1005 #if defined(THREADED_RTS)
1006     if (work_stealing) {
1007         nat n;
1008         // look for work to steal
1009         for (n = 0; n < n_gc_threads; n++) {
1010             if (n == gct->thread_index) continue;
1011             for (s = total_steps-1; s >= 0; s--) {
1012                 ws = &gc_threads[n]->steps[s];
1013                 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
1014             }
1015         }
1016     }
1017 #endif
1018
1019     gct->no_work++;
1020
1021     return rtsFalse;
1022 }
1023
1024 static void
1025 scavenge_until_all_done (void)
1026 {
1027     nat r;
1028
1029
1030 loop:
1031     traceEvent(&capabilities[gct->thread_index], EVENT_GC_WORK);
1032
1033 #if defined(THREADED_RTS)
1034     if (n_gc_threads > 1) {
1035         scavenge_loop();
1036     } else {
1037         scavenge_loop1();
1038     }
1039 #else
1040     scavenge_loop();
1041 #endif
1042
1043     // scavenge_loop() only exits when there's no work to do
1044     r = dec_running();
1045
1046     traceEvent(&capabilities[gct->thread_index], EVENT_GC_IDLE);
1047
1048     debugTrace(DEBUG_gc, "%d GC threads still running", r);
1049
1050     while (gc_running_threads != 0) {
1051         // usleep(1);
1052         if (any_work()) {
1053             inc_running();
1054             goto loop;
1055         }
1056         // any_work() does not remove the work from the queue, it
1057         // just checks for the presence of work.  If we find any,
1058         // then we increment gc_running_threads and go back to
1059         // scavenge_loop() to perform any pending work.
1060     }
1061
1062     traceEvent(&capabilities[gct->thread_index], EVENT_GC_DONE);
1063 }
1064
1065 #if defined(THREADED_RTS)
1066
1067 void
1068 gcWorkerThread (Capability *cap)
1069 {
1070     gc_thread *saved_gct;
1071
1072     // necessary if we stole a callee-saves register for gct:
1073     saved_gct = gct;
1074
1075     cap->in_gc = rtsTrue;
1076
1077     gct = gc_threads[cap->no];
1078     gct->id = osThreadId();
1079
1080     // Wait until we're told to wake up
1081     RELEASE_SPIN_LOCK(&gct->mut_spin);
1082     gct->wakeup = GC_THREAD_STANDING_BY;
1083     debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1084     ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1085
1086 #ifdef USE_PAPI
1087     // start performance counters in this thread...
1088     if (gct->papi_events == -1) {
1089         papi_init_eventset(&gct->papi_events);
1090     }
1091     papi_thread_start_gc1_count(gct->papi_events);
1092 #endif
1093
1094     // Every thread evacuates some roots.
1095     gct->evac_step = 0;
1096     markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
1097                          rtsTrue/*prune sparks*/);
1098     scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
1099
1100     scavenge_until_all_done();
1101
1102 #ifdef USE_PAPI
1103     // count events in this thread towards the GC totals
1104     papi_thread_stop_gc1_count(gct->papi_events);
1105 #endif
1106
1107     // Wait until we're told to continue
1108     RELEASE_SPIN_LOCK(&gct->gc_spin);
1109     gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1110     debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1111                gct->thread_index);
1112     ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1113     debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1114
1115     SET_GCT(saved_gct);
1116 }
1117
1118 #endif
1119
1120 #if defined(THREADED_RTS)
1121
1122 void
1123 waitForGcThreads (Capability *cap USED_IF_THREADS)
1124 {
1125     nat n_threads = RtsFlags.ParFlags.nNodes;
1126     nat me = cap->no;
1127     nat i, j;
1128     rtsBool retry = rtsTrue;
1129
1130     while(retry) {
1131         for (i=0; i < n_threads; i++) {
1132             if (i == me) continue;
1133             if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1134                 prodCapability(&capabilities[i], cap->running_task);
1135             }
1136         }
1137         for (j=0; j < 10; j++) {
1138             retry = rtsFalse;
1139             for (i=0; i < n_threads; i++) {
1140                 if (i == me) continue;
1141                 write_barrier();
1142                 setContextSwitches();
1143                 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1144                     retry = rtsTrue;
1145                 }
1146             }
1147             if (!retry) break;
1148             yieldThread();
1149         }
1150     }
1151 }
1152
1153 #endif // THREADED_RTS
1154
1155 static void
1156 start_gc_threads (void)
1157 {
1158 #if defined(THREADED_RTS)
1159     gc_running_threads = 0;
1160 #endif
1161 }
1162
1163 static void
1164 wakeup_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1165 {
1166 #if defined(THREADED_RTS)
1167     nat i;
1168     for (i=0; i < n_threads; i++) {
1169         if (i == me) continue;
1170         inc_running();
1171         debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1172         if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1173
1174         gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1175         ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1176         RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1177     }
1178 #endif
1179 }
1180
1181 // After GC is complete, we must wait for all GC threads to enter the
1182 // standby state, otherwise they may still be executing inside
1183 // any_work(), and may even remain awake until the next GC starts.
1184 static void
1185 shutdown_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1186 {
1187 #if defined(THREADED_RTS)
1188     nat i;
1189     for (i=0; i < n_threads; i++) {
1190         if (i == me) continue;
1191         while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1192     }
1193 #endif
1194 }
1195
1196 #if defined(THREADED_RTS)
1197 void
1198 releaseGCThreads (Capability *cap USED_IF_THREADS)
1199 {
1200     nat n_threads = RtsFlags.ParFlags.nNodes;
1201     nat me = cap->no;
1202     nat i;
1203     for (i=0; i < n_threads; i++) {
1204         if (i == me) continue;
1205         if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1206             barf("releaseGCThreads");
1207
1208         gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1209         ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1210         RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1211     }
1212 }
1213 #endif
1214
1215 /* ----------------------------------------------------------------------------
1216    Initialise a generation that is to be collected
1217    ------------------------------------------------------------------------- */
1218
1219 static void
1220 init_collected_gen (nat g, nat n_threads)
1221 {
1222     nat s, t, i;
1223     step_workspace *ws;
1224     step *stp;
1225     bdescr *bd;
1226
1227     // Throw away the current mutable list.  Invariant: the mutable
1228     // list always has at least one block; this means we can avoid a
1229     // check for NULL in recordMutable().
1230     if (g != 0) {
1231         freeChain(generations[g].mut_list);
1232         generations[g].mut_list = allocBlock();
1233         for (i = 0; i < n_capabilities; i++) {
1234             freeChain(capabilities[i].mut_lists[g]);
1235             capabilities[i].mut_lists[g] = allocBlock();
1236         }
1237     }
1238
1239     if (g == 0) {
1240         for (i = 0; i < n_capabilities; i++) {
1241             stp = &nurseries[i];
1242             stp->old_threads = stp->threads;
1243             stp->threads = END_TSO_QUEUE;
1244         }
1245     }
1246
1247     for (s = 0; s < generations[g].n_steps; s++) {
1248
1249         // generation 0, step 0 doesn't need to-space, unless -G1
1250         if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
1251             continue;
1252         }
1253
1254         stp = &generations[g].steps[s];
1255         ASSERT(stp->gen_no == g);
1256
1257         // we'll construct a new list of threads in this step
1258         // during GC, throw away the current list.
1259         stp->old_threads = stp->threads;
1260         stp->threads = END_TSO_QUEUE;
1261
1262         // deprecate the existing blocks
1263         stp->old_blocks   = stp->blocks;
1264         stp->n_old_blocks = stp->n_blocks;
1265         stp->blocks       = NULL;
1266         stp->n_blocks     = 0;
1267         stp->n_words      = 0;
1268         stp->live_estimate = 0;
1269
1270         // initialise the large object queues.
1271         stp->scavenged_large_objects = NULL;
1272         stp->n_scavenged_large_blocks = 0;
1273
1274         // mark the small objects as from-space
1275         for (bd = stp->old_blocks; bd; bd = bd->link) {
1276             bd->flags &= ~BF_EVACUATED;
1277         }
1278
1279         // mark the large objects as from-space
1280         for (bd = stp->large_objects; bd; bd = bd->link) {
1281             bd->flags &= ~BF_EVACUATED;
1282         }
1283
1284         // for a compacted step, we need to allocate the bitmap
1285         if (stp->mark) {
1286             nat bitmap_size; // in bytes
1287             bdescr *bitmap_bdescr;
1288             StgWord *bitmap;
1289
1290             bitmap_size = stp->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1291
1292             if (bitmap_size > 0) {
1293                 bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1294                                            / BLOCK_SIZE);
1295                 stp->bitmap = bitmap_bdescr;
1296                 bitmap = bitmap_bdescr->start;
1297
1298                 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1299                            bitmap_size, bitmap);
1300
1301                 // don't forget to fill it with zeros!
1302                 memset(bitmap, 0, bitmap_size);
1303
1304                 // For each block in this step, point to its bitmap from the
1305                 // block descriptor.
1306                 for (bd=stp->old_blocks; bd != NULL; bd = bd->link) {
1307                     bd->u.bitmap = bitmap;
1308                     bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1309
1310                     // Also at this point we set the BF_MARKED flag
1311                     // for this block.  The invariant is that
1312                     // BF_MARKED is always unset, except during GC
1313                     // when it is set on those blocks which will be
1314                     // compacted.
1315                     if (!(bd->flags & BF_FRAGMENTED)) {
1316                         bd->flags |= BF_MARKED;
1317                     }
1318                 }
1319             }
1320         }
1321     }
1322
1323     // For each GC thread, for each step, allocate a "todo" block to
1324     // store evacuated objects to be scavenged, and a block to store
1325     // evacuated objects that do not need to be scavenged.
1326     for (t = 0; t < n_threads; t++) {
1327         for (s = 0; s < generations[g].n_steps; s++) {
1328
1329             // we don't copy objects into g0s0, unless -G0
1330             if (g==0 && s==0 && RtsFlags.GcFlags.generations > 1) continue;
1331
1332             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1333
1334             ws->todo_large_objects = NULL;
1335
1336             ws->part_list = NULL;
1337             ws->n_part_blocks = 0;
1338
1339             // allocate the first to-space block; extra blocks will be
1340             // chained on as necessary.
1341             ws->todo_bd = NULL;
1342             ASSERT(looksEmptyWSDeque(ws->todo_q));
1343             alloc_todo_block(ws,0);
1344
1345             ws->todo_overflow = NULL;
1346             ws->n_todo_overflow = 0;
1347
1348             ws->scavd_list = NULL;
1349             ws->n_scavd_blocks = 0;
1350         }
1351     }
1352 }
1353
1354
1355 /* ----------------------------------------------------------------------------
1356    Initialise a generation that is *not* to be collected
1357    ------------------------------------------------------------------------- */
1358
1359 static void
1360 init_uncollected_gen (nat g, nat threads)
1361 {
1362     nat s, t, n;
1363     step_workspace *ws;
1364     step *stp;
1365     bdescr *bd;
1366
1367     // save the current mutable lists for this generation, and
1368     // allocate a fresh block for each one.  We'll traverse these
1369     // mutable lists as roots early on in the GC.
1370     generations[g].saved_mut_list = generations[g].mut_list;
1371     generations[g].mut_list = allocBlock();
1372     for (n = 0; n < n_capabilities; n++) {
1373         capabilities[n].saved_mut_lists[g] = capabilities[n].mut_lists[g];
1374         capabilities[n].mut_lists[g] = allocBlock();
1375     }
1376
1377     for (s = 0; s < generations[g].n_steps; s++) {
1378         stp = &generations[g].steps[s];
1379         stp->scavenged_large_objects = NULL;
1380         stp->n_scavenged_large_blocks = 0;
1381     }
1382
1383     for (s = 0; s < generations[g].n_steps; s++) {
1384
1385         stp = &generations[g].steps[s];
1386
1387         for (t = 0; t < threads; t++) {
1388             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1389
1390             ASSERT(looksEmptyWSDeque(ws->todo_q));
1391             ws->todo_large_objects = NULL;
1392
1393             ws->part_list = NULL;
1394             ws->n_part_blocks = 0;
1395
1396             ws->scavd_list = NULL;
1397             ws->n_scavd_blocks = 0;
1398
1399             // If the block at the head of the list in this generation
1400             // is less than 3/4 full, then use it as a todo block.
1401             if (stp->blocks && isPartiallyFull(stp->blocks))
1402             {
1403                 ws->todo_bd = stp->blocks;
1404                 ws->todo_free = ws->todo_bd->free;
1405                 ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
1406                 stp->blocks = stp->blocks->link;
1407                 stp->n_blocks -= 1;
1408                 stp->n_words -= ws->todo_bd->free - ws->todo_bd->start;
1409                 ws->todo_bd->link = NULL;
1410                 // we must scan from the current end point.
1411                 ws->todo_bd->u.scan = ws->todo_bd->free;
1412             }
1413             else
1414             {
1415                 ws->todo_bd = NULL;
1416                 alloc_todo_block(ws,0);
1417             }
1418         }
1419
1420         // deal out any more partial blocks to the threads' part_lists
1421         t = 0;
1422         while (stp->blocks && isPartiallyFull(stp->blocks))
1423         {
1424             bd = stp->blocks;
1425             stp->blocks = bd->link;
1426             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1427             bd->link = ws->part_list;
1428             ws->part_list = bd;
1429             ws->n_part_blocks += 1;
1430             bd->u.scan = bd->free;
1431             stp->n_blocks -= 1;
1432             stp->n_words -= bd->free - bd->start;
1433             t++;
1434             if (t == n_gc_threads) t = 0;
1435         }
1436     }
1437 }
1438
1439 /* -----------------------------------------------------------------------------
1440    Initialise a gc_thread before GC
1441    -------------------------------------------------------------------------- */
1442
1443 static void
1444 init_gc_thread (gc_thread *t)
1445 {
1446     t->static_objects = END_OF_STATIC_LIST;
1447     t->scavenged_static_objects = END_OF_STATIC_LIST;
1448     t->scan_bd = NULL;
1449     t->mut_lists = capabilities[t->thread_index].mut_lists;
1450     t->evac_step = 0;
1451     t->failed_to_evac = rtsFalse;
1452     t->eager_promotion = rtsTrue;
1453     t->thunk_selector_depth = 0;
1454     t->copied = 0;
1455     t->scanned = 0;
1456     t->any_work = 0;
1457     t->no_work = 0;
1458     t->scav_find_work = 0;
1459 }
1460
1461 /* -----------------------------------------------------------------------------
1462    Function we pass to evacuate roots.
1463    -------------------------------------------------------------------------- */
1464
1465 static void
1466 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1467 {
1468     // we stole a register for gct, but this function is called from
1469     // *outside* the GC where the register variable is not in effect,
1470     // so we need to save and restore it here.  NB. only call
1471     // mark_root() from the main GC thread, otherwise gct will be
1472     // incorrect.
1473     gc_thread *saved_gct;
1474     saved_gct = gct;
1475     SET_GCT(user);
1476
1477     evacuate(root);
1478
1479     SET_GCT(saved_gct);
1480 }
1481
1482 /* -----------------------------------------------------------------------------
1483    Initialising the static object & mutable lists
1484    -------------------------------------------------------------------------- */
1485
1486 static void
1487 zero_static_object_list(StgClosure* first_static)
1488 {
1489   StgClosure* p;
1490   StgClosure* link;
1491   const StgInfoTable *info;
1492
1493   for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1494     info = get_itbl(p);
1495     link = *STATIC_LINK(info, p);
1496     *STATIC_LINK(info,p) = NULL;
1497   }
1498 }
1499
1500 /* ----------------------------------------------------------------------------
1501    Update the pointers from the task list
1502
1503    These are treated as weak pointers because we want to allow a main
1504    thread to get a BlockedOnDeadMVar exception in the same way as any
1505    other thread.  Note that the threads should all have been retained
1506    by GC by virtue of being on the all_threads list, we're just
1507    updating pointers here.
1508    ------------------------------------------------------------------------- */
1509
1510 static void
1511 update_task_list (void)
1512 {
1513     Task *task;
1514     StgTSO *tso;
1515     for (task = all_tasks; task != NULL; task = task->all_link) {
1516         if (!task->stopped && task->tso) {
1517             ASSERT(task->tso->bound == task);
1518             tso = (StgTSO *) isAlive((StgClosure *)task->tso);
1519             if (tso == NULL) {
1520                 barf("task %p: main thread %d has been GC'd",
1521 #ifdef THREADED_RTS
1522                      (void *)task->id,
1523 #else
1524                      (void *)task,
1525 #endif
1526                      task->tso->id);
1527             }
1528             task->tso = tso;
1529         }
1530     }
1531 }
1532
1533 /* ----------------------------------------------------------------------------
1534    Reset the sizes of the older generations when we do a major
1535    collection.
1536
1537    CURRENT STRATEGY: make all generations except zero the same size.
1538    We have to stay within the maximum heap size, and leave a certain
1539    percentage of the maximum heap size available to allocate into.
1540    ------------------------------------------------------------------------- */
1541
1542 static void
1543 resize_generations (void)
1544 {
1545     nat g;
1546
1547     if (major_gc && RtsFlags.GcFlags.generations > 1) {
1548         nat live, size, min_alloc, words;
1549         nat max  = RtsFlags.GcFlags.maxHeapSize;
1550         nat gens = RtsFlags.GcFlags.generations;
1551
1552         // live in the oldest generations
1553         if (oldest_gen->steps[0].live_estimate != 0) {
1554             words = oldest_gen->steps[0].live_estimate;
1555         } else {
1556             words = oldest_gen->steps[0].n_words;
1557         }
1558         live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1559             oldest_gen->steps[0].n_large_blocks;
1560
1561         // default max size for all generations except zero
1562         size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1563                        RtsFlags.GcFlags.minOldGenSize);
1564
1565         if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1566             RtsFlags.GcFlags.heapSizeSuggestion = size;
1567         }
1568
1569         // minimum size for generation zero
1570         min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1571                             RtsFlags.GcFlags.minAllocAreaSize);
1572
1573         // Auto-enable compaction when the residency reaches a
1574         // certain percentage of the maximum heap size (default: 30%).
1575         if (RtsFlags.GcFlags.generations > 1 &&
1576             (RtsFlags.GcFlags.compact ||
1577              (max > 0 &&
1578               oldest_gen->steps[0].n_blocks >
1579               (RtsFlags.GcFlags.compactThreshold * max) / 100))) {
1580             oldest_gen->steps[0].mark = 1;
1581             oldest_gen->steps[0].compact = 1;
1582 //        debugBelch("compaction: on\n", live);
1583         } else {
1584             oldest_gen->steps[0].mark = 0;
1585             oldest_gen->steps[0].compact = 0;
1586 //        debugBelch("compaction: off\n", live);
1587         }
1588
1589         if (RtsFlags.GcFlags.sweep) {
1590             oldest_gen->steps[0].mark = 1;
1591         }
1592
1593         // if we're going to go over the maximum heap size, reduce the
1594         // size of the generations accordingly.  The calculation is
1595         // different if compaction is turned on, because we don't need
1596         // to double the space required to collect the old generation.
1597         if (max != 0) {
1598
1599             // this test is necessary to ensure that the calculations
1600             // below don't have any negative results - we're working
1601             // with unsigned values here.
1602             if (max < min_alloc) {
1603                 heapOverflow();
1604             }
1605
1606             if (oldest_gen->steps[0].compact) {
1607                 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1608                     size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1609                 }
1610             } else {
1611                 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1612                     size = (max - min_alloc) / ((gens - 1) * 2);
1613                 }
1614             }
1615
1616             if (size < live) {
1617                 heapOverflow();
1618             }
1619         }
1620
1621 #if 0
1622         debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1623                    min_alloc, size, max);
1624 #endif
1625
1626         for (g = 0; g < gens; g++) {
1627             generations[g].max_blocks = size;
1628         }
1629     }
1630 }
1631
1632 /* -----------------------------------------------------------------------------
1633    Calculate the new size of the nursery, and resize it.
1634    -------------------------------------------------------------------------- */
1635
1636 static void
1637 resize_nursery (void)
1638 {
1639     lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities;
1640
1641     if (RtsFlags.GcFlags.generations == 1)
1642     {   // Two-space collector:
1643         nat blocks;
1644
1645         /* set up a new nursery.  Allocate a nursery size based on a
1646          * function of the amount of live data (by default a factor of 2)
1647          * Use the blocks from the old nursery if possible, freeing up any
1648          * left over blocks.
1649          *
1650          * If we get near the maximum heap size, then adjust our nursery
1651          * size accordingly.  If the nursery is the same size as the live
1652          * data (L), then we need 3L bytes.  We can reduce the size of the
1653          * nursery to bring the required memory down near 2L bytes.
1654          *
1655          * A normal 2-space collector would need 4L bytes to give the same
1656          * performance we get from 3L bytes, reducing to the same
1657          * performance at 2L bytes.
1658          */
1659         blocks = generations[0].steps[0].n_blocks;
1660
1661         if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1662              blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1663              RtsFlags.GcFlags.maxHeapSize )
1664         {
1665             long adjusted_blocks;  // signed on purpose
1666             int pc_free;
1667
1668             adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1669
1670             debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1671                        RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1672
1673             pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1674             if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1675             {
1676                 heapOverflow();
1677             }
1678             blocks = adjusted_blocks;
1679         }
1680         else
1681         {
1682             blocks *= RtsFlags.GcFlags.oldGenFactor;
1683             if (blocks < min_nursery)
1684             {
1685                 blocks = min_nursery;
1686             }
1687         }
1688         resizeNurseries(blocks);
1689     }
1690     else  // Generational collector
1691     {
1692         /*
1693          * If the user has given us a suggested heap size, adjust our
1694          * allocation area to make best use of the memory available.
1695          */
1696         if (RtsFlags.GcFlags.heapSizeSuggestion)
1697         {
1698             long blocks;
1699             nat needed = calcNeeded();  // approx blocks needed at next GC
1700
1701             /* Guess how much will be live in generation 0 step 0 next time.
1702              * A good approximation is obtained by finding the
1703              * percentage of g0s0 that was live at the last minor GC.
1704              *
1705              * We have an accurate figure for the amount of copied data in
1706              * 'copied', but we must convert this to a number of blocks, with
1707              * a small adjustment for estimated slop at the end of a block
1708              * (- 10 words).
1709              */
1710             if (N == 0)
1711             {
1712                 g0s0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1713                     / countNurseryBlocks();
1714             }
1715
1716             /* Estimate a size for the allocation area based on the
1717              * information available.  We might end up going slightly under
1718              * or over the suggested heap size, but we should be pretty
1719              * close on average.
1720              *
1721              * Formula:            suggested - needed
1722              *                ----------------------------
1723              *                    1 + g0s0_pcnt_kept/100
1724              *
1725              * where 'needed' is the amount of memory needed at the next
1726              * collection for collecting all steps except g0s0.
1727              */
1728             blocks =
1729                 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1730                 (100 + (long)g0s0_pcnt_kept);
1731
1732             if (blocks < (long)min_nursery) {
1733                 blocks = min_nursery;
1734             }
1735
1736             resizeNurseries((nat)blocks);
1737         }
1738         else
1739         {
1740             // we might have added extra large blocks to the nursery, so
1741             // resize back to minAllocAreaSize again.
1742             resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1743         }
1744     }
1745 }
1746
1747 /* -----------------------------------------------------------------------------
1748    Sanity code for CAF garbage collection.
1749
1750    With DEBUG turned on, we manage a CAF list in addition to the SRT
1751    mechanism.  After GC, we run down the CAF list and blackhole any
1752    CAFs which have been garbage collected.  This means we get an error
1753    whenever the program tries to enter a garbage collected CAF.
1754
1755    Any garbage collected CAFs are taken off the CAF list at the same
1756    time.
1757    -------------------------------------------------------------------------- */
1758
1759 #if 0 && defined(DEBUG)
1760
1761 static void
1762 gcCAFs(void)
1763 {
1764   StgClosure*  p;
1765   StgClosure** pp;
1766   const StgInfoTable *info;
1767   nat i;
1768
1769   i = 0;
1770   p = caf_list;
1771   pp = &caf_list;
1772
1773   while (p != NULL) {
1774
1775     info = get_itbl(p);
1776
1777     ASSERT(info->type == IND_STATIC);
1778
1779     if (STATIC_LINK(info,p) == NULL) {
1780         debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1781         // black hole it
1782         SET_INFO(p,&stg_BLACKHOLE_info);
1783         p = STATIC_LINK2(info,p);
1784         *pp = p;
1785     }
1786     else {
1787       pp = &STATIC_LINK2(info,p);
1788       p = *pp;
1789       i++;
1790     }
1791
1792   }
1793
1794   debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1795 }
1796 #endif