rts/sm/GC.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team 1998-2006
   4  *
   5  * Generational garbage collector
   6  *
   7  * Documentation on the architecture of the Garbage Collector can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 // #include "PosixSource.h"
  15 #include "Rts.h"
  16 #include "RtsFlags.h"
  17 #include "RtsUtils.h"
  18 #include "Apply.h"
  19 #include "OSThreads.h"
  20 #include "LdvProfile.h"
  21 #include "Updates.h"
  22 #include "Stats.h"
  23 #include "Schedule.h"
  24 #include "Sanity.h"
  25 #include "BlockAlloc.h"
  26 #include "MBlock.h"
  27 #include "ProfHeap.h"
  28 #include "SchedAPI.h"
  29 #include "Weak.h"
  30 #include "Prelude.h"
  31 #include "ParTicky.h"           // ToDo: move into Rts.h
  32 #include "RtsSignals.h"
  33 #include "STM.h"
  34 #include "HsFFI.h"
  35 #include "Linker.h"
  36 #if defined(RTS_GTK_FRONTPANEL)
  37 #include "FrontPanel.h"
  38 #endif
  39 #include "Trace.h"
  40 #include "RetainerProfile.h"
  41 #include "RaiseAsync.h"
  42 #include "Sparks.h"
  43 #include "Papi.h"
  44
  45 #include "GC.h"
  46 #include "Compact.h"
  47 #include "Evac.h"
  48 #include "Scav.h"
  49 #include "GCUtils.h"
  50 #include "MarkWeak.h"
  51 #include "Sparks.h"
  52
  53 #include <string.h> // for memset()
  54 #include <unistd.h>
  55
  56 /* -----------------------------------------------------------------------------
  57    Global variables
  58    -------------------------------------------------------------------------- */
  59
  60 /* STATIC OBJECT LIST.
  61  *
  62  * During GC:
  63  * We maintain a linked list of static objects that are still live.
  64  * The requirements for this list are:
  65  *
  66  *  - we need to scan the list while adding to it, in order to
  67  *    scavenge all the static objects (in the same way that
  68  *    breadth-first scavenging works for dynamic objects).
  69  *
  70  *  - we need to be able to tell whether an object is already on
  71  *    the list, to break loops.
  72  *
  73  * Each static object has a "static link field", which we use for
  74  * linking objects on to the list.  We use a stack-type list, consing
  75  * objects on the front as they are added (this means that the
  76  * scavenge phase is depth-first, not breadth-first, but that
  77  * shouldn't matter).
  78  *
  79  * A separate list is kept for objects that have been scavenged
  80  * already - this is so that we can zero all the marks afterwards.
  81  *
  82  * An object is on the list if its static link field is non-zero; this
  83  * means that we have to mark the end of the list with '1', not NULL.
  84  *
  85  * Extra notes for generational GC:
  86  *
  87  * Each generation has a static object list associated with it.  When
  88  * collecting generations up to N, we treat the static object lists
  89  * from generations > N as roots.
  90  *
  91  * We build up a static object list while collecting generations 0..N,
  92  * which is then appended to the static object list of generation N+1.
  93  */
  94
  95 /* N is the oldest generation being collected, where the generations
  96  * are numbered starting at 0.  A major GC (indicated by the major_gc
  97  * flag) is when we're collecting all generations.  We only attempt to
  98  * deal with static objects and GC CAFs when doing a major GC.
  99  */
 100 nat N;
 101 rtsBool major_gc;
 102
 103 /* Data used for allocation area sizing.
 104  */
 105 static lnat g0s0_pcnt_kept = 30; // percentage of g0s0 live at last minor GC
 106
 107 /* Mut-list stats */
 108 #ifdef DEBUG
 109 nat mutlist_MUTVARS,
 110     mutlist_MUTARRS,
 111     mutlist_MVARS,
 112     mutlist_OTHERS;
 113 #endif
 114
 115 /* Thread-local data for each GC thread
 116  */
 117 gc_thread **gc_threads = NULL;
 118 // gc_thread *gct = NULL;  // this thread's gct TODO: make thread-local
 119
 120 // Number of threads running in *this* GC.  Affects how many
 121 // step->todos[] lists we have to look in to find work.
 122 nat n_gc_threads;
 123
 124 // For stats:
 125 long copied;        // *words* copied & scavenged during this GC
 126
 127 #ifdef THREADED_RTS
 128 SpinLock recordMutableGen_sync;
 129 #endif
 130
 131 /* -----------------------------------------------------------------------------
 132    Static function declarations
 133    -------------------------------------------------------------------------- */
 134
 135 static void mark_root               (StgClosure **root);
 136 static void zero_static_object_list (StgClosure* first_static);
 137 static nat  initialise_N            (rtsBool force_major_gc);
 138 static void alloc_gc_threads        (void);
 139 static void init_collected_gen      (nat g, nat threads);
 140 static void init_uncollected_gen    (nat g, nat threads);
 141 static void init_gc_thread          (gc_thread *t);
 142 static void update_task_list        (void);
 143 static void resize_generations      (void);
 144 static void resize_nursery          (void);
 145 static void start_gc_threads        (void);
 146 static void gc_thread_work          (void);
 147 static nat  inc_running             (void);
 148 static nat  dec_running             (void);
 149 static void wakeup_gc_threads       (nat n_threads);
 150 static void shutdown_gc_threads     (nat n_threads);
 151
 152 #if 0 && defined(DEBUG)
 153 static void gcCAFs                  (void);
 154 #endif
 155
 156 /* -----------------------------------------------------------------------------
 157    The mark bitmap & stack.
 158    -------------------------------------------------------------------------- */
 159
 160 #define MARK_STACK_BLOCKS 4
 161
 162 bdescr *mark_stack_bdescr;
 163 StgPtr *mark_stack;
 164 StgPtr *mark_sp;
 165 StgPtr *mark_splim;
 166
 167 // Flag and pointers used for falling back to a linear scan when the
 168 // mark stack overflows.
 169 rtsBool mark_stack_overflowed;
 170 bdescr *oldgen_scan_bd;
 171 StgPtr  oldgen_scan;
 172
 173 /* -----------------------------------------------------------------------------
 174    GarbageCollect: the main entry point to the garbage collector.
 175
 176    Locks held: all capabilities are held throughout GarbageCollect().
 177    -------------------------------------------------------------------------- */
 178
 179 void
 180 GarbageCollect ( rtsBool force_major_gc )
 181 {
 182   bdescr *bd;
 183   step *stp;
 184   lnat live, allocated, max_copied, avg_copied, slop;
 185   lnat oldgen_saved_blocks = 0;
 186   gc_thread *saved_gct;
 187   nat g, s, t, n;
 188
 189   // necessary if we stole a callee-saves register for gct:
 190   saved_gct = gct;
 191
 192 #ifdef PROFILING
 193   CostCentreStack *prev_CCS;
 194 #endif
 195
 196   ACQUIRE_SM_LOCK;
 197
 198 #if defined(RTS_USER_SIGNALS)
 199   if (RtsFlags.MiscFlags.install_signal_handlers) {
 200     // block signals
 201     blockUserSignals();
 202   }
 203 #endif
 204
 205   // tell the stats department that we've started a GC
 206   stat_startGC();
 207
 208   // tell the STM to discard any cached closures it's hoping to re-use
 209   stmPreGCHook();
 210
 211 #ifdef DEBUG
 212   mutlist_MUTVARS = 0;
 213   mutlist_MUTARRS = 0;
 214   mutlist_OTHERS = 0;
 215 #endif
 216
 217   // attribute any costs to CCS_GC
 218 #ifdef PROFILING
 219   prev_CCS = CCCS;
 220   CCCS = CCS_GC;
 221 #endif
 222
 223   /* Approximate how much we allocated.
 224    * Todo: only when generating stats?
 225    */
 226   allocated = calcAllocated();
 227
 228   /* Figure out which generation to collect
 229    */
 230   n = initialise_N(force_major_gc);
 231
 232   /* Allocate + initialise the gc_thread structures.
 233    */
 234   alloc_gc_threads();
 235
 236   /* Start threads, so they can be spinning up while we finish initialisation.
 237    */
 238   start_gc_threads();
 239
 240   /* How many threads will be participating in this GC?
 241    * We don't try to parallelise minor GC.
 242    */
 243 #if defined(THREADED_RTS)
 244   if (n < (4*1024*1024 / BLOCK_SIZE)) {
 245       n_gc_threads = 1;
 246   } else {
 247       n_gc_threads = RtsFlags.ParFlags.gcThreads;
 248   }
 249 #else
 250   n_gc_threads = 1;
 251 #endif
 252   trace(TRACE_gc|DEBUG_gc, "GC (gen %d): %dKB to collect, using %d thread(s)",
 253         N, n * (BLOCK_SIZE / 1024), n_gc_threads);
 254
 255 #ifdef RTS_GTK_FRONTPANEL
 256   if (RtsFlags.GcFlags.frontpanel) {
 257       updateFrontPanelBeforeGC(N);
 258   }
 259 #endif
 260
 261 #ifdef DEBUG
 262   // check for memory leaks if DEBUG is on
 263   memInventory(traceClass(DEBUG_gc));
 264 #endif
 265
 266   // check stack sanity *before* GC (ToDo: check all threads)
 267   IF_DEBUG(sanity, checkFreeListSanity());
 268
 269   // Initialise all our gc_thread structures
 270   for (t = 0; t < n_gc_threads; t++) {
 271       init_gc_thread(gc_threads[t]);
 272   }
 273
 274   // Initialise all the generations/steps that we're collecting.
 275   for (g = 0; g <= N; g++) {
 276       init_collected_gen(g,n_gc_threads);
 277   }
 278
 279   // Initialise all the generations/steps that we're *not* collecting.
 280   for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
 281       init_uncollected_gen(g,n_gc_threads);
 282   }
 283
 284   /* Allocate a mark stack if we're doing a major collection.
 285    */
 286   if (major_gc) {
 287       mark_stack_bdescr = allocGroup(MARK_STACK_BLOCKS);
 288       mark_stack = (StgPtr *)mark_stack_bdescr->start;
 289       mark_sp    = mark_stack;
 290       mark_splim = mark_stack + (MARK_STACK_BLOCKS * BLOCK_SIZE_W);
 291   } else {
 292       mark_stack_bdescr = NULL;
 293   }
 294
 295   // this is the main thread
 296   gct = gc_threads[0];
 297
 298   /* -----------------------------------------------------------------------
 299    * follow all the roots that we know about:
 300    *   - mutable lists from each generation > N
 301    * we want to *scavenge* these roots, not evacuate them: they're not
 302    * going to move in this GC.
 303    * Also do them in reverse generation order, for the usual reason:
 304    * namely to reduce the likelihood of spurious old->new pointers.
 305    */
 306   for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
 307       generations[g].saved_mut_list = generations[g].mut_list;
 308       generations[g].mut_list = allocBlock();
 309       // mut_list always has at least one block.
 310   }
 311
 312   // the main thread is running: this prevents any other threads from
 313   // exiting prematurely, so we can start them now.
 314   // NB. do this after the mutable lists have been saved above, otherwise
 315   // the other GC threads will be writing into the old mutable lists.
 316   inc_running();
 317   wakeup_gc_threads(n_gc_threads);
 318
 319   for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
 320       scavenge_mutable_list(&generations[g]);
 321   }
 322
 323   // follow roots from the CAF list (used by GHCi)
 324   gct->evac_step = 0;
 325   markCAFs(mark_root);
 326
 327   // follow all the roots that the application knows about.
 328   gct->evac_step = 0;
 329   GetRoots(mark_root);
 330
 331 #if defined(RTS_USER_SIGNALS)
 332   // mark the signal handlers (signals should be already blocked)
 333   markSignalHandlers(mark_root);
 334 #endif
 335
 336   // Mark the weak pointer list, and prepare to detect dead weak pointers.
 337   markWeakPtrList();
 338   initWeakForGC();
 339
 340   // Mark the stable pointer table.
 341   markStablePtrTable(mark_root);
 342
 343   /* -------------------------------------------------------------------------
 344    * Repeatedly scavenge all the areas we know about until there's no
 345    * more scavenging to be done.
 346    */
 347   for (;;)
 348   {
 349       gc_thread_work();
 350       // The other threads are now stopped.  We might recurse back to
 351       // here, but from now on this is the only thread.
 352
 353       // if any blackholes are alive, make the threads that wait on
 354       // them alive too.
 355       if (traverseBlackholeQueue()) {
 356           inc_running();
 357           continue;
 358       }
 359
 360       // must be last...  invariant is that everything is fully
 361       // scavenged at this point.
 362       if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
 363           inc_running();
 364           continue;
 365       }
 366
 367       // If we get to here, there's really nothing left to do.
 368       break;
 369   }
 370
 371   shutdown_gc_threads(n_gc_threads);
 372
 373   // Update pointers from the Task list
 374   update_task_list();
 375
 376   // Now see which stable names are still alive.
 377   gcStablePtrTable();
 378
 379 #ifdef PROFILING
 380   // We call processHeapClosureForDead() on every closure destroyed during
 381   // the current garbage collection, so we invoke LdvCensusForDead().
 382   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
 383       || RtsFlags.ProfFlags.bioSelector != NULL)
 384     LdvCensusForDead(N);
 385 #endif
 386
 387   // NO MORE EVACUATION AFTER THIS POINT!
 388   // Finally: compaction of the oldest generation.
 389   if (major_gc && oldest_gen->steps[0].is_compacted) {
 390       // save number of blocks for stats
 391       oldgen_saved_blocks = oldest_gen->steps[0].n_old_blocks;
 392       compact();
 393   }
 394
 395   IF_DEBUG(sanity, checkGlobalTSOList(rtsFalse));
 396
 397   // Two-space collector: free the old to-space.
 398   // g0s0->old_blocks is the old nursery
 399   // g0s0->blocks is to-space from the previous GC
 400   if (RtsFlags.GcFlags.generations == 1) {
 401       if (g0s0->blocks != NULL) {
 402           freeChain(g0s0->blocks);
 403           g0s0->blocks = NULL;
 404       }
 405   }
 406
 407   // For each workspace, in each thread:
 408   //    * clear the BF_EVACUATED flag from each copied block
 409   //    * move the copied blocks to the step
 410   {
 411       gc_thread *thr;
 412       step_workspace *ws;
 413       bdescr *prev, *next;
 414
 415       for (t = 0; t < n_gc_threads; t++) {
 416           thr = gc_threads[t];
 417
 418           // not step 0
 419           for (s = 1; s < total_steps; s++) {
 420               ws = &thr->steps[s];
 421
 422               // Push the final block
 423               if (ws->todo_bd) {
 424                   push_scanned_block(ws->todo_bd, ws);
 425               }
 426
 427               ASSERT(gct->scan_bd == NULL);
 428               ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
 429
 430               prev = NULL;
 431               for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
 432                   bd->flags &= ~BF_EVACUATED;    // now from-space
 433                   ws->step->n_words += bd->free - bd->start;
 434                   prev = bd;
 435               }
 436               if (prev != NULL) {
 437                   prev->link = ws->step->blocks;
 438                   ws->step->blocks = ws->scavd_list;
 439               }
 440               ws->step->n_blocks += ws->n_scavd_blocks;
 441
 442               prev = NULL;
 443               for (bd = ws->part_list; bd != NULL; bd = next) {
 444                   next = bd->link;
 445                   if (bd->free == bd->start) {
 446                       if (prev == NULL) {
 447                           ws->part_list = next;
 448                       } else {
 449                           prev->link = next;
 450                       }
 451                       freeGroup(bd);
 452                       ws->n_part_blocks--;
 453                   } else {
 454                       bd->flags &= ~BF_EVACUATED;        // now from-space
 455                       ws->step->n_words += bd->free - bd->start;
 456                       prev = bd;
 457                   }
 458               }
 459               if (prev != NULL) {
 460                   prev->link = ws->step->blocks;
 461                   ws->step->blocks = ws->part_list;
 462               }
 463               ws->step->n_blocks += ws->n_part_blocks;
 464
 465               ASSERT(countBlocks(ws->step->blocks) == ws->step->n_blocks);
 466               ASSERT(countOccupied(ws->step->blocks) == ws->step->n_words);
 467           }
 468       }
 469   }
 470
 471   // Two-space collector: swap the semi-spaces around.
 472   // Currently: g0s0->old_blocks is the old nursery
 473   //            g0s0->blocks is to-space from this GC
 474   // We want these the other way around.
 475   if (RtsFlags.GcFlags.generations == 1) {
 476       bdescr *nursery_blocks = g0s0->old_blocks;
 477       nat n_nursery_blocks = g0s0->n_old_blocks;
 478       g0s0->old_blocks = g0s0->blocks;
 479       g0s0->n_old_blocks = g0s0->n_blocks;
 480       g0s0->blocks = nursery_blocks;
 481       g0s0->n_blocks = n_nursery_blocks;
 482   }
 483
 484   /* run through all the generations/steps and tidy up
 485    */
 486   copied = 0;
 487   max_copied = 0;
 488   avg_copied = 0;
 489   {
 490       nat i;
 491       for (i=0; i < n_gc_threads; i++) {
 492           if (n_gc_threads > 1) {
 493               trace(TRACE_gc,"thread %d:", i);
 494               trace(TRACE_gc,"   copied           %ld", gc_threads[i]->copied * sizeof(W_));
 495               trace(TRACE_gc,"   scanned          %ld", gc_threads[i]->scanned * sizeof(W_));
 496               trace(TRACE_gc,"   any_work         %ld", gc_threads[i]->any_work);
 497               trace(TRACE_gc,"   no_work          %ld", gc_threads[i]->no_work);
 498               trace(TRACE_gc,"   scav_find_work %ld",   gc_threads[i]->scav_find_work);
 499           }
 500           copied += gc_threads[i]->copied;
 501           max_copied = stg_max(gc_threads[i]->copied, max_copied);
 502       }
 503       if (n_gc_threads == 1) {
 504           max_copied = 0;
 505           avg_copied = 0;
 506       } else {
 507           avg_copied = copied;
 508       }
 509   }
 510
 511   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 512
 513     if (g == N) {
 514       generations[g].collections++; // for stats
 515       if (n_gc_threads > 1) generations[g].par_collections++;
 516     }
 517
 518     // Count the mutable list as bytes "copied" for the purposes of
 519     // stats.  Every mutable list is copied during every GC.
 520     if (g > 0) {
 521         nat mut_list_size = 0;
 522         for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
 523             mut_list_size += bd->free - bd->start;
 524         }
 525         copied +=  mut_list_size;
 526
 527         debugTrace(DEBUG_gc,
 528                    "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
 529                    (unsigned long)(mut_list_size * sizeof(W_)),
 530                    mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
 531     }
 532
 533     for (s = 0; s < generations[g].n_steps; s++) {
 534       bdescr *next;
 535       stp = &generations[g].steps[s];
 536
 537       // for generations we collected...
 538       if (g <= N) {
 539
 540         /* free old memory and shift to-space into from-space for all
 541          * the collected steps (except the allocation area).  These
 542          * freed blocks will probaby be quickly recycled.
 543          */
 544         if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) {
 545             if (stp->is_compacted)
 546             {
 547                 // for a compacted step, just shift the new to-space
 548                 // onto the front of the now-compacted existing blocks.
 549                 for (bd = stp->blocks; bd != NULL; bd = bd->link) {
 550                     bd->flags &= ~BF_EVACUATED;  // now from-space
 551                     stp->n_words += bd->free - bd->start;
 552                 }
 553                 // tack the new blocks on the end of the existing blocks
 554                 if (stp->old_blocks != NULL) {
 555                     for (bd = stp->old_blocks; bd != NULL; bd = next) {
 556                         // NB. this step might not be compacted next
 557                         // time, so reset the BF_COMPACTED flags.
 558                         // They are set before GC if we're going to
 559                         // compact.  (search for BF_COMPACTED above).
 560                         bd->flags &= ~BF_COMPACTED;
 561                         next = bd->link;
 562                         if (next == NULL) {
 563                             bd->link = stp->blocks;
 564                         }
 565                     }
 566                     stp->blocks = stp->old_blocks;
 567                 }
 568                 // add the new blocks to the block tally
 569                 stp->n_blocks += stp->n_old_blocks;
 570                 ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
 571                 ASSERT(countOccupied(stp->blocks) == stp->n_words);
 572             }
 573             else // not copacted
 574             {
 575                 freeChain(stp->old_blocks);
 576             }
 577             stp->old_blocks = NULL;
 578             stp->n_old_blocks = 0;
 579         }
 580
 581         /* LARGE OBJECTS.  The current live large objects are chained on
 582          * scavenged_large, having been moved during garbage
 583          * collection from large_objects.  Any objects left on
 584          * large_objects list are therefore dead, so we free them here.
 585          */
 586         for (bd = stp->large_objects; bd != NULL; bd = next) {
 587           next = bd->link;
 588           freeGroup(bd);
 589           bd = next;
 590         }
 591
 592         // update the count of blocks used by large objects
 593         for (bd = stp->scavenged_large_objects; bd != NULL; bd = bd->link) {
 594           bd->flags &= ~BF_EVACUATED;
 595         }
 596         stp->large_objects  = stp->scavenged_large_objects;
 597         stp->n_large_blocks = stp->n_scavenged_large_blocks;
 598
 599       }
 600       else // for older generations...
 601       {
 602         /* For older generations, we need to append the
 603          * scavenged_large_object list (i.e. large objects that have been
 604          * promoted during this GC) to the large_object list for that step.
 605          */
 606         for (bd = stp->scavenged_large_objects; bd; bd = next) {
 607           next = bd->link;
 608           bd->flags &= ~BF_EVACUATED;
 609           dbl_link_onto(bd, &stp->large_objects);
 610         }
 611
 612         // add the new blocks we promoted during this GC
 613         stp->n_large_blocks += stp->n_scavenged_large_blocks;
 614       }
 615     }
 616   }
 617
 618   // update the max size of older generations after a major GC
 619   resize_generations();
 620
 621   // Calculate the amount of live data for stats.
 622   live = calcLiveWords();
 623
 624   // Free the small objects allocated via allocate(), since this will
 625   // all have been copied into G0S1 now.
 626   if (RtsFlags.GcFlags.generations > 1) {
 627       if (g0s0->blocks != NULL) {
 628           freeChain(g0s0->blocks);
 629           g0s0->blocks = NULL;
 630       }
 631       g0s0->n_blocks = 0;
 632       g0s0->n_words = 0;
 633   }
 634   alloc_blocks = 0;
 635   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 636
 637   // Start a new pinned_object_block
 638   pinned_object_block = NULL;
 639
 640   // Free the mark stack.
 641   if (mark_stack_bdescr != NULL) {
 642       freeGroup(mark_stack_bdescr);
 643   }
 644
 645   // Free any bitmaps.
 646   for (g = 0; g <= N; g++) {
 647       for (s = 0; s < generations[g].n_steps; s++) {
 648           stp = &generations[g].steps[s];
 649           if (stp->bitmap != NULL) {
 650               freeGroup(stp->bitmap);
 651               stp->bitmap = NULL;
 652           }
 653       }
 654   }
 655
 656   resize_nursery();
 657
 658  // mark the garbage collected CAFs as dead
 659 #if 0 && defined(DEBUG) // doesn't work at the moment
 660   if (major_gc) { gcCAFs(); }
 661 #endif
 662
 663 #ifdef PROFILING
 664   // resetStaticObjectForRetainerProfiling() must be called before
 665   // zeroing below.
 666   if (n_gc_threads > 1) {
 667       barf("profiling is currently broken with multi-threaded GC");
 668       // ToDo: fix the gct->scavenged_static_objects below
 669   }
 670   resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
 671 #endif
 672
 673   // zero the scavenged static object list
 674   if (major_gc) {
 675       nat i;
 676       for (i = 0; i < n_gc_threads; i++) {
 677           zero_static_object_list(gc_threads[i]->scavenged_static_objects);
 678       }
 679   }
 680
 681   // Reset the nursery
 682   resetNurseries();
 683
 684   // start any pending finalizers
 685   RELEASE_SM_LOCK;
 686   scheduleFinalizers(last_free_capability, old_weak_ptr_list);
 687   ACQUIRE_SM_LOCK;
 688
 689   // send exceptions to any threads which were about to die
 690   RELEASE_SM_LOCK;
 691   resurrectThreads(resurrected_threads);
 692   ACQUIRE_SM_LOCK;
 693
 694   // Update the stable pointer hash table.
 695   updateStablePtrTable(major_gc);
 696
 697   // check sanity after GC
 698   IF_DEBUG(sanity, checkSanity());
 699
 700   // extra GC trace info
 701   if (traceClass(TRACE_gc|DEBUG_gc)) statDescribeGens();
 702
 703 #ifdef DEBUG
 704   // symbol-table based profiling
 705   /*  heapCensus(to_blocks); */ /* ToDo */
 706 #endif
 707
 708   // restore enclosing cost centre
 709 #ifdef PROFILING
 710   CCCS = prev_CCS;
 711 #endif
 712
 713 #ifdef DEBUG
 714   // check for memory leaks if DEBUG is on
 715   memInventory(traceClass(DEBUG_gc));
 716 #endif
 717
 718 #ifdef RTS_GTK_FRONTPANEL
 719   if (RtsFlags.GcFlags.frontpanel) {
 720       updateFrontPanelAfterGC( N, live );
 721   }
 722 #endif
 723
 724   // ok, GC over: tell the stats department what happened.
 725   slop = calcLiveBlocks() * BLOCK_SIZE_W - live;
 726   stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop);
 727
 728 #if defined(RTS_USER_SIGNALS)
 729   if (RtsFlags.MiscFlags.install_signal_handlers) {
 730     // unblock signals again
 731     unblockUserSignals();
 732   }
 733 #endif
 734
 735   RELEASE_SM_LOCK;
 736
 737   gct = saved_gct;
 738 }
 739
 740 /* -----------------------------------------------------------------------------
 741  * Mark all nodes pointed to by sparks in the spark queues (for GC) Does an
 742  * implicit slide i.e. after marking all sparks are at the beginning of the
 743  * spark pool and the spark pool only contains sparkable closures
 744  * -------------------------------------------------------------------------- */
 745
 746 #ifdef THREADED_RTS
 747 static void
 748 markSparkQueue (evac_fn evac, Capability *cap)
 749 {
 750     StgClosure **sparkp, **to_sparkp;
 751     nat n, pruned_sparks; // stats only
 752     StgSparkPool *pool;
 753
 754     PAR_TICKY_MARK_SPARK_QUEUE_START();
 755
 756     n = 0;
 757     pruned_sparks = 0;
 758
 759     pool = &(cap->r.rSparks);
 760
 761     ASSERT_SPARK_POOL_INVARIANTS(pool);
 762
 763 #if defined(PARALLEL_HASKELL)
 764     // stats only
 765     n = 0;
 766     pruned_sparks = 0;
 767 #endif
 768
 769     sparkp = pool->hd;
 770     to_sparkp = pool->hd;
 771     while (sparkp != pool->tl) {
 772         ASSERT(*sparkp!=NULL);
 773         ASSERT(LOOKS_LIKE_CLOSURE_PTR(((StgClosure *)*sparkp)));
 774         // ToDo?: statistics gathering here (also for GUM!)
 775         if (closure_SHOULD_SPARK(*sparkp)) {
 776             evac(sparkp);
 777             *to_sparkp++ = *sparkp;
 778             if (to_sparkp == pool->lim) {
 779                 to_sparkp = pool->base;
 780             }
 781             n++;
 782         } else {
 783             pruned_sparks++;
 784         }
 785         sparkp++;
 786         if (sparkp == pool->lim) {
 787             sparkp = pool->base;
 788         }
 789     }
 790     pool->tl = to_sparkp;
 791
 792     PAR_TICKY_MARK_SPARK_QUEUE_END(n);
 793
 794 #if defined(PARALLEL_HASKELL)
 795     debugTrace(DEBUG_sched,
 796                "marked %d sparks and pruned %d sparks on [%x]",
 797                n, pruned_sparks, mytid);
 798 #else
 799     debugTrace(DEBUG_sched,
 800                "marked %d sparks and pruned %d sparks",
 801                n, pruned_sparks);
 802 #endif
 803
 804     debugTrace(DEBUG_sched,
 805                "new spark queue len=%d; (hd=%p; tl=%p)\n",
 806                sparkPoolSize(pool), pool->hd, pool->tl);
 807 }
 808 #endif
 809
 810 /* ---------------------------------------------------------------------------
 811    Where are the roots that we know about?
 812
 813         - all the threads on the runnable queue
 814         - all the threads on the blocked queue
 815         - all the threads on the sleeping queue
 816         - all the thread currently executing a _ccall_GC
 817         - all the "main threads"
 818
 819    ------------------------------------------------------------------------ */
 820
 821 void
 822 GetRoots( evac_fn evac )
 823 {
 824     nat i;
 825     Capability *cap;
 826     Task *task;
 827
 828     // Each GC thread is responsible for following roots from the
 829     // Capability of the same number.  There will usually be the same
 830     // or fewer Capabilities as GC threads, but just in case there
 831     // are more, we mark every Capability whose number is the GC
 832     // thread's index plus a multiple of the number of GC threads.
 833     for (i = gct->thread_index; i < n_capabilities; i += n_gc_threads) {
 834         cap = &capabilities[i];
 835         evac((StgClosure **)(void *)&cap->run_queue_hd);
 836         evac((StgClosure **)(void *)&cap->run_queue_tl);
 837 #if defined(THREADED_RTS)
 838         evac((StgClosure **)(void *)&cap->wakeup_queue_hd);
 839         evac((StgClosure **)(void *)&cap->wakeup_queue_tl);
 840 #endif
 841         for (task = cap->suspended_ccalling_tasks; task != NULL;
 842              task=task->next) {
 843             debugTrace(DEBUG_sched,
 844                        "evac'ing suspended TSO %lu", (unsigned long)task->suspended_tso->id);
 845             evac((StgClosure **)(void *)&task->suspended_tso);
 846         }
 847
 848 #if defined(THREADED_RTS)
 849         markSparkQueue(evac,cap);
 850 #endif
 851     }
 852
 853 #if !defined(THREADED_RTS)
 854     evac((StgClosure **)(void *)&blocked_queue_hd);
 855     evac((StgClosure **)(void *)&blocked_queue_tl);
 856     evac((StgClosure **)(void *)&sleeping_queue);
 857 #endif
 858 }
 859
 860 /* -----------------------------------------------------------------------------
 861    isAlive determines whether the given closure is still alive (after
 862    a garbage collection) or not.  It returns the new address of the
 863    closure if it is alive, or NULL otherwise.
 864
 865    NOTE: Use it before compaction only!
 866          It untags and (if needed) retags pointers to closures.
 867    -------------------------------------------------------------------------- */
 868
 869
 870 StgClosure *
 871 isAlive(StgClosure *p)
 872 {
 873   const StgInfoTable *info;
 874   bdescr *bd;
 875   StgWord tag;
 876   StgClosure *q;
 877
 878   while (1) {
 879     /* The tag and the pointer are split, to be merged later when needed. */
 880     tag = GET_CLOSURE_TAG(p);
 881     q = UNTAG_CLOSURE(p);
 882
 883     ASSERT(LOOKS_LIKE_CLOSURE_PTR(q));
 884     info = get_itbl(q);
 885
 886     // ignore static closures
 887     //
 888     // ToDo: for static closures, check the static link field.
 889     // Problem here is that we sometimes don't set the link field, eg.
 890     // for static closures with an empty SRT or CONSTR_STATIC_NOCAFs.
 891     //
 892     if (!HEAP_ALLOCED(q)) {
 893         return p;
 894     }
 895
 896     // ignore closures in generations that we're not collecting.
 897     bd = Bdescr((P_)q);
 898     if (bd->gen_no > N) {
 899         return p;
 900     }
 901
 902     // if it's a pointer into to-space, then we're done
 903     if (bd->flags & BF_EVACUATED) {
 904         return p;
 905     }
 906
 907     // large objects use the evacuated flag
 908     if (bd->flags & BF_LARGE) {
 909         return NULL;
 910     }
 911
 912     // check the mark bit for compacted steps
 913     if ((bd->flags & BF_COMPACTED) && is_marked((P_)q,bd)) {
 914         return p;
 915     }
 916
 917     switch (info->type) {
 918
 919     case IND:
 920     case IND_STATIC:
 921     case IND_PERM:
 922     case IND_OLDGEN:            // rely on compatible layout with StgInd
 923     case IND_OLDGEN_PERM:
 924       // follow indirections
 925       p = ((StgInd *)q)->indirectee;
 926       continue;
 927
 928     case EVACUATED:
 929       // alive!
 930       return ((StgEvacuated *)q)->evacuee;
 931
 932     case TSO:
 933       if (((StgTSO *)q)->what_next == ThreadRelocated) {
 934         p = (StgClosure *)((StgTSO *)q)->link;
 935         continue;
 936       }
 937       return NULL;
 938
 939     default:
 940       // dead.
 941       return NULL;
 942     }
 943   }
 944 }
 945
 946 /* -----------------------------------------------------------------------------
 947    Figure out which generation to collect, initialise N and major_gc.
 948
 949    Also returns the total number of blocks in generations that will be
 950    collected.
 951    -------------------------------------------------------------------------- */
 952
 953 static nat
 954 initialise_N (rtsBool force_major_gc)
 955 {
 956     int g;
 957     nat s, blocks, blocks_total;
 958
 959     blocks = 0;
 960     blocks_total = 0;
 961
 962     if (force_major_gc) {
 963         N = RtsFlags.GcFlags.generations - 1;
 964     } else {
 965         N = 0;
 966     }
 967
 968     for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
 969         blocks = 0;
 970         for (s = 0; s < generations[g].n_steps; s++) {
 971             blocks += generations[g].steps[s].n_words / BLOCK_SIZE_W;
 972             blocks += generations[g].steps[s].n_large_blocks;
 973         }
 974         if (blocks >= generations[g].max_blocks) {
 975             N = stg_max(N,g);
 976         }
 977         if ((nat)g <= N) {
 978             blocks_total += blocks;
 979         }
 980     }
 981
 982     blocks_total += countNurseryBlocks();
 983
 984     major_gc = (N == RtsFlags.GcFlags.generations-1);
 985     return blocks_total;
 986 }
 987
 988 /* -----------------------------------------------------------------------------
 989    Initialise the gc_thread structures.
 990    -------------------------------------------------------------------------- */
 991
 992 static gc_thread *
 993 alloc_gc_thread (int n)
 994 {
 995     nat s;
 996     step_workspace *ws;
 997     gc_thread *t;
 998
 999     t = stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace),
1000                        "alloc_gc_thread");
1001
1002 #ifdef THREADED_RTS
1003     t->id = 0;
1004     initCondition(&t->wake_cond);
1005     initMutex(&t->wake_mutex);
1006     t->wakeup = rtsTrue;  // starts true, so we can wait for the
1007                           // thread to start up, see wakeup_gc_threads
1008     t->exit   = rtsFalse;
1009 #endif
1010
1011     t->thread_index = n;
1012     t->free_blocks = NULL;
1013     t->gc_count = 0;
1014
1015     init_gc_thread(t);
1016
1017 #ifdef USE_PAPI
1018     t->papi_events = -1;
1019 #endif
1020
1021     for (s = 0; s < total_steps; s++)
1022     {
1023         ws = &t->steps[s];
1024         ws->step = &all_steps[s];
1025         ASSERT(s == ws->step->abs_no);
1026         ws->gct = t;
1027
1028         ws->todo_bd = NULL;
1029         ws->buffer_todo_bd = NULL;
1030
1031         ws->part_list = NULL;
1032         ws->n_part_blocks = 0;
1033
1034         ws->scavd_list = NULL;
1035         ws->n_scavd_blocks = 0;
1036     }
1037
1038     return t;
1039 }
1040
1041
1042 static void
1043 alloc_gc_threads (void)
1044 {
1045     if (gc_threads == NULL) {
1046 #if defined(THREADED_RTS)
1047         nat i;
1048         gc_threads = stgMallocBytes (RtsFlags.ParFlags.gcThreads *
1049                                      sizeof(gc_thread*),
1050                                      "alloc_gc_threads");
1051
1052         for (i = 0; i < RtsFlags.ParFlags.gcThreads; i++) {
1053             gc_threads[i] = alloc_gc_thread(i);
1054         }
1055 #else
1056         gc_threads = stgMallocBytes (sizeof(gc_thread*),
1057                                      "alloc_gc_threads");
1058
1059         gc_threads[0] = alloc_gc_thread(0);
1060 #endif
1061     }
1062 }
1063
1064 /* ----------------------------------------------------------------------------
1065    Start GC threads
1066    ------------------------------------------------------------------------- */
1067
1068 static nat gc_running_threads;
1069
1070 #if defined(THREADED_RTS)
1071 static Mutex gc_running_mutex;
1072 #endif
1073
1074 static nat
1075 inc_running (void)
1076 {
1077     nat n_running;
1078     ACQUIRE_LOCK(&gc_running_mutex);
1079     n_running = ++gc_running_threads;
1080     RELEASE_LOCK(&gc_running_mutex);
1081     ASSERT(n_running <= n_gc_threads);
1082     return n_running;
1083 }
1084
1085 static nat
1086 dec_running (void)
1087 {
1088     nat n_running;
1089     ACQUIRE_LOCK(&gc_running_mutex);
1090     ASSERT(n_gc_threads != 0);
1091     n_running = --gc_running_threads;
1092     RELEASE_LOCK(&gc_running_mutex);
1093     return n_running;
1094 }
1095
1096 //
1097 // gc_thread_work(): Scavenge until there's no work left to do and all
1098 // the running threads are idle.
1099 //
1100 static void
1101 gc_thread_work (void)
1102 {
1103     nat r;
1104
1105     debugTrace(DEBUG_gc, "GC thread %d working", gct->thread_index);
1106
1107     // gc_running_threads has already been incremented for us; either
1108     // this is the main thread and we incremented it inside
1109     // GarbageCollect(), or this is a worker thread and the main
1110     // thread bumped gc_running_threads before waking us up.
1111
1112     // Every thread evacuates some roots.
1113     gct->evac_step = 0;
1114     GetRoots(mark_root);
1115
1116 loop:
1117     scavenge_loop();
1118     // scavenge_loop() only exits when there's no work to do
1119     r = dec_running();
1120
1121     debugTrace(DEBUG_gc, "GC thread %d idle (%d still running)",
1122                gct->thread_index, r);
1123
1124     while (gc_running_threads != 0) {
1125         usleep(1);
1126         if (any_work()) {
1127             inc_running();
1128             goto loop;
1129         }
1130         // any_work() does not remove the work from the queue, it
1131         // just checks for the presence of work.  If we find any,
1132         // then we increment gc_running_threads and go back to
1133         // scavenge_loop() to perform any pending work.
1134     }
1135
1136     // All threads are now stopped
1137     debugTrace(DEBUG_gc, "GC thread %d finished.", gct->thread_index);
1138 }
1139
1140
1141 #if defined(THREADED_RTS)
1142 static void
1143 gc_thread_mainloop (void)
1144 {
1145     while (!gct->exit) {
1146
1147         // Wait until we're told to wake up
1148         ACQUIRE_LOCK(&gct->wake_mutex);
1149         gct->wakeup = rtsFalse;
1150         while (!gct->wakeup) {
1151             debugTrace(DEBUG_gc, "GC thread %d standing by...",
1152                        gct->thread_index);
1153             waitCondition(&gct->wake_cond, &gct->wake_mutex);
1154         }
1155         RELEASE_LOCK(&gct->wake_mutex);
1156         if (gct->exit) break;
1157
1158 #ifdef USE_PAPI
1159         // start performance counters in this thread...
1160         if (gct->papi_events == -1) {
1161             papi_init_eventset(&gct->papi_events);
1162         }
1163         papi_thread_start_gc1_count(gct->papi_events);
1164 #endif
1165
1166         gc_thread_work();
1167
1168 #ifdef USE_PAPI
1169         // count events in this thread towards the GC totals
1170         papi_thread_stop_gc1_count(gct->papi_events);
1171 #endif
1172     }
1173 }
1174 #endif
1175
1176 #if defined(THREADED_RTS)
1177 static void
1178 gc_thread_entry (gc_thread *my_gct)
1179 {
1180     gct = my_gct;
1181     debugTrace(DEBUG_gc, "GC thread %d starting...", gct->thread_index);
1182     gct->id = osThreadId();
1183     gc_thread_mainloop();
1184 }
1185 #endif
1186
1187 static void
1188 start_gc_threads (void)
1189 {
1190 #if defined(THREADED_RTS)
1191     nat i;
1192     OSThreadId id;
1193     static rtsBool done = rtsFalse;
1194
1195     gc_running_threads = 0;
1196     initMutex(&gc_running_mutex);
1197
1198     if (!done) {
1199         // Start from 1: the main thread is 0
1200         for (i = 1; i < RtsFlags.ParFlags.gcThreads; i++) {
1201             createOSThread(&id, (OSThreadProc*)&gc_thread_entry,
1202                            gc_threads[i]);
1203         }
1204         done = rtsTrue;
1205     }
1206 #endif
1207 }
1208
1209 static void
1210 wakeup_gc_threads (nat n_threads USED_IF_THREADS)
1211 {
1212 #if defined(THREADED_RTS)
1213     nat i;
1214     for (i=1; i < n_threads; i++) {
1215         inc_running();
1216         debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1217         do {
1218             ACQUIRE_LOCK(&gc_threads[i]->wake_mutex);
1219             if (gc_threads[i]->wakeup) {
1220                 RELEASE_LOCK(&gc_threads[i]->wake_mutex);
1221                 continue;
1222             } else {
1223                 break;
1224             }
1225         } while (1);
1226         gc_threads[i]->wakeup = rtsTrue;
1227         signalCondition(&gc_threads[i]->wake_cond);
1228         RELEASE_LOCK(&gc_threads[i]->wake_mutex);
1229     }
1230 #endif
1231 }
1232
1233 // After GC is complete, we must wait for all GC threads to enter the
1234 // standby state, otherwise they may still be executing inside
1235 // any_work(), and may even remain awake until the next GC starts.
1236 static void
1237 shutdown_gc_threads (nat n_threads USED_IF_THREADS)
1238 {
1239 #if defined(THREADED_RTS)
1240     nat i;
1241     rtsBool wakeup;
1242     for (i=1; i < n_threads; i++) {
1243         do {
1244             ACQUIRE_LOCK(&gc_threads[i]->wake_mutex);
1245             wakeup = gc_threads[i]->wakeup;
1246             // wakeup is false while the thread is waiting
1247             RELEASE_LOCK(&gc_threads[i]->wake_mutex);
1248         } while (wakeup);
1249     }
1250 #endif
1251 }
1252
1253 /* ----------------------------------------------------------------------------
1254    Initialise a generation that is to be collected
1255    ------------------------------------------------------------------------- */
1256
1257 static void
1258 init_collected_gen (nat g, nat n_threads)
1259 {
1260     nat s, t, i;
1261     step_workspace *ws;
1262     step *stp;
1263     bdescr *bd;
1264
1265     // Throw away the current mutable list.  Invariant: the mutable
1266     // list always has at least one block; this means we can avoid a
1267     // check for NULL in recordMutable().
1268     if (g != 0) {
1269         freeChain(generations[g].mut_list);
1270         generations[g].mut_list = allocBlock();
1271         for (i = 0; i < n_capabilities; i++) {
1272             freeChain(capabilities[i].mut_lists[g]);
1273             capabilities[i].mut_lists[g] = allocBlock();
1274         }
1275     }
1276
1277     for (s = 0; s < generations[g].n_steps; s++) {
1278
1279         // generation 0, step 0 doesn't need to-space
1280         if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
1281             continue;
1282         }
1283
1284         stp = &generations[g].steps[s];
1285         ASSERT(stp->gen_no == g);
1286
1287         // deprecate the existing blocks
1288         stp->old_blocks   = stp->blocks;
1289         stp->n_old_blocks = stp->n_blocks;
1290         stp->blocks       = NULL;
1291         stp->n_blocks     = 0;
1292         stp->n_words      = 0;
1293
1294         // we don't have any to-be-scavenged blocks yet
1295         stp->todos = NULL;
1296         stp->todos_last = NULL;
1297         stp->n_todos = 0;
1298
1299         // initialise the large object queues.
1300         stp->scavenged_large_objects = NULL;
1301         stp->n_scavenged_large_blocks = 0;
1302
1303         // mark the large objects as not evacuated yet
1304         for (bd = stp->large_objects; bd; bd = bd->link) {
1305             bd->flags &= ~BF_EVACUATED;
1306         }
1307
1308         // for a compacted step, we need to allocate the bitmap
1309         if (stp->is_compacted) {
1310             nat bitmap_size; // in bytes
1311             bdescr *bitmap_bdescr;
1312             StgWord *bitmap;
1313
1314             bitmap_size = stp->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1315
1316             if (bitmap_size > 0) {
1317                 bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1318                                            / BLOCK_SIZE);
1319                 stp->bitmap = bitmap_bdescr;
1320                 bitmap = bitmap_bdescr->start;
1321
1322                 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1323                            bitmap_size, bitmap);
1324
1325                 // don't forget to fill it with zeros!
1326                 memset(bitmap, 0, bitmap_size);
1327
1328                 // For each block in this step, point to its bitmap from the
1329                 // block descriptor.
1330                 for (bd=stp->old_blocks; bd != NULL; bd = bd->link) {
1331                     bd->u.bitmap = bitmap;
1332                     bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1333
1334                     // Also at this point we set the BF_COMPACTED flag
1335                     // for this block.  The invariant is that
1336                     // BF_COMPACTED is always unset, except during GC
1337                     // when it is set on those blocks which will be
1338                     // compacted.
1339                     bd->flags |= BF_COMPACTED;
1340                 }
1341             }
1342         }
1343     }
1344
1345     // For each GC thread, for each step, allocate a "todo" block to
1346     // store evacuated objects to be scavenged, and a block to store
1347     // evacuated objects that do not need to be scavenged.
1348     for (t = 0; t < n_threads; t++) {
1349         for (s = 0; s < generations[g].n_steps; s++) {
1350
1351             // we don't copy objects into g0s0, unless -G0
1352             if (g==0 && s==0 && RtsFlags.GcFlags.generations > 1) continue;
1353
1354             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1355
1356             ws->todo_large_objects = NULL;
1357
1358             ws->part_list = NULL;
1359             ws->n_part_blocks = 0;
1360
1361             // allocate the first to-space block; extra blocks will be
1362             // chained on as necessary.
1363             ws->todo_bd = NULL;
1364             ws->buffer_todo_bd = NULL;
1365             alloc_todo_block(ws,0);
1366
1367             ws->scavd_list = NULL;
1368             ws->n_scavd_blocks = 0;
1369         }
1370     }
1371 }
1372
1373
1374 /* ----------------------------------------------------------------------------
1375    Initialise a generation that is *not* to be collected
1376    ------------------------------------------------------------------------- */
1377
1378 static void
1379 init_uncollected_gen (nat g, nat threads)
1380 {
1381     nat s, t, i;
1382     step_workspace *ws;
1383     step *stp;
1384     bdescr *bd;
1385
1386     for (s = 0; s < generations[g].n_steps; s++) {
1387         stp = &generations[g].steps[s];
1388         stp->scavenged_large_objects = NULL;
1389         stp->n_scavenged_large_blocks = 0;
1390     }
1391
1392     for (t = 0; t < threads; t++) {
1393         for (s = 0; s < generations[g].n_steps; s++) {
1394
1395             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1396             stp = ws->step;
1397
1398             ws->buffer_todo_bd = NULL;
1399             ws->todo_large_objects = NULL;
1400
1401             ws->part_list = NULL;
1402             ws->n_part_blocks = 0;
1403
1404             ws->scavd_list = NULL;
1405             ws->n_scavd_blocks = 0;
1406
1407             // If the block at the head of the list in this generation
1408             // is less than 3/4 full, then use it as a todo block.
1409             if (stp->blocks && isPartiallyFull(stp->blocks))
1410             {
1411                 ws->todo_bd = stp->blocks;
1412                 ws->todo_free = ws->todo_bd->free;
1413                 ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
1414                 stp->blocks = stp->blocks->link;
1415                 stp->n_blocks -= 1;
1416                 stp->n_words -= ws->todo_bd->free - ws->todo_bd->start;
1417                 ws->todo_bd->link = NULL;
1418                 // we must scan from the current end point.
1419                 ws->todo_bd->u.scan = ws->todo_bd->free;
1420             }
1421             else
1422             {
1423                 ws->todo_bd = NULL;
1424                 alloc_todo_block(ws,0);
1425             }
1426         }
1427     }
1428
1429     // Move the private mutable lists from each capability onto the
1430     // main mutable list for the generation.
1431     for (i = 0; i < n_capabilities; i++) {
1432         for (bd = capabilities[i].mut_lists[g];
1433              bd->link != NULL; bd = bd->link) {
1434             /* nothing */
1435         }
1436         bd->link = generations[g].mut_list;
1437         generations[g].mut_list = capabilities[i].mut_lists[g];
1438         capabilities[i].mut_lists[g] = allocBlock();
1439     }
1440 }
1441
1442 /* -----------------------------------------------------------------------------
1443    Initialise a gc_thread before GC
1444    -------------------------------------------------------------------------- */
1445
1446 static void
1447 init_gc_thread (gc_thread *t)
1448 {
1449     t->static_objects = END_OF_STATIC_LIST;
1450     t->scavenged_static_objects = END_OF_STATIC_LIST;
1451     t->scan_bd = NULL;
1452     t->evac_step = 0;
1453     t->failed_to_evac = rtsFalse;
1454     t->eager_promotion = rtsTrue;
1455     t->thunk_selector_depth = 0;
1456     t->copied = 0;
1457     t->scanned = 0;
1458     t->any_work = 0;
1459     t->no_work = 0;
1460     t->scav_find_work = 0;
1461 }
1462
1463 /* -----------------------------------------------------------------------------
1464    Function we pass to GetRoots to evacuate roots.
1465    -------------------------------------------------------------------------- */
1466
1467 static void
1468 mark_root(StgClosure **root)
1469 {
1470   evacuate(root);
1471 }
1472
1473 /* -----------------------------------------------------------------------------
1474    Initialising the static object & mutable lists
1475    -------------------------------------------------------------------------- */
1476
1477 static void
1478 zero_static_object_list(StgClosure* first_static)
1479 {
1480   StgClosure* p;
1481   StgClosure* link;
1482   const StgInfoTable *info;
1483
1484   for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1485     info = get_itbl(p);
1486     link = *STATIC_LINK(info, p);
1487     *STATIC_LINK(info,p) = NULL;
1488   }
1489 }
1490
1491 /* -----------------------------------------------------------------------------
1492    Reverting CAFs
1493    -------------------------------------------------------------------------- */
1494
1495 void
1496 revertCAFs( void )
1497 {
1498     StgIndStatic *c;
1499
1500     for (c = (StgIndStatic *)revertible_caf_list; c != NULL;
1501          c = (StgIndStatic *)c->static_link)
1502     {
1503         SET_INFO(c, c->saved_info);
1504         c->saved_info = NULL;
1505         // could, but not necessary: c->static_link = NULL;
1506     }
1507     revertible_caf_list = NULL;
1508 }
1509
1510 void
1511 markCAFs( evac_fn evac )
1512 {
1513     StgIndStatic *c;
1514
1515     for (c = (StgIndStatic *)caf_list; c != NULL;
1516          c = (StgIndStatic *)c->static_link)
1517     {
1518         evac(&c->indirectee);
1519     }
1520     for (c = (StgIndStatic *)revertible_caf_list; c != NULL;
1521          c = (StgIndStatic *)c->static_link)
1522     {
1523         evac(&c->indirectee);
1524     }
1525 }
1526
1527 /* ----------------------------------------------------------------------------
1528    Update the pointers from the task list
1529
1530    These are treated as weak pointers because we want to allow a main
1531    thread to get a BlockedOnDeadMVar exception in the same way as any
1532    other thread.  Note that the threads should all have been retained
1533    by GC by virtue of being on the all_threads list, we're just
1534    updating pointers here.
1535    ------------------------------------------------------------------------- */
1536
1537 static void
1538 update_task_list (void)
1539 {
1540     Task *task;
1541     StgTSO *tso;
1542     for (task = all_tasks; task != NULL; task = task->all_link) {
1543         if (!task->stopped && task->tso) {
1544             ASSERT(task->tso->bound == task);
1545             tso = (StgTSO *) isAlive((StgClosure *)task->tso);
1546             if (tso == NULL) {
1547                 barf("task %p: main thread %d has been GC'd",
1548 #ifdef THREADED_RTS
1549                      (void *)task->id,
1550 #else
1551                      (void *)task,
1552 #endif
1553                      task->tso->id);
1554             }
1555             task->tso = tso;
1556         }
1557     }
1558 }
1559
1560 /* ----------------------------------------------------------------------------
1561    Reset the sizes of the older generations when we do a major
1562    collection.
1563
1564    CURRENT STRATEGY: make all generations except zero the same size.
1565    We have to stay within the maximum heap size, and leave a certain
1566    percentage of the maximum heap size available to allocate into.
1567    ------------------------------------------------------------------------- */
1568
1569 static void
1570 resize_generations (void)
1571 {
1572     nat g;
1573
1574     if (major_gc && RtsFlags.GcFlags.generations > 1) {
1575         nat live, size, min_alloc;
1576         nat max  = RtsFlags.GcFlags.maxHeapSize;
1577         nat gens = RtsFlags.GcFlags.generations;
1578
1579         // live in the oldest generations
1580         live = (oldest_gen->steps[0].n_words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W+
1581             oldest_gen->steps[0].n_large_blocks;
1582
1583         // default max size for all generations except zero
1584         size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1585                        RtsFlags.GcFlags.minOldGenSize);
1586
1587         // minimum size for generation zero
1588         min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1589                             RtsFlags.GcFlags.minAllocAreaSize);
1590
1591         // Auto-enable compaction when the residency reaches a
1592         // certain percentage of the maximum heap size (default: 30%).
1593         if (RtsFlags.GcFlags.generations > 1 &&
1594             (RtsFlags.GcFlags.compact ||
1595              (max > 0 &&
1596               oldest_gen->steps[0].n_blocks >
1597               (RtsFlags.GcFlags.compactThreshold * max) / 100))) {
1598             oldest_gen->steps[0].is_compacted = 1;
1599 //        debugBelch("compaction: on\n", live);
1600         } else {
1601             oldest_gen->steps[0].is_compacted = 0;
1602 //        debugBelch("compaction: off\n", live);
1603         }
1604
1605         // if we're going to go over the maximum heap size, reduce the
1606         // size of the generations accordingly.  The calculation is
1607         // different if compaction is turned on, because we don't need
1608         // to double the space required to collect the old generation.
1609         if (max != 0) {
1610
1611             // this test is necessary to ensure that the calculations
1612             // below don't have any negative results - we're working
1613             // with unsigned values here.
1614             if (max < min_alloc) {
1615                 heapOverflow();
1616             }
1617
1618             if (oldest_gen->steps[0].is_compacted) {
1619                 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1620                     size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1621                 }
1622             } else {
1623                 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1624                     size = (max - min_alloc) / ((gens - 1) * 2);
1625                 }
1626             }
1627
1628             if (size < live) {
1629                 heapOverflow();
1630             }
1631         }
1632
1633 #if 0
1634         debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1635                    min_alloc, size, max);
1636 #endif
1637
1638         for (g = 0; g < gens; g++) {
1639             generations[g].max_blocks = size;
1640         }
1641     }
1642 }
1643
1644 /* -----------------------------------------------------------------------------
1645    Calculate the new size of the nursery, and resize it.
1646    -------------------------------------------------------------------------- */
1647
1648 static void
1649 resize_nursery (void)
1650 {
1651     if (RtsFlags.GcFlags.generations == 1)
1652     {   // Two-space collector:
1653         nat blocks;
1654
1655         /* set up a new nursery.  Allocate a nursery size based on a
1656          * function of the amount of live data (by default a factor of 2)
1657          * Use the blocks from the old nursery if possible, freeing up any
1658          * left over blocks.
1659          *
1660          * If we get near the maximum heap size, then adjust our nursery
1661          * size accordingly.  If the nursery is the same size as the live
1662          * data (L), then we need 3L bytes.  We can reduce the size of the
1663          * nursery to bring the required memory down near 2L bytes.
1664          *
1665          * A normal 2-space collector would need 4L bytes to give the same
1666          * performance we get from 3L bytes, reducing to the same
1667          * performance at 2L bytes.
1668          */
1669         blocks = g0s0->n_old_blocks;
1670
1671         if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1672              blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1673              RtsFlags.GcFlags.maxHeapSize )
1674         {
1675             long adjusted_blocks;  // signed on purpose
1676             int pc_free;
1677
1678             adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1679
1680             debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1681                        RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1682
1683             pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1684             if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1685             {
1686                 heapOverflow();
1687             }
1688             blocks = adjusted_blocks;
1689         }
1690         else
1691         {
1692             blocks *= RtsFlags.GcFlags.oldGenFactor;
1693             if (blocks < RtsFlags.GcFlags.minAllocAreaSize)
1694             {
1695                 blocks = RtsFlags.GcFlags.minAllocAreaSize;
1696             }
1697         }
1698         resizeNurseries(blocks);
1699     }
1700     else  // Generational collector
1701     {
1702         /*
1703          * If the user has given us a suggested heap size, adjust our
1704          * allocation area to make best use of the memory available.
1705          */
1706         if (RtsFlags.GcFlags.heapSizeSuggestion)
1707         {
1708             long blocks;
1709             nat needed = calcNeeded();  // approx blocks needed at next GC
1710
1711             /* Guess how much will be live in generation 0 step 0 next time.
1712              * A good approximation is obtained by finding the
1713              * percentage of g0s0 that was live at the last minor GC.
1714              *
1715              * We have an accurate figure for the amount of copied data in
1716              * 'copied', but we must convert this to a number of blocks, with
1717              * a small adjustment for estimated slop at the end of a block
1718              * (- 10 words).
1719              */
1720             if (N == 0)
1721             {
1722                 g0s0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1723                     / countNurseryBlocks();
1724             }
1725
1726             /* Estimate a size for the allocation area based on the
1727              * information available.  We might end up going slightly under
1728              * or over the suggested heap size, but we should be pretty
1729              * close on average.
1730              *
1731              * Formula:            suggested - needed
1732              *                ----------------------------
1733              *                    1 + g0s0_pcnt_kept/100
1734              *
1735              * where 'needed' is the amount of memory needed at the next
1736              * collection for collecting all steps except g0s0.
1737              */
1738             blocks =
1739                 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1740                 (100 + (long)g0s0_pcnt_kept);
1741
1742             if (blocks < (long)RtsFlags.GcFlags.minAllocAreaSize) {
1743                 blocks = RtsFlags.GcFlags.minAllocAreaSize;
1744             }
1745
1746             resizeNurseries((nat)blocks);
1747         }
1748         else
1749         {
1750             // we might have added extra large blocks to the nursery, so
1751             // resize back to minAllocAreaSize again.
1752             resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1753         }
1754     }
1755 }
1756
1757 /* -----------------------------------------------------------------------------
1758    Sanity code for CAF garbage collection.
1759
1760    With DEBUG turned on, we manage a CAF list in addition to the SRT
1761    mechanism.  After GC, we run down the CAF list and blackhole any
1762    CAFs which have been garbage collected.  This means we get an error
1763    whenever the program tries to enter a garbage collected CAF.
1764
1765    Any garbage collected CAFs are taken off the CAF list at the same
1766    time.
1767    -------------------------------------------------------------------------- */
1768
1769 #if 0 && defined(DEBUG)
1770
1771 static void
1772 gcCAFs(void)
1773 {
1774   StgClosure*  p;
1775   StgClosure** pp;
1776   const StgInfoTable *info;
1777   nat i;
1778
1779   i = 0;
1780   p = caf_list;
1781   pp = &caf_list;
1782
1783   while (p != NULL) {
1784
1785     info = get_itbl(p);
1786
1787     ASSERT(info->type == IND_STATIC);
1788
1789     if (STATIC_LINK(info,p) == NULL) {
1790         debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1791         // black hole it
1792         SET_INFO(p,&stg_BLACKHOLE_info);
1793         p = STATIC_LINK2(info,p);
1794         *pp = p;
1795     }
1796     else {
1797       pp = &STATIC_LINK2(info,p);
1798       p = *pp;
1799       i++;
1800     }
1801
1802   }
1803
1804   debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1805 }
1806 #endif