rts/sm/GC.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team 1998-2006
   4  *
   5  * Generational garbage collector
   6  *
   7  * Documentation on the architecture of the Garbage Collector can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 // #include "PosixSource.h"
  15 #include "Rts.h"
  16 #include "RtsFlags.h"
  17 #include "RtsUtils.h"
  18 #include "Apply.h"
  19 #include "OSThreads.h"
  20 #include "LdvProfile.h"
  21 #include "Updates.h"
  22 #include "Stats.h"
  23 #include "Schedule.h"
  24 #include "Sanity.h"
  25 #include "BlockAlloc.h"
  26 #include "MBlock.h"
  27 #include "ProfHeap.h"
  28 #include "SchedAPI.h"
  29 #include "Weak.h"
  30 #include "Prelude.h"
  31 #include "ParTicky.h"           // ToDo: move into Rts.h
  32 #include "RtsSignals.h"
  33 #include "STM.h"
  34 #include "HsFFI.h"
  35 #include "Linker.h"
  36 #if defined(RTS_GTK_FRONTPANEL)
  37 #include "FrontPanel.h"
  38 #endif
  39 #include "Trace.h"
  40 #include "RetainerProfile.h"
  41 #include "RaiseAsync.h"
  42 #include "Sparks.h"
  43 #include "Papi.h"
  44
  45 #include "GC.h"
  46 #include "Compact.h"
  47 #include "Evac.h"
  48 #include "Scav.h"
  49 #include "GCUtils.h"
  50 #include "MarkWeak.h"
  51 #include "Sparks.h"
  52
  53 #include <string.h> // for memset()
  54 #include <unistd.h>
  55
  56 /* -----------------------------------------------------------------------------
  57    Global variables
  58    -------------------------------------------------------------------------- */
  59
  60 /* STATIC OBJECT LIST.
  61  *
  62  * During GC:
  63  * We maintain a linked list of static objects that are still live.
  64  * The requirements for this list are:
  65  *
  66  *  - we need to scan the list while adding to it, in order to
  67  *    scavenge all the static objects (in the same way that
  68  *    breadth-first scavenging works for dynamic objects).
  69  *
  70  *  - we need to be able to tell whether an object is already on
  71  *    the list, to break loops.
  72  *
  73  * Each static object has a "static link field", which we use for
  74  * linking objects on to the list.  We use a stack-type list, consing
  75  * objects on the front as they are added (this means that the
  76  * scavenge phase is depth-first, not breadth-first, but that
  77  * shouldn't matter).
  78  *
  79  * A separate list is kept for objects that have been scavenged
  80  * already - this is so that we can zero all the marks afterwards.
  81  *
  82  * An object is on the list if its static link field is non-zero; this
  83  * means that we have to mark the end of the list with '1', not NULL.
  84  *
  85  * Extra notes for generational GC:
  86  *
  87  * Each generation has a static object list associated with it.  When
  88  * collecting generations up to N, we treat the static object lists
  89  * from generations > N as roots.
  90  *
  91  * We build up a static object list while collecting generations 0..N,
  92  * which is then appended to the static object list of generation N+1.
  93  */
  94
  95 /* N is the oldest generation being collected, where the generations
  96  * are numbered starting at 0.  A major GC (indicated by the major_gc
  97  * flag) is when we're collecting all generations.  We only attempt to
  98  * deal with static objects and GC CAFs when doing a major GC.
  99  */
 100 nat N;
 101 rtsBool major_gc;
 102
 103 /* Data used for allocation area sizing.
 104  */
 105 static lnat g0s0_pcnt_kept = 30; // percentage of g0s0 live at last minor GC
 106
 107 /* Mut-list stats */
 108 #ifdef DEBUG
 109 nat mutlist_MUTVARS,
 110     mutlist_MUTARRS,
 111     mutlist_MVARS,
 112     mutlist_OTHERS;
 113 #endif
 114
 115 /* Thread-local data for each GC thread
 116  */
 117 gc_thread **gc_threads = NULL;
 118 // gc_thread *gct = NULL;  // this thread's gct TODO: make thread-local
 119
 120 // Number of threads running in *this* GC.  Affects how many
 121 // step->todos[] lists we have to look in to find work.
 122 nat n_gc_threads;
 123
 124 // For stats:
 125 long copied;        // *words* copied & scavenged during this GC
 126
 127 #ifdef THREADED_RTS
 128 SpinLock recordMutableGen_sync;
 129 #endif
 130
 131 /* -----------------------------------------------------------------------------
 132    Static function declarations
 133    -------------------------------------------------------------------------- */
 134
 135 static void mark_root               (StgClosure **root);
 136 static void zero_static_object_list (StgClosure* first_static);
 137 static nat  initialise_N            (rtsBool force_major_gc);
 138 static void alloc_gc_threads        (void);
 139 static void init_collected_gen      (nat g, nat threads);
 140 static void init_uncollected_gen    (nat g, nat threads);
 141 static void init_gc_thread          (gc_thread *t);
 142 static void update_task_list        (void);
 143 static void resize_generations      (void);
 144 static void resize_nursery          (void);
 145 static void start_gc_threads        (void);
 146 static void gc_thread_work          (void);
 147 static nat  inc_running             (void);
 148 static nat  dec_running             (void);
 149 static void wakeup_gc_threads       (nat n_threads);
 150 static void shutdown_gc_threads     (nat n_threads);
 151
 152 #if 0 && defined(DEBUG)
 153 static void gcCAFs                  (void);
 154 #endif
 155
 156 /* -----------------------------------------------------------------------------
 157    The mark bitmap & stack.
 158    -------------------------------------------------------------------------- */
 159
 160 #define MARK_STACK_BLOCKS 4
 161
 162 bdescr *mark_stack_bdescr;
 163 StgPtr *mark_stack;
 164 StgPtr *mark_sp;
 165 StgPtr *mark_splim;
 166
 167 // Flag and pointers used for falling back to a linear scan when the
 168 // mark stack overflows.
 169 rtsBool mark_stack_overflowed;
 170 bdescr *oldgen_scan_bd;
 171 StgPtr  oldgen_scan;
 172
 173 /* -----------------------------------------------------------------------------
 174    GarbageCollect: the main entry point to the garbage collector.
 175
 176    Locks held: all capabilities are held throughout GarbageCollect().
 177    -------------------------------------------------------------------------- */
 178
 179 void
 180 GarbageCollect ( rtsBool force_major_gc )
 181 {
 182   bdescr *bd;
 183   step *stp;
 184   lnat live, allocated, max_copied, avg_copied;
 185   lnat oldgen_saved_blocks = 0;
 186   gc_thread *saved_gct;
 187   nat g, s, t, n;
 188
 189   // necessary if we stole a callee-saves register for gct:
 190   saved_gct = gct;
 191
 192 #ifdef PROFILING
 193   CostCentreStack *prev_CCS;
 194 #endif
 195
 196   ACQUIRE_SM_LOCK;
 197
 198 #if defined(RTS_USER_SIGNALS)
 199   if (RtsFlags.MiscFlags.install_signal_handlers) {
 200     // block signals
 201     blockUserSignals();
 202   }
 203 #endif
 204
 205   // tell the stats department that we've started a GC
 206   stat_startGC();
 207
 208   // tell the STM to discard any cached closures it's hoping to re-use
 209   stmPreGCHook();
 210
 211 #ifdef DEBUG
 212   mutlist_MUTVARS = 0;
 213   mutlist_MUTARRS = 0;
 214   mutlist_OTHERS = 0;
 215 #endif
 216
 217   // attribute any costs to CCS_GC
 218 #ifdef PROFILING
 219   prev_CCS = CCCS;
 220   CCCS = CCS_GC;
 221 #endif
 222
 223   /* Approximate how much we allocated.
 224    * Todo: only when generating stats?
 225    */
 226   allocated = calcAllocated();
 227
 228   /* Figure out which generation to collect
 229    */
 230   n = initialise_N(force_major_gc);
 231
 232   /* Allocate + initialise the gc_thread structures.
 233    */
 234   alloc_gc_threads();
 235
 236   /* Start threads, so they can be spinning up while we finish initialisation.
 237    */
 238   start_gc_threads();
 239
 240   /* How many threads will be participating in this GC?
 241    * We don't try to parallelise minor GC.
 242    */
 243 #if defined(THREADED_RTS)
 244   if (n < (4*1024*1024 / BLOCK_SIZE)) {
 245       n_gc_threads = 1;
 246   } else {
 247       n_gc_threads = RtsFlags.ParFlags.gcThreads;
 248   }
 249 #else
 250   n_gc_threads = 1;
 251 #endif
 252   trace(TRACE_gc|DEBUG_gc, "GC (gen %d): %dKB to collect, using %d thread(s)",
 253         N, n * (BLOCK_SIZE / 1024), n_gc_threads);
 254
 255 #ifdef RTS_GTK_FRONTPANEL
 256   if (RtsFlags.GcFlags.frontpanel) {
 257       updateFrontPanelBeforeGC(N);
 258   }
 259 #endif
 260
 261 #ifdef DEBUG
 262   // check for memory leaks if DEBUG is on
 263   memInventory(traceClass(DEBUG_gc));
 264 #endif
 265
 266   // check stack sanity *before* GC (ToDo: check all threads)
 267   IF_DEBUG(sanity, checkFreeListSanity());
 268
 269   // Initialise all our gc_thread structures
 270   for (t = 0; t < n_gc_threads; t++) {
 271       init_gc_thread(gc_threads[t]);
 272   }
 273
 274   // Initialise all the generations/steps that we're collecting.
 275   for (g = 0; g <= N; g++) {
 276       init_collected_gen(g,n_gc_threads);
 277   }
 278
 279   // Initialise all the generations/steps that we're *not* collecting.
 280   for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
 281       init_uncollected_gen(g,n_gc_threads);
 282   }
 283
 284   /* Allocate a mark stack if we're doing a major collection.
 285    */
 286   if (major_gc) {
 287       mark_stack_bdescr = allocGroup(MARK_STACK_BLOCKS);
 288       mark_stack = (StgPtr *)mark_stack_bdescr->start;
 289       mark_sp    = mark_stack;
 290       mark_splim = mark_stack + (MARK_STACK_BLOCKS * BLOCK_SIZE_W);
 291   } else {
 292       mark_stack_bdescr = NULL;
 293   }
 294
 295   // this is the main thread
 296   gct = gc_threads[0];
 297
 298   /* -----------------------------------------------------------------------
 299    * follow all the roots that we know about:
 300    *   - mutable lists from each generation > N
 301    * we want to *scavenge* these roots, not evacuate them: they're not
 302    * going to move in this GC.
 303    * Also do them in reverse generation order, for the usual reason:
 304    * namely to reduce the likelihood of spurious old->new pointers.
 305    */
 306   for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
 307       generations[g].saved_mut_list = generations[g].mut_list;
 308       generations[g].mut_list = allocBlock();
 309       // mut_list always has at least one block.
 310   }
 311
 312   // the main thread is running: this prevents any other threads from
 313   // exiting prematurely, so we can start them now.
 314   // NB. do this after the mutable lists have been saved above, otherwise
 315   // the other GC threads will be writing into the old mutable lists.
 316   inc_running();
 317   wakeup_gc_threads(n_gc_threads);
 318
 319   for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
 320       scavenge_mutable_list(&generations[g]);
 321   }
 322
 323   // follow roots from the CAF list (used by GHCi)
 324   gct->evac_step = 0;
 325   markCAFs(mark_root);
 326
 327   // follow all the roots that the application knows about.
 328   gct->evac_step = 0;
 329   GetRoots(mark_root);
 330
 331 #if defined(RTS_USER_SIGNALS)
 332   // mark the signal handlers (signals should be already blocked)
 333   markSignalHandlers(mark_root);
 334 #endif
 335
 336   // Mark the weak pointer list, and prepare to detect dead weak pointers.
 337   markWeakPtrList();
 338   initWeakForGC();
 339
 340   // Mark the stable pointer table.
 341   markStablePtrTable(mark_root);
 342
 343   /* -------------------------------------------------------------------------
 344    * Repeatedly scavenge all the areas we know about until there's no
 345    * more scavenging to be done.
 346    */
 347   for (;;)
 348   {
 349       gc_thread_work();
 350       // The other threads are now stopped.  We might recurse back to
 351       // here, but from now on this is the only thread.
 352
 353       // if any blackholes are alive, make the threads that wait on
 354       // them alive too.
 355       if (traverseBlackholeQueue()) {
 356           inc_running();
 357           continue;
 358       }
 359
 360       // must be last...  invariant is that everything is fully
 361       // scavenged at this point.
 362       if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
 363           inc_running();
 364           continue;
 365       }
 366
 367       // If we get to here, there's really nothing left to do.
 368       break;
 369   }
 370
 371   shutdown_gc_threads(n_gc_threads);
 372
 373   // Update pointers from the Task list
 374   update_task_list();
 375
 376   // Now see which stable names are still alive.
 377   gcStablePtrTable();
 378
 379 #ifdef PROFILING
 380   // We call processHeapClosureForDead() on every closure destroyed during
 381   // the current garbage collection, so we invoke LdvCensusForDead().
 382   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
 383       || RtsFlags.ProfFlags.bioSelector != NULL)
 384     LdvCensusForDead(N);
 385 #endif
 386
 387   // NO MORE EVACUATION AFTER THIS POINT!
 388   // Finally: compaction of the oldest generation.
 389   if (major_gc && oldest_gen->steps[0].is_compacted) {
 390       // save number of blocks for stats
 391       oldgen_saved_blocks = oldest_gen->steps[0].n_old_blocks;
 392       compact();
 393   }
 394
 395   IF_DEBUG(sanity, checkGlobalTSOList(rtsFalse));
 396
 397   // Two-space collector: free the old to-space.
 398   // g0s0->old_blocks is the old nursery
 399   // g0s0->blocks is to-space from the previous GC
 400   if (RtsFlags.GcFlags.generations == 1) {
 401       if (g0s0->blocks != NULL) {
 402           freeChain(g0s0->blocks);
 403           g0s0->blocks = NULL;
 404       }
 405   }
 406
 407   // For each workspace, in each thread:
 408   //    * clear the BF_EVACUATED flag from each copied block
 409   //    * move the copied blocks to the step
 410   {
 411       gc_thread *thr;
 412       step_workspace *ws;
 413       bdescr *prev;
 414
 415       for (t = 0; t < n_gc_threads; t++) {
 416           thr = gc_threads[t];
 417
 418           // not step 0
 419           for (s = 1; s < total_steps; s++) {
 420               ws = &thr->steps[s];
 421               // Not true?
 422               // ASSERT( ws->scan_bd == ws->todo_bd );
 423               ASSERT( ws->scan_bd ? ws->scan_bd->u.scan == ws->scan_bd->free : 1 );
 424
 425               // Push the final block
 426               if (ws->scan_bd) { push_scanned_block(ws->scan_bd, ws); }
 427
 428               ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
 429
 430               prev = ws->part_list;
 431               for (bd = ws->part_list; bd != NULL; bd = bd->link) {
 432                   bd->flags &= ~BF_EVACUATED;    // now from-space
 433                   ws->step->n_words += bd->free - bd->start;
 434                   prev = bd;
 435               }
 436               if (prev != NULL) {
 437                   prev->link = ws->scavd_list;
 438               }
 439               for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
 440                   bd->flags &= ~BF_EVACUATED;    // now from-space
 441                   ws->step->n_words += bd->free - bd->start;
 442                   prev = bd;
 443               }
 444               prev->link = ws->step->blocks;
 445               if (ws->part_list != NULL) {
 446                   ws->step->blocks = ws->part_list;
 447               } else {
 448                   ws->step->blocks = ws->scavd_list;
 449               }
 450               ws->step->n_blocks += ws->n_part_blocks;
 451               ws->step->n_blocks += ws->n_scavd_blocks;
 452               ASSERT(countBlocks(ws->step->blocks) == ws->step->n_blocks);
 453               ASSERT(countOccupied(ws->step->blocks) == ws->step->n_words);
 454           }
 455       }
 456   }
 457
 458   // Two-space collector: swap the semi-spaces around.
 459   // Currently: g0s0->old_blocks is the old nursery
 460   //            g0s0->blocks is to-space from this GC
 461   // We want these the other way around.
 462   if (RtsFlags.GcFlags.generations == 1) {
 463       bdescr *nursery_blocks = g0s0->old_blocks;
 464       nat n_nursery_blocks = g0s0->n_old_blocks;
 465       g0s0->old_blocks = g0s0->blocks;
 466       g0s0->n_old_blocks = g0s0->n_blocks;
 467       g0s0->blocks = nursery_blocks;
 468       g0s0->n_blocks = n_nursery_blocks;
 469   }
 470
 471   /* run through all the generations/steps and tidy up
 472    */
 473   copied = 0;
 474   max_copied = 0;
 475   avg_copied = 0;
 476   {
 477       nat i;
 478       for (i=0; i < n_gc_threads; i++) {
 479           if (n_gc_threads > 1) {
 480               trace(TRACE_gc,"thread %d:", i);
 481               trace(TRACE_gc,"   copied           %ld", gc_threads[i]->copied * sizeof(W_));
 482               trace(TRACE_gc,"   scanned          %ld", gc_threads[i]->scanned * sizeof(W_));
 483               trace(TRACE_gc,"   any_work         %ld", gc_threads[i]->any_work);
 484               trace(TRACE_gc,"   no_work          %ld", gc_threads[i]->no_work);
 485               trace(TRACE_gc,"   scav_find_work %ld",   gc_threads[i]->scav_find_work);
 486           }
 487           copied += gc_threads[i]->copied;
 488           max_copied = stg_max(gc_threads[i]->copied, max_copied);
 489       }
 490       if (n_gc_threads == 1) {
 491           max_copied = 0;
 492           avg_copied = 0;
 493       } else {
 494           avg_copied = copied;
 495       }
 496   }
 497
 498   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 499
 500     if (g == N) {
 501       generations[g].collections++; // for stats
 502       if (n_gc_threads > 1) generations[g].par_collections++;
 503     }
 504
 505     // Count the mutable list as bytes "copied" for the purposes of
 506     // stats.  Every mutable list is copied during every GC.
 507     if (g > 0) {
 508         nat mut_list_size = 0;
 509         for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
 510             mut_list_size += bd->free - bd->start;
 511         }
 512         copied +=  mut_list_size;
 513
 514         debugTrace(DEBUG_gc,
 515                    "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
 516                    (unsigned long)(mut_list_size * sizeof(W_)),
 517                    mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
 518     }
 519
 520     for (s = 0; s < generations[g].n_steps; s++) {
 521       bdescr *next;
 522       stp = &generations[g].steps[s];
 523
 524       // for generations we collected...
 525       if (g <= N) {
 526
 527         /* free old memory and shift to-space into from-space for all
 528          * the collected steps (except the allocation area).  These
 529          * freed blocks will probaby be quickly recycled.
 530          */
 531         if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) {
 532             if (stp->is_compacted)
 533             {
 534                 // for a compacted step, just shift the new to-space
 535                 // onto the front of the now-compacted existing blocks.
 536                 for (bd = stp->blocks; bd != NULL; bd = bd->link) {
 537                     bd->flags &= ~BF_EVACUATED;  // now from-space
 538                     stp->n_words += bd->free - bd->start;
 539                 }
 540                 // tack the new blocks on the end of the existing blocks
 541                 if (stp->old_blocks != NULL) {
 542                     for (bd = stp->old_blocks; bd != NULL; bd = next) {
 543                         // NB. this step might not be compacted next
 544                         // time, so reset the BF_COMPACTED flags.
 545                         // They are set before GC if we're going to
 546                         // compact.  (search for BF_COMPACTED above).
 547                         bd->flags &= ~BF_COMPACTED;
 548                         next = bd->link;
 549                         if (next == NULL) {
 550                             bd->link = stp->blocks;
 551                         }
 552                     }
 553                     stp->blocks = stp->old_blocks;
 554                 }
 555                 // add the new blocks to the block tally
 556                 stp->n_blocks += stp->n_old_blocks;
 557                 ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
 558                 ASSERT(countOccupied(stp->blocks) == stp->n_words);
 559             }
 560             else // not copacted
 561             {
 562                 freeChain(stp->old_blocks);
 563             }
 564             stp->old_blocks = NULL;
 565             stp->n_old_blocks = 0;
 566         }
 567
 568         /* LARGE OBJECTS.  The current live large objects are chained on
 569          * scavenged_large, having been moved during garbage
 570          * collection from large_objects.  Any objects left on
 571          * large_objects list are therefore dead, so we free them here.
 572          */
 573         for (bd = stp->large_objects; bd != NULL; bd = next) {
 574           next = bd->link;
 575           freeGroup(bd);
 576           bd = next;
 577         }
 578
 579         // update the count of blocks used by large objects
 580         for (bd = stp->scavenged_large_objects; bd != NULL; bd = bd->link) {
 581           bd->flags &= ~BF_EVACUATED;
 582         }
 583         stp->large_objects  = stp->scavenged_large_objects;
 584         stp->n_large_blocks = stp->n_scavenged_large_blocks;
 585
 586       }
 587       else // for older generations...
 588       {
 589         /* For older generations, we need to append the
 590          * scavenged_large_object list (i.e. large objects that have been
 591          * promoted during this GC) to the large_object list for that step.
 592          */
 593         for (bd = stp->scavenged_large_objects; bd; bd = next) {
 594           next = bd->link;
 595           bd->flags &= ~BF_EVACUATED;
 596           dbl_link_onto(bd, &stp->large_objects);
 597         }
 598
 599         // add the new blocks we promoted during this GC
 600         stp->n_large_blocks += stp->n_scavenged_large_blocks;
 601       }
 602     }
 603   }
 604
 605   // update the max size of older generations after a major GC
 606   resize_generations();
 607
 608   // Calculate the amount of live data for stats.
 609   live = calcLiveWords();
 610
 611   // Free the small objects allocated via allocate(), since this will
 612   // all have been copied into G0S1 now.
 613   if (RtsFlags.GcFlags.generations > 1) {
 614       if (g0s0->blocks != NULL) {
 615           freeChain(g0s0->blocks);
 616           g0s0->blocks = NULL;
 617       }
 618       g0s0->n_blocks = 0;
 619       g0s0->n_words = 0;
 620   }
 621   alloc_blocks = 0;
 622   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 623
 624   // Start a new pinned_object_block
 625   pinned_object_block = NULL;
 626
 627   // Free the mark stack.
 628   if (mark_stack_bdescr != NULL) {
 629       freeGroup(mark_stack_bdescr);
 630   }
 631
 632   // Free any bitmaps.
 633   for (g = 0; g <= N; g++) {
 634       for (s = 0; s < generations[g].n_steps; s++) {
 635           stp = &generations[g].steps[s];
 636           if (stp->bitmap != NULL) {
 637               freeGroup(stp->bitmap);
 638               stp->bitmap = NULL;
 639           }
 640       }
 641   }
 642
 643   resize_nursery();
 644
 645  // mark the garbage collected CAFs as dead
 646 #if 0 && defined(DEBUG) // doesn't work at the moment
 647   if (major_gc) { gcCAFs(); }
 648 #endif
 649
 650 #ifdef PROFILING
 651   // resetStaticObjectForRetainerProfiling() must be called before
 652   // zeroing below.
 653   if (n_gc_threads > 1) {
 654       barf("profiling is currently broken with multi-threaded GC");
 655       // ToDo: fix the gct->scavenged_static_objects below
 656   }
 657   resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
 658 #endif
 659
 660   // zero the scavenged static object list
 661   if (major_gc) {
 662       nat i;
 663       for (i = 0; i < n_gc_threads; i++) {
 664           zero_static_object_list(gc_threads[i]->scavenged_static_objects);
 665       }
 666   }
 667
 668   // Reset the nursery
 669   resetNurseries();
 670
 671   // start any pending finalizers
 672   RELEASE_SM_LOCK;
 673   scheduleFinalizers(last_free_capability, old_weak_ptr_list);
 674   ACQUIRE_SM_LOCK;
 675
 676   // send exceptions to any threads which were about to die
 677   RELEASE_SM_LOCK;
 678   resurrectThreads(resurrected_threads);
 679   ACQUIRE_SM_LOCK;
 680
 681   // Update the stable pointer hash table.
 682   updateStablePtrTable(major_gc);
 683
 684   // check sanity after GC
 685   IF_DEBUG(sanity, checkSanity());
 686
 687   // extra GC trace info
 688   if (traceClass(TRACE_gc|DEBUG_gc)) statDescribeGens();
 689
 690 #ifdef DEBUG
 691   // symbol-table based profiling
 692   /*  heapCensus(to_blocks); */ /* ToDo */
 693 #endif
 694
 695   // restore enclosing cost centre
 696 #ifdef PROFILING
 697   CCCS = prev_CCS;
 698 #endif
 699
 700 #ifdef DEBUG
 701   // check for memory leaks if DEBUG is on
 702   memInventory(traceClass(DEBUG_gc));
 703 #endif
 704
 705 #ifdef RTS_GTK_FRONTPANEL
 706   if (RtsFlags.GcFlags.frontpanel) {
 707       updateFrontPanelAfterGC( N, live );
 708   }
 709 #endif
 710
 711   // ok, GC over: tell the stats department what happened.
 712   stat_endGC(allocated, live, copied, N, max_copied, avg_copied);
 713
 714 #if defined(RTS_USER_SIGNALS)
 715   if (RtsFlags.MiscFlags.install_signal_handlers) {
 716     // unblock signals again
 717     unblockUserSignals();
 718   }
 719 #endif
 720
 721   RELEASE_SM_LOCK;
 722
 723   gct = saved_gct;
 724 }
 725
 726 /* -----------------------------------------------------------------------------
 727  * Mark all nodes pointed to by sparks in the spark queues (for GC) Does an
 728  * implicit slide i.e. after marking all sparks are at the beginning of the
 729  * spark pool and the spark pool only contains sparkable closures
 730  * -------------------------------------------------------------------------- */
 731
 732 #ifdef THREADED_RTS
 733 static void
 734 markSparkQueue (evac_fn evac, Capability *cap)
 735 {
 736     StgClosure **sparkp, **to_sparkp;
 737     nat n, pruned_sparks; // stats only
 738     StgSparkPool *pool;
 739
 740     PAR_TICKY_MARK_SPARK_QUEUE_START();
 741
 742     n = 0;
 743     pruned_sparks = 0;
 744
 745     pool = &(cap->r.rSparks);
 746
 747     ASSERT_SPARK_POOL_INVARIANTS(pool);
 748
 749 #if defined(PARALLEL_HASKELL)
 750     // stats only
 751     n = 0;
 752     pruned_sparks = 0;
 753 #endif
 754
 755     sparkp = pool->hd;
 756     to_sparkp = pool->hd;
 757     while (sparkp != pool->tl) {
 758         ASSERT(*sparkp!=NULL);
 759         ASSERT(LOOKS_LIKE_CLOSURE_PTR(((StgClosure *)*sparkp)));
 760         // ToDo?: statistics gathering here (also for GUM!)
 761         if (closure_SHOULD_SPARK(*sparkp)) {
 762             evac(sparkp);
 763             *to_sparkp++ = *sparkp;
 764             if (to_sparkp == pool->lim) {
 765                 to_sparkp = pool->base;
 766             }
 767             n++;
 768         } else {
 769             pruned_sparks++;
 770         }
 771         sparkp++;
 772         if (sparkp == pool->lim) {
 773             sparkp = pool->base;
 774         }
 775     }
 776     pool->tl = to_sparkp;
 777
 778     PAR_TICKY_MARK_SPARK_QUEUE_END(n);
 779
 780 #if defined(PARALLEL_HASKELL)
 781     debugTrace(DEBUG_sched,
 782                "marked %d sparks and pruned %d sparks on [%x]",
 783                n, pruned_sparks, mytid);
 784 #else
 785     debugTrace(DEBUG_sched,
 786                "marked %d sparks and pruned %d sparks",
 787                n, pruned_sparks);
 788 #endif
 789
 790     debugTrace(DEBUG_sched,
 791                "new spark queue len=%d; (hd=%p; tl=%p)\n",
 792                sparkPoolSize(pool), pool->hd, pool->tl);
 793 }
 794 #endif
 795
 796 /* ---------------------------------------------------------------------------
 797    Where are the roots that we know about?
 798
 799         - all the threads on the runnable queue
 800         - all the threads on the blocked queue
 801         - all the threads on the sleeping queue
 802         - all the thread currently executing a _ccall_GC
 803         - all the "main threads"
 804
 805    ------------------------------------------------------------------------ */
 806
 807 void
 808 GetRoots( evac_fn evac )
 809 {
 810     nat i;
 811     Capability *cap;
 812     Task *task;
 813
 814     // Each GC thread is responsible for following roots from the
 815     // Capability of the same number.  There will usually be the same
 816     // or fewer Capabilities as GC threads, but just in case there
 817     // are more, we mark every Capability whose number is the GC
 818     // thread's index plus a multiple of the number of GC threads.
 819     for (i = gct->thread_index; i < n_capabilities; i += n_gc_threads) {
 820         cap = &capabilities[i];
 821         evac((StgClosure **)(void *)&cap->run_queue_hd);
 822         evac((StgClosure **)(void *)&cap->run_queue_tl);
 823 #if defined(THREADED_RTS)
 824         evac((StgClosure **)(void *)&cap->wakeup_queue_hd);
 825         evac((StgClosure **)(void *)&cap->wakeup_queue_tl);
 826 #endif
 827         for (task = cap->suspended_ccalling_tasks; task != NULL;
 828              task=task->next) {
 829             debugTrace(DEBUG_sched,
 830                        "evac'ing suspended TSO %lu", (unsigned long)task->suspended_tso->id);
 831             evac((StgClosure **)(void *)&task->suspended_tso);
 832         }
 833
 834 #if defined(THREADED_RTS)
 835         markSparkQueue(evac,cap);
 836 #endif
 837     }
 838
 839 #if !defined(THREADED_RTS)
 840     evac((StgClosure **)(void *)&blocked_queue_hd);
 841     evac((StgClosure **)(void *)&blocked_queue_tl);
 842     evac((StgClosure **)(void *)&sleeping_queue);
 843 #endif
 844 }
 845
 846 /* -----------------------------------------------------------------------------
 847    isAlive determines whether the given closure is still alive (after
 848    a garbage collection) or not.  It returns the new address of the
 849    closure if it is alive, or NULL otherwise.
 850
 851    NOTE: Use it before compaction only!
 852          It untags and (if needed) retags pointers to closures.
 853    -------------------------------------------------------------------------- */
 854
 855
 856 StgClosure *
 857 isAlive(StgClosure *p)
 858 {
 859   const StgInfoTable *info;
 860   bdescr *bd;
 861   StgWord tag;
 862   StgClosure *q;
 863
 864   while (1) {
 865     /* The tag and the pointer are split, to be merged later when needed. */
 866     tag = GET_CLOSURE_TAG(p);
 867     q = UNTAG_CLOSURE(p);
 868
 869     ASSERT(LOOKS_LIKE_CLOSURE_PTR(q));
 870     info = get_itbl(q);
 871
 872     // ignore static closures
 873     //
 874     // ToDo: for static closures, check the static link field.
 875     // Problem here is that we sometimes don't set the link field, eg.
 876     // for static closures with an empty SRT or CONSTR_STATIC_NOCAFs.
 877     //
 878     if (!HEAP_ALLOCED(q)) {
 879         return p;
 880     }
 881
 882     // ignore closures in generations that we're not collecting.
 883     bd = Bdescr((P_)q);
 884     if (bd->gen_no > N) {
 885         return p;
 886     }
 887
 888     // if it's a pointer into to-space, then we're done
 889     if (bd->flags & BF_EVACUATED) {
 890         return p;
 891     }
 892
 893     // large objects use the evacuated flag
 894     if (bd->flags & BF_LARGE) {
 895         return NULL;
 896     }
 897
 898     // check the mark bit for compacted steps
 899     if ((bd->flags & BF_COMPACTED) && is_marked((P_)q,bd)) {
 900         return p;
 901     }
 902
 903     switch (info->type) {
 904
 905     case IND:
 906     case IND_STATIC:
 907     case IND_PERM:
 908     case IND_OLDGEN:            // rely on compatible layout with StgInd
 909     case IND_OLDGEN_PERM:
 910       // follow indirections
 911       p = ((StgInd *)q)->indirectee;
 912       continue;
 913
 914     case EVACUATED:
 915       // alive!
 916       return ((StgEvacuated *)q)->evacuee;
 917
 918     case TSO:
 919       if (((StgTSO *)q)->what_next == ThreadRelocated) {
 920         p = (StgClosure *)((StgTSO *)q)->link;
 921         continue;
 922       }
 923       return NULL;
 924
 925     default:
 926       // dead.
 927       return NULL;
 928     }
 929   }
 930 }
 931
 932 /* -----------------------------------------------------------------------------
 933    Figure out which generation to collect, initialise N and major_gc.
 934
 935    Also returns the total number of blocks in generations that will be
 936    collected.
 937    -------------------------------------------------------------------------- */
 938
 939 static nat
 940 initialise_N (rtsBool force_major_gc)
 941 {
 942     int g;
 943     nat s, blocks, blocks_total;
 944
 945     blocks = 0;
 946     blocks_total = 0;
 947
 948     if (force_major_gc) {
 949         N = RtsFlags.GcFlags.generations - 1;
 950     } else {
 951         N = 0;
 952     }
 953
 954     for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
 955         blocks = 0;
 956         for (s = 0; s < generations[g].n_steps; s++) {
 957             blocks += generations[g].steps[s].n_words / BLOCK_SIZE_W;
 958             blocks += generations[g].steps[s].n_large_blocks;
 959         }
 960         if (blocks >= generations[g].max_blocks) {
 961             N = stg_max(N,g);
 962         }
 963         if ((nat)g <= N) {
 964             blocks_total += blocks;
 965         }
 966     }
 967
 968     blocks_total += countNurseryBlocks();
 969
 970     major_gc = (N == RtsFlags.GcFlags.generations-1);
 971     return blocks_total;
 972 }
 973
 974 /* -----------------------------------------------------------------------------
 975    Initialise the gc_thread structures.
 976    -------------------------------------------------------------------------- */
 977
 978 static gc_thread *
 979 alloc_gc_thread (int n)
 980 {
 981     nat s;
 982     step_workspace *ws;
 983     gc_thread *t;
 984
 985     t = stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace),
 986                        "alloc_gc_thread");
 987
 988 #ifdef THREADED_RTS
 989     t->id = 0;
 990     initCondition(&t->wake_cond);
 991     initMutex(&t->wake_mutex);
 992     t->wakeup = rtsTrue;  // starts true, so we can wait for the
 993                           // thread to start up, see wakeup_gc_threads
 994     t->exit   = rtsFalse;
 995 #endif
 996
 997     t->thread_index = n;
 998     t->free_blocks = NULL;
 999     t->gc_count = 0;
1000
1001     init_gc_thread(t);
1002
1003 #ifdef USE_PAPI
1004     t->papi_events = -1;
1005 #endif
1006
1007     for (s = 0; s < total_steps; s++)
1008     {
1009         ws = &t->steps[s];
1010         ws->step = &all_steps[s];
1011         ASSERT(s == ws->step->abs_no);
1012         ws->gct = t;
1013
1014         ws->scan_bd = NULL;
1015
1016         ws->todo_bd = NULL;
1017         ws->buffer_todo_bd = NULL;
1018
1019         ws->part_list = NULL;
1020         ws->n_part_blocks = 0;
1021
1022         ws->scavd_list = NULL;
1023         ws->n_scavd_blocks = 0;
1024     }
1025
1026     return t;
1027 }
1028
1029
1030 static void
1031 alloc_gc_threads (void)
1032 {
1033     if (gc_threads == NULL) {
1034 #if defined(THREADED_RTS)
1035         nat i;
1036         gc_threads = stgMallocBytes (RtsFlags.ParFlags.gcThreads *
1037                                      sizeof(gc_thread*),
1038                                      "alloc_gc_threads");
1039
1040         for (i = 0; i < RtsFlags.ParFlags.gcThreads; i++) {
1041             gc_threads[i] = alloc_gc_thread(i);
1042         }
1043 #else
1044         gc_threads = stgMallocBytes (sizeof(gc_thread*),
1045                                      "alloc_gc_threads");
1046
1047         gc_threads[0] = alloc_gc_thread(0);
1048 #endif
1049     }
1050 }
1051
1052 /* ----------------------------------------------------------------------------
1053    Start GC threads
1054    ------------------------------------------------------------------------- */
1055
1056 static nat gc_running_threads;
1057
1058 #if defined(THREADED_RTS)
1059 static Mutex gc_running_mutex;
1060 #endif
1061
1062 static nat
1063 inc_running (void)
1064 {
1065     nat n_running;
1066     ACQUIRE_LOCK(&gc_running_mutex);
1067     n_running = ++gc_running_threads;
1068     RELEASE_LOCK(&gc_running_mutex);
1069     ASSERT(n_running <= n_gc_threads);
1070     return n_running;
1071 }
1072
1073 static nat
1074 dec_running (void)
1075 {
1076     nat n_running;
1077     ACQUIRE_LOCK(&gc_running_mutex);
1078     ASSERT(n_gc_threads != 0);
1079     n_running = --gc_running_threads;
1080     RELEASE_LOCK(&gc_running_mutex);
1081     return n_running;
1082 }
1083
1084 //
1085 // gc_thread_work(): Scavenge until there's no work left to do and all
1086 // the running threads are idle.
1087 //
1088 static void
1089 gc_thread_work (void)
1090 {
1091     nat r;
1092
1093     debugTrace(DEBUG_gc, "GC thread %d working", gct->thread_index);
1094
1095     // gc_running_threads has already been incremented for us; either
1096     // this is the main thread and we incremented it inside
1097     // GarbageCollect(), or this is a worker thread and the main
1098     // thread bumped gc_running_threads before waking us up.
1099
1100     // Every thread evacuates some roots.
1101     gct->evac_step = 0;
1102     GetRoots(mark_root);
1103
1104 loop:
1105     scavenge_loop();
1106     // scavenge_loop() only exits when there's no work to do
1107     r = dec_running();
1108
1109     debugTrace(DEBUG_gc, "GC thread %d idle (%d still running)",
1110                gct->thread_index, r);
1111
1112     while (gc_running_threads != 0) {
1113         usleep(1);
1114         if (any_work()) {
1115             inc_running();
1116             goto loop;
1117         }
1118         // any_work() does not remove the work from the queue, it
1119         // just checks for the presence of work.  If we find any,
1120         // then we increment gc_running_threads and go back to
1121         // scavenge_loop() to perform any pending work.
1122     }
1123
1124     // All threads are now stopped
1125     debugTrace(DEBUG_gc, "GC thread %d finished.", gct->thread_index);
1126 }
1127
1128
1129 #if defined(THREADED_RTS)
1130 static void
1131 gc_thread_mainloop (void)
1132 {
1133     while (!gct->exit) {
1134
1135         // Wait until we're told to wake up
1136         ACQUIRE_LOCK(&gct->wake_mutex);
1137         gct->wakeup = rtsFalse;
1138         while (!gct->wakeup) {
1139             debugTrace(DEBUG_gc, "GC thread %d standing by...",
1140                        gct->thread_index);
1141             waitCondition(&gct->wake_cond, &gct->wake_mutex);
1142         }
1143         RELEASE_LOCK(&gct->wake_mutex);
1144         if (gct->exit) break;
1145
1146 #ifdef USE_PAPI
1147         // start performance counters in this thread...
1148         if (gct->papi_events == -1) {
1149             papi_init_eventset(&gct->papi_events);
1150         }
1151         papi_thread_start_gc1_count(gct->papi_events);
1152 #endif
1153
1154         gc_thread_work();
1155
1156 #ifdef USE_PAPI
1157         // count events in this thread towards the GC totals
1158         papi_thread_stop_gc1_count(gct->papi_events);
1159 #endif
1160     }
1161 }
1162 #endif
1163
1164 #if defined(THREADED_RTS)
1165 static void
1166 gc_thread_entry (gc_thread *my_gct)
1167 {
1168     gct = my_gct;
1169     debugTrace(DEBUG_gc, "GC thread %d starting...", gct->thread_index);
1170     gct->id = osThreadId();
1171     gc_thread_mainloop();
1172 }
1173 #endif
1174
1175 static void
1176 start_gc_threads (void)
1177 {
1178 #if defined(THREADED_RTS)
1179     nat i;
1180     OSThreadId id;
1181     static rtsBool done = rtsFalse;
1182
1183     gc_running_threads = 0;
1184     initMutex(&gc_running_mutex);
1185
1186     if (!done) {
1187         // Start from 1: the main thread is 0
1188         for (i = 1; i < RtsFlags.ParFlags.gcThreads; i++) {
1189             createOSThread(&id, (OSThreadProc*)&gc_thread_entry,
1190                            gc_threads[i]);
1191         }
1192         done = rtsTrue;
1193     }
1194 #endif
1195 }
1196
1197 static void
1198 wakeup_gc_threads (nat n_threads USED_IF_THREADS)
1199 {
1200 #if defined(THREADED_RTS)
1201     nat i;
1202     for (i=1; i < n_threads; i++) {
1203         inc_running();
1204         debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1205         do {
1206             ACQUIRE_LOCK(&gc_threads[i]->wake_mutex);
1207             if (gc_threads[i]->wakeup) {
1208                 RELEASE_LOCK(&gc_threads[i]->wake_mutex);
1209                 continue;
1210             } else {
1211                 break;
1212             }
1213         } while (1);
1214         gc_threads[i]->wakeup = rtsTrue;
1215         signalCondition(&gc_threads[i]->wake_cond);
1216         RELEASE_LOCK(&gc_threads[i]->wake_mutex);
1217     }
1218 #endif
1219 }
1220
1221 // After GC is complete, we must wait for all GC threads to enter the
1222 // standby state, otherwise they may still be executing inside
1223 // any_work(), and may even remain awake until the next GC starts.
1224 static void
1225 shutdown_gc_threads (nat n_threads USED_IF_THREADS)
1226 {
1227 #if defined(THREADED_RTS)
1228     nat i;
1229     rtsBool wakeup;
1230     for (i=1; i < n_threads; i++) {
1231         do {
1232             ACQUIRE_LOCK(&gc_threads[i]->wake_mutex);
1233             wakeup = gc_threads[i]->wakeup;
1234             // wakeup is false while the thread is waiting
1235             RELEASE_LOCK(&gc_threads[i]->wake_mutex);
1236         } while (wakeup);
1237     }
1238 #endif
1239 }
1240
1241 /* ----------------------------------------------------------------------------
1242    Initialise a generation that is to be collected
1243    ------------------------------------------------------------------------- */
1244
1245 static void
1246 init_collected_gen (nat g, nat n_threads)
1247 {
1248     nat s, t, i;
1249     step_workspace *ws;
1250     step *stp;
1251     bdescr *bd;
1252
1253     // Throw away the current mutable list.  Invariant: the mutable
1254     // list always has at least one block; this means we can avoid a
1255     // check for NULL in recordMutable().
1256     if (g != 0) {
1257         freeChain(generations[g].mut_list);
1258         generations[g].mut_list = allocBlock();
1259         for (i = 0; i < n_capabilities; i++) {
1260             freeChain(capabilities[i].mut_lists[g]);
1261             capabilities[i].mut_lists[g] = allocBlock();
1262         }
1263     }
1264
1265     for (s = 0; s < generations[g].n_steps; s++) {
1266
1267         // generation 0, step 0 doesn't need to-space
1268         if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
1269             continue;
1270         }
1271
1272         stp = &generations[g].steps[s];
1273         ASSERT(stp->gen_no == g);
1274
1275         // deprecate the existing blocks
1276         stp->old_blocks   = stp->blocks;
1277         stp->n_old_blocks = stp->n_blocks;
1278         stp->blocks       = NULL;
1279         stp->n_blocks     = 0;
1280         stp->n_words      = 0;
1281
1282         // we don't have any to-be-scavenged blocks yet
1283         stp->todos = NULL;
1284         stp->todos_last = NULL;
1285         stp->n_todos = 0;
1286
1287         // initialise the large object queues.
1288         stp->scavenged_large_objects = NULL;
1289         stp->n_scavenged_large_blocks = 0;
1290
1291         // mark the large objects as not evacuated yet
1292         for (bd = stp->large_objects; bd; bd = bd->link) {
1293             bd->flags &= ~BF_EVACUATED;
1294         }
1295
1296         // for a compacted step, we need to allocate the bitmap
1297         if (stp->is_compacted) {
1298             nat bitmap_size; // in bytes
1299             bdescr *bitmap_bdescr;
1300             StgWord *bitmap;
1301
1302             bitmap_size = stp->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1303
1304             if (bitmap_size > 0) {
1305                 bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1306                                            / BLOCK_SIZE);
1307                 stp->bitmap = bitmap_bdescr;
1308                 bitmap = bitmap_bdescr->start;
1309
1310                 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1311                            bitmap_size, bitmap);
1312
1313                 // don't forget to fill it with zeros!
1314                 memset(bitmap, 0, bitmap_size);
1315
1316                 // For each block in this step, point to its bitmap from the
1317                 // block descriptor.
1318                 for (bd=stp->old_blocks; bd != NULL; bd = bd->link) {
1319                     bd->u.bitmap = bitmap;
1320                     bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1321
1322                     // Also at this point we set the BF_COMPACTED flag
1323                     // for this block.  The invariant is that
1324                     // BF_COMPACTED is always unset, except during GC
1325                     // when it is set on those blocks which will be
1326                     // compacted.
1327                     bd->flags |= BF_COMPACTED;
1328                 }
1329             }
1330         }
1331     }
1332
1333     // For each GC thread, for each step, allocate a "todo" block to
1334     // store evacuated objects to be scavenged, and a block to store
1335     // evacuated objects that do not need to be scavenged.
1336     for (t = 0; t < n_threads; t++) {
1337         for (s = 0; s < generations[g].n_steps; s++) {
1338
1339             // we don't copy objects into g0s0, unless -G0
1340             if (g==0 && s==0 && RtsFlags.GcFlags.generations > 1) continue;
1341
1342             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1343
1344             ws->scan_bd = NULL;
1345
1346             ws->todo_large_objects = NULL;
1347
1348             ws->part_list = NULL;
1349             ws->n_part_blocks = 0;
1350
1351             // allocate the first to-space block; extra blocks will be
1352             // chained on as necessary.
1353             ws->todo_bd = NULL;
1354             ws->buffer_todo_bd = NULL;
1355             alloc_todo_block(ws,0);
1356
1357             ws->scavd_list = NULL;
1358             ws->n_scavd_blocks = 0;
1359         }
1360     }
1361 }
1362
1363
1364 /* ----------------------------------------------------------------------------
1365    Initialise a generation that is *not* to be collected
1366    ------------------------------------------------------------------------- */
1367
1368 static void
1369 init_uncollected_gen (nat g, nat threads)
1370 {
1371     nat s, t, i;
1372     step_workspace *ws;
1373     step *stp;
1374     bdescr *bd;
1375
1376     for (s = 0; s < generations[g].n_steps; s++) {
1377         stp = &generations[g].steps[s];
1378         stp->scavenged_large_objects = NULL;
1379         stp->n_scavenged_large_blocks = 0;
1380     }
1381
1382     for (t = 0; t < threads; t++) {
1383         for (s = 0; s < generations[g].n_steps; s++) {
1384
1385             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1386             stp = ws->step;
1387
1388             ws->buffer_todo_bd = NULL;
1389             ws->todo_large_objects = NULL;
1390
1391             ws->part_list = NULL;
1392             ws->n_part_blocks = 0;
1393
1394             ws->scavd_list = NULL;
1395             ws->n_scavd_blocks = 0;
1396
1397             // If the block at the head of the list in this generation
1398             // is less than 3/4 full, then use it as a todo block.
1399             if (stp->blocks && isPartiallyFull(stp->blocks))
1400             {
1401                 ws->todo_bd = stp->blocks;
1402                 ws->todo_free = ws->todo_bd->free;
1403                 ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
1404                 stp->blocks = stp->blocks->link;
1405                 stp->n_blocks -= 1;
1406                 stp->n_words -= ws->todo_bd->free - ws->todo_bd->start;
1407                 ws->todo_bd->link = NULL;
1408
1409                 // this block is also the scan block; we must scan
1410                 // from the current end point.
1411                 ws->scan_bd = ws->todo_bd;
1412                 ws->scan_bd->u.scan = ws->scan_bd->free;
1413             }
1414             else
1415             {
1416                 ws->scan_bd = NULL;
1417                 ws->todo_bd = NULL;
1418                 alloc_todo_block(ws,0);
1419             }
1420         }
1421     }
1422
1423     // Move the private mutable lists from each capability onto the
1424     // main mutable list for the generation.
1425     for (i = 0; i < n_capabilities; i++) {
1426         for (bd = capabilities[i].mut_lists[g];
1427              bd->link != NULL; bd = bd->link) {
1428             /* nothing */
1429         }
1430         bd->link = generations[g].mut_list;
1431         generations[g].mut_list = capabilities[i].mut_lists[g];
1432         capabilities[i].mut_lists[g] = allocBlock();
1433     }
1434 }
1435
1436 /* -----------------------------------------------------------------------------
1437    Initialise a gc_thread before GC
1438    -------------------------------------------------------------------------- */
1439
1440 static void
1441 init_gc_thread (gc_thread *t)
1442 {
1443     t->static_objects = END_OF_STATIC_LIST;
1444     t->scavenged_static_objects = END_OF_STATIC_LIST;
1445     t->evac_step = 0;
1446     t->failed_to_evac = rtsFalse;
1447     t->eager_promotion = rtsTrue;
1448     t->thunk_selector_depth = 0;
1449     t->copied = 0;
1450     t->scanned = 0;
1451     t->any_work = 0;
1452     t->no_work = 0;
1453     t->scav_find_work = 0;
1454 }
1455
1456 /* -----------------------------------------------------------------------------
1457    Function we pass to GetRoots to evacuate roots.
1458    -------------------------------------------------------------------------- */
1459
1460 static void
1461 mark_root(StgClosure **root)
1462 {
1463   evacuate(root);
1464 }
1465
1466 /* -----------------------------------------------------------------------------
1467    Initialising the static object & mutable lists
1468    -------------------------------------------------------------------------- */
1469
1470 static void
1471 zero_static_object_list(StgClosure* first_static)
1472 {
1473   StgClosure* p;
1474   StgClosure* link;
1475   const StgInfoTable *info;
1476
1477   for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1478     info = get_itbl(p);
1479     link = *STATIC_LINK(info, p);
1480     *STATIC_LINK(info,p) = NULL;
1481   }
1482 }
1483
1484 /* -----------------------------------------------------------------------------
1485    Reverting CAFs
1486    -------------------------------------------------------------------------- */
1487
1488 void
1489 revertCAFs( void )
1490 {
1491     StgIndStatic *c;
1492
1493     for (c = (StgIndStatic *)revertible_caf_list; c != NULL;
1494          c = (StgIndStatic *)c->static_link)
1495     {
1496         SET_INFO(c, c->saved_info);
1497         c->saved_info = NULL;
1498         // could, but not necessary: c->static_link = NULL;
1499     }
1500     revertible_caf_list = NULL;
1501 }
1502
1503 void
1504 markCAFs( evac_fn evac )
1505 {
1506     StgIndStatic *c;
1507
1508     for (c = (StgIndStatic *)caf_list; c != NULL;
1509          c = (StgIndStatic *)c->static_link)
1510     {
1511         evac(&c->indirectee);
1512     }
1513     for (c = (StgIndStatic *)revertible_caf_list; c != NULL;
1514          c = (StgIndStatic *)c->static_link)
1515     {
1516         evac(&c->indirectee);
1517     }
1518 }
1519
1520 /* ----------------------------------------------------------------------------
1521    Update the pointers from the task list
1522
1523    These are treated as weak pointers because we want to allow a main
1524    thread to get a BlockedOnDeadMVar exception in the same way as any
1525    other thread.  Note that the threads should all have been retained
1526    by GC by virtue of being on the all_threads list, we're just
1527    updating pointers here.
1528    ------------------------------------------------------------------------- */
1529
1530 static void
1531 update_task_list (void)
1532 {
1533     Task *task;
1534     StgTSO *tso;
1535     for (task = all_tasks; task != NULL; task = task->all_link) {
1536         if (!task->stopped && task->tso) {
1537             ASSERT(task->tso->bound == task);
1538             tso = (StgTSO *) isAlive((StgClosure *)task->tso);
1539             if (tso == NULL) {
1540                 barf("task %p: main thread %d has been GC'd",
1541 #ifdef THREADED_RTS
1542                      (void *)task->id,
1543 #else
1544                      (void *)task,
1545 #endif
1546                      task->tso->id);
1547             }
1548             task->tso = tso;
1549         }
1550     }
1551 }
1552
1553 /* ----------------------------------------------------------------------------
1554    Reset the sizes of the older generations when we do a major
1555    collection.
1556
1557    CURRENT STRATEGY: make all generations except zero the same size.
1558    We have to stay within the maximum heap size, and leave a certain
1559    percentage of the maximum heap size available to allocate into.
1560    ------------------------------------------------------------------------- */
1561
1562 static void
1563 resize_generations (void)
1564 {
1565     nat g;
1566
1567     if (major_gc && RtsFlags.GcFlags.generations > 1) {
1568         nat live, size, min_alloc;
1569         nat max  = RtsFlags.GcFlags.maxHeapSize;
1570         nat gens = RtsFlags.GcFlags.generations;
1571
1572         // live in the oldest generations
1573         live = (oldest_gen->steps[0].n_words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W+
1574             oldest_gen->steps[0].n_large_blocks;
1575
1576         // default max size for all generations except zero
1577         size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1578                        RtsFlags.GcFlags.minOldGenSize);
1579
1580         // minimum size for generation zero
1581         min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1582                             RtsFlags.GcFlags.minAllocAreaSize);
1583
1584         // Auto-enable compaction when the residency reaches a
1585         // certain percentage of the maximum heap size (default: 30%).
1586         if (RtsFlags.GcFlags.generations > 1 &&
1587             (RtsFlags.GcFlags.compact ||
1588              (max > 0 &&
1589               oldest_gen->steps[0].n_blocks >
1590               (RtsFlags.GcFlags.compactThreshold * max) / 100))) {
1591             oldest_gen->steps[0].is_compacted = 1;
1592 //        debugBelch("compaction: on\n", live);
1593         } else {
1594             oldest_gen->steps[0].is_compacted = 0;
1595 //        debugBelch("compaction: off\n", live);
1596         }
1597
1598         // if we're going to go over the maximum heap size, reduce the
1599         // size of the generations accordingly.  The calculation is
1600         // different if compaction is turned on, because we don't need
1601         // to double the space required to collect the old generation.
1602         if (max != 0) {
1603
1604             // this test is necessary to ensure that the calculations
1605             // below don't have any negative results - we're working
1606             // with unsigned values here.
1607             if (max < min_alloc) {
1608                 heapOverflow();
1609             }
1610
1611             if (oldest_gen->steps[0].is_compacted) {
1612                 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1613                     size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1614                 }
1615             } else {
1616                 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1617                     size = (max - min_alloc) / ((gens - 1) * 2);
1618                 }
1619             }
1620
1621             if (size < live) {
1622                 heapOverflow();
1623             }
1624         }
1625
1626 #if 0
1627         debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1628                    min_alloc, size, max);
1629 #endif
1630
1631         for (g = 0; g < gens; g++) {
1632             generations[g].max_blocks = size;
1633         }
1634     }
1635 }
1636
1637 /* -----------------------------------------------------------------------------
1638    Calculate the new size of the nursery, and resize it.
1639    -------------------------------------------------------------------------- */
1640
1641 static void
1642 resize_nursery (void)
1643 {
1644     if (RtsFlags.GcFlags.generations == 1)
1645     {   // Two-space collector:
1646         nat blocks;
1647
1648         /* set up a new nursery.  Allocate a nursery size based on a
1649          * function of the amount of live data (by default a factor of 2)
1650          * Use the blocks from the old nursery if possible, freeing up any
1651          * left over blocks.
1652          *
1653          * If we get near the maximum heap size, then adjust our nursery
1654          * size accordingly.  If the nursery is the same size as the live
1655          * data (L), then we need 3L bytes.  We can reduce the size of the
1656          * nursery to bring the required memory down near 2L bytes.
1657          *
1658          * A normal 2-space collector would need 4L bytes to give the same
1659          * performance we get from 3L bytes, reducing to the same
1660          * performance at 2L bytes.
1661          */
1662         blocks = g0s0->n_old_blocks;
1663
1664         if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1665              blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1666              RtsFlags.GcFlags.maxHeapSize )
1667         {
1668             long adjusted_blocks;  // signed on purpose
1669             int pc_free;
1670
1671             adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1672
1673             debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1674                        RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1675
1676             pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1677             if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1678             {
1679                 heapOverflow();
1680             }
1681             blocks = adjusted_blocks;
1682         }
1683         else
1684         {
1685             blocks *= RtsFlags.GcFlags.oldGenFactor;
1686             if (blocks < RtsFlags.GcFlags.minAllocAreaSize)
1687             {
1688                 blocks = RtsFlags.GcFlags.minAllocAreaSize;
1689             }
1690         }
1691         resizeNurseries(blocks);
1692     }
1693     else  // Generational collector
1694     {
1695         /*
1696          * If the user has given us a suggested heap size, adjust our
1697          * allocation area to make best use of the memory available.
1698          */
1699         if (RtsFlags.GcFlags.heapSizeSuggestion)
1700         {
1701             long blocks;
1702             nat needed = calcNeeded();  // approx blocks needed at next GC
1703
1704             /* Guess how much will be live in generation 0 step 0 next time.
1705              * A good approximation is obtained by finding the
1706              * percentage of g0s0 that was live at the last minor GC.
1707              *
1708              * We have an accurate figure for the amount of copied data in
1709              * 'copied', but we must convert this to a number of blocks, with
1710              * a small adjustment for estimated slop at the end of a block
1711              * (- 10 words).
1712              */
1713             if (N == 0)
1714             {
1715                 g0s0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1716                     / countNurseryBlocks();
1717             }
1718
1719             /* Estimate a size for the allocation area based on the
1720              * information available.  We might end up going slightly under
1721              * or over the suggested heap size, but we should be pretty
1722              * close on average.
1723              *
1724              * Formula:            suggested - needed
1725              *                ----------------------------
1726              *                    1 + g0s0_pcnt_kept/100
1727              *
1728              * where 'needed' is the amount of memory needed at the next
1729              * collection for collecting all steps except g0s0.
1730              */
1731             blocks =
1732                 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1733                 (100 + (long)g0s0_pcnt_kept);
1734
1735             if (blocks < (long)RtsFlags.GcFlags.minAllocAreaSize) {
1736                 blocks = RtsFlags.GcFlags.minAllocAreaSize;
1737             }
1738
1739             resizeNurseries((nat)blocks);
1740         }
1741         else
1742         {
1743             // we might have added extra large blocks to the nursery, so
1744             // resize back to minAllocAreaSize again.
1745             resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1746         }
1747     }
1748 }
1749
1750 /* -----------------------------------------------------------------------------
1751    Sanity code for CAF garbage collection.
1752
1753    With DEBUG turned on, we manage a CAF list in addition to the SRT
1754    mechanism.  After GC, we run down the CAF list and blackhole any
1755    CAFs which have been garbage collected.  This means we get an error
1756    whenever the program tries to enter a garbage collected CAF.
1757
1758    Any garbage collected CAFs are taken off the CAF list at the same
1759    time.
1760    -------------------------------------------------------------------------- */
1761
1762 #if 0 && defined(DEBUG)
1763
1764 static void
1765 gcCAFs(void)
1766 {
1767   StgClosure*  p;
1768   StgClosure** pp;
1769   const StgInfoTable *info;
1770   nat i;
1771
1772   i = 0;
1773   p = caf_list;
1774   pp = &caf_list;
1775
1776   while (p != NULL) {
1777
1778     info = get_itbl(p);
1779
1780     ASSERT(info->type == IND_STATIC);
1781
1782     if (STATIC_LINK(info,p) == NULL) {
1783         debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1784         // black hole it
1785         SET_INFO(p,&stg_BLACKHOLE_info);
1786         p = STATIC_LINK2(info,p);
1787         *pp = p;
1788     }
1789     else {
1790       pp = &STATIC_LINK2(info,p);
1791       p = *pp;
1792       i++;
1793     }
1794
1795   }
1796
1797   debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1798 }
1799 #endif