rts/sm/GC.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team 1998-2006
   4  *
   5  * Generational garbage collector
   6  *
   7  * Documentation on the architecture of the Garbage Collector can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 // #include "PosixSource.h"
  15 #include "Rts.h"
  16 #include "RtsFlags.h"
  17 #include "RtsUtils.h"
  18 #include "Apply.h"
  19 #include "OSThreads.h"
  20 #include "LdvProfile.h"
  21 #include "Updates.h"
  22 #include "Stats.h"
  23 #include "Schedule.h"
  24 #include "Sanity.h"
  25 #include "BlockAlloc.h"
  26 #include "MBlock.h"
  27 #include "ProfHeap.h"
  28 #include "SchedAPI.h"
  29 #include "Weak.h"
  30 #include "Prelude.h"
  31 #include "ParTicky.h"           // ToDo: move into Rts.h
  32 #include "RtsSignals.h"
  33 #include "STM.h"
  34 #include "HsFFI.h"
  35 #include "Linker.h"
  36 #if defined(RTS_GTK_FRONTPANEL)
  37 #include "FrontPanel.h"
  38 #endif
  39 #include "Trace.h"
  40 #include "RetainerProfile.h"
  41 #include "RaiseAsync.h"
  42 #include "Sparks.h"
  43 #include "Papi.h"
  44
  45 #include "GC.h"
  46 #include "Compact.h"
  47 #include "Evac.h"
  48 #include "Scav.h"
  49 #include "GCUtils.h"
  50 #include "MarkWeak.h"
  51 #include "Sparks.h"
  52
  53 #include <string.h> // for memset()
  54 #include <unistd.h>
  55
  56 /* -----------------------------------------------------------------------------
  57    Global variables
  58    -------------------------------------------------------------------------- */
  59
  60 /* STATIC OBJECT LIST.
  61  *
  62  * During GC:
  63  * We maintain a linked list of static objects that are still live.
  64  * The requirements for this list are:
  65  *
  66  *  - we need to scan the list while adding to it, in order to
  67  *    scavenge all the static objects (in the same way that
  68  *    breadth-first scavenging works for dynamic objects).
  69  *
  70  *  - we need to be able to tell whether an object is already on
  71  *    the list, to break loops.
  72  *
  73  * Each static object has a "static link field", which we use for
  74  * linking objects on to the list.  We use a stack-type list, consing
  75  * objects on the front as they are added (this means that the
  76  * scavenge phase is depth-first, not breadth-first, but that
  77  * shouldn't matter).
  78  *
  79  * A separate list is kept for objects that have been scavenged
  80  * already - this is so that we can zero all the marks afterwards.
  81  *
  82  * An object is on the list if its static link field is non-zero; this
  83  * means that we have to mark the end of the list with '1', not NULL.
  84  *
  85  * Extra notes for generational GC:
  86  *
  87  * Each generation has a static object list associated with it.  When
  88  * collecting generations up to N, we treat the static object lists
  89  * from generations > N as roots.
  90  *
  91  * We build up a static object list while collecting generations 0..N,
  92  * which is then appended to the static object list of generation N+1.
  93  */
  94
  95 /* N is the oldest generation being collected, where the generations
  96  * are numbered starting at 0.  A major GC (indicated by the major_gc
  97  * flag) is when we're collecting all generations.  We only attempt to
  98  * deal with static objects and GC CAFs when doing a major GC.
  99  */
 100 nat N;
 101 rtsBool major_gc;
 102
 103 /* Data used for allocation area sizing.
 104  */
 105 static lnat g0s0_pcnt_kept = 30; // percentage of g0s0 live at last minor GC
 106
 107 /* Mut-list stats */
 108 #ifdef DEBUG
 109 nat mutlist_MUTVARS,
 110     mutlist_MUTARRS,
 111     mutlist_MVARS,
 112     mutlist_OTHERS;
 113 #endif
 114
 115 /* Thread-local data for each GC thread
 116  */
 117 gc_thread **gc_threads = NULL;
 118 // gc_thread *gct = NULL;  // this thread's gct TODO: make thread-local
 119
 120 // Number of threads running in *this* GC.  Affects how many
 121 // step->todos[] lists we have to look in to find work.
 122 nat n_gc_threads;
 123
 124 // For stats:
 125 long copied;        // *words* copied & scavenged during this GC
 126
 127 #ifdef THREADED_RTS
 128 SpinLock recordMutableGen_sync;
 129 #endif
 130
 131 /* -----------------------------------------------------------------------------
 132    Static function declarations
 133    -------------------------------------------------------------------------- */
 134
 135 static void mark_root               (StgClosure **root);
 136 static void zero_static_object_list (StgClosure* first_static);
 137 static nat  initialise_N            (rtsBool force_major_gc);
 138 static void alloc_gc_threads        (void);
 139 static void init_collected_gen      (nat g, nat threads);
 140 static void init_uncollected_gen    (nat g, nat threads);
 141 static void init_gc_thread          (gc_thread *t);
 142 static void update_task_list        (void);
 143 static void resize_generations      (void);
 144 static void resize_nursery          (void);
 145 static void start_gc_threads        (void);
 146 static void gc_thread_work          (void);
 147 static nat  inc_running             (void);
 148 static nat  dec_running             (void);
 149 static void wakeup_gc_threads       (nat n_threads);
 150 static void shutdown_gc_threads     (nat n_threads);
 151
 152 #if 0 && defined(DEBUG)
 153 static void gcCAFs                  (void);
 154 #endif
 155
 156 /* -----------------------------------------------------------------------------
 157    The mark bitmap & stack.
 158    -------------------------------------------------------------------------- */
 159
 160 #define MARK_STACK_BLOCKS 4
 161
 162 bdescr *mark_stack_bdescr;
 163 StgPtr *mark_stack;
 164 StgPtr *mark_sp;
 165 StgPtr *mark_splim;
 166
 167 // Flag and pointers used for falling back to a linear scan when the
 168 // mark stack overflows.
 169 rtsBool mark_stack_overflowed;
 170 bdescr *oldgen_scan_bd;
 171 StgPtr  oldgen_scan;
 172
 173 /* -----------------------------------------------------------------------------
 174    GarbageCollect: the main entry point to the garbage collector.
 175
 176    Locks held: all capabilities are held throughout GarbageCollect().
 177    -------------------------------------------------------------------------- */
 178
 179 void
 180 GarbageCollect ( rtsBool force_major_gc )
 181 {
 182   bdescr *bd;
 183   step *stp;
 184   lnat live, allocated;
 185   lnat oldgen_saved_blocks = 0;
 186   gc_thread *saved_gct;
 187   nat g, s, t, n;
 188
 189   // necessary if we stole a callee-saves register for gct:
 190   saved_gct = gct;
 191
 192 #ifdef PROFILING
 193   CostCentreStack *prev_CCS;
 194 #endif
 195
 196   ACQUIRE_SM_LOCK;
 197
 198   debugTrace(DEBUG_gc, "starting GC");
 199
 200 #if defined(RTS_USER_SIGNALS)
 201   if (RtsFlags.MiscFlags.install_signal_handlers) {
 202     // block signals
 203     blockUserSignals();
 204   }
 205 #endif
 206
 207   // tell the stats department that we've started a GC
 208   stat_startGC();
 209
 210   // tell the STM to discard any cached closures it's hoping to re-use
 211   stmPreGCHook();
 212
 213 #ifdef DEBUG
 214   mutlist_MUTVARS = 0;
 215   mutlist_MUTARRS = 0;
 216   mutlist_OTHERS = 0;
 217 #endif
 218
 219   // attribute any costs to CCS_GC
 220 #ifdef PROFILING
 221   prev_CCS = CCCS;
 222   CCCS = CCS_GC;
 223 #endif
 224
 225   /* Approximate how much we allocated.
 226    * Todo: only when generating stats?
 227    */
 228   allocated = calcAllocated();
 229
 230   /* Figure out which generation to collect
 231    */
 232   n = initialise_N(force_major_gc);
 233
 234   /* Allocate + initialise the gc_thread structures.
 235    */
 236   alloc_gc_threads();
 237
 238   /* Start threads, so they can be spinning up while we finish initialisation.
 239    */
 240   start_gc_threads();
 241
 242   /* How many threads will be participating in this GC?
 243    * We don't try to parallelise minor GC.
 244    */
 245 #if defined(THREADED_RTS)
 246   if (n < (4*1024*1024 / BLOCK_SIZE)) {
 247       n_gc_threads = 1;
 248   } else {
 249       n_gc_threads = RtsFlags.ParFlags.gcThreads;
 250   }
 251   trace(TRACE_gc|DEBUG_gc, "GC (gen %d): %dKB to collect, using %d thread(s)",
 252         N, n * (BLOCK_SIZE / 1024), n_gc_threads);
 253 #else
 254   n_gc_threads = 1;
 255 #endif
 256
 257 #ifdef RTS_GTK_FRONTPANEL
 258   if (RtsFlags.GcFlags.frontpanel) {
 259       updateFrontPanelBeforeGC(N);
 260   }
 261 #endif
 262
 263 #ifdef DEBUG
 264   // check for memory leaks if DEBUG is on
 265   memInventory(traceClass(DEBUG_gc));
 266 #endif
 267
 268   // check stack sanity *before* GC (ToDo: check all threads)
 269   IF_DEBUG(sanity, checkFreeListSanity());
 270
 271   // Initialise all our gc_thread structures
 272   for (t = 0; t < n_gc_threads; t++) {
 273       init_gc_thread(gc_threads[t]);
 274   }
 275
 276   // Initialise all the generations/steps that we're collecting.
 277   for (g = 0; g <= N; g++) {
 278       init_collected_gen(g,n_gc_threads);
 279   }
 280
 281   // Initialise all the generations/steps that we're *not* collecting.
 282   for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
 283       init_uncollected_gen(g,n_gc_threads);
 284   }
 285
 286   /* Allocate a mark stack if we're doing a major collection.
 287    */
 288   if (major_gc) {
 289       mark_stack_bdescr = allocGroup(MARK_STACK_BLOCKS);
 290       mark_stack = (StgPtr *)mark_stack_bdescr->start;
 291       mark_sp    = mark_stack;
 292       mark_splim = mark_stack + (MARK_STACK_BLOCKS * BLOCK_SIZE_W);
 293   } else {
 294       mark_stack_bdescr = NULL;
 295   }
 296
 297   // this is the main thread
 298   gct = gc_threads[0];
 299
 300   /* -----------------------------------------------------------------------
 301    * follow all the roots that we know about:
 302    *   - mutable lists from each generation > N
 303    * we want to *scavenge* these roots, not evacuate them: they're not
 304    * going to move in this GC.
 305    * Also do them in reverse generation order, for the usual reason:
 306    * namely to reduce the likelihood of spurious old->new pointers.
 307    */
 308   for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
 309       generations[g].saved_mut_list = generations[g].mut_list;
 310       generations[g].mut_list = allocBlock();
 311       // mut_list always has at least one block.
 312   }
 313
 314   // the main thread is running: this prevents any other threads from
 315   // exiting prematurely, so we can start them now.
 316   // NB. do this after the mutable lists have been saved above, otherwise
 317   // the other GC threads will be writing into the old mutable lists.
 318   inc_running();
 319   wakeup_gc_threads(n_gc_threads);
 320
 321   for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
 322       scavenge_mutable_list(&generations[g]);
 323   }
 324
 325   // follow roots from the CAF list (used by GHCi)
 326   gct->evac_step = 0;
 327   markCAFs(mark_root);
 328
 329   // follow all the roots that the application knows about.
 330   gct->evac_step = 0;
 331   GetRoots(mark_root);
 332
 333 #if defined(RTS_USER_SIGNALS)
 334   // mark the signal handlers (signals should be already blocked)
 335   markSignalHandlers(mark_root);
 336 #endif
 337
 338   // Mark the weak pointer list, and prepare to detect dead weak pointers.
 339   markWeakPtrList();
 340   initWeakForGC();
 341
 342   // Mark the stable pointer table.
 343   markStablePtrTable(mark_root);
 344
 345   /* -------------------------------------------------------------------------
 346    * Repeatedly scavenge all the areas we know about until there's no
 347    * more scavenging to be done.
 348    */
 349   for (;;)
 350   {
 351       gc_thread_work();
 352       // The other threads are now stopped.  We might recurse back to
 353       // here, but from now on this is the only thread.
 354
 355       // if any blackholes are alive, make the threads that wait on
 356       // them alive too.
 357       if (traverseBlackholeQueue()) {
 358           inc_running();
 359           continue;
 360       }
 361
 362       // must be last...  invariant is that everything is fully
 363       // scavenged at this point.
 364       if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
 365           inc_running();
 366           continue;
 367       }
 368
 369       // If we get to here, there's really nothing left to do.
 370       break;
 371   }
 372
 373   shutdown_gc_threads(n_gc_threads);
 374
 375   // Update pointers from the Task list
 376   update_task_list();
 377
 378   // Now see which stable names are still alive.
 379   gcStablePtrTable();
 380
 381 #ifdef PROFILING
 382   // We call processHeapClosureForDead() on every closure destroyed during
 383   // the current garbage collection, so we invoke LdvCensusForDead().
 384   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
 385       || RtsFlags.ProfFlags.bioSelector != NULL)
 386     LdvCensusForDead(N);
 387 #endif
 388
 389   // NO MORE EVACUATION AFTER THIS POINT!
 390   // Finally: compaction of the oldest generation.
 391   if (major_gc && oldest_gen->steps[0].is_compacted) {
 392       // save number of blocks for stats
 393       oldgen_saved_blocks = oldest_gen->steps[0].n_old_blocks;
 394       compact();
 395   }
 396
 397   IF_DEBUG(sanity, checkGlobalTSOList(rtsFalse));
 398
 399   // Two-space collector: free the old to-space.
 400   // g0s0->old_blocks is the old nursery
 401   // g0s0->blocks is to-space from the previous GC
 402   if (RtsFlags.GcFlags.generations == 1) {
 403       if (g0s0->blocks != NULL) {
 404           freeChain(g0s0->blocks);
 405           g0s0->blocks = NULL;
 406       }
 407   }
 408
 409   // For each workspace, in each thread:
 410   //    * clear the BF_EVACUATED flag from each copied block
 411   //    * move the copied blocks to the step
 412   {
 413       gc_thread *thr;
 414       step_workspace *ws;
 415       bdescr *prev;
 416
 417       for (t = 0; t < n_gc_threads; t++) {
 418           thr = gc_threads[t];
 419
 420           // not step 0
 421           for (s = 1; s < total_steps; s++) {
 422               ws = &thr->steps[s];
 423               // Not true?
 424               // ASSERT( ws->scan_bd == ws->todo_bd );
 425               ASSERT( ws->scan_bd ? ws->scan == ws->scan_bd->free : 1 );
 426
 427               // Push the final block
 428               if (ws->scan_bd) { push_scan_block(ws->scan_bd, ws); }
 429
 430               ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
 431
 432               prev = ws->scavd_list;
 433               for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
 434                   bd->flags &= ~BF_EVACUATED;    // now from-space
 435                   prev = bd;
 436               }
 437               prev->link = ws->stp->blocks;
 438               ws->stp->blocks = ws->scavd_list;
 439               ws->stp->n_blocks += ws->n_scavd_blocks;
 440               ASSERT(countBlocks(ws->stp->blocks) == ws->stp->n_blocks);
 441           }
 442       }
 443   }
 444
 445   // Two-space collector: swap the semi-spaces around.
 446   // Currently: g0s0->old_blocks is the old nursery
 447   //            g0s0->blocks is to-space from this GC
 448   // We want these the other way around.
 449   if (RtsFlags.GcFlags.generations == 1) {
 450       bdescr *nursery_blocks = g0s0->old_blocks;
 451       nat n_nursery_blocks = g0s0->n_old_blocks;
 452       g0s0->old_blocks = g0s0->blocks;
 453       g0s0->n_old_blocks = g0s0->n_blocks;
 454       g0s0->blocks = nursery_blocks;
 455       g0s0->n_blocks = n_nursery_blocks;
 456   }
 457
 458   /* run through all the generations/steps and tidy up
 459    */
 460   copied = 0;
 461   {
 462       nat i;
 463       for (i=0; i < n_gc_threads; i++) {
 464           if (n_gc_threads > 1) {
 465               trace(TRACE_gc,"thread %d:", i);
 466               trace(TRACE_gc,"   copied           %ld", gc_threads[i]->copied * sizeof(W_));
 467               trace(TRACE_gc,"   any_work         %ld", gc_threads[i]->any_work);
 468               trace(TRACE_gc,"   no_work          %ld", gc_threads[i]->no_work);
 469               trace(TRACE_gc,"   scav_global_work %ld", gc_threads[i]->scav_global_work);
 470               trace(TRACE_gc,"   scav_local_work  %ld", gc_threads[i]->scav_local_work);
 471           }
 472           copied += gc_threads[i]->copied;
 473       }
 474   }
 475
 476   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 477
 478     if (g <= N) {
 479       generations[g].collections++; // for stats
 480     }
 481
 482     // Count the mutable list as bytes "copied" for the purposes of
 483     // stats.  Every mutable list is copied during every GC.
 484     if (g > 0) {
 485         nat mut_list_size = 0;
 486         for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
 487             mut_list_size += bd->free - bd->start;
 488         }
 489         copied +=  mut_list_size;
 490
 491         debugTrace(DEBUG_gc,
 492                    "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
 493                    (unsigned long)(mut_list_size * sizeof(W_)),
 494                    mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
 495     }
 496
 497     for (s = 0; s < generations[g].n_steps; s++) {
 498       bdescr *next;
 499       stp = &generations[g].steps[s];
 500
 501       // for generations we collected...
 502       if (g <= N) {
 503
 504         /* free old memory and shift to-space into from-space for all
 505          * the collected steps (except the allocation area).  These
 506          * freed blocks will probaby be quickly recycled.
 507          */
 508         if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) {
 509             if (stp->is_compacted)
 510             {
 511                 // for a compacted step, just shift the new to-space
 512                 // onto the front of the now-compacted existing blocks.
 513                 for (bd = stp->blocks; bd != NULL; bd = bd->link) {
 514                     bd->flags &= ~BF_EVACUATED;  // now from-space
 515                 }
 516                 // tack the new blocks on the end of the existing blocks
 517                 if (stp->old_blocks != NULL) {
 518                     for (bd = stp->old_blocks; bd != NULL; bd = next) {
 519                         // NB. this step might not be compacted next
 520                         // time, so reset the BF_COMPACTED flags.
 521                         // They are set before GC if we're going to
 522                         // compact.  (search for BF_COMPACTED above).
 523                         bd->flags &= ~BF_COMPACTED;
 524                         next = bd->link;
 525                         if (next == NULL) {
 526                             bd->link = stp->blocks;
 527                         }
 528                     }
 529                     stp->blocks = stp->old_blocks;
 530                 }
 531                 // add the new blocks to the block tally
 532                 stp->n_blocks += stp->n_old_blocks;
 533                 ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
 534             }
 535             else // not copacted
 536             {
 537                 freeChain(stp->old_blocks);
 538             }
 539             stp->old_blocks = NULL;
 540             stp->n_old_blocks = 0;
 541         }
 542
 543         /* LARGE OBJECTS.  The current live large objects are chained on
 544          * scavenged_large, having been moved during garbage
 545          * collection from large_objects.  Any objects left on
 546          * large_objects list are therefore dead, so we free them here.
 547          */
 548         for (bd = stp->large_objects; bd != NULL; bd = next) {
 549           next = bd->link;
 550           freeGroup(bd);
 551           bd = next;
 552         }
 553
 554         // update the count of blocks used by large objects
 555         for (bd = stp->scavenged_large_objects; bd != NULL; bd = bd->link) {
 556           bd->flags &= ~BF_EVACUATED;
 557         }
 558         stp->large_objects  = stp->scavenged_large_objects;
 559         stp->n_large_blocks = stp->n_scavenged_large_blocks;
 560
 561       }
 562       else // for older generations...
 563       {
 564         /* For older generations, we need to append the
 565          * scavenged_large_object list (i.e. large objects that have been
 566          * promoted during this GC) to the large_object list for that step.
 567          */
 568         for (bd = stp->scavenged_large_objects; bd; bd = next) {
 569           next = bd->link;
 570           bd->flags &= ~BF_EVACUATED;
 571           dbl_link_onto(bd, &stp->large_objects);
 572         }
 573
 574         // add the new blocks we promoted during this GC
 575         stp->n_large_blocks += stp->n_scavenged_large_blocks;
 576       }
 577     }
 578   }
 579
 580   // update the max size of older generations after a major GC
 581   resize_generations();
 582
 583   // Guess the amount of live data for stats.
 584   live = calcLiveBlocks() * BLOCK_SIZE_W;
 585   debugTrace(DEBUG_gc, "Slop: %ldKB",
 586              (live - calcLiveWords()) / (1024/sizeof(W_)));
 587
 588   // Free the small objects allocated via allocate(), since this will
 589   // all have been copied into G0S1 now.
 590   if (RtsFlags.GcFlags.generations > 1) {
 591       if (g0s0->blocks != NULL) {
 592           freeChain(g0s0->blocks);
 593           g0s0->blocks = NULL;
 594       }
 595       g0s0->n_blocks = 0;
 596   }
 597   alloc_blocks = 0;
 598   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 599
 600   // Start a new pinned_object_block
 601   pinned_object_block = NULL;
 602
 603   // Free the mark stack.
 604   if (mark_stack_bdescr != NULL) {
 605       freeGroup(mark_stack_bdescr);
 606   }
 607
 608   // Free any bitmaps.
 609   for (g = 0; g <= N; g++) {
 610       for (s = 0; s < generations[g].n_steps; s++) {
 611           stp = &generations[g].steps[s];
 612           if (stp->bitmap != NULL) {
 613               freeGroup(stp->bitmap);
 614               stp->bitmap = NULL;
 615           }
 616       }
 617   }
 618
 619   resize_nursery();
 620
 621  // mark the garbage collected CAFs as dead
 622 #if 0 && defined(DEBUG) // doesn't work at the moment
 623   if (major_gc) { gcCAFs(); }
 624 #endif
 625
 626 #ifdef PROFILING
 627   // resetStaticObjectForRetainerProfiling() must be called before
 628   // zeroing below.
 629   if (n_gc_threads > 1) {
 630       barf("profiling is currently broken with multi-threaded GC");
 631       // ToDo: fix the gct->scavenged_static_objects below
 632   }
 633   resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
 634 #endif
 635
 636   // zero the scavenged static object list
 637   if (major_gc) {
 638       nat i;
 639       for (i = 0; i < n_gc_threads; i++) {
 640           zero_static_object_list(gc_threads[i]->scavenged_static_objects);
 641       }
 642   }
 643
 644   // Reset the nursery
 645   resetNurseries();
 646
 647   // start any pending finalizers
 648   RELEASE_SM_LOCK;
 649   scheduleFinalizers(last_free_capability, old_weak_ptr_list);
 650   ACQUIRE_SM_LOCK;
 651
 652   // send exceptions to any threads which were about to die
 653   RELEASE_SM_LOCK;
 654   resurrectThreads(resurrected_threads);
 655   ACQUIRE_SM_LOCK;
 656
 657   // Update the stable pointer hash table.
 658   updateStablePtrTable(major_gc);
 659
 660   // check sanity after GC
 661   IF_DEBUG(sanity, checkSanity());
 662
 663   // extra GC trace info
 664   if (traceClass(TRACE_gc|DEBUG_gc)) statDescribeGens();
 665
 666 #ifdef DEBUG
 667   // symbol-table based profiling
 668   /*  heapCensus(to_blocks); */ /* ToDo */
 669 #endif
 670
 671   // restore enclosing cost centre
 672 #ifdef PROFILING
 673   CCCS = prev_CCS;
 674 #endif
 675
 676 #ifdef DEBUG
 677   // check for memory leaks if DEBUG is on
 678   memInventory(traceClass(DEBUG_gc));
 679 #endif
 680
 681 #ifdef RTS_GTK_FRONTPANEL
 682   if (RtsFlags.GcFlags.frontpanel) {
 683       updateFrontPanelAfterGC( N, live );
 684   }
 685 #endif
 686
 687   // ok, GC over: tell the stats department what happened.
 688   stat_endGC(allocated, live, copied, N);
 689
 690 #if defined(RTS_USER_SIGNALS)
 691   if (RtsFlags.MiscFlags.install_signal_handlers) {
 692     // unblock signals again
 693     unblockUserSignals();
 694   }
 695 #endif
 696
 697   RELEASE_SM_LOCK;
 698
 699   gct = saved_gct;
 700 }
 701
 702 /* -----------------------------------------------------------------------------
 703  * Mark all nodes pointed to by sparks in the spark queues (for GC) Does an
 704  * implicit slide i.e. after marking all sparks are at the beginning of the
 705  * spark pool and the spark pool only contains sparkable closures
 706  * -------------------------------------------------------------------------- */
 707
 708 #ifdef THREADED_RTS
 709 static void
 710 markSparkQueue (evac_fn evac, Capability *cap)
 711 {
 712     StgClosure **sparkp, **to_sparkp;
 713     nat n, pruned_sparks; // stats only
 714     StgSparkPool *pool;
 715
 716     PAR_TICKY_MARK_SPARK_QUEUE_START();
 717
 718     n = 0;
 719     pruned_sparks = 0;
 720
 721     pool = &(cap->r.rSparks);
 722
 723     ASSERT_SPARK_POOL_INVARIANTS(pool);
 724
 725 #if defined(PARALLEL_HASKELL)
 726     // stats only
 727     n = 0;
 728     pruned_sparks = 0;
 729 #endif
 730
 731     sparkp = pool->hd;
 732     to_sparkp = pool->hd;
 733     while (sparkp != pool->tl) {
 734         ASSERT(*sparkp!=NULL);
 735         ASSERT(LOOKS_LIKE_CLOSURE_PTR(((StgClosure *)*sparkp)));
 736         // ToDo?: statistics gathering here (also for GUM!)
 737         if (closure_SHOULD_SPARK(*sparkp)) {
 738             evac(sparkp);
 739             *to_sparkp++ = *sparkp;
 740             if (to_sparkp == pool->lim) {
 741                 to_sparkp = pool->base;
 742             }
 743             n++;
 744         } else {
 745             pruned_sparks++;
 746         }
 747         sparkp++;
 748         if (sparkp == pool->lim) {
 749             sparkp = pool->base;
 750         }
 751     }
 752     pool->tl = to_sparkp;
 753
 754     PAR_TICKY_MARK_SPARK_QUEUE_END(n);
 755
 756 #if defined(PARALLEL_HASKELL)
 757     debugTrace(DEBUG_sched,
 758                "marked %d sparks and pruned %d sparks on [%x]",
 759                n, pruned_sparks, mytid);
 760 #else
 761     debugTrace(DEBUG_sched,
 762                "marked %d sparks and pruned %d sparks",
 763                n, pruned_sparks);
 764 #endif
 765
 766     debugTrace(DEBUG_sched,
 767                "new spark queue len=%d; (hd=%p; tl=%p)\n",
 768                sparkPoolSize(pool), pool->hd, pool->tl);
 769 }
 770 #endif
 771
 772 /* ---------------------------------------------------------------------------
 773    Where are the roots that we know about?
 774
 775         - all the threads on the runnable queue
 776         - all the threads on the blocked queue
 777         - all the threads on the sleeping queue
 778         - all the thread currently executing a _ccall_GC
 779         - all the "main threads"
 780
 781    ------------------------------------------------------------------------ */
 782
 783 void
 784 GetRoots( evac_fn evac )
 785 {
 786     nat i;
 787     Capability *cap;
 788     Task *task;
 789
 790     // Each GC thread is responsible for following roots from the
 791     // Capability of the same number.  There will usually be the same
 792     // or fewer Capabilities as GC threads, but just in case there
 793     // are more, we mark every Capability whose number is the GC
 794     // thread's index plus a multiple of the number of GC threads.
 795     for (i = gct->thread_index; i < n_capabilities; i += n_gc_threads) {
 796         cap = &capabilities[i];
 797         evac((StgClosure **)(void *)&cap->run_queue_hd);
 798         evac((StgClosure **)(void *)&cap->run_queue_tl);
 799 #if defined(THREADED_RTS)
 800         evac((StgClosure **)(void *)&cap->wakeup_queue_hd);
 801         evac((StgClosure **)(void *)&cap->wakeup_queue_tl);
 802 #endif
 803         for (task = cap->suspended_ccalling_tasks; task != NULL;
 804              task=task->next) {
 805             debugTrace(DEBUG_sched,
 806                        "evac'ing suspended TSO %lu", (unsigned long)task->suspended_tso->id);
 807             evac((StgClosure **)(void *)&task->suspended_tso);
 808         }
 809
 810 #if defined(THREADED_RTS)
 811         markSparkQueue(evac,cap);
 812 #endif
 813     }
 814
 815 #if !defined(THREADED_RTS)
 816     evac((StgClosure **)(void *)&blocked_queue_hd);
 817     evac((StgClosure **)(void *)&blocked_queue_tl);
 818     evac((StgClosure **)(void *)&sleeping_queue);
 819 #endif
 820 }
 821
 822 /* -----------------------------------------------------------------------------
 823    isAlive determines whether the given closure is still alive (after
 824    a garbage collection) or not.  It returns the new address of the
 825    closure if it is alive, or NULL otherwise.
 826
 827    NOTE: Use it before compaction only!
 828          It untags and (if needed) retags pointers to closures.
 829    -------------------------------------------------------------------------- */
 830
 831
 832 StgClosure *
 833 isAlive(StgClosure *p)
 834 {
 835   const StgInfoTable *info;
 836   bdescr *bd;
 837   StgWord tag;
 838   StgClosure *q;
 839
 840   while (1) {
 841     /* The tag and the pointer are split, to be merged later when needed. */
 842     tag = GET_CLOSURE_TAG(p);
 843     q = UNTAG_CLOSURE(p);
 844
 845     ASSERT(LOOKS_LIKE_CLOSURE_PTR(q));
 846     info = get_itbl(q);
 847
 848     // ignore static closures
 849     //
 850     // ToDo: for static closures, check the static link field.
 851     // Problem here is that we sometimes don't set the link field, eg.
 852     // for static closures with an empty SRT or CONSTR_STATIC_NOCAFs.
 853     //
 854     if (!HEAP_ALLOCED(q)) {
 855         return p;
 856     }
 857
 858     // ignore closures in generations that we're not collecting.
 859     bd = Bdescr((P_)q);
 860     if (bd->gen_no > N) {
 861         return p;
 862     }
 863
 864     // if it's a pointer into to-space, then we're done
 865     if (bd->flags & BF_EVACUATED) {
 866         return p;
 867     }
 868
 869     // large objects use the evacuated flag
 870     if (bd->flags & BF_LARGE) {
 871         return NULL;
 872     }
 873
 874     // check the mark bit for compacted steps
 875     if ((bd->flags & BF_COMPACTED) && is_marked((P_)q,bd)) {
 876         return p;
 877     }
 878
 879     switch (info->type) {
 880
 881     case IND:
 882     case IND_STATIC:
 883     case IND_PERM:
 884     case IND_OLDGEN:            // rely on compatible layout with StgInd
 885     case IND_OLDGEN_PERM:
 886       // follow indirections
 887       p = ((StgInd *)q)->indirectee;
 888       continue;
 889
 890     case EVACUATED:
 891       // alive!
 892       return ((StgEvacuated *)q)->evacuee;
 893
 894     case TSO:
 895       if (((StgTSO *)q)->what_next == ThreadRelocated) {
 896         p = (StgClosure *)((StgTSO *)q)->link;
 897         continue;
 898       }
 899       return NULL;
 900
 901     default:
 902       // dead.
 903       return NULL;
 904     }
 905   }
 906 }
 907
 908 /* -----------------------------------------------------------------------------
 909    Figure out which generation to collect, initialise N and major_gc.
 910
 911    Also returns the total number of blocks in generations that will be
 912    collected.
 913    -------------------------------------------------------------------------- */
 914
 915 static nat
 916 initialise_N (rtsBool force_major_gc)
 917 {
 918     int g;
 919     nat s, blocks, blocks_total;
 920
 921     blocks = 0;
 922     blocks_total = 0;
 923
 924     if (force_major_gc) {
 925         N = RtsFlags.GcFlags.generations - 1;
 926     } else {
 927         N = 0;
 928     }
 929
 930     for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
 931         blocks = 0;
 932         for (s = 0; s < generations[g].n_steps; s++) {
 933             blocks += generations[g].steps[s].n_blocks;
 934             blocks += generations[g].steps[s].n_large_blocks;
 935         }
 936         if (blocks >= generations[g].max_blocks) {
 937             N = stg_max(N,g);
 938         }
 939         if ((nat)g <= N) {
 940             blocks_total += blocks;
 941         }
 942     }
 943
 944     blocks_total += countNurseryBlocks();
 945
 946     major_gc = (N == RtsFlags.GcFlags.generations-1);
 947     return blocks_total;
 948 }
 949
 950 /* -----------------------------------------------------------------------------
 951    Initialise the gc_thread structures.
 952    -------------------------------------------------------------------------- */
 953
 954 static gc_thread *
 955 alloc_gc_thread (int n)
 956 {
 957     nat s;
 958     step_workspace *ws;
 959     gc_thread *t;
 960
 961     t = stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace),
 962                        "alloc_gc_thread");
 963
 964 #ifdef THREADED_RTS
 965     t->id = 0;
 966     initCondition(&t->wake_cond);
 967     initMutex(&t->wake_mutex);
 968     t->wakeup = rtsFalse;
 969     t->exit   = rtsFalse;
 970 #endif
 971
 972     t->thread_index = n;
 973     t->free_blocks = NULL;
 974     t->gc_count = 0;
 975
 976     init_gc_thread(t);
 977
 978 #ifdef USE_PAPI
 979     t->papi_events = -1;
 980 #endif
 981
 982     for (s = 0; s < total_steps; s++)
 983     {
 984         ws = &t->steps[s];
 985         ws->stp = &all_steps[s];
 986         ASSERT(s == ws->stp->abs_no);
 987         ws->gct = t;
 988
 989         ws->scan_bd = NULL;
 990         ws->scan = NULL;
 991
 992         ws->todo_bd = NULL;
 993         ws->buffer_todo_bd = NULL;
 994
 995         ws->scavd_list = NULL;
 996         ws->n_scavd_blocks = 0;
 997     }
 998
 999     return t;
1000 }
1001
1002
1003 static void
1004 alloc_gc_threads (void)
1005 {
1006     if (gc_threads == NULL) {
1007 #if defined(THREADED_RTS)
1008         nat i;
1009         gc_threads = stgMallocBytes (RtsFlags.ParFlags.gcThreads *
1010                                      sizeof(gc_thread*),
1011                                      "alloc_gc_threads");
1012
1013         for (i = 0; i < RtsFlags.ParFlags.gcThreads; i++) {
1014             gc_threads[i] = alloc_gc_thread(i);
1015         }
1016 #else
1017         gc_threads = stgMallocBytes (sizeof(gc_thread*),
1018                                      "alloc_gc_threads");
1019
1020         gc_threads[0] = alloc_gc_thread(0);
1021 #endif
1022     }
1023 }
1024
1025 /* ----------------------------------------------------------------------------
1026    Start GC threads
1027    ------------------------------------------------------------------------- */
1028
1029 static nat gc_running_threads;
1030
1031 #if defined(THREADED_RTS)
1032 static Mutex gc_running_mutex;
1033 #endif
1034
1035 static nat
1036 inc_running (void)
1037 {
1038     nat n_running;
1039     ACQUIRE_LOCK(&gc_running_mutex);
1040     n_running = ++gc_running_threads;
1041     RELEASE_LOCK(&gc_running_mutex);
1042     ASSERT(n_running <= n_gc_threads);
1043     return n_running;
1044 }
1045
1046 static nat
1047 dec_running (void)
1048 {
1049     nat n_running;
1050     ACQUIRE_LOCK(&gc_running_mutex);
1051     ASSERT(n_gc_threads != 0);
1052     n_running = --gc_running_threads;
1053     RELEASE_LOCK(&gc_running_mutex);
1054     return n_running;
1055 }
1056
1057 //
1058 // gc_thread_work(): Scavenge until there's no work left to do and all
1059 // the running threads are idle.
1060 //
1061 static void
1062 gc_thread_work (void)
1063 {
1064     nat r;
1065
1066     debugTrace(DEBUG_gc, "GC thread %d working", gct->thread_index);
1067
1068     // gc_running_threads has already been incremented for us; either
1069     // this is the main thread and we incremented it inside
1070     // GarbageCollect(), or this is a worker thread and the main
1071     // thread bumped gc_running_threads before waking us up.
1072
1073     // Every thread evacuates some roots.
1074     gct->evac_step = 0;
1075     GetRoots(mark_root);
1076
1077 loop:
1078     scavenge_loop();
1079     // scavenge_loop() only exits when there's no work to do
1080     r = dec_running();
1081
1082     debugTrace(DEBUG_gc, "GC thread %d idle (%d still running)",
1083                gct->thread_index, r);
1084
1085     while (gc_running_threads != 0) {
1086         usleep(1);
1087         if (any_work()) {
1088             inc_running();
1089             goto loop;
1090         }
1091         // any_work() does not remove the work from the queue, it
1092         // just checks for the presence of work.  If we find any,
1093         // then we increment gc_running_threads and go back to
1094         // scavenge_loop() to perform any pending work.
1095     }
1096
1097     // All threads are now stopped
1098     debugTrace(DEBUG_gc, "GC thread %d finished.", gct->thread_index);
1099 }
1100
1101
1102 #if defined(THREADED_RTS)
1103 static void
1104 gc_thread_mainloop (void)
1105 {
1106     while (!gct->exit) {
1107
1108         // Wait until we're told to wake up
1109         ACQUIRE_LOCK(&gct->wake_mutex);
1110         gct->wakeup = rtsFalse;
1111         while (!gct->wakeup) {
1112             debugTrace(DEBUG_gc, "GC thread %d standing by...",
1113                        gct->thread_index);
1114             waitCondition(&gct->wake_cond, &gct->wake_mutex);
1115         }
1116         RELEASE_LOCK(&gct->wake_mutex);
1117         if (gct->exit) break;
1118
1119 #ifdef USE_PAPI
1120         // start performance counters in this thread...
1121         if (gct->papi_events == -1) {
1122             papi_init_eventset(&gct->papi_events);
1123         }
1124         papi_thread_start_gc1_count(gct->papi_events);
1125 #endif
1126
1127         gc_thread_work();
1128
1129 #ifdef USE_PAPI
1130         // count events in this thread towards the GC totals
1131         papi_thread_stop_gc1_count(gct->papi_events);
1132 #endif
1133     }
1134 }
1135 #endif
1136
1137 #if defined(THREADED_RTS)
1138 static void
1139 gc_thread_entry (gc_thread *my_gct)
1140 {
1141     gct = my_gct;
1142     debugTrace(DEBUG_gc, "GC thread %d starting...", gct->thread_index);
1143     gct->id = osThreadId();
1144     gc_thread_mainloop();
1145 }
1146 #endif
1147
1148 static void
1149 start_gc_threads (void)
1150 {
1151 #if defined(THREADED_RTS)
1152     nat i;
1153     OSThreadId id;
1154     static rtsBool done = rtsFalse;
1155
1156     gc_running_threads = 0;
1157     initMutex(&gc_running_mutex);
1158
1159     if (!done) {
1160         // Start from 1: the main thread is 0
1161         for (i = 1; i < RtsFlags.ParFlags.gcThreads; i++) {
1162             createOSThread(&id, (OSThreadProc*)&gc_thread_entry,
1163                            gc_threads[i]);
1164         }
1165         done = rtsTrue;
1166     }
1167 #endif
1168 }
1169
1170 static void
1171 wakeup_gc_threads (nat n_threads USED_IF_THREADS)
1172 {
1173 #if defined(THREADED_RTS)
1174     nat i;
1175     for (i=1; i < n_threads; i++) {
1176         inc_running();
1177         ACQUIRE_LOCK(&gc_threads[i]->wake_mutex);
1178         gc_threads[i]->wakeup = rtsTrue;
1179         signalCondition(&gc_threads[i]->wake_cond);
1180         RELEASE_LOCK(&gc_threads[i]->wake_mutex);
1181     }
1182 #endif
1183 }
1184
1185 // After GC is complete, we must wait for all GC threads to enter the
1186 // standby state, otherwise they may still be executing inside
1187 // any_work(), and may even remain awake until the next GC starts.
1188 static void
1189 shutdown_gc_threads (nat n_threads USED_IF_THREADS)
1190 {
1191 #if defined(THREADED_RTS)
1192     nat i;
1193     rtsBool wakeup;
1194     for (i=1; i < n_threads; i++) {
1195         do {
1196             ACQUIRE_LOCK(&gc_threads[i]->wake_mutex);
1197             wakeup = gc_threads[i]->wakeup;
1198             // wakeup is false while the thread is waiting
1199             RELEASE_LOCK(&gc_threads[i]->wake_mutex);
1200         } while (wakeup);
1201     }
1202 #endif
1203 }
1204
1205 /* ----------------------------------------------------------------------------
1206    Initialise a generation that is to be collected
1207    ------------------------------------------------------------------------- */
1208
1209 static void
1210 init_collected_gen (nat g, nat n_threads)
1211 {
1212     nat s, t, i;
1213     step_workspace *ws;
1214     step *stp;
1215     bdescr *bd;
1216
1217     // Throw away the current mutable list.  Invariant: the mutable
1218     // list always has at least one block; this means we can avoid a
1219     // check for NULL in recordMutable().
1220     if (g != 0) {
1221         freeChain(generations[g].mut_list);
1222         generations[g].mut_list = allocBlock();
1223         for (i = 0; i < n_capabilities; i++) {
1224             freeChain(capabilities[i].mut_lists[g]);
1225             capabilities[i].mut_lists[g] = allocBlock();
1226         }
1227     }
1228
1229     for (s = 0; s < generations[g].n_steps; s++) {
1230
1231         // generation 0, step 0 doesn't need to-space
1232         if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
1233             continue;
1234         }
1235
1236         stp = &generations[g].steps[s];
1237         ASSERT(stp->gen_no == g);
1238
1239         // deprecate the existing blocks
1240         stp->old_blocks   = stp->blocks;
1241         stp->n_old_blocks = stp->n_blocks;
1242         stp->blocks       = NULL;
1243         stp->n_blocks     = 0;
1244
1245         // we don't have any to-be-scavenged blocks yet
1246         stp->todos = NULL;
1247         stp->todos_last = NULL;
1248         stp->n_todos = 0;
1249
1250         // initialise the large object queues.
1251         stp->scavenged_large_objects = NULL;
1252         stp->n_scavenged_large_blocks = 0;
1253
1254         // mark the large objects as not evacuated yet
1255         for (bd = stp->large_objects; bd; bd = bd->link) {
1256             bd->flags &= ~BF_EVACUATED;
1257         }
1258
1259         // for a compacted step, we need to allocate the bitmap
1260         if (stp->is_compacted) {
1261             nat bitmap_size; // in bytes
1262             bdescr *bitmap_bdescr;
1263             StgWord *bitmap;
1264
1265             bitmap_size = stp->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1266
1267             if (bitmap_size > 0) {
1268                 bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1269                                            / BLOCK_SIZE);
1270                 stp->bitmap = bitmap_bdescr;
1271                 bitmap = bitmap_bdescr->start;
1272
1273                 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1274                            bitmap_size, bitmap);
1275
1276                 // don't forget to fill it with zeros!
1277                 memset(bitmap, 0, bitmap_size);
1278
1279                 // For each block in this step, point to its bitmap from the
1280                 // block descriptor.
1281                 for (bd=stp->old_blocks; bd != NULL; bd = bd->link) {
1282                     bd->u.bitmap = bitmap;
1283                     bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1284
1285                     // Also at this point we set the BF_COMPACTED flag
1286                     // for this block.  The invariant is that
1287                     // BF_COMPACTED is always unset, except during GC
1288                     // when it is set on those blocks which will be
1289                     // compacted.
1290                     bd->flags |= BF_COMPACTED;
1291                 }
1292             }
1293         }
1294     }
1295
1296     // For each GC thread, for each step, allocate a "todo" block to
1297     // store evacuated objects to be scavenged, and a block to store
1298     // evacuated objects that do not need to be scavenged.
1299     for (t = 0; t < n_threads; t++) {
1300         for (s = 0; s < generations[g].n_steps; s++) {
1301
1302             // we don't copy objects into g0s0, unless -G0
1303             if (g==0 && s==0 && RtsFlags.GcFlags.generations > 1) continue;
1304
1305             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1306
1307             ws->scan_bd = NULL;
1308             ws->scan = NULL;
1309
1310             ws->todo_large_objects = NULL;
1311
1312             // allocate the first to-space block; extra blocks will be
1313             // chained on as necessary.
1314             ws->todo_bd = NULL;
1315             ws->buffer_todo_bd = NULL;
1316             gc_alloc_todo_block(ws);
1317
1318             ws->scavd_list = NULL;
1319             ws->n_scavd_blocks = 0;
1320         }
1321     }
1322 }
1323
1324
1325 /* ----------------------------------------------------------------------------
1326    Initialise a generation that is *not* to be collected
1327    ------------------------------------------------------------------------- */
1328
1329 static void
1330 init_uncollected_gen (nat g, nat threads)
1331 {
1332     nat s, t, i;
1333     step_workspace *ws;
1334     step *stp;
1335     bdescr *bd;
1336
1337     for (s = 0; s < generations[g].n_steps; s++) {
1338         stp = &generations[g].steps[s];
1339         stp->scavenged_large_objects = NULL;
1340         stp->n_scavenged_large_blocks = 0;
1341     }
1342
1343     for (t = 0; t < threads; t++) {
1344         for (s = 0; s < generations[g].n_steps; s++) {
1345
1346             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1347             stp = ws->stp;
1348
1349             ws->buffer_todo_bd = NULL;
1350             ws->todo_large_objects = NULL;
1351
1352             ws->scavd_list = NULL;
1353             ws->n_scavd_blocks = 0;
1354
1355             // If the block at the head of the list in this generation
1356             // is less than 3/4 full, then use it as a todo block.
1357             if (stp->blocks && isPartiallyFull(stp->blocks))
1358             {
1359                 ws->todo_bd = stp->blocks;
1360                 ws->todo_free = ws->todo_bd->free;
1361                 ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
1362                 stp->blocks = stp->blocks->link;
1363                 stp->n_blocks -= 1;
1364                 ws->todo_bd->link = NULL;
1365
1366                 // this block is also the scan block; we must scan
1367                 // from the current end point.
1368                 ws->scan_bd = ws->todo_bd;
1369                 ws->scan = ws->scan_bd->free;
1370
1371                 // subtract the contents of this block from the stats,
1372                 // because we'll count the whole block later.
1373                 copied -= ws->scan_bd->free - ws->scan_bd->start;
1374             }
1375             else
1376             {
1377                 ws->scan_bd = NULL;
1378                 ws->scan = NULL;
1379                 ws->todo_bd = NULL;
1380                 gc_alloc_todo_block(ws);
1381             }
1382         }
1383     }
1384
1385     // Move the private mutable lists from each capability onto the
1386     // main mutable list for the generation.
1387     for (i = 0; i < n_capabilities; i++) {
1388         for (bd = capabilities[i].mut_lists[g];
1389              bd->link != NULL; bd = bd->link) {
1390             /* nothing */
1391         }
1392         bd->link = generations[g].mut_list;
1393         generations[g].mut_list = capabilities[i].mut_lists[g];
1394         capabilities[i].mut_lists[g] = allocBlock();
1395     }
1396 }
1397
1398 /* -----------------------------------------------------------------------------
1399    Initialise a gc_thread before GC
1400    -------------------------------------------------------------------------- */
1401
1402 static void
1403 init_gc_thread (gc_thread *t)
1404 {
1405     t->static_objects = END_OF_STATIC_LIST;
1406     t->scavenged_static_objects = END_OF_STATIC_LIST;
1407     t->evac_step = 0;
1408     t->failed_to_evac = rtsFalse;
1409     t->eager_promotion = rtsTrue;
1410     t->thunk_selector_depth = 0;
1411     t->copied = 0;
1412     t->any_work = 0;
1413     t->no_work = 0;
1414     t->scav_global_work = 0;
1415     t->scav_local_work = 0;
1416
1417 }
1418
1419 /* -----------------------------------------------------------------------------
1420    Function we pass to GetRoots to evacuate roots.
1421    -------------------------------------------------------------------------- */
1422
1423 static void
1424 mark_root(StgClosure **root)
1425 {
1426   evacuate(root);
1427 }
1428
1429 /* -----------------------------------------------------------------------------
1430    Initialising the static object & mutable lists
1431    -------------------------------------------------------------------------- */
1432
1433 static void
1434 zero_static_object_list(StgClosure* first_static)
1435 {
1436   StgClosure* p;
1437   StgClosure* link;
1438   const StgInfoTable *info;
1439
1440   for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1441     info = get_itbl(p);
1442     link = *STATIC_LINK(info, p);
1443     *STATIC_LINK(info,p) = NULL;
1444   }
1445 }
1446
1447 /* -----------------------------------------------------------------------------
1448    Reverting CAFs
1449    -------------------------------------------------------------------------- */
1450
1451 void
1452 revertCAFs( void )
1453 {
1454     StgIndStatic *c;
1455
1456     for (c = (StgIndStatic *)revertible_caf_list; c != NULL;
1457          c = (StgIndStatic *)c->static_link)
1458     {
1459         SET_INFO(c, c->saved_info);
1460         c->saved_info = NULL;
1461         // could, but not necessary: c->static_link = NULL;
1462     }
1463     revertible_caf_list = NULL;
1464 }
1465
1466 void
1467 markCAFs( evac_fn evac )
1468 {
1469     StgIndStatic *c;
1470
1471     for (c = (StgIndStatic *)caf_list; c != NULL;
1472          c = (StgIndStatic *)c->static_link)
1473     {
1474         evac(&c->indirectee);
1475     }
1476     for (c = (StgIndStatic *)revertible_caf_list; c != NULL;
1477          c = (StgIndStatic *)c->static_link)
1478     {
1479         evac(&c->indirectee);
1480     }
1481 }
1482
1483 /* ----------------------------------------------------------------------------
1484    Update the pointers from the task list
1485
1486    These are treated as weak pointers because we want to allow a main
1487    thread to get a BlockedOnDeadMVar exception in the same way as any
1488    other thread.  Note that the threads should all have been retained
1489    by GC by virtue of being on the all_threads list, we're just
1490    updating pointers here.
1491    ------------------------------------------------------------------------- */
1492
1493 static void
1494 update_task_list (void)
1495 {
1496     Task *task;
1497     StgTSO *tso;
1498     for (task = all_tasks; task != NULL; task = task->all_link) {
1499         if (!task->stopped && task->tso) {
1500             ASSERT(task->tso->bound == task);
1501             tso = (StgTSO *) isAlive((StgClosure *)task->tso);
1502             if (tso == NULL) {
1503                 barf("task %p: main thread %d has been GC'd",
1504 #ifdef THREADED_RTS
1505                      (void *)task->id,
1506 #else
1507                      (void *)task,
1508 #endif
1509                      task->tso->id);
1510             }
1511             task->tso = tso;
1512         }
1513     }
1514 }
1515
1516 /* ----------------------------------------------------------------------------
1517    Reset the sizes of the older generations when we do a major
1518    collection.
1519
1520    CURRENT STRATEGY: make all generations except zero the same size.
1521    We have to stay within the maximum heap size, and leave a certain
1522    percentage of the maximum heap size available to allocate into.
1523    ------------------------------------------------------------------------- */
1524
1525 static void
1526 resize_generations (void)
1527 {
1528     nat g;
1529
1530     if (major_gc && RtsFlags.GcFlags.generations > 1) {
1531         nat live, size, min_alloc;
1532         nat max  = RtsFlags.GcFlags.maxHeapSize;
1533         nat gens = RtsFlags.GcFlags.generations;
1534
1535         // live in the oldest generations
1536         live = oldest_gen->steps[0].n_blocks +
1537             oldest_gen->steps[0].n_large_blocks;
1538
1539         // default max size for all generations except zero
1540         size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1541                        RtsFlags.GcFlags.minOldGenSize);
1542
1543         // minimum size for generation zero
1544         min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1545                             RtsFlags.GcFlags.minAllocAreaSize);
1546
1547         // Auto-enable compaction when the residency reaches a
1548         // certain percentage of the maximum heap size (default: 30%).
1549         if (RtsFlags.GcFlags.generations > 1 &&
1550             (RtsFlags.GcFlags.compact ||
1551              (max > 0 &&
1552               oldest_gen->steps[0].n_blocks >
1553               (RtsFlags.GcFlags.compactThreshold * max) / 100))) {
1554             oldest_gen->steps[0].is_compacted = 1;
1555 //        debugBelch("compaction: on\n", live);
1556         } else {
1557             oldest_gen->steps[0].is_compacted = 0;
1558 //        debugBelch("compaction: off\n", live);
1559         }
1560
1561         // if we're going to go over the maximum heap size, reduce the
1562         // size of the generations accordingly.  The calculation is
1563         // different if compaction is turned on, because we don't need
1564         // to double the space required to collect the old generation.
1565         if (max != 0) {
1566
1567             // this test is necessary to ensure that the calculations
1568             // below don't have any negative results - we're working
1569             // with unsigned values here.
1570             if (max < min_alloc) {
1571                 heapOverflow();
1572             }
1573
1574             if (oldest_gen->steps[0].is_compacted) {
1575                 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1576                     size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1577                 }
1578             } else {
1579                 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1580                     size = (max - min_alloc) / ((gens - 1) * 2);
1581                 }
1582             }
1583
1584             if (size < live) {
1585                 heapOverflow();
1586             }
1587         }
1588
1589 #if 0
1590         debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1591                    min_alloc, size, max);
1592 #endif
1593
1594         for (g = 0; g < gens; g++) {
1595             generations[g].max_blocks = size;
1596         }
1597     }
1598 }
1599
1600 /* -----------------------------------------------------------------------------
1601    Calculate the new size of the nursery, and resize it.
1602    -------------------------------------------------------------------------- */
1603
1604 static void
1605 resize_nursery (void)
1606 {
1607     if (RtsFlags.GcFlags.generations == 1)
1608     {   // Two-space collector:
1609         nat blocks;
1610
1611         /* set up a new nursery.  Allocate a nursery size based on a
1612          * function of the amount of live data (by default a factor of 2)
1613          * Use the blocks from the old nursery if possible, freeing up any
1614          * left over blocks.
1615          *
1616          * If we get near the maximum heap size, then adjust our nursery
1617          * size accordingly.  If the nursery is the same size as the live
1618          * data (L), then we need 3L bytes.  We can reduce the size of the
1619          * nursery to bring the required memory down near 2L bytes.
1620          *
1621          * A normal 2-space collector would need 4L bytes to give the same
1622          * performance we get from 3L bytes, reducing to the same
1623          * performance at 2L bytes.
1624          */
1625         blocks = g0s0->n_old_blocks;
1626
1627         if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1628              blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1629              RtsFlags.GcFlags.maxHeapSize )
1630         {
1631             long adjusted_blocks;  // signed on purpose
1632             int pc_free;
1633
1634             adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1635
1636             debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1637                        RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1638
1639             pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1640             if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1641             {
1642                 heapOverflow();
1643             }
1644             blocks = adjusted_blocks;
1645         }
1646         else
1647         {
1648             blocks *= RtsFlags.GcFlags.oldGenFactor;
1649             if (blocks < RtsFlags.GcFlags.minAllocAreaSize)
1650             {
1651                 blocks = RtsFlags.GcFlags.minAllocAreaSize;
1652             }
1653         }
1654         resizeNurseries(blocks);
1655     }
1656     else  // Generational collector
1657     {
1658         /*
1659          * If the user has given us a suggested heap size, adjust our
1660          * allocation area to make best use of the memory available.
1661          */
1662         if (RtsFlags.GcFlags.heapSizeSuggestion)
1663         {
1664             long blocks;
1665             nat needed = calcNeeded();  // approx blocks needed at next GC
1666
1667             /* Guess how much will be live in generation 0 step 0 next time.
1668              * A good approximation is obtained by finding the
1669              * percentage of g0s0 that was live at the last minor GC.
1670              *
1671              * We have an accurate figure for the amount of copied data in
1672              * 'copied', but we must convert this to a number of blocks, with
1673              * a small adjustment for estimated slop at the end of a block
1674              * (- 10 words).
1675              */
1676             if (N == 0)
1677             {
1678                 g0s0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1679                     / countNurseryBlocks();
1680             }
1681
1682             /* Estimate a size for the allocation area based on the
1683              * information available.  We might end up going slightly under
1684              * or over the suggested heap size, but we should be pretty
1685              * close on average.
1686              *
1687              * Formula:            suggested - needed
1688              *                ----------------------------
1689              *                    1 + g0s0_pcnt_kept/100
1690              *
1691              * where 'needed' is the amount of memory needed at the next
1692              * collection for collecting all steps except g0s0.
1693              */
1694             blocks =
1695                 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1696                 (100 + (long)g0s0_pcnt_kept);
1697
1698             if (blocks < (long)RtsFlags.GcFlags.minAllocAreaSize) {
1699                 blocks = RtsFlags.GcFlags.minAllocAreaSize;
1700             }
1701
1702             resizeNurseries((nat)blocks);
1703         }
1704         else
1705         {
1706             // we might have added extra large blocks to the nursery, so
1707             // resize back to minAllocAreaSize again.
1708             resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1709         }
1710     }
1711 }
1712
1713 /* -----------------------------------------------------------------------------
1714    Sanity code for CAF garbage collection.
1715
1716    With DEBUG turned on, we manage a CAF list in addition to the SRT
1717    mechanism.  After GC, we run down the CAF list and blackhole any
1718    CAFs which have been garbage collected.  This means we get an error
1719    whenever the program tries to enter a garbage collected CAF.
1720
1721    Any garbage collected CAFs are taken off the CAF list at the same
1722    time.
1723    -------------------------------------------------------------------------- */
1724
1725 #if 0 && defined(DEBUG)
1726
1727 static void
1728 gcCAFs(void)
1729 {
1730   StgClosure*  p;
1731   StgClosure** pp;
1732   const StgInfoTable *info;
1733   nat i;
1734
1735   i = 0;
1736   p = caf_list;
1737   pp = &caf_list;
1738
1739   while (p != NULL) {
1740
1741     info = get_itbl(p);
1742
1743     ASSERT(info->type == IND_STATIC);
1744
1745     if (STATIC_LINK(info,p) == NULL) {
1746         debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1747         // black hole it
1748         SET_INFO(p,&stg_BLACKHOLE_info);
1749         p = STATIC_LINK2(info,p);
1750         *pp = p;
1751     }
1752     else {
1753       pp = &STATIC_LINK2(info,p);
1754       p = *pp;
1755       i++;
1756     }
1757
1758   }
1759
1760   debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1761 }
1762 #endif