rts/sm/GC.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team 1998-2008
   4  *
   5  * Generational garbage collector
   6  *
   7  * Documentation on the architecture of the Garbage Collector can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 // #include "PosixSource.h"
  15 #include "Rts.h"
  16 #include "RtsFlags.h"
  17 #include "RtsUtils.h"
  18 #include "Apply.h"
  19 #include "OSThreads.h"
  20 #include "LdvProfile.h"
  21 #include "Updates.h"
  22 #include "Stats.h"
  23 #include "Schedule.h"
  24 #include "Sanity.h"
  25 #include "BlockAlloc.h"
  26 #include "MBlock.h"
  27 #include "ProfHeap.h"
  28 #include "SchedAPI.h"
  29 #include "Weak.h"
  30 #include "Prelude.h"
  31 #include "ParTicky.h"           // ToDo: move into Rts.h
  32 #include "RtsSignals.h"
  33 #include "STM.h"
  34 #include "HsFFI.h"
  35 #include "Linker.h"
  36 #if defined(RTS_GTK_FRONTPANEL)
  37 #include "FrontPanel.h"
  38 #endif
  39 #include "Trace.h"
  40 #include "RetainerProfile.h"
  41 #include "RaiseAsync.h"
  42 #include "Papi.h"
  43
  44 #include "GC.h"
  45 #include "GCThread.h"
  46 #include "Compact.h"
  47 #include "Evac.h"
  48 #include "Scav.h"
  49 #include "GCUtils.h"
  50 #include "MarkWeak.h"
  51 #include "Sparks.h"
  52 #include "Sweep.h"
  53
  54 #include <string.h> // for memset()
  55 #include <unistd.h>
  56
  57 /* -----------------------------------------------------------------------------
  58    Global variables
  59    -------------------------------------------------------------------------- */
  60
  61 /* STATIC OBJECT LIST.
  62  *
  63  * During GC:
  64  * We maintain a linked list of static objects that are still live.
  65  * The requirements for this list are:
  66  *
  67  *  - we need to scan the list while adding to it, in order to
  68  *    scavenge all the static objects (in the same way that
  69  *    breadth-first scavenging works for dynamic objects).
  70  *
  71  *  - we need to be able to tell whether an object is already on
  72  *    the list, to break loops.
  73  *
  74  * Each static object has a "static link field", which we use for
  75  * linking objects on to the list.  We use a stack-type list, consing
  76  * objects on the front as they are added (this means that the
  77  * scavenge phase is depth-first, not breadth-first, but that
  78  * shouldn't matter).
  79  *
  80  * A separate list is kept for objects that have been scavenged
  81  * already - this is so that we can zero all the marks afterwards.
  82  *
  83  * An object is on the list if its static link field is non-zero; this
  84  * means that we have to mark the end of the list with '1', not NULL.
  85  *
  86  * Extra notes for generational GC:
  87  *
  88  * Each generation has a static object list associated with it.  When
  89  * collecting generations up to N, we treat the static object lists
  90  * from generations > N as roots.
  91  *
  92  * We build up a static object list while collecting generations 0..N,
  93  * which is then appended to the static object list of generation N+1.
  94  */
  95
  96 /* N is the oldest generation being collected, where the generations
  97  * are numbered starting at 0.  A major GC (indicated by the major_gc
  98  * flag) is when we're collecting all generations.  We only attempt to
  99  * deal with static objects and GC CAFs when doing a major GC.
 100  */
 101 nat N;
 102 rtsBool major_gc;
 103
 104 /* Data used for allocation area sizing.
 105  */
 106 static lnat g0s0_pcnt_kept = 30; // percentage of g0s0 live at last minor GC
 107
 108 /* Mut-list stats */
 109 #ifdef DEBUG
 110 nat mutlist_MUTVARS,
 111     mutlist_MUTARRS,
 112     mutlist_MVARS,
 113     mutlist_OTHERS;
 114 #endif
 115
 116 /* Thread-local data for each GC thread
 117  */
 118 gc_thread **gc_threads = NULL;
 119 // gc_thread *gct = NULL;  // this thread's gct TODO: make thread-local
 120
 121 // Number of threads running in *this* GC.  Affects how many
 122 // step->todos[] lists we have to look in to find work.
 123 nat n_gc_threads;
 124
 125 // For stats:
 126 long copied;        // *words* copied & scavenged during this GC
 127
 128 DECLARE_GCT
 129
 130 /* -----------------------------------------------------------------------------
 131    Static function declarations
 132    -------------------------------------------------------------------------- */
 133
 134 static void mark_root               (void *user, StgClosure **root);
 135 static void zero_static_object_list (StgClosure* first_static);
 136 static nat  initialise_N            (rtsBool force_major_gc);
 137 static void init_collected_gen      (nat g, nat threads);
 138 static void init_uncollected_gen    (nat g, nat threads);
 139 static void init_gc_thread          (gc_thread *t);
 140 static void update_task_list        (void);
 141 static void resize_generations      (void);
 142 static void resize_nursery          (void);
 143 static void start_gc_threads        (void);
 144 static void scavenge_until_all_done (void);
 145 static nat  inc_running             (void);
 146 static nat  dec_running             (void);
 147 static void wakeup_gc_threads       (nat n_threads, nat me);
 148 static void shutdown_gc_threads     (nat n_threads, nat me);
 149
 150 #if 0 && defined(DEBUG)
 151 static void gcCAFs                  (void);
 152 #endif
 153
 154 /* -----------------------------------------------------------------------------
 155    The mark bitmap & stack.
 156    -------------------------------------------------------------------------- */
 157
 158 #define MARK_STACK_BLOCKS 4
 159
 160 bdescr *mark_stack_bdescr;
 161 StgPtr *mark_stack;
 162 StgPtr *mark_sp;
 163 StgPtr *mark_splim;
 164
 165 // Flag and pointers used for falling back to a linear scan when the
 166 // mark stack overflows.
 167 rtsBool mark_stack_overflowed;
 168 bdescr *oldgen_scan_bd;
 169 StgPtr  oldgen_scan;
 170
 171 /* -----------------------------------------------------------------------------
 172    GarbageCollect: the main entry point to the garbage collector.
 173
 174    Locks held: all capabilities are held throughout GarbageCollect().
 175    -------------------------------------------------------------------------- */
 176
 177 void
 178 GarbageCollect (rtsBool force_major_gc,
 179                 nat gc_type USED_IF_THREADS,
 180                 Capability *cap USED_IF_THREADS)
 181 {
 182   bdescr *bd;
 183   step *stp;
 184   lnat live, allocated, max_copied, avg_copied, slop;
 185   gc_thread *saved_gct;
 186   nat g, s, t, n;
 187
 188   // necessary if we stole a callee-saves register for gct:
 189   saved_gct = gct;
 190
 191 #ifdef PROFILING
 192   CostCentreStack *prev_CCS;
 193 #endif
 194
 195   ACQUIRE_SM_LOCK;
 196
 197 #if defined(RTS_USER_SIGNALS)
 198   if (RtsFlags.MiscFlags.install_signal_handlers) {
 199     // block signals
 200     blockUserSignals();
 201   }
 202 #endif
 203
 204   ASSERT(sizeof(step_workspace) == 16 * sizeof(StgWord));
 205   // otherwise adjust the padding in step_workspace.
 206
 207   // tell the stats department that we've started a GC
 208   stat_startGC();
 209
 210   // tell the STM to discard any cached closures it's hoping to re-use
 211   stmPreGCHook();
 212
 213 #ifdef DEBUG
 214   mutlist_MUTVARS = 0;
 215   mutlist_MUTARRS = 0;
 216   mutlist_OTHERS = 0;
 217 #endif
 218
 219   // attribute any costs to CCS_GC
 220 #ifdef PROFILING
 221   prev_CCS = CCCS;
 222   CCCS = CCS_GC;
 223 #endif
 224
 225   /* Approximate how much we allocated.
 226    * Todo: only when generating stats?
 227    */
 228   allocated = calcAllocated();
 229
 230   /* Figure out which generation to collect
 231    */
 232   n = initialise_N(force_major_gc);
 233
 234   /* Start threads, so they can be spinning up while we finish initialisation.
 235    */
 236   start_gc_threads();
 237
 238 #if defined(THREADED_RTS)
 239   /* How many threads will be participating in this GC?
 240    * We don't try to parallelise minor GCs (unless the user asks for
 241    * it with +RTS -gn0), or mark/compact/sweep GC.
 242    */
 243   if (gc_type == PENDING_GC_PAR) {
 244       n_gc_threads = RtsFlags.ParFlags.nNodes;
 245   } else {
 246       n_gc_threads = 1;
 247   }
 248 #else
 249   n_gc_threads = 1;
 250 #endif
 251
 252   trace(TRACE_gc|DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
 253         N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
 254
 255 #ifdef RTS_GTK_FRONTPANEL
 256   if (RtsFlags.GcFlags.frontpanel) {
 257       updateFrontPanelBeforeGC(N);
 258   }
 259 #endif
 260
 261 #ifdef DEBUG
 262   // check for memory leaks if DEBUG is on
 263   memInventory(traceClass(DEBUG_gc));
 264 #endif
 265
 266   // check stack sanity *before* GC
 267   IF_DEBUG(sanity, checkFreeListSanity());
 268   IF_DEBUG(sanity, checkMutableLists(rtsTrue));
 269
 270   // Initialise all our gc_thread structures
 271   for (t = 0; t < n_gc_threads; t++) {
 272       init_gc_thread(gc_threads[t]);
 273   }
 274
 275   // Initialise all the generations/steps that we're collecting.
 276   for (g = 0; g <= N; g++) {
 277       init_collected_gen(g,n_gc_threads);
 278   }
 279
 280   // Initialise all the generations/steps that we're *not* collecting.
 281   for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
 282       init_uncollected_gen(g,n_gc_threads);
 283   }
 284
 285   /* Allocate a mark stack if we're doing a major collection.
 286    */
 287   if (major_gc && oldest_gen->steps[0].mark) {
 288       nat mark_stack_blocks;
 289       mark_stack_blocks = stg_max(MARK_STACK_BLOCKS,
 290                                   oldest_gen->steps[0].n_old_blocks / 100);
 291       mark_stack_bdescr = allocGroup(mark_stack_blocks);
 292       mark_stack = (StgPtr *)mark_stack_bdescr->start;
 293       mark_sp    = mark_stack;
 294       mark_splim = mark_stack + (mark_stack_blocks * BLOCK_SIZE_W);
 295   } else {
 296       mark_stack_bdescr = NULL;
 297   }
 298
 299   // this is the main thread
 300 #ifdef THREADED_RTS
 301   if (n_gc_threads == 1) {
 302       gct = gc_threads[0];
 303   } else {
 304       gct = gc_threads[cap->no];
 305   }
 306 #else
 307   gct = gc_threads[0];
 308 #endif
 309
 310   /* -----------------------------------------------------------------------
 311    * follow all the roots that we know about:
 312    */
 313
 314   // the main thread is running: this prevents any other threads from
 315   // exiting prematurely, so we can start them now.
 316   // NB. do this after the mutable lists have been saved above, otherwise
 317   // the other GC threads will be writing into the old mutable lists.
 318   inc_running();
 319   wakeup_gc_threads(n_gc_threads, gct->thread_index);
 320
 321   // Mutable lists from each generation > N
 322   // we want to *scavenge* these roots, not evacuate them: they're not
 323   // going to move in this GC.
 324   // Also do them in reverse generation order, for the usual reason:
 325   // namely to reduce the likelihood of spurious old->new pointers.
 326   //
 327   for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
 328       scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]);
 329       freeChain_sync(generations[g].saved_mut_list);
 330       generations[g].saved_mut_list = NULL;
 331
 332   }
 333
 334   // scavenge the capability-private mutable lists.  This isn't part
 335   // of markSomeCapabilities() because markSomeCapabilities() can only
 336   // call back into the GC via mark_root() (due to the gct register
 337   // variable).
 338   if (n_gc_threads == 1) {
 339       for (n = 0; n < n_capabilities; n++) {
 340           scavenge_capability_mut_lists(&capabilities[n]);
 341       }
 342   } else {
 343       scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
 344   }
 345
 346   // follow roots from the CAF list (used by GHCi)
 347   gct->evac_step = 0;
 348   markCAFs(mark_root, gct);
 349
 350   // follow all the roots that the application knows about.
 351   gct->evac_step = 0;
 352   markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
 353                        rtsTrue/*prune sparks*/);
 354
 355 #if defined(RTS_USER_SIGNALS)
 356   // mark the signal handlers (signals should be already blocked)
 357   markSignalHandlers(mark_root, gct);
 358 #endif
 359
 360   // Mark the weak pointer list, and prepare to detect dead weak pointers.
 361   markWeakPtrList();
 362   initWeakForGC();
 363
 364   // Mark the stable pointer table.
 365   markStablePtrTable(mark_root, gct);
 366
 367   /* -------------------------------------------------------------------------
 368    * Repeatedly scavenge all the areas we know about until there's no
 369    * more scavenging to be done.
 370    */
 371   for (;;)
 372   {
 373       scavenge_until_all_done();
 374       // The other threads are now stopped.  We might recurse back to
 375       // here, but from now on this is the only thread.
 376
 377       // if any blackholes are alive, make the threads that wait on
 378       // them alive too.
 379       if (traverseBlackholeQueue()) {
 380           inc_running();
 381           continue;
 382       }
 383
 384       // must be last...  invariant is that everything is fully
 385       // scavenged at this point.
 386       if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
 387           inc_running();
 388           continue;
 389       }
 390
 391       // If we get to here, there's really nothing left to do.
 392       break;
 393   }
 394
 395   shutdown_gc_threads(n_gc_threads, gct->thread_index);
 396
 397   // Update pointers from the Task list
 398   update_task_list();
 399
 400   // Now see which stable names are still alive.
 401   gcStablePtrTable();
 402
 403 #ifdef PROFILING
 404   // We call processHeapClosureForDead() on every closure destroyed during
 405   // the current garbage collection, so we invoke LdvCensusForDead().
 406   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
 407       || RtsFlags.ProfFlags.bioSelector != NULL)
 408     LdvCensusForDead(N);
 409 #endif
 410
 411   // NO MORE EVACUATION AFTER THIS POINT!
 412
 413   // Two-space collector: free the old to-space.
 414   // g0s0->old_blocks is the old nursery
 415   // g0s0->blocks is to-space from the previous GC
 416   if (RtsFlags.GcFlags.generations == 1) {
 417       if (g0s0->blocks != NULL) {
 418           freeChain(g0s0->blocks);
 419           g0s0->blocks = NULL;
 420       }
 421   }
 422
 423   // For each workspace, in each thread, move the copied blocks to the step
 424   {
 425       gc_thread *thr;
 426       step_workspace *ws;
 427       bdescr *prev, *next;
 428
 429       for (t = 0; t < n_gc_threads; t++) {
 430           thr = gc_threads[t];
 431
 432           // not step 0
 433           if (RtsFlags.GcFlags.generations == 1) {
 434               s = 0;
 435           } else {
 436               s = 1;
 437           }
 438           for (; s < total_steps; s++) {
 439               ws = &thr->steps[s];
 440
 441               // Push the final block
 442               if (ws->todo_bd) {
 443                   push_scanned_block(ws->todo_bd, ws);
 444               }
 445
 446               ASSERT(gct->scan_bd == NULL);
 447               ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
 448
 449               prev = NULL;
 450               for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
 451                   ws->step->n_words += bd->free - bd->start;
 452                   prev = bd;
 453               }
 454               if (prev != NULL) {
 455                   prev->link = ws->step->blocks;
 456                   ws->step->blocks = ws->scavd_list;
 457               }
 458               ws->step->n_blocks += ws->n_scavd_blocks;
 459           }
 460       }
 461
 462       // Add all the partial blocks *after* we've added all the full
 463       // blocks.  This is so that we can grab the partial blocks back
 464       // again and try to fill them up in the next GC.
 465       for (t = 0; t < n_gc_threads; t++) {
 466           thr = gc_threads[t];
 467
 468           // not step 0
 469           if (RtsFlags.GcFlags.generations == 1) {
 470               s = 0;
 471           } else {
 472               s = 1;
 473           }
 474           for (; s < total_steps; s++) {
 475               ws = &thr->steps[s];
 476
 477               prev = NULL;
 478               for (bd = ws->part_list; bd != NULL; bd = next) {
 479                   next = bd->link;
 480                   if (bd->free == bd->start) {
 481                       if (prev == NULL) {
 482                           ws->part_list = next;
 483                       } else {
 484                           prev->link = next;
 485                       }
 486                       freeGroup(bd);
 487                       ws->n_part_blocks--;
 488                   } else {
 489                       ws->step->n_words += bd->free - bd->start;
 490                       prev = bd;
 491                   }
 492               }
 493               if (prev != NULL) {
 494                   prev->link = ws->step->blocks;
 495                   ws->step->blocks = ws->part_list;
 496               }
 497               ws->step->n_blocks += ws->n_part_blocks;
 498
 499               ASSERT(countBlocks(ws->step->blocks) == ws->step->n_blocks);
 500               ASSERT(countOccupied(ws->step->blocks) == ws->step->n_words);
 501           }
 502       }
 503   }
 504
 505   // Finally: compact or sweep the oldest generation.
 506   if (major_gc && oldest_gen->steps[0].mark) {
 507       if (oldest_gen->steps[0].compact)
 508           compact(gct->scavenged_static_objects);
 509       else
 510           sweep(&oldest_gen->steps[0]);
 511   }
 512
 513   /* run through all the generations/steps and tidy up
 514    */
 515   copied = 0;
 516   max_copied = 0;
 517   avg_copied = 0;
 518   {
 519       nat i;
 520       for (i=0; i < n_gc_threads; i++) {
 521           if (n_gc_threads > 1) {
 522               trace(TRACE_gc,"thread %d:", i);
 523               trace(TRACE_gc,"   copied           %ld", gc_threads[i]->copied * sizeof(W_));
 524               trace(TRACE_gc,"   scanned          %ld", gc_threads[i]->scanned * sizeof(W_));
 525               trace(TRACE_gc,"   any_work         %ld", gc_threads[i]->any_work);
 526               trace(TRACE_gc,"   no_work          %ld", gc_threads[i]->no_work);
 527               trace(TRACE_gc,"   scav_find_work %ld",   gc_threads[i]->scav_find_work);
 528           }
 529           copied += gc_threads[i]->copied;
 530           max_copied = stg_max(gc_threads[i]->copied, max_copied);
 531       }
 532       if (n_gc_threads == 1) {
 533           max_copied = 0;
 534           avg_copied = 0;
 535       } else {
 536           avg_copied = copied;
 537       }
 538   }
 539
 540   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 541
 542     if (g == N) {
 543       generations[g].collections++; // for stats
 544       if (n_gc_threads > 1) generations[g].par_collections++;
 545     }
 546
 547     // Count the mutable list as bytes "copied" for the purposes of
 548     // stats.  Every mutable list is copied during every GC.
 549     if (g > 0) {
 550         nat mut_list_size = 0;
 551         for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
 552             mut_list_size += bd->free - bd->start;
 553         }
 554         for (n = 0; n < n_capabilities; n++) {
 555             for (bd = capabilities[n].mut_lists[g];
 556                  bd != NULL; bd = bd->link) {
 557                 mut_list_size += bd->free - bd->start;
 558             }
 559         }
 560         copied +=  mut_list_size;
 561
 562         debugTrace(DEBUG_gc,
 563                    "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
 564                    (unsigned long)(mut_list_size * sizeof(W_)),
 565                    mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
 566     }
 567
 568     for (s = 0; s < generations[g].n_steps; s++) {
 569       bdescr *next, *prev;
 570       stp = &generations[g].steps[s];
 571
 572       // for generations we collected...
 573       if (g <= N) {
 574
 575         /* free old memory and shift to-space into from-space for all
 576          * the collected steps (except the allocation area).  These
 577          * freed blocks will probaby be quickly recycled.
 578          */
 579         if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) {
 580             if (stp->mark)
 581             {
 582                 // tack the new blocks on the end of the existing blocks
 583                 if (stp->old_blocks != NULL) {
 584
 585                     prev = NULL;
 586                     for (bd = stp->old_blocks; bd != NULL; bd = next) {
 587
 588                         next = bd->link;
 589
 590                         if (!(bd->flags & BF_MARKED))
 591                         {
 592                             if (prev == NULL) {
 593                                 stp->old_blocks = next;
 594                             } else {
 595                                 prev->link = next;
 596                             }
 597                             freeGroup(bd);
 598                             stp->n_old_blocks--;
 599                         }
 600                         else
 601                         {
 602                             stp->n_words += bd->free - bd->start;
 603
 604                             // NB. this step might not be compacted next
 605                             // time, so reset the BF_MARKED flags.
 606                             // They are set before GC if we're going to
 607                             // compact.  (search for BF_MARKED above).
 608                             bd->flags &= ~BF_MARKED;
 609
 610                             // between GCs, all blocks in the heap except
 611                             // for the nursery have the BF_EVACUATED flag set.
 612                             bd->flags |= BF_EVACUATED;
 613
 614                             prev = bd;
 615                         }
 616                     }
 617
 618                     if (prev != NULL) {
 619                         prev->link = stp->blocks;
 620                         stp->blocks = stp->old_blocks;
 621                     }
 622                 }
 623                 // add the new blocks to the block tally
 624                 stp->n_blocks += stp->n_old_blocks;
 625                 ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
 626                 ASSERT(countOccupied(stp->blocks) == stp->n_words);
 627             }
 628             else // not copacted
 629             {
 630                 freeChain(stp->old_blocks);
 631             }
 632             stp->old_blocks = NULL;
 633             stp->n_old_blocks = 0;
 634         }
 635
 636         /* LARGE OBJECTS.  The current live large objects are chained on
 637          * scavenged_large, having been moved during garbage
 638          * collection from large_objects.  Any objects left on
 639          * large_objects list are therefore dead, so we free them here.
 640          */
 641         for (bd = stp->large_objects; bd != NULL; bd = next) {
 642           next = bd->link;
 643           freeGroup(bd);
 644           bd = next;
 645         }
 646
 647         stp->large_objects  = stp->scavenged_large_objects;
 648         stp->n_large_blocks = stp->n_scavenged_large_blocks;
 649
 650       }
 651       else // for older generations...
 652       {
 653         /* For older generations, we need to append the
 654          * scavenged_large_object list (i.e. large objects that have been
 655          * promoted during this GC) to the large_object list for that step.
 656          */
 657         for (bd = stp->scavenged_large_objects; bd; bd = next) {
 658           next = bd->link;
 659           dbl_link_onto(bd, &stp->large_objects);
 660         }
 661
 662         // add the new blocks we promoted during this GC
 663         stp->n_large_blocks += stp->n_scavenged_large_blocks;
 664       }
 665     }
 666   }
 667
 668   // update the max size of older generations after a major GC
 669   resize_generations();
 670
 671   // Calculate the amount of live data for stats.
 672   live = calcLiveWords();
 673
 674   // Free the small objects allocated via allocate(), since this will
 675   // all have been copied into G0S1 now.
 676   if (RtsFlags.GcFlags.generations > 1) {
 677       if (g0s0->blocks != NULL) {
 678           freeChain(g0s0->blocks);
 679           g0s0->blocks = NULL;
 680       }
 681       g0s0->n_blocks = 0;
 682       g0s0->n_words = 0;
 683   }
 684   alloc_blocks = 0;
 685   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 686
 687   // Start a new pinned_object_block
 688   pinned_object_block = NULL;
 689
 690   // Free the mark stack.
 691   if (mark_stack_bdescr != NULL) {
 692       freeGroup(mark_stack_bdescr);
 693   }
 694
 695   // Free any bitmaps.
 696   for (g = 0; g <= N; g++) {
 697       for (s = 0; s < generations[g].n_steps; s++) {
 698           stp = &generations[g].steps[s];
 699           if (stp->bitmap != NULL) {
 700               freeGroup(stp->bitmap);
 701               stp->bitmap = NULL;
 702           }
 703       }
 704   }
 705
 706   resize_nursery();
 707
 708  // mark the garbage collected CAFs as dead
 709 #if 0 && defined(DEBUG) // doesn't work at the moment
 710   if (major_gc) { gcCAFs(); }
 711 #endif
 712
 713 #ifdef PROFILING
 714   // resetStaticObjectForRetainerProfiling() must be called before
 715   // zeroing below.
 716   if (n_gc_threads > 1) {
 717       barf("profiling is currently broken with multi-threaded GC");
 718       // ToDo: fix the gct->scavenged_static_objects below
 719   }
 720   resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
 721 #endif
 722
 723   // zero the scavenged static object list
 724   if (major_gc) {
 725       nat i;
 726       for (i = 0; i < n_gc_threads; i++) {
 727           zero_static_object_list(gc_threads[i]->scavenged_static_objects);
 728       }
 729   }
 730
 731   // Reset the nursery
 732   resetNurseries();
 733
 734   // start any pending finalizers
 735   RELEASE_SM_LOCK;
 736   scheduleFinalizers(last_free_capability, old_weak_ptr_list);
 737   ACQUIRE_SM_LOCK;
 738
 739   // send exceptions to any threads which were about to die
 740   RELEASE_SM_LOCK;
 741   resurrectThreads(resurrected_threads);
 742   performPendingThrowTos(exception_threads);
 743   ACQUIRE_SM_LOCK;
 744
 745   // Update the stable pointer hash table.
 746   updateStablePtrTable(major_gc);
 747
 748   // check sanity after GC
 749   IF_DEBUG(sanity, checkSanity());
 750
 751   // extra GC trace info
 752   if (traceClass(TRACE_gc|DEBUG_gc)) statDescribeGens();
 753
 754 #ifdef DEBUG
 755   // symbol-table based profiling
 756   /*  heapCensus(to_blocks); */ /* ToDo */
 757 #endif
 758
 759   // restore enclosing cost centre
 760 #ifdef PROFILING
 761   CCCS = prev_CCS;
 762 #endif
 763
 764 #ifdef DEBUG
 765   // check for memory leaks if DEBUG is on
 766   memInventory(traceClass(DEBUG_gc));
 767 #endif
 768
 769 #ifdef RTS_GTK_FRONTPANEL
 770   if (RtsFlags.GcFlags.frontpanel) {
 771       updateFrontPanelAfterGC( N, live );
 772   }
 773 #endif
 774
 775   // ok, GC over: tell the stats department what happened.
 776   slop = calcLiveBlocks() * BLOCK_SIZE_W - live;
 777   stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop);
 778
 779   // Guess which generation we'll collect *next* time
 780   initialise_N(force_major_gc);
 781
 782 #if defined(RTS_USER_SIGNALS)
 783   if (RtsFlags.MiscFlags.install_signal_handlers) {
 784     // unblock signals again
 785     unblockUserSignals();
 786   }
 787 #endif
 788
 789   RELEASE_SM_LOCK;
 790
 791   gct = saved_gct;
 792 }
 793
 794 /* -----------------------------------------------------------------------------
 795    Figure out which generation to collect, initialise N and major_gc.
 796
 797    Also returns the total number of blocks in generations that will be
 798    collected.
 799    -------------------------------------------------------------------------- */
 800
 801 static nat
 802 initialise_N (rtsBool force_major_gc)
 803 {
 804     int g;
 805     nat s, blocks, blocks_total;
 806
 807     blocks = 0;
 808     blocks_total = 0;
 809
 810     if (force_major_gc) {
 811         N = RtsFlags.GcFlags.generations - 1;
 812     } else {
 813         N = 0;
 814     }
 815
 816     for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
 817         blocks = 0;
 818         for (s = 0; s < generations[g].n_steps; s++) {
 819             blocks += generations[g].steps[s].n_words / BLOCK_SIZE_W;
 820             blocks += generations[g].steps[s].n_large_blocks;
 821         }
 822         if (blocks >= generations[g].max_blocks) {
 823             N = stg_max(N,g);
 824         }
 825         if ((nat)g <= N) {
 826             blocks_total += blocks;
 827         }
 828     }
 829
 830     blocks_total += countNurseryBlocks();
 831
 832     major_gc = (N == RtsFlags.GcFlags.generations-1);
 833     return blocks_total;
 834 }
 835
 836 /* -----------------------------------------------------------------------------
 837    Initialise the gc_thread structures.
 838    -------------------------------------------------------------------------- */
 839
 840 #define GC_THREAD_INACTIVE             0
 841 #define GC_THREAD_STANDING_BY          1
 842 #define GC_THREAD_RUNNING              2
 843 #define GC_THREAD_WAITING_TO_CONTINUE  3
 844
 845 static gc_thread *
 846 alloc_gc_thread (int n)
 847 {
 848     nat s;
 849     step_workspace *ws;
 850     gc_thread *t;
 851
 852     t = stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace),
 853                        "alloc_gc_thread");
 854
 855 #ifdef THREADED_RTS
 856     t->id = 0;
 857     initSpinLock(&t->gc_spin);
 858     initSpinLock(&t->mut_spin);
 859     ACQUIRE_SPIN_LOCK(&t->gc_spin);
 860     t->wakeup = GC_THREAD_INACTIVE;  // starts true, so we can wait for the
 861                           // thread to start up, see wakeup_gc_threads
 862 #endif
 863
 864     t->thread_index = n;
 865     t->free_blocks = NULL;
 866     t->gc_count = 0;
 867
 868     init_gc_thread(t);
 869
 870 #ifdef USE_PAPI
 871     t->papi_events = -1;
 872 #endif
 873
 874     for (s = 0; s < total_steps; s++)
 875     {
 876         ws = &t->steps[s];
 877         ws->step = &all_steps[s];
 878         ASSERT(s == ws->step->abs_no);
 879         ws->gct = t;
 880
 881         ws->todo_bd = NULL;
 882         ws->buffer_todo_bd = NULL;
 883
 884         ws->part_list = NULL;
 885         ws->n_part_blocks = 0;
 886
 887         ws->scavd_list = NULL;
 888         ws->n_scavd_blocks = 0;
 889     }
 890
 891     return t;
 892 }
 893
 894
 895 void
 896 initGcThreads (void)
 897 {
 898     if (gc_threads == NULL) {
 899 #if defined(THREADED_RTS)
 900         nat i;
 901         gc_threads = stgMallocBytes (RtsFlags.ParFlags.nNodes *
 902                                      sizeof(gc_thread*),
 903                                      "alloc_gc_threads");
 904
 905         for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
 906             gc_threads[i] = alloc_gc_thread(i);
 907         }
 908 #else
 909         gc_threads = stgMallocBytes (sizeof(gc_thread*),
 910                                      "alloc_gc_threads");
 911
 912         gc_threads[0] = alloc_gc_thread(0);
 913 #endif
 914     }
 915 }
 916
 917 /* ----------------------------------------------------------------------------
 918    Start GC threads
 919    ------------------------------------------------------------------------- */
 920
 921 static nat gc_running_threads;
 922
 923 #if defined(THREADED_RTS)
 924 static Mutex gc_running_mutex;
 925 #endif
 926
 927 static nat
 928 inc_running (void)
 929 {
 930     nat n_running;
 931     ACQUIRE_LOCK(&gc_running_mutex);
 932     n_running = ++gc_running_threads;
 933     RELEASE_LOCK(&gc_running_mutex);
 934     ASSERT(n_running <= n_gc_threads);
 935     return n_running;
 936 }
 937
 938 static nat
 939 dec_running (void)
 940 {
 941     nat n_running;
 942     ACQUIRE_LOCK(&gc_running_mutex);
 943     ASSERT(n_gc_threads != 0);
 944     n_running = --gc_running_threads;
 945     RELEASE_LOCK(&gc_running_mutex);
 946     return n_running;
 947 }
 948
 949 static rtsBool
 950 any_work (void)
 951 {
 952     int s;
 953     step_workspace *ws;
 954
 955     gct->any_work++;
 956
 957     write_barrier();
 958
 959     // scavenge objects in compacted generation
 960     if (mark_stack_overflowed || oldgen_scan_bd != NULL ||
 961         (mark_stack_bdescr != NULL && !mark_stack_empty())) {
 962         return rtsTrue;
 963     }
 964
 965     // Check for global work in any step.  We don't need to check for
 966     // local work, because we have already exited scavenge_loop(),
 967     // which means there is no local work for this thread.
 968     for (s = total_steps-1; s >= 0; s--) {
 969         if (s == 0 && RtsFlags.GcFlags.generations > 1) {
 970             continue;
 971         }
 972         ws = &gct->steps[s];
 973         if (ws->todo_large_objects) return rtsTrue;
 974         if (ws->step->todos) return rtsTrue;
 975     }
 976
 977     gct->no_work++;
 978
 979     return rtsFalse;
 980 }
 981
 982 static void
 983 scavenge_until_all_done (void)
 984 {
 985     nat r;
 986
 987     debugTrace(DEBUG_gc, "GC thread %d working", gct->thread_index);
 988
 989 loop:
 990 #if defined(THREADED_RTS)
 991     if (n_gc_threads > 1) {
 992         scavenge_loop();
 993     } else {
 994         scavenge_loop1();
 995     }
 996 #else
 997     scavenge_loop();
 998 #endif
 999
1000     // scavenge_loop() only exits when there's no work to do
1001     r = dec_running();
1002
1003     debugTrace(DEBUG_gc, "GC thread %d idle (%d still running)",
1004                gct->thread_index, r);
1005
1006     while (gc_running_threads != 0) {
1007         // usleep(1);
1008         if (any_work()) {
1009             inc_running();
1010             goto loop;
1011         }
1012         // any_work() does not remove the work from the queue, it
1013         // just checks for the presence of work.  If we find any,
1014         // then we increment gc_running_threads and go back to
1015         // scavenge_loop() to perform any pending work.
1016     }
1017
1018     // All threads are now stopped
1019     debugTrace(DEBUG_gc, "GC thread %d finished.", gct->thread_index);
1020 }
1021
1022 #if defined(THREADED_RTS)
1023
1024 void
1025 gcWorkerThread (Capability *cap)
1026 {
1027     cap->in_gc = rtsTrue;
1028
1029     gct = gc_threads[cap->no];
1030     gct->id = osThreadId();
1031
1032     // Wait until we're told to wake up
1033     RELEASE_SPIN_LOCK(&gct->mut_spin);
1034     gct->wakeup = GC_THREAD_STANDING_BY;
1035     debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1036     ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1037
1038 #ifdef USE_PAPI
1039     // start performance counters in this thread...
1040     if (gct->papi_events == -1) {
1041         papi_init_eventset(&gct->papi_events);
1042     }
1043     papi_thread_start_gc1_count(gct->papi_events);
1044 #endif
1045
1046     // Every thread evacuates some roots.
1047     gct->evac_step = 0;
1048     markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
1049                          rtsTrue/*prune sparks*/);
1050     scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
1051
1052     scavenge_until_all_done();
1053
1054 #ifdef USE_PAPI
1055     // count events in this thread towards the GC totals
1056     papi_thread_stop_gc1_count(gct->papi_events);
1057 #endif
1058
1059     // Wait until we're told to continue
1060     RELEASE_SPIN_LOCK(&gct->gc_spin);
1061     gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1062     debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1063                gct->thread_index);
1064     ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1065     debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1066 }
1067
1068 #endif
1069
1070 void
1071 waitForGcThreads (Capability *cap USED_IF_THREADS)
1072 {
1073 #if defined(THREADED_RTS)
1074     nat n_threads = RtsFlags.ParFlags.nNodes;
1075     nat me = cap->no;
1076     nat i, j;
1077     rtsBool retry = rtsTrue;
1078
1079     while(retry) {
1080         for (i=0; i < n_threads; i++) {
1081             if (i == me) continue;
1082             if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1083                 prodCapability(&capabilities[i], cap->running_task);
1084             }
1085         }
1086         for (j=0; j < 10000000; j++) {
1087             retry = rtsFalse;
1088             for (i=0; i < n_threads; i++) {
1089                 if (i == me) continue;
1090                 write_barrier();
1091                 setContextSwitches();
1092                 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1093                     retry = rtsTrue;
1094                 }
1095             }
1096             if (!retry) break;
1097         }
1098     }
1099 #endif
1100 }
1101
1102 static void
1103 start_gc_threads (void)
1104 {
1105 #if defined(THREADED_RTS)
1106     gc_running_threads = 0;
1107     initMutex(&gc_running_mutex);
1108 #endif
1109 }
1110
1111 static void
1112 wakeup_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1113 {
1114 #if defined(THREADED_RTS)
1115     nat i;
1116     for (i=0; i < n_threads; i++) {
1117         if (i == me) continue;
1118         inc_running();
1119         debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1120         if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1121
1122         gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1123         ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1124         RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1125     }
1126 #endif
1127 }
1128
1129 // After GC is complete, we must wait for all GC threads to enter the
1130 // standby state, otherwise they may still be executing inside
1131 // any_work(), and may even remain awake until the next GC starts.
1132 static void
1133 shutdown_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1134 {
1135 #if defined(THREADED_RTS)
1136     nat i;
1137     for (i=0; i < n_threads; i++) {
1138         if (i == me) continue;
1139         while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1140     }
1141 #endif
1142 }
1143
1144 void
1145 releaseGCThreads (Capability *cap USED_IF_THREADS)
1146 {
1147 #if defined(THREADED_RTS)
1148     nat n_threads = RtsFlags.ParFlags.nNodes;
1149     nat me = cap->no;
1150     nat i;
1151     for (i=0; i < n_threads; i++) {
1152         if (i == me) continue;
1153         if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1154             barf("releaseGCThreads");
1155
1156         gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1157         ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1158         RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1159     }
1160 #endif
1161 }
1162
1163 /* ----------------------------------------------------------------------------
1164    Initialise a generation that is to be collected
1165    ------------------------------------------------------------------------- */
1166
1167 static void
1168 init_collected_gen (nat g, nat n_threads)
1169 {
1170     nat s, t, i;
1171     step_workspace *ws;
1172     step *stp;
1173     bdescr *bd;
1174
1175     // Throw away the current mutable list.  Invariant: the mutable
1176     // list always has at least one block; this means we can avoid a
1177     // check for NULL in recordMutable().
1178     if (g != 0) {
1179         freeChain(generations[g].mut_list);
1180         generations[g].mut_list = allocBlock();
1181         for (i = 0; i < n_capabilities; i++) {
1182             freeChain(capabilities[i].mut_lists[g]);
1183             capabilities[i].mut_lists[g] = allocBlock();
1184         }
1185     }
1186
1187     for (s = 0; s < generations[g].n_steps; s++) {
1188
1189         stp = &generations[g].steps[s];
1190         ASSERT(stp->gen_no == g);
1191
1192         // we'll construct a new list of threads in this step
1193         // during GC, throw away the current list.
1194         stp->old_threads = stp->threads;
1195         stp->threads = END_TSO_QUEUE;
1196
1197         // generation 0, step 0 doesn't need to-space
1198         if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
1199             continue;
1200         }
1201
1202         // deprecate the existing blocks
1203         stp->old_blocks   = stp->blocks;
1204         stp->n_old_blocks = stp->n_blocks;
1205         stp->blocks       = NULL;
1206         stp->n_blocks     = 0;
1207         stp->n_words      = 0;
1208         stp->live_estimate = 0;
1209
1210         // we don't have any to-be-scavenged blocks yet
1211         stp->todos = NULL;
1212         stp->todos_last = NULL;
1213         stp->n_todos = 0;
1214
1215         // initialise the large object queues.
1216         stp->scavenged_large_objects = NULL;
1217         stp->n_scavenged_large_blocks = 0;
1218
1219         // mark the small objects as from-space
1220         for (bd = stp->old_blocks; bd; bd = bd->link) {
1221             bd->flags &= ~BF_EVACUATED;
1222         }
1223
1224         // mark the large objects as from-space
1225         for (bd = stp->large_objects; bd; bd = bd->link) {
1226             bd->flags &= ~BF_EVACUATED;
1227         }
1228
1229         // for a compacted step, we need to allocate the bitmap
1230         if (stp->mark) {
1231             nat bitmap_size; // in bytes
1232             bdescr *bitmap_bdescr;
1233             StgWord *bitmap;
1234
1235             bitmap_size = stp->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1236
1237             if (bitmap_size > 0) {
1238                 bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1239                                            / BLOCK_SIZE);
1240                 stp->bitmap = bitmap_bdescr;
1241                 bitmap = bitmap_bdescr->start;
1242
1243                 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1244                            bitmap_size, bitmap);
1245
1246                 // don't forget to fill it with zeros!
1247                 memset(bitmap, 0, bitmap_size);
1248
1249                 // For each block in this step, point to its bitmap from the
1250                 // block descriptor.
1251                 for (bd=stp->old_blocks; bd != NULL; bd = bd->link) {
1252                     bd->u.bitmap = bitmap;
1253                     bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1254
1255                     // Also at this point we set the BF_MARKED flag
1256                     // for this block.  The invariant is that
1257                     // BF_MARKED is always unset, except during GC
1258                     // when it is set on those blocks which will be
1259                     // compacted.
1260                     if (!(bd->flags & BF_FRAGMENTED)) {
1261                         bd->flags |= BF_MARKED;
1262                     }
1263                 }
1264             }
1265         }
1266     }
1267
1268     // For each GC thread, for each step, allocate a "todo" block to
1269     // store evacuated objects to be scavenged, and a block to store
1270     // evacuated objects that do not need to be scavenged.
1271     for (t = 0; t < n_threads; t++) {
1272         for (s = 0; s < generations[g].n_steps; s++) {
1273
1274             // we don't copy objects into g0s0, unless -G0
1275             if (g==0 && s==0 && RtsFlags.GcFlags.generations > 1) continue;
1276
1277             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1278
1279             ws->todo_large_objects = NULL;
1280
1281             ws->part_list = NULL;
1282             ws->n_part_blocks = 0;
1283
1284             // allocate the first to-space block; extra blocks will be
1285             // chained on as necessary.
1286             ws->todo_bd = NULL;
1287             ws->buffer_todo_bd = NULL;
1288             alloc_todo_block(ws,0);
1289
1290             ws->scavd_list = NULL;
1291             ws->n_scavd_blocks = 0;
1292         }
1293     }
1294 }
1295
1296
1297 /* ----------------------------------------------------------------------------
1298    Initialise a generation that is *not* to be collected
1299    ------------------------------------------------------------------------- */
1300
1301 static void
1302 init_uncollected_gen (nat g, nat threads)
1303 {
1304     nat s, t, n;
1305     step_workspace *ws;
1306     step *stp;
1307     bdescr *bd;
1308
1309     // save the current mutable lists for this generation, and
1310     // allocate a fresh block for each one.  We'll traverse these
1311     // mutable lists as roots early on in the GC.
1312     generations[g].saved_mut_list = generations[g].mut_list;
1313     generations[g].mut_list = allocBlock();
1314     for (n = 0; n < n_capabilities; n++) {
1315         capabilities[n].saved_mut_lists[g] = capabilities[n].mut_lists[g];
1316         capabilities[n].mut_lists[g] = allocBlock();
1317     }
1318
1319     for (s = 0; s < generations[g].n_steps; s++) {
1320         stp = &generations[g].steps[s];
1321         stp->scavenged_large_objects = NULL;
1322         stp->n_scavenged_large_blocks = 0;
1323     }
1324
1325     for (s = 0; s < generations[g].n_steps; s++) {
1326
1327         stp = &generations[g].steps[s];
1328
1329         for (t = 0; t < threads; t++) {
1330             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1331
1332             ws->buffer_todo_bd = NULL;
1333             ws->todo_large_objects = NULL;
1334
1335             ws->part_list = NULL;
1336             ws->n_part_blocks = 0;
1337
1338             ws->scavd_list = NULL;
1339             ws->n_scavd_blocks = 0;
1340
1341             // If the block at the head of the list in this generation
1342             // is less than 3/4 full, then use it as a todo block.
1343             if (stp->blocks && isPartiallyFull(stp->blocks))
1344             {
1345                 ws->todo_bd = stp->blocks;
1346                 ws->todo_free = ws->todo_bd->free;
1347                 ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
1348                 stp->blocks = stp->blocks->link;
1349                 stp->n_blocks -= 1;
1350                 stp->n_words -= ws->todo_bd->free - ws->todo_bd->start;
1351                 ws->todo_bd->link = NULL;
1352                 // we must scan from the current end point.
1353                 ws->todo_bd->u.scan = ws->todo_bd->free;
1354             }
1355             else
1356             {
1357                 ws->todo_bd = NULL;
1358                 alloc_todo_block(ws,0);
1359             }
1360         }
1361
1362         // deal out any more partial blocks to the threads' part_lists
1363         t = 0;
1364         while (stp->blocks && isPartiallyFull(stp->blocks))
1365         {
1366             bd = stp->blocks;
1367             stp->blocks = bd->link;
1368             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1369             bd->link = ws->part_list;
1370             ws->part_list = bd;
1371             ws->n_part_blocks += 1;
1372             bd->u.scan = bd->free;
1373             stp->n_blocks -= 1;
1374             stp->n_words -= bd->free - bd->start;
1375             t++;
1376             if (t == n_gc_threads) t = 0;
1377         }
1378     }
1379 }
1380
1381 /* -----------------------------------------------------------------------------
1382    Initialise a gc_thread before GC
1383    -------------------------------------------------------------------------- */
1384
1385 static void
1386 init_gc_thread (gc_thread *t)
1387 {
1388     t->static_objects = END_OF_STATIC_LIST;
1389     t->scavenged_static_objects = END_OF_STATIC_LIST;
1390     t->scan_bd = NULL;
1391     t->mut_lists = capabilities[t->thread_index].mut_lists;
1392     t->evac_step = 0;
1393     t->failed_to_evac = rtsFalse;
1394     t->eager_promotion = rtsTrue;
1395     t->thunk_selector_depth = 0;
1396     t->copied = 0;
1397     t->scanned = 0;
1398     t->any_work = 0;
1399     t->no_work = 0;
1400     t->scav_find_work = 0;
1401 }
1402
1403 /* -----------------------------------------------------------------------------
1404    Function we pass to evacuate roots.
1405    -------------------------------------------------------------------------- */
1406
1407 static void
1408 mark_root(void *user, StgClosure **root)
1409 {
1410     // we stole a register for gct, but this function is called from
1411     // *outside* the GC where the register variable is not in effect,
1412     // so we need to save and restore it here.  NB. only call
1413     // mark_root() from the main GC thread, otherwise gct will be
1414     // incorrect.
1415     gc_thread *saved_gct;
1416     saved_gct = gct;
1417     gct = user;
1418
1419     evacuate(root);
1420
1421     gct = saved_gct;
1422 }
1423
1424 /* -----------------------------------------------------------------------------
1425    Initialising the static object & mutable lists
1426    -------------------------------------------------------------------------- */
1427
1428 static void
1429 zero_static_object_list(StgClosure* first_static)
1430 {
1431   StgClosure* p;
1432   StgClosure* link;
1433   const StgInfoTable *info;
1434
1435   for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1436     info = get_itbl(p);
1437     link = *STATIC_LINK(info, p);
1438     *STATIC_LINK(info,p) = NULL;
1439   }
1440 }
1441
1442 /* ----------------------------------------------------------------------------
1443    Update the pointers from the task list
1444
1445    These are treated as weak pointers because we want to allow a main
1446    thread to get a BlockedOnDeadMVar exception in the same way as any
1447    other thread.  Note that the threads should all have been retained
1448    by GC by virtue of being on the all_threads list, we're just
1449    updating pointers here.
1450    ------------------------------------------------------------------------- */
1451
1452 static void
1453 update_task_list (void)
1454 {
1455     Task *task;
1456     StgTSO *tso;
1457     for (task = all_tasks; task != NULL; task = task->all_link) {
1458         if (!task->stopped && task->tso) {
1459             ASSERT(task->tso->bound == task);
1460             tso = (StgTSO *) isAlive((StgClosure *)task->tso);
1461             if (tso == NULL) {
1462                 barf("task %p: main thread %d has been GC'd",
1463 #ifdef THREADED_RTS
1464                      (void *)task->id,
1465 #else
1466                      (void *)task,
1467 #endif
1468                      task->tso->id);
1469             }
1470             task->tso = tso;
1471         }
1472     }
1473 }
1474
1475 /* ----------------------------------------------------------------------------
1476    Reset the sizes of the older generations when we do a major
1477    collection.
1478
1479    CURRENT STRATEGY: make all generations except zero the same size.
1480    We have to stay within the maximum heap size, and leave a certain
1481    percentage of the maximum heap size available to allocate into.
1482    ------------------------------------------------------------------------- */
1483
1484 static void
1485 resize_generations (void)
1486 {
1487     nat g;
1488
1489     if (major_gc && RtsFlags.GcFlags.generations > 1) {
1490         nat live, size, min_alloc, words;
1491         nat max  = RtsFlags.GcFlags.maxHeapSize;
1492         nat gens = RtsFlags.GcFlags.generations;
1493
1494         // live in the oldest generations
1495         if (oldest_gen->steps[0].live_estimate != 0) {
1496             words = oldest_gen->steps[0].live_estimate;
1497         } else {
1498             words = oldest_gen->steps[0].n_words;
1499         }
1500         live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1501             oldest_gen->steps[0].n_large_blocks;
1502
1503         // default max size for all generations except zero
1504         size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1505                        RtsFlags.GcFlags.minOldGenSize);
1506
1507         // minimum size for generation zero
1508         min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1509                             RtsFlags.GcFlags.minAllocAreaSize);
1510
1511         // Auto-enable compaction when the residency reaches a
1512         // certain percentage of the maximum heap size (default: 30%).
1513         if (RtsFlags.GcFlags.generations > 1 &&
1514             (RtsFlags.GcFlags.compact ||
1515              (max > 0 &&
1516               oldest_gen->steps[0].n_blocks >
1517               (RtsFlags.GcFlags.compactThreshold * max) / 100))) {
1518             oldest_gen->steps[0].mark = 1;
1519             oldest_gen->steps[0].compact = 1;
1520 //        debugBelch("compaction: on\n", live);
1521         } else {
1522             oldest_gen->steps[0].mark = 0;
1523             oldest_gen->steps[0].compact = 0;
1524 //        debugBelch("compaction: off\n", live);
1525         }
1526
1527         if (RtsFlags.GcFlags.sweep) {
1528             oldest_gen->steps[0].mark = 1;
1529         }
1530
1531         // if we're going to go over the maximum heap size, reduce the
1532         // size of the generations accordingly.  The calculation is
1533         // different if compaction is turned on, because we don't need
1534         // to double the space required to collect the old generation.
1535         if (max != 0) {
1536
1537             // this test is necessary to ensure that the calculations
1538             // below don't have any negative results - we're working
1539             // with unsigned values here.
1540             if (max < min_alloc) {
1541                 heapOverflow();
1542             }
1543
1544             if (oldest_gen->steps[0].compact) {
1545                 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1546                     size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1547                 }
1548             } else {
1549                 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1550                     size = (max - min_alloc) / ((gens - 1) * 2);
1551                 }
1552             }
1553
1554             if (size < live) {
1555                 heapOverflow();
1556             }
1557         }
1558
1559 #if 0
1560         debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1561                    min_alloc, size, max);
1562 #endif
1563
1564         for (g = 0; g < gens; g++) {
1565             generations[g].max_blocks = size;
1566         }
1567     }
1568 }
1569
1570 /* -----------------------------------------------------------------------------
1571    Calculate the new size of the nursery, and resize it.
1572    -------------------------------------------------------------------------- */
1573
1574 static void
1575 resize_nursery (void)
1576 {
1577     if (RtsFlags.GcFlags.generations == 1)
1578     {   // Two-space collector:
1579         nat blocks;
1580
1581         /* set up a new nursery.  Allocate a nursery size based on a
1582          * function of the amount of live data (by default a factor of 2)
1583          * Use the blocks from the old nursery if possible, freeing up any
1584          * left over blocks.
1585          *
1586          * If we get near the maximum heap size, then adjust our nursery
1587          * size accordingly.  If the nursery is the same size as the live
1588          * data (L), then we need 3L bytes.  We can reduce the size of the
1589          * nursery to bring the required memory down near 2L bytes.
1590          *
1591          * A normal 2-space collector would need 4L bytes to give the same
1592          * performance we get from 3L bytes, reducing to the same
1593          * performance at 2L bytes.
1594          */
1595         blocks = g0s0->n_blocks;
1596
1597         if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1598              blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1599              RtsFlags.GcFlags.maxHeapSize )
1600         {
1601             long adjusted_blocks;  // signed on purpose
1602             int pc_free;
1603
1604             adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1605
1606             debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1607                        RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1608
1609             pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1610             if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1611             {
1612                 heapOverflow();
1613             }
1614             blocks = adjusted_blocks;
1615         }
1616         else
1617         {
1618             blocks *= RtsFlags.GcFlags.oldGenFactor;
1619             if (blocks < RtsFlags.GcFlags.minAllocAreaSize)
1620             {
1621                 blocks = RtsFlags.GcFlags.minAllocAreaSize;
1622             }
1623         }
1624         resizeNurseries(blocks);
1625     }
1626     else  // Generational collector
1627     {
1628         /*
1629          * If the user has given us a suggested heap size, adjust our
1630          * allocation area to make best use of the memory available.
1631          */
1632         if (RtsFlags.GcFlags.heapSizeSuggestion)
1633         {
1634             long blocks;
1635             nat needed = calcNeeded();  // approx blocks needed at next GC
1636
1637             /* Guess how much will be live in generation 0 step 0 next time.
1638              * A good approximation is obtained by finding the
1639              * percentage of g0s0 that was live at the last minor GC.
1640              *
1641              * We have an accurate figure for the amount of copied data in
1642              * 'copied', but we must convert this to a number of blocks, with
1643              * a small adjustment for estimated slop at the end of a block
1644              * (- 10 words).
1645              */
1646             if (N == 0)
1647             {
1648                 g0s0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1649                     / countNurseryBlocks();
1650             }
1651
1652             /* Estimate a size for the allocation area based on the
1653              * information available.  We might end up going slightly under
1654              * or over the suggested heap size, but we should be pretty
1655              * close on average.
1656              *
1657              * Formula:            suggested - needed
1658              *                ----------------------------
1659              *                    1 + g0s0_pcnt_kept/100
1660              *
1661              * where 'needed' is the amount of memory needed at the next
1662              * collection for collecting all steps except g0s0.
1663              */
1664             blocks =
1665                 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1666                 (100 + (long)g0s0_pcnt_kept);
1667
1668             if (blocks < (long)RtsFlags.GcFlags.minAllocAreaSize) {
1669                 blocks = RtsFlags.GcFlags.minAllocAreaSize;
1670             }
1671
1672             resizeNurseries((nat)blocks);
1673         }
1674         else
1675         {
1676             // we might have added extra large blocks to the nursery, so
1677             // resize back to minAllocAreaSize again.
1678             resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1679         }
1680     }
1681 }
1682
1683 /* -----------------------------------------------------------------------------
1684    Sanity code for CAF garbage collection.
1685
1686    With DEBUG turned on, we manage a CAF list in addition to the SRT
1687    mechanism.  After GC, we run down the CAF list and blackhole any
1688    CAFs which have been garbage collected.  This means we get an error
1689    whenever the program tries to enter a garbage collected CAF.
1690
1691    Any garbage collected CAFs are taken off the CAF list at the same
1692    time.
1693    -------------------------------------------------------------------------- */
1694
1695 #if 0 && defined(DEBUG)
1696
1697 static void
1698 gcCAFs(void)
1699 {
1700   StgClosure*  p;
1701   StgClosure** pp;
1702   const StgInfoTable *info;
1703   nat i;
1704
1705   i = 0;
1706   p = caf_list;
1707   pp = &caf_list;
1708
1709   while (p != NULL) {
1710
1711     info = get_itbl(p);
1712
1713     ASSERT(info->type == IND_STATIC);
1714
1715     if (STATIC_LINK(info,p) == NULL) {
1716         debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1717         // black hole it
1718         SET_INFO(p,&stg_BLACKHOLE_info);
1719         p = STATIC_LINK2(info,p);
1720         *pp = p;
1721     }
1722     else {
1723       pp = &STATIC_LINK2(info,p);
1724       p = *pp;
1725       i++;
1726     }
1727
1728   }
1729
1730   debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1731 }
1732 #endif