rts/sm/GC.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team 1998-2008
   4  *
   5  * Generational garbage collector
   6  *
   7  * Documentation on the architecture of the Garbage Collector can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 #include "PosixSource.h"
  15 #include "Rts.h"
  16 #include "HsFFI.h"
  17
  18 #include "Storage.h"
  19 #include "RtsUtils.h"
  20 #include "Apply.h"
  21 #include "Updates.h"
  22 #include "Stats.h"
  23 #include "Schedule.h"
  24 #include "Sanity.h"
  25 #include "BlockAlloc.h"
  26 #include "ProfHeap.h"
  27 #include "Weak.h"
  28 #include "Prelude.h"
  29 #include "RtsSignals.h"
  30 #include "STM.h"
  31 #if defined(RTS_GTK_FRONTPANEL)
  32 #include "FrontPanel.h"
  33 #endif
  34 #include "Trace.h"
  35 #include "RetainerProfile.h"
  36 #include "LdvProfile.h"
  37 #include "RaiseAsync.h"
  38 #include "Papi.h"
  39 #include "Stable.h"
  40
  41 #include "GC.h"
  42 #include "GCThread.h"
  43 #include "GCTDecl.h"
  44 #include "Compact.h"
  45 #include "Evac.h"
  46 #include "Scav.h"
  47 #include "GCUtils.h"
  48 #include "MarkStack.h"
  49 #include "MarkWeak.h"
  50 #include "Sparks.h"
  51 #include "Sweep.h"
  52
  53 #include <string.h> // for memset()
  54 #include <unistd.h>
  55
  56 /* -----------------------------------------------------------------------------
  57    Global variables
  58    -------------------------------------------------------------------------- */
  59
  60 /* STATIC OBJECT LIST.
  61  *
  62  * During GC:
  63  * We maintain a linked list of static objects that are still live.
  64  * The requirements for this list are:
  65  *
  66  *  - we need to scan the list while adding to it, in order to
  67  *    scavenge all the static objects (in the same way that
  68  *    breadth-first scavenging works for dynamic objects).
  69  *
  70  *  - we need to be able to tell whether an object is already on
  71  *    the list, to break loops.
  72  *
  73  * Each static object has a "static link field", which we use for
  74  * linking objects on to the list.  We use a stack-type list, consing
  75  * objects on the front as they are added (this means that the
  76  * scavenge phase is depth-first, not breadth-first, but that
  77  * shouldn't matter).
  78  *
  79  * A separate list is kept for objects that have been scavenged
  80  * already - this is so that we can zero all the marks afterwards.
  81  *
  82  * An object is on the list if its static link field is non-zero; this
  83  * means that we have to mark the end of the list with '1', not NULL.
  84  *
  85  * Extra notes for generational GC:
  86  *
  87  * Each generation has a static object list associated with it.  When
  88  * collecting generations up to N, we treat the static object lists
  89  * from generations > N as roots.
  90  *
  91  * We build up a static object list while collecting generations 0..N,
  92  * which is then appended to the static object list of generation N+1.
  93  */
  94
  95 /* N is the oldest generation being collected, where the generations
  96  * are numbered starting at 0.  A major GC (indicated by the major_gc
  97  * flag) is when we're collecting all generations.  We only attempt to
  98  * deal with static objects and GC CAFs when doing a major GC.
  99  */
 100 nat N;
 101 rtsBool major_gc;
 102
 103 /* Data used for allocation area sizing.
 104  */
 105 static lnat g0_pcnt_kept = 30; // percentage of g0 live at last minor GC
 106
 107 /* Mut-list stats */
 108 #ifdef DEBUG
 109 nat mutlist_MUTVARS,
 110     mutlist_MUTARRS,
 111     mutlist_MVARS,
 112     mutlist_OTHERS;
 113 #endif
 114
 115 /* Thread-local data for each GC thread
 116  */
 117 gc_thread **gc_threads = NULL;
 118
 119 #if !defined(THREADED_RTS)
 120 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)];
 121 #endif
 122
 123 // Number of threads running in *this* GC.  Affects how many
 124 // step->todos[] lists we have to look in to find work.
 125 nat n_gc_threads;
 126
 127 // For stats:
 128 long copied;        // *words* copied & scavenged during this GC
 129
 130 rtsBool work_stealing;
 131
 132 DECLARE_GCT
 133
 134 /* -----------------------------------------------------------------------------
 135    Static function declarations
 136    -------------------------------------------------------------------------- */
 137
 138 static void mark_root               (void *user, StgClosure **root);
 139 static void zero_static_object_list (StgClosure* first_static);
 140 static nat  initialise_N            (rtsBool force_major_gc);
 141 static void prepare_collected_gen   (generation *gen);
 142 static void prepare_uncollected_gen (generation *gen);
 143 static void init_gc_thread          (gc_thread *t);
 144 static void resize_generations      (void);
 145 static void resize_nursery          (void);
 146 static void start_gc_threads        (void);
 147 static void scavenge_until_all_done (void);
 148 static StgWord inc_running          (void);
 149 static StgWord dec_running          (void);
 150 static void wakeup_gc_threads       (nat me);
 151 static void shutdown_gc_threads     (nat me);
 152 static void collect_gct_blocks      (void);
 153
 154 #if 0 && defined(DEBUG)
 155 static void gcCAFs                  (void);
 156 #endif
 157
 158 /* -----------------------------------------------------------------------------
 159    The mark stack.
 160    -------------------------------------------------------------------------- */
 161
 162 bdescr *mark_stack_top_bd; // topmost block in the mark stack
 163 bdescr *mark_stack_bd;     // current block in the mark stack
 164 StgPtr mark_sp;            // pointer to the next unallocated mark stack entry
 165
 166 /* -----------------------------------------------------------------------------
 167    GarbageCollect: the main entry point to the garbage collector.
 168
 169    Locks held: all capabilities are held throughout GarbageCollect().
 170    -------------------------------------------------------------------------- */
 171
 172 void
 173 GarbageCollect (rtsBool force_major_gc,
 174                 nat gc_type USED_IF_THREADS,
 175                 Capability *cap)
 176 {
 177   bdescr *bd;
 178   generation *gen;
 179   lnat live_blocks, live_words, allocated, max_copied, avg_copied;
 180   gc_thread *saved_gct;
 181   nat g, n;
 182
 183   // necessary if we stole a callee-saves register for gct:
 184   saved_gct = gct;
 185
 186 #ifdef PROFILING
 187   CostCentreStack *prev_CCS;
 188 #endif
 189
 190   ACQUIRE_SM_LOCK;
 191
 192 #if defined(RTS_USER_SIGNALS)
 193   if (RtsFlags.MiscFlags.install_signal_handlers) {
 194     // block signals
 195     blockUserSignals();
 196   }
 197 #endif
 198
 199   ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord));
 200   // otherwise adjust the padding in gen_workspace.
 201
 202   // this is the main thread
 203   SET_GCT(gc_threads[cap->no]);
 204
 205   // tell the stats department that we've started a GC
 206   stat_startGC(gct);
 207
 208   // lock the StablePtr table
 209   stablePtrPreGC();
 210
 211 #ifdef DEBUG
 212   mutlist_MUTVARS = 0;
 213   mutlist_MUTARRS = 0;
 214   mutlist_OTHERS = 0;
 215 #endif
 216
 217   // attribute any costs to CCS_GC
 218 #ifdef PROFILING
 219   prev_CCS = CCCS;
 220   CCCS = CCS_GC;
 221 #endif
 222
 223   /* Approximate how much we allocated.
 224    * Todo: only when generating stats?
 225    */
 226   allocated = calcAllocated(rtsFalse/* don't count the nursery yet */);
 227
 228   /* Figure out which generation to collect
 229    */
 230   n = initialise_N(force_major_gc);
 231
 232 #if defined(THREADED_RTS)
 233   work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
 234                   N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
 235       // It's not always a good idea to do load balancing in parallel
 236       // GC.  In particular, for a parallel program we don't want to
 237       // lose locality by moving cached data into another CPU's cache
 238       // (this effect can be quite significant).
 239       //
 240       // We could have a more complex way to deterimine whether to do
 241       // work stealing or not, e.g. it might be a good idea to do it
 242       // if the heap is big.  For now, we just turn it on or off with
 243       // a flag.
 244 #endif
 245
 246   /* Start threads, so they can be spinning up while we finish initialisation.
 247    */
 248   start_gc_threads();
 249
 250 #if defined(THREADED_RTS)
 251   /* How many threads will be participating in this GC?
 252    * We don't try to parallelise minor GCs (unless the user asks for
 253    * it with +RTS -gn0), or mark/compact/sweep GC.
 254    */
 255   if (gc_type == PENDING_GC_PAR) {
 256       n_gc_threads = RtsFlags.ParFlags.nNodes;
 257   } else {
 258       n_gc_threads = 1;
 259   }
 260 #else
 261   n_gc_threads = 1;
 262 #endif
 263
 264   debugTrace(DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
 265         N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
 266
 267 #ifdef RTS_GTK_FRONTPANEL
 268   if (RtsFlags.GcFlags.frontpanel) {
 269       updateFrontPanelBeforeGC(N);
 270   }
 271 #endif
 272
 273 #ifdef DEBUG
 274   // check for memory leaks if DEBUG is on
 275   memInventory(DEBUG_gc);
 276 #endif
 277
 278   // check sanity *before* GC
 279   IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc));
 280
 281   // Initialise all the generations/steps that we're collecting.
 282   for (g = 0; g <= N; g++) {
 283       prepare_collected_gen(&generations[g]);
 284   }
 285   // Initialise all the generations/steps that we're *not* collecting.
 286   for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
 287       prepare_uncollected_gen(&generations[g]);
 288   }
 289
 290   // Prepare this gc_thread
 291   init_gc_thread(gct);
 292
 293   /* Allocate a mark stack if we're doing a major collection.
 294    */
 295   if (major_gc && oldest_gen->mark) {
 296       mark_stack_bd     = allocBlock();
 297       mark_stack_top_bd = mark_stack_bd;
 298       mark_stack_bd->link = NULL;
 299       mark_stack_bd->u.back = NULL;
 300       mark_sp           = mark_stack_bd->start;
 301   } else {
 302       mark_stack_bd     = NULL;
 303       mark_stack_top_bd = NULL;
 304       mark_sp           = NULL;
 305   }
 306
 307   /* -----------------------------------------------------------------------
 308    * follow all the roots that we know about:
 309    */
 310
 311   // the main thread is running: this prevents any other threads from
 312   // exiting prematurely, so we can start them now.
 313   // NB. do this after the mutable lists have been saved above, otherwise
 314   // the other GC threads will be writing into the old mutable lists.
 315   inc_running();
 316   wakeup_gc_threads(gct->thread_index);
 317
 318   traceEventGcWork(gct->cap);
 319
 320   // scavenge the capability-private mutable lists.  This isn't part
 321   // of markSomeCapabilities() because markSomeCapabilities() can only
 322   // call back into the GC via mark_root() (due to the gct register
 323   // variable).
 324   if (n_gc_threads == 1) {
 325       for (n = 0; n < n_capabilities; n++) {
 326 #if defined(THREADED_RTS)
 327           scavenge_capability_mut_Lists1(&capabilities[n]);
 328 #else
 329           scavenge_capability_mut_lists(&capabilities[n]);
 330 #endif
 331       }
 332   } else {
 333       scavenge_capability_mut_lists(gct->cap);
 334   }
 335
 336   // follow roots from the CAF list (used by GHCi)
 337   gct->evac_gen_no = 0;
 338   markCAFs(mark_root, gct);
 339
 340   // follow all the roots that the application knows about.
 341   gct->evac_gen_no = 0;
 342   if (n_gc_threads == 1) {
 343       for (n = 0; n < n_capabilities; n++) {
 344           markCapability(mark_root, gct, &capabilities[n],
 345                          rtsTrue/*don't mark sparks*/);
 346       }
 347   } else {
 348       markCapability(mark_root, gct, cap, rtsTrue/*don't mark sparks*/);
 349   }
 350
 351   markScheduler(mark_root, gct);
 352
 353 #if defined(RTS_USER_SIGNALS)
 354   // mark the signal handlers (signals should be already blocked)
 355   markSignalHandlers(mark_root, gct);
 356 #endif
 357
 358   // Mark the weak pointer list, and prepare to detect dead weak pointers.
 359   markWeakPtrList();
 360   initWeakForGC();
 361
 362   // Mark the stable pointer table.
 363   markStablePtrTable(mark_root, gct);
 364
 365   /* -------------------------------------------------------------------------
 366    * Repeatedly scavenge all the areas we know about until there's no
 367    * more scavenging to be done.
 368    */
 369   for (;;)
 370   {
 371       scavenge_until_all_done();
 372       // The other threads are now stopped.  We might recurse back to
 373       // here, but from now on this is the only thread.
 374
 375       // must be last...  invariant is that everything is fully
 376       // scavenged at this point.
 377       if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
 378           inc_running();
 379           continue;
 380       }
 381
 382       // If we get to here, there's really nothing left to do.
 383       break;
 384   }
 385
 386   shutdown_gc_threads(gct->thread_index);
 387
 388   // Now see which stable names are still alive.
 389   gcStablePtrTable();
 390
 391 #ifdef THREADED_RTS
 392   if (n_gc_threads == 1) {
 393       for (n = 0; n < n_capabilities; n++) {
 394           pruneSparkQueue(&capabilities[n]);
 395       }
 396   } else {
 397       pruneSparkQueue(gct->cap);
 398   }
 399 #endif
 400
 401 #ifdef PROFILING
 402   // We call processHeapClosureForDead() on every closure destroyed during
 403   // the current garbage collection, so we invoke LdvCensusForDead().
 404   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
 405       || RtsFlags.ProfFlags.bioSelector != NULL)
 406     LdvCensusForDead(N);
 407 #endif
 408
 409   // NO MORE EVACUATION AFTER THIS POINT!
 410
 411   // Finally: compact or sweep the oldest generation.
 412   if (major_gc && oldest_gen->mark) {
 413       if (oldest_gen->compact)
 414           compact(gct->scavenged_static_objects);
 415       else
 416           sweep(oldest_gen);
 417   }
 418
 419   copied = 0;
 420   max_copied = 0;
 421   avg_copied = 0;
 422   {
 423       nat i;
 424       for (i=0; i < n_gc_threads; i++) {
 425           if (n_gc_threads > 1) {
 426               debugTrace(DEBUG_gc,"thread %d:", i);
 427               debugTrace(DEBUG_gc,"   copied           %ld", gc_threads[i]->copied * sizeof(W_));
 428               debugTrace(DEBUG_gc,"   scanned          %ld", gc_threads[i]->scanned * sizeof(W_));
 429               debugTrace(DEBUG_gc,"   any_work         %ld", gc_threads[i]->any_work);
 430               debugTrace(DEBUG_gc,"   no_work          %ld", gc_threads[i]->no_work);
 431               debugTrace(DEBUG_gc,"   scav_find_work %ld",   gc_threads[i]->scav_find_work);
 432           }
 433           copied += gc_threads[i]->copied;
 434           max_copied = stg_max(gc_threads[i]->copied, max_copied);
 435       }
 436       if (n_gc_threads == 1) {
 437           max_copied = 0;
 438           avg_copied = 0;
 439       } else {
 440           avg_copied = copied;
 441       }
 442   }
 443
 444   // Run through all the generations/steps and tidy up.
 445   // We're going to:
 446   //   - count the amount of "live" data (live_words, live_blocks)
 447   //   - count the amount of "copied" data in this GC (copied)
 448   //   - free from-space
 449   //   - make to-space the new from-space (set BF_EVACUATED on all blocks)
 450   //
 451   live_words = 0;
 452   live_blocks = 0;
 453
 454   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 455
 456     if (g == N) {
 457       generations[g].collections++; // for stats
 458       if (n_gc_threads > 1) generations[g].par_collections++;
 459     }
 460
 461     // Count the mutable list as bytes "copied" for the purposes of
 462     // stats.  Every mutable list is copied during every GC.
 463     if (g > 0) {
 464         nat mut_list_size = 0;
 465         for (n = 0; n < n_capabilities; n++) {
 466             mut_list_size += countOccupied(capabilities[n].mut_lists[g]);
 467         }
 468         copied +=  mut_list_size;
 469
 470         debugTrace(DEBUG_gc,
 471                    "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
 472                    (unsigned long)(mut_list_size * sizeof(W_)),
 473                    mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
 474     }
 475
 476     bdescr *next, *prev;
 477     gen = &generations[g];
 478
 479     // for generations we collected...
 480     if (g <= N) {
 481
 482         /* free old memory and shift to-space into from-space for all
 483          * the collected steps (except the allocation area).  These
 484          * freed blocks will probaby be quickly recycled.
 485          */
 486         if (gen->mark)
 487         {
 488             // tack the new blocks on the end of the existing blocks
 489             if (gen->old_blocks != NULL) {
 490
 491                 prev = NULL;
 492                 for (bd = gen->old_blocks; bd != NULL; bd = next) {
 493
 494                     next = bd->link;
 495
 496                     if (!(bd->flags & BF_MARKED))
 497                     {
 498                         if (prev == NULL) {
 499                             gen->old_blocks = next;
 500                         } else {
 501                             prev->link = next;
 502                         }
 503                         freeGroup(bd);
 504                         gen->n_old_blocks--;
 505                     }
 506                     else
 507                     {
 508                         gen->n_words += bd->free - bd->start;
 509
 510                         // NB. this step might not be compacted next
 511                         // time, so reset the BF_MARKED flags.
 512                         // They are set before GC if we're going to
 513                         // compact.  (search for BF_MARKED above).
 514                         bd->flags &= ~BF_MARKED;
 515
 516                         // between GCs, all blocks in the heap except
 517                         // for the nursery have the BF_EVACUATED flag set.
 518                         bd->flags |= BF_EVACUATED;
 519
 520                         prev = bd;
 521                     }
 522                 }
 523
 524                 if (prev != NULL) {
 525                     prev->link = gen->blocks;
 526                     gen->blocks = gen->old_blocks;
 527                 }
 528             }
 529             // add the new blocks to the block tally
 530             gen->n_blocks += gen->n_old_blocks;
 531             ASSERT(countBlocks(gen->blocks) == gen->n_blocks);
 532             ASSERT(countOccupied(gen->blocks) == gen->n_words);
 533         }
 534         else // not copacted
 535         {
 536             freeChain(gen->old_blocks);
 537         }
 538
 539         gen->old_blocks = NULL;
 540         gen->n_old_blocks = 0;
 541
 542         /* LARGE OBJECTS.  The current live large objects are chained on
 543          * scavenged_large, having been moved during garbage
 544          * collection from large_objects.  Any objects left on the
 545          * large_objects list are therefore dead, so we free them here.
 546          */
 547         freeChain(gen->large_objects);
 548         gen->large_objects  = gen->scavenged_large_objects;
 549         gen->n_large_blocks = gen->n_scavenged_large_blocks;
 550         gen->n_new_large_words = 0;
 551     }
 552     else // for generations > N
 553     {
 554         /* For older generations, we need to append the
 555          * scavenged_large_object list (i.e. large objects that have been
 556          * promoted during this GC) to the large_object list for that step.
 557          */
 558         for (bd = gen->scavenged_large_objects; bd; bd = next) {
 559             next = bd->link;
 560             dbl_link_onto(bd, &gen->large_objects);
 561         }
 562
 563         // add the new blocks we promoted during this GC
 564         gen->n_large_blocks += gen->n_scavenged_large_blocks;
 565     }
 566
 567     ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks);
 568
 569     gen->scavenged_large_objects = NULL;
 570     gen->n_scavenged_large_blocks = 0;
 571
 572     // Count "live" data
 573     live_words  += genLiveWords(gen);
 574     live_blocks += genLiveBlocks(gen);
 575
 576     // add in the partial blocks in the gen_workspaces, but ignore gen 0
 577     // if this is a local GC (we can't count another capability's part_list)
 578     {
 579         nat i;
 580         for (i = 0; i < n_capabilities; i++) {
 581             live_words  += gcThreadLiveWords(i, gen->no);
 582             live_blocks += gcThreadLiveBlocks(i, gen->no);
 583         }
 584     }
 585   } // for all generations
 586
 587   // update the max size of older generations after a major GC
 588   resize_generations();
 589
 590   // Free the mark stack.
 591   if (mark_stack_top_bd != NULL) {
 592       debugTrace(DEBUG_gc, "mark stack: %d blocks",
 593                  countBlocks(mark_stack_top_bd));
 594       freeChain(mark_stack_top_bd);
 595   }
 596
 597   // Free any bitmaps.
 598   for (g = 0; g <= N; g++) {
 599       gen = &generations[g];
 600       if (gen->bitmap != NULL) {
 601           freeGroup(gen->bitmap);
 602           gen->bitmap = NULL;
 603       }
 604   }
 605
 606   // Reset the nursery: make the blocks empty
 607   allocated += clearNurseries();
 608
 609   resize_nursery();
 610
 611   resetNurseries();
 612
 613  // mark the garbage collected CAFs as dead
 614 #if 0 && defined(DEBUG) // doesn't work at the moment
 615   if (major_gc) { gcCAFs(); }
 616 #endif
 617
 618 #ifdef PROFILING
 619   // resetStaticObjectForRetainerProfiling() must be called before
 620   // zeroing below.
 621   if (n_gc_threads > 1) {
 622       barf("profiling is currently broken with multi-threaded GC");
 623       // ToDo: fix the gct->scavenged_static_objects below
 624   }
 625   resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
 626 #endif
 627
 628   // zero the scavenged static object list
 629   if (major_gc) {
 630       nat i;
 631       if (n_gc_threads == 1) {
 632           zero_static_object_list(gct->scavenged_static_objects);
 633       } else {
 634           for (i = 0; i < n_gc_threads; i++) {
 635               zero_static_object_list(gc_threads[i]->scavenged_static_objects);
 636           }
 637       }
 638   }
 639
 640   // Update the stable pointer hash table.
 641   updateStablePtrTable(major_gc);
 642
 643   // unlock the StablePtr table.  Must be before scheduleFinalizers(),
 644   // because a finalizer may call hs_free_fun_ptr() or
 645   // hs_free_stable_ptr(), both of which access the StablePtr table.
 646   stablePtrPostGC();
 647
 648   // Start any pending finalizers.  Must be after
 649   // updateStablePtrTable() and stablePtrPostGC() (see #4221).
 650   RELEASE_SM_LOCK;
 651   scheduleFinalizers(cap, old_weak_ptr_list);
 652   ACQUIRE_SM_LOCK;
 653
 654   // check sanity after GC
 655   // before resurrectThreads(), because that might overwrite some
 656   // closures, which will cause problems with THREADED where we don't
 657   // fill slop.
 658   IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc));
 659
 660   // send exceptions to any threads which were about to die
 661   RELEASE_SM_LOCK;
 662   resurrectThreads(resurrected_threads);
 663   ACQUIRE_SM_LOCK;
 664
 665   if (major_gc) {
 666       nat need, got;
 667       need = BLOCKS_TO_MBLOCKS(n_alloc_blocks);
 668       got = mblocks_allocated;
 669       /* If the amount of data remains constant, next major GC we'll
 670          require (F+1)*need. We leave (F+2)*need in order to reduce
 671          repeated deallocation and reallocation. */
 672       need = (RtsFlags.GcFlags.oldGenFactor + 2) * need;
 673       if (got > need) {
 674           returnMemoryToOS(got - need);
 675       }
 676   }
 677
 678   // extra GC trace info
 679   IF_DEBUG(gc, statDescribeGens());
 680
 681 #ifdef DEBUG
 682   // symbol-table based profiling
 683   /*  heapCensus(to_blocks); */ /* ToDo */
 684 #endif
 685
 686   // restore enclosing cost centre
 687 #ifdef PROFILING
 688   CCCS = prev_CCS;
 689 #endif
 690
 691 #ifdef DEBUG
 692   // check for memory leaks if DEBUG is on
 693   memInventory(DEBUG_gc);
 694 #endif
 695
 696 #ifdef RTS_GTK_FRONTPANEL
 697   if (RtsFlags.GcFlags.frontpanel) {
 698       updateFrontPanelAfterGC( N, live );
 699   }
 700 #endif
 701
 702   // ok, GC over: tell the stats department what happened.
 703   stat_endGC(gct, allocated, live_words,
 704              copied, N, max_copied, avg_copied,
 705              live_blocks * BLOCK_SIZE_W - live_words /* slop */);
 706
 707   // Guess which generation we'll collect *next* time
 708   initialise_N(force_major_gc);
 709
 710 #if defined(RTS_USER_SIGNALS)
 711   if (RtsFlags.MiscFlags.install_signal_handlers) {
 712     // unblock signals again
 713     unblockUserSignals();
 714   }
 715 #endif
 716
 717   RELEASE_SM_LOCK;
 718
 719   SET_GCT(saved_gct);
 720 }
 721
 722 /* -----------------------------------------------------------------------------
 723    Figure out which generation to collect, initialise N and major_gc.
 724
 725    Also returns the total number of blocks in generations that will be
 726    collected.
 727    -------------------------------------------------------------------------- */
 728
 729 static nat
 730 initialise_N (rtsBool force_major_gc)
 731 {
 732     int g;
 733     nat blocks, blocks_total;
 734
 735     blocks = 0;
 736     blocks_total = 0;
 737
 738     if (force_major_gc) {
 739         N = RtsFlags.GcFlags.generations - 1;
 740     } else {
 741         N = 0;
 742     }
 743
 744     for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
 745
 746         blocks = generations[g].n_words / BLOCK_SIZE_W
 747                + generations[g].n_large_blocks;
 748
 749         if (blocks >= generations[g].max_blocks) {
 750             N = stg_max(N,g);
 751         }
 752         if ((nat)g <= N) {
 753             blocks_total += blocks;
 754         }
 755     }
 756
 757     blocks_total += countNurseryBlocks();
 758
 759     major_gc = (N == RtsFlags.GcFlags.generations-1);
 760     return blocks_total;
 761 }
 762
 763 /* -----------------------------------------------------------------------------
 764    Initialise the gc_thread structures.
 765    -------------------------------------------------------------------------- */
 766
 767 #define GC_THREAD_INACTIVE             0
 768 #define GC_THREAD_STANDING_BY          1
 769 #define GC_THREAD_RUNNING              2
 770 #define GC_THREAD_WAITING_TO_CONTINUE  3
 771
 772 static void
 773 new_gc_thread (nat n, gc_thread *t)
 774 {
 775     nat g;
 776     gen_workspace *ws;
 777
 778     t->cap = &capabilities[n];
 779
 780 #ifdef THREADED_RTS
 781     t->id = 0;
 782     initSpinLock(&t->gc_spin);
 783     initSpinLock(&t->mut_spin);
 784     ACQUIRE_SPIN_LOCK(&t->gc_spin);
 785     t->wakeup = GC_THREAD_INACTIVE;  // starts true, so we can wait for the
 786                           // thread to start up, see wakeup_gc_threads
 787 #endif
 788
 789     t->thread_index = n;
 790     t->free_blocks = NULL;
 791     t->gc_count = 0;
 792
 793     init_gc_thread(t);
 794
 795 #ifdef USE_PAPI
 796     t->papi_events = -1;
 797 #endif
 798
 799     for (g = 0; g < RtsFlags.GcFlags.generations; g++)
 800     {
 801         ws = &t->gens[g];
 802         ws->gen = &generations[g];
 803         ASSERT(g == ws->gen->no);
 804         ws->my_gct = t;
 805
 806         // We want to call
 807         //   alloc_todo_block(ws,0);
 808         // but can't, because it uses gct which isn't set up at this point.
 809         // Hence, allocate a block for todo_bd manually:
 810         {
 811             bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet
 812             initBdescr(bd, ws->gen, ws->gen->to);
 813             bd->flags = BF_EVACUATED;
 814             bd->u.scan = bd->free = bd->start;
 815
 816             ws->todo_bd = bd;
 817             ws->todo_free = bd->free;
 818             ws->todo_lim = bd->start + BLOCK_SIZE_W;
 819         }
 820
 821         ws->todo_q = newWSDeque(128);
 822         ws->todo_overflow = NULL;
 823         ws->n_todo_overflow = 0;
 824         ws->todo_large_objects = NULL;
 825
 826         ws->part_list = NULL;
 827         ws->n_part_blocks = 0;
 828
 829         ws->scavd_list = NULL;
 830         ws->n_scavd_blocks = 0;
 831     }
 832 }
 833
 834
 835 void
 836 initGcThreads (void)
 837 {
 838     if (gc_threads == NULL) {
 839 #if defined(THREADED_RTS)
 840         nat i;
 841         gc_threads = stgMallocBytes (RtsFlags.ParFlags.nNodes *
 842                                      sizeof(gc_thread*),
 843                                      "alloc_gc_threads");
 844
 845         for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
 846             gc_threads[i] =
 847                 stgMallocBytes(sizeof(gc_thread) +
 848                                RtsFlags.GcFlags.generations * sizeof(gen_workspace),
 849                                "alloc_gc_threads");
 850
 851             new_gc_thread(i, gc_threads[i]);
 852         }
 853 #else
 854         gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
 855         gc_threads[0] = gct;
 856         new_gc_thread(0,gc_threads[0]);
 857 #endif
 858     }
 859 }
 860
 861 void
 862 freeGcThreads (void)
 863 {
 864     nat g;
 865     if (gc_threads != NULL) {
 866 #if defined(THREADED_RTS)
 867         nat i;
 868         for (i = 0; i < n_capabilities; i++) {
 869             for (g = 0; g < RtsFlags.GcFlags.generations; g++)
 870             {
 871                 freeWSDeque(gc_threads[i]->gens[g].todo_q);
 872             }
 873             stgFree (gc_threads[i]);
 874         }
 875         stgFree (gc_threads);
 876 #else
 877         for (g = 0; g < RtsFlags.GcFlags.generations; g++)
 878         {
 879             freeWSDeque(gc_threads[0]->gens[g].todo_q);
 880         }
 881         stgFree (gc_threads);
 882 #endif
 883         gc_threads = NULL;
 884     }
 885 }
 886
 887 /* ----------------------------------------------------------------------------
 888    Start GC threads
 889    ------------------------------------------------------------------------- */
 890
 891 static volatile StgWord gc_running_threads;
 892
 893 static StgWord
 894 inc_running (void)
 895 {
 896     StgWord new;
 897     new = atomic_inc(&gc_running_threads);
 898     ASSERT(new <= n_gc_threads);
 899     return new;
 900 }
 901
 902 static StgWord
 903 dec_running (void)
 904 {
 905     ASSERT(gc_running_threads != 0);
 906     return atomic_dec(&gc_running_threads);
 907 }
 908
 909 static rtsBool
 910 any_work (void)
 911 {
 912     int g;
 913     gen_workspace *ws;
 914
 915     gct->any_work++;
 916
 917     write_barrier();
 918
 919     // scavenge objects in compacted generation
 920     if (mark_stack_bd != NULL && !mark_stack_empty()) {
 921         return rtsTrue;
 922     }
 923
 924     // Check for global work in any step.  We don't need to check for
 925     // local work, because we have already exited scavenge_loop(),
 926     // which means there is no local work for this thread.
 927     for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) {
 928         ws = &gct->gens[g];
 929         if (ws->todo_large_objects) return rtsTrue;
 930         if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
 931         if (ws->todo_overflow) return rtsTrue;
 932     }
 933
 934 #if defined(THREADED_RTS)
 935     if (work_stealing) {
 936         nat n;
 937         // look for work to steal
 938         for (n = 0; n < n_gc_threads; n++) {
 939             if (n == gct->thread_index) continue;
 940             for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
 941                 ws = &gc_threads[n]->gens[g];
 942                 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
 943             }
 944         }
 945     }
 946 #endif
 947
 948     gct->no_work++;
 949 #if defined(THREADED_RTS)
 950     yieldThread();
 951 #endif
 952
 953     return rtsFalse;
 954 }
 955
 956 static void
 957 scavenge_until_all_done (void)
 958 {
 959     nat r;
 960
 961
 962 loop:
 963 #if defined(THREADED_RTS)
 964     if (n_gc_threads > 1) {
 965         scavenge_loop();
 966     } else {
 967         scavenge_loop1();
 968     }
 969 #else
 970     scavenge_loop();
 971 #endif
 972
 973     collect_gct_blocks();
 974
 975     // scavenge_loop() only exits when there's no work to do
 976     r = dec_running();
 977
 978     traceEventGcIdle(gct->cap);
 979
 980     debugTrace(DEBUG_gc, "%d GC threads still running", r);
 981
 982     while (gc_running_threads != 0) {
 983         // usleep(1);
 984         if (any_work()) {
 985             inc_running();
 986             traceEventGcWork(gct->cap);
 987             goto loop;
 988         }
 989         // any_work() does not remove the work from the queue, it
 990         // just checks for the presence of work.  If we find any,
 991         // then we increment gc_running_threads and go back to
 992         // scavenge_loop() to perform any pending work.
 993     }
 994
 995     traceEventGcDone(gct->cap);
 996 }
 997
 998 #if defined(THREADED_RTS)
 999
1000 void
1001 gcWorkerThread (Capability *cap)
1002 {
1003     gc_thread *saved_gct;
1004
1005     // necessary if we stole a callee-saves register for gct:
1006     saved_gct = gct;
1007
1008     gct = gc_threads[cap->no];
1009     gct->id = osThreadId();
1010
1011     stat_gcWorkerThreadStart(gct);
1012
1013     // Wait until we're told to wake up
1014     RELEASE_SPIN_LOCK(&gct->mut_spin);
1015     gct->wakeup = GC_THREAD_STANDING_BY;
1016     debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1017     ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1018
1019 #ifdef USE_PAPI
1020     // start performance counters in this thread...
1021     if (gct->papi_events == -1) {
1022         papi_init_eventset(&gct->papi_events);
1023     }
1024     papi_thread_start_gc1_count(gct->papi_events);
1025 #endif
1026
1027     init_gc_thread(gct);
1028
1029     traceEventGcWork(gct->cap);
1030
1031     // Every thread evacuates some roots.
1032     gct->evac_gen_no = 0;
1033     markCapability(mark_root, gct, cap, rtsTrue/*prune sparks*/);
1034     scavenge_capability_mut_lists(cap);
1035
1036     scavenge_until_all_done();
1037
1038 #ifdef THREADED_RTS
1039     // Now that the whole heap is marked, we discard any sparks that
1040     // were found to be unreachable.  The main GC thread is currently
1041     // marking heap reachable via weak pointers, so it is
1042     // non-deterministic whether a spark will be retained if it is
1043     // only reachable via weak pointers.  To fix this problem would
1044     // require another GC barrier, which is too high a price.
1045     pruneSparkQueue(cap);
1046 #endif
1047
1048 #ifdef USE_PAPI
1049     // count events in this thread towards the GC totals
1050     papi_thread_stop_gc1_count(gct->papi_events);
1051 #endif
1052
1053     // Wait until we're told to continue
1054     RELEASE_SPIN_LOCK(&gct->gc_spin);
1055     gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1056     debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1057                gct->thread_index);
1058     ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1059     debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1060
1061     // record the time spent doing GC in the Task structure
1062     stat_gcWorkerThreadDone(gct);
1063
1064     SET_GCT(saved_gct);
1065 }
1066
1067 #endif
1068
1069 #if defined(THREADED_RTS)
1070
1071 void
1072 waitForGcThreads (Capability *cap USED_IF_THREADS)
1073 {
1074     const nat n_threads = RtsFlags.ParFlags.nNodes;
1075     const nat me = cap->no;
1076     nat i, j;
1077     rtsBool retry = rtsTrue;
1078
1079     while(retry) {
1080         for (i=0; i < n_threads; i++) {
1081             if (i == me) continue;
1082             if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1083                 prodCapability(&capabilities[i], cap->running_task);
1084             }
1085         }
1086         for (j=0; j < 10; j++) {
1087             retry = rtsFalse;
1088             for (i=0; i < n_threads; i++) {
1089                 if (i == me) continue;
1090                 write_barrier();
1091                 setContextSwitches();
1092                 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1093                     retry = rtsTrue;
1094                 }
1095             }
1096             if (!retry) break;
1097             yieldThread();
1098         }
1099     }
1100 }
1101
1102 #endif // THREADED_RTS
1103
1104 static void
1105 start_gc_threads (void)
1106 {
1107 #if defined(THREADED_RTS)
1108     gc_running_threads = 0;
1109 #endif
1110 }
1111
1112 static void
1113 wakeup_gc_threads (nat me USED_IF_THREADS)
1114 {
1115 #if defined(THREADED_RTS)
1116     nat i;
1117
1118     if (n_gc_threads == 1) return;
1119
1120     for (i=0; i < n_gc_threads; i++) {
1121         if (i == me) continue;
1122         inc_running();
1123         debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1124         if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1125
1126         gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1127         ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1128         RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1129     }
1130 #endif
1131 }
1132
1133 // After GC is complete, we must wait for all GC threads to enter the
1134 // standby state, otherwise they may still be executing inside
1135 // any_work(), and may even remain awake until the next GC starts.
1136 static void
1137 shutdown_gc_threads (nat me USED_IF_THREADS)
1138 {
1139 #if defined(THREADED_RTS)
1140     nat i;
1141
1142     if (n_gc_threads == 1) return;
1143
1144     for (i=0; i < n_gc_threads; i++) {
1145         if (i == me) continue;
1146         while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1147     }
1148 #endif
1149 }
1150
1151 #if defined(THREADED_RTS)
1152 void
1153 releaseGCThreads (Capability *cap USED_IF_THREADS)
1154 {
1155     const nat n_threads = RtsFlags.ParFlags.nNodes;
1156     const nat me = cap->no;
1157     nat i;
1158     for (i=0; i < n_threads; i++) {
1159         if (i == me) continue;
1160         if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1161             barf("releaseGCThreads");
1162
1163         gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1164         ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1165         RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1166     }
1167 }
1168 #endif
1169
1170 /* ----------------------------------------------------------------------------
1171    Initialise a generation that is to be collected
1172    ------------------------------------------------------------------------- */
1173
1174 static void
1175 prepare_collected_gen (generation *gen)
1176 {
1177     nat i, g, n;
1178     gen_workspace *ws;
1179     bdescr *bd, *next;
1180
1181     // Throw away the current mutable list.  Invariant: the mutable
1182     // list always has at least one block; this means we can avoid a
1183     // check for NULL in recordMutable().
1184     g = gen->no;
1185     if (g != 0) {
1186         for (i = 0; i < n_capabilities; i++) {
1187             freeChain(capabilities[i].mut_lists[g]);
1188             capabilities[i].mut_lists[g] = allocBlock();
1189         }
1190     }
1191
1192     gen = &generations[g];
1193     ASSERT(gen->no == g);
1194
1195     // we'll construct a new list of threads in this step
1196     // during GC, throw away the current list.
1197     gen->old_threads = gen->threads;
1198     gen->threads = END_TSO_QUEUE;
1199
1200     // deprecate the existing blocks
1201     gen->old_blocks   = gen->blocks;
1202     gen->n_old_blocks = gen->n_blocks;
1203     gen->blocks       = NULL;
1204     gen->n_blocks     = 0;
1205     gen->n_words      = 0;
1206     gen->live_estimate = 0;
1207
1208     // initialise the large object queues.
1209     ASSERT(gen->scavenged_large_objects == NULL);
1210     ASSERT(gen->n_scavenged_large_blocks == 0);
1211
1212     // grab all the partial blocks stashed in the gc_thread workspaces and
1213     // move them to the old_blocks list of this gen.
1214     for (n = 0; n < n_capabilities; n++) {
1215         ws = &gc_threads[n]->gens[gen->no];
1216
1217         for (bd = ws->part_list; bd != NULL; bd = next) {
1218             next = bd->link;
1219             bd->link = gen->old_blocks;
1220             gen->old_blocks = bd;
1221             gen->n_old_blocks += bd->blocks;
1222         }
1223         ws->part_list = NULL;
1224         ws->n_part_blocks = 0;
1225
1226         ASSERT(ws->scavd_list == NULL);
1227         ASSERT(ws->n_scavd_blocks == 0);
1228
1229         if (ws->todo_free != ws->todo_bd->start) {
1230             ws->todo_bd->free = ws->todo_free;
1231             ws->todo_bd->link = gen->old_blocks;
1232             gen->old_blocks = ws->todo_bd;
1233             gen->n_old_blocks += ws->todo_bd->blocks;
1234             alloc_todo_block(ws,0); // always has one block.
1235         }
1236     }
1237
1238     // mark the small objects as from-space
1239     for (bd = gen->old_blocks; bd; bd = bd->link) {
1240         bd->flags &= ~BF_EVACUATED;
1241     }
1242
1243     // mark the large objects as from-space
1244     for (bd = gen->large_objects; bd; bd = bd->link) {
1245         bd->flags &= ~BF_EVACUATED;
1246     }
1247
1248     // for a compacted generation, we need to allocate the bitmap
1249     if (gen->mark) {
1250         lnat bitmap_size; // in bytes
1251         bdescr *bitmap_bdescr;
1252         StgWord *bitmap;
1253
1254         bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1255
1256         if (bitmap_size > 0) {
1257             bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1258                                        / BLOCK_SIZE);
1259             gen->bitmap = bitmap_bdescr;
1260             bitmap = bitmap_bdescr->start;
1261
1262             debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1263                        bitmap_size, bitmap);
1264
1265             // don't forget to fill it with zeros!
1266             memset(bitmap, 0, bitmap_size);
1267
1268             // For each block in this step, point to its bitmap from the
1269             // block descriptor.
1270             for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
1271                 bd->u.bitmap = bitmap;
1272                 bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1273
1274                 // Also at this point we set the BF_MARKED flag
1275                 // for this block.  The invariant is that
1276                 // BF_MARKED is always unset, except during GC
1277                 // when it is set on those blocks which will be
1278                 // compacted.
1279                 if (!(bd->flags & BF_FRAGMENTED)) {
1280                     bd->flags |= BF_MARKED;
1281                 }
1282
1283                 // BF_SWEPT should be marked only for blocks that are being
1284                 // collected in sweep()
1285                 bd->flags &= ~BF_SWEPT;
1286             }
1287         }
1288     }
1289 }
1290
1291
1292 /* ----------------------------------------------------------------------------
1293    Save the mutable lists in saved_mut_lists
1294    ------------------------------------------------------------------------- */
1295
1296 static void
1297 stash_mut_list (Capability *cap, nat gen_no)
1298 {
1299     cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
1300     cap->mut_lists[gen_no] = allocBlock_sync();
1301 }
1302
1303 /* ----------------------------------------------------------------------------
1304    Initialise a generation that is *not* to be collected
1305    ------------------------------------------------------------------------- */
1306
1307 static void
1308 prepare_uncollected_gen (generation *gen)
1309 {
1310     nat i;
1311
1312
1313     ASSERT(gen->no > 0);
1314
1315     // save the current mutable lists for this generation, and
1316     // allocate a fresh block for each one.  We'll traverse these
1317     // mutable lists as roots early on in the GC.
1318     for (i = 0; i < n_capabilities; i++) {
1319         stash_mut_list(&capabilities[i], gen->no);
1320     }
1321
1322     ASSERT(gen->scavenged_large_objects == NULL);
1323     ASSERT(gen->n_scavenged_large_blocks == 0);
1324 }
1325
1326 /* -----------------------------------------------------------------------------
1327    Collect the completed blocks from a GC thread and attach them to
1328    the generation.
1329    -------------------------------------------------------------------------- */
1330
1331 static void
1332 collect_gct_blocks (void)
1333 {
1334     nat g;
1335     gen_workspace *ws;
1336     bdescr *bd, *prev;
1337
1338     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1339         ws = &gct->gens[g];
1340
1341         // there may still be a block attached to ws->todo_bd;
1342         // leave it there to use next time.
1343
1344         if (ws->scavd_list != NULL) {
1345             ACQUIRE_SPIN_LOCK(&ws->gen->sync);
1346
1347             ASSERT(gct->scan_bd == NULL);
1348             ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
1349
1350             prev = NULL;
1351             for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
1352                 ws->gen->n_words += bd->free - bd->start;
1353                 prev = bd;
1354             }
1355             if (prev != NULL) {
1356                 prev->link = ws->gen->blocks;
1357                 ws->gen->blocks = ws->scavd_list;
1358             }
1359             ws->gen->n_blocks += ws->n_scavd_blocks;
1360
1361             ws->scavd_list = NULL;
1362             ws->n_scavd_blocks = 0;
1363
1364             RELEASE_SPIN_LOCK(&ws->gen->sync);
1365         }
1366     }
1367 }
1368
1369 /* -----------------------------------------------------------------------------
1370    Initialise a gc_thread before GC
1371    -------------------------------------------------------------------------- */
1372
1373 static void
1374 init_gc_thread (gc_thread *t)
1375 {
1376     t->static_objects = END_OF_STATIC_LIST;
1377     t->scavenged_static_objects = END_OF_STATIC_LIST;
1378     t->scan_bd = NULL;
1379     t->mut_lists = t->cap->mut_lists;
1380     t->evac_gen_no = 0;
1381     t->failed_to_evac = rtsFalse;
1382     t->eager_promotion = rtsTrue;
1383     t->thunk_selector_depth = 0;
1384     t->copied = 0;
1385     t->scanned = 0;
1386     t->any_work = 0;
1387     t->no_work = 0;
1388     t->scav_find_work = 0;
1389 }
1390
1391 /* -----------------------------------------------------------------------------
1392    Function we pass to evacuate roots.
1393    -------------------------------------------------------------------------- */
1394
1395 static void
1396 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1397 {
1398     // we stole a register for gct, but this function is called from
1399     // *outside* the GC where the register variable is not in effect,
1400     // so we need to save and restore it here.  NB. only call
1401     // mark_root() from the main GC thread, otherwise gct will be
1402     // incorrect.
1403     gc_thread *saved_gct;
1404     saved_gct = gct;
1405     SET_GCT(user);
1406
1407     evacuate(root);
1408
1409     SET_GCT(saved_gct);
1410 }
1411
1412 /* -----------------------------------------------------------------------------
1413    Initialising the static object & mutable lists
1414    -------------------------------------------------------------------------- */
1415
1416 static void
1417 zero_static_object_list(StgClosure* first_static)
1418 {
1419   StgClosure* p;
1420   StgClosure* link;
1421   const StgInfoTable *info;
1422
1423   for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1424     info = get_itbl(p);
1425     link = *STATIC_LINK(info, p);
1426     *STATIC_LINK(info,p) = NULL;
1427   }
1428 }
1429
1430 /* ----------------------------------------------------------------------------
1431    Reset the sizes of the older generations when we do a major
1432    collection.
1433
1434    CURRENT STRATEGY: make all generations except zero the same size.
1435    We have to stay within the maximum heap size, and leave a certain
1436    percentage of the maximum heap size available to allocate into.
1437    ------------------------------------------------------------------------- */
1438
1439 static void
1440 resize_generations (void)
1441 {
1442     nat g;
1443
1444     if (major_gc && RtsFlags.GcFlags.generations > 1) {
1445         nat live, size, min_alloc, words;
1446         const nat max  = RtsFlags.GcFlags.maxHeapSize;
1447         const nat gens = RtsFlags.GcFlags.generations;
1448
1449         // live in the oldest generations
1450         if (oldest_gen->live_estimate != 0) {
1451             words = oldest_gen->live_estimate;
1452         } else {
1453             words = oldest_gen->n_words;
1454         }
1455         live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1456             oldest_gen->n_large_blocks;
1457
1458         // default max size for all generations except zero
1459         size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1460                        RtsFlags.GcFlags.minOldGenSize);
1461
1462         if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
1463             RtsFlags.GcFlags.heapSizeSuggestion = size;
1464         }
1465
1466         // minimum size for generation zero
1467         min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1468                             RtsFlags.GcFlags.minAllocAreaSize);
1469
1470         // Auto-enable compaction when the residency reaches a
1471         // certain percentage of the maximum heap size (default: 30%).
1472         if (RtsFlags.GcFlags.compact ||
1473             (max > 0 &&
1474              oldest_gen->n_blocks >
1475              (RtsFlags.GcFlags.compactThreshold * max) / 100)) {
1476             oldest_gen->mark = 1;
1477             oldest_gen->compact = 1;
1478 //        debugBelch("compaction: on\n", live);
1479         } else {
1480             oldest_gen->mark = 0;
1481             oldest_gen->compact = 0;
1482 //        debugBelch("compaction: off\n", live);
1483         }
1484
1485         if (RtsFlags.GcFlags.sweep) {
1486             oldest_gen->mark = 1;
1487         }
1488
1489         // if we're going to go over the maximum heap size, reduce the
1490         // size of the generations accordingly.  The calculation is
1491         // different if compaction is turned on, because we don't need
1492         // to double the space required to collect the old generation.
1493         if (max != 0) {
1494
1495             // this test is necessary to ensure that the calculations
1496             // below don't have any negative results - we're working
1497             // with unsigned values here.
1498             if (max < min_alloc) {
1499                 heapOverflow();
1500             }
1501
1502             if (oldest_gen->compact) {
1503                 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1504                     size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1505                 }
1506             } else {
1507                 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1508                     size = (max - min_alloc) / ((gens - 1) * 2);
1509                 }
1510             }
1511
1512             if (size < live) {
1513                 heapOverflow();
1514             }
1515         }
1516
1517 #if 0
1518         debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1519                    min_alloc, size, max);
1520 #endif
1521
1522         for (g = 0; g < gens; g++) {
1523             generations[g].max_blocks = size;
1524         }
1525     }
1526 }
1527
1528 /* -----------------------------------------------------------------------------
1529    Calculate the new size of the nursery, and resize it.
1530    -------------------------------------------------------------------------- */
1531
1532 static void
1533 resize_nursery (void)
1534 {
1535     const lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities;
1536
1537     if (RtsFlags.GcFlags.generations == 1)
1538     {   // Two-space collector:
1539         nat blocks;
1540
1541         /* set up a new nursery.  Allocate a nursery size based on a
1542          * function of the amount of live data (by default a factor of 2)
1543          * Use the blocks from the old nursery if possible, freeing up any
1544          * left over blocks.
1545          *
1546          * If we get near the maximum heap size, then adjust our nursery
1547          * size accordingly.  If the nursery is the same size as the live
1548          * data (L), then we need 3L bytes.  We can reduce the size of the
1549          * nursery to bring the required memory down near 2L bytes.
1550          *
1551          * A normal 2-space collector would need 4L bytes to give the same
1552          * performance we get from 3L bytes, reducing to the same
1553          * performance at 2L bytes.
1554          */
1555         blocks = generations[0].n_blocks;
1556
1557         if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1558              blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1559              RtsFlags.GcFlags.maxHeapSize )
1560         {
1561             long adjusted_blocks;  // signed on purpose
1562             int pc_free;
1563
1564             adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1565
1566             debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1567                        RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1568
1569             pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1570             if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1571             {
1572                 heapOverflow();
1573             }
1574             blocks = adjusted_blocks;
1575         }
1576         else
1577         {
1578             blocks *= RtsFlags.GcFlags.oldGenFactor;
1579             if (blocks < min_nursery)
1580             {
1581                 blocks = min_nursery;
1582             }
1583         }
1584         resizeNurseries(blocks);
1585     }
1586     else  // Generational collector
1587     {
1588         /*
1589          * If the user has given us a suggested heap size, adjust our
1590          * allocation area to make best use of the memory available.
1591          */
1592         if (RtsFlags.GcFlags.heapSizeSuggestion)
1593         {
1594             long blocks;
1595             const nat needed = calcNeeded();    // approx blocks needed at next GC
1596
1597             /* Guess how much will be live in generation 0 step 0 next time.
1598              * A good approximation is obtained by finding the
1599              * percentage of g0 that was live at the last minor GC.
1600              *
1601              * We have an accurate figure for the amount of copied data in
1602              * 'copied', but we must convert this to a number of blocks, with
1603              * a small adjustment for estimated slop at the end of a block
1604              * (- 10 words).
1605              */
1606             if (N == 0)
1607             {
1608                 g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1609                     / countNurseryBlocks();
1610             }
1611
1612             /* Estimate a size for the allocation area based on the
1613              * information available.  We might end up going slightly under
1614              * or over the suggested heap size, but we should be pretty
1615              * close on average.
1616              *
1617              * Formula:            suggested - needed
1618              *                ----------------------------
1619              *                    1 + g0_pcnt_kept/100
1620              *
1621              * where 'needed' is the amount of memory needed at the next
1622              * collection for collecting all gens except g0.
1623              */
1624             blocks =
1625                 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1626                 (100 + (long)g0_pcnt_kept);
1627
1628             if (blocks < (long)min_nursery) {
1629                 blocks = min_nursery;
1630             }
1631
1632             resizeNurseries((nat)blocks);
1633         }
1634         else
1635         {
1636             // we might have added extra large blocks to the nursery, so
1637             // resize back to minAllocAreaSize again.
1638             resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1639         }
1640     }
1641 }
1642
1643 /* -----------------------------------------------------------------------------
1644    Sanity code for CAF garbage collection.
1645
1646    With DEBUG turned on, we manage a CAF list in addition to the SRT
1647    mechanism.  After GC, we run down the CAF list and blackhole any
1648    CAFs which have been garbage collected.  This means we get an error
1649    whenever the program tries to enter a garbage collected CAF.
1650
1651    Any garbage collected CAFs are taken off the CAF list at the same
1652    time.
1653    -------------------------------------------------------------------------- */
1654
1655 #if 0 && defined(DEBUG)
1656
1657 static void
1658 gcCAFs(void)
1659 {
1660   StgClosure*  p;
1661   StgClosure** pp;
1662   const StgInfoTable *info;
1663   nat i;
1664
1665   i = 0;
1666   p = caf_list;
1667   pp = &caf_list;
1668
1669   while (p != NULL) {
1670
1671     info = get_itbl(p);
1672
1673     ASSERT(info->type == IND_STATIC);
1674
1675     if (STATIC_LINK(info,p) == NULL) {
1676         debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1677         // black hole it
1678         SET_INFO(p,&stg_BLACKHOLE_info);
1679         p = STATIC_LINK2(info,p);
1680         *pp = p;
1681     }
1682     else {
1683       pp = &STATIC_LINK2(info,p);
1684       p = *pp;
1685       i++;
1686     }
1687
1688   }
1689
1690   debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1691 }
1692 #endif