rts/sm/GC.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team 1998-2008
   4  *
   5  * Generational garbage collector
   6  *
   7  * Documentation on the architecture of the Garbage Collector can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 // #include "PosixSource.h"
  15 #include "Rts.h"
  16 #include "RtsFlags.h"
  17 #include "RtsUtils.h"
  18 #include "Apply.h"
  19 #include "OSThreads.h"
  20 #include "LdvProfile.h"
  21 #include "Updates.h"
  22 #include "Stats.h"
  23 #include "Schedule.h"
  24 #include "Sanity.h"
  25 #include "BlockAlloc.h"
  26 #include "MBlock.h"
  27 #include "ProfHeap.h"
  28 #include "SchedAPI.h"
  29 #include "Weak.h"
  30 #include "Prelude.h"
  31 #include "ParTicky.h"           // ToDo: move into Rts.h
  32 #include "RtsSignals.h"
  33 #include "STM.h"
  34 #include "HsFFI.h"
  35 #include "Linker.h"
  36 #if defined(RTS_GTK_FRONTPANEL)
  37 #include "FrontPanel.h"
  38 #endif
  39 #include "Trace.h"
  40 #include "RetainerProfile.h"
  41 #include "RaiseAsync.h"
  42 #include "Papi.h"
  43
  44 #include "GC.h"
  45 #include "GCThread.h"
  46 #include "Compact.h"
  47 #include "Evac.h"
  48 #include "Scav.h"
  49 #include "GCUtils.h"
  50 #include "MarkWeak.h"
  51 #include "Sparks.h"
  52
  53 #include <string.h> // for memset()
  54 #include <unistd.h>
  55
  56 /* -----------------------------------------------------------------------------
  57    Global variables
  58    -------------------------------------------------------------------------- */
  59
  60 /* STATIC OBJECT LIST.
  61  *
  62  * During GC:
  63  * We maintain a linked list of static objects that are still live.
  64  * The requirements for this list are:
  65  *
  66  *  - we need to scan the list while adding to it, in order to
  67  *    scavenge all the static objects (in the same way that
  68  *    breadth-first scavenging works for dynamic objects).
  69  *
  70  *  - we need to be able to tell whether an object is already on
  71  *    the list, to break loops.
  72  *
  73  * Each static object has a "static link field", which we use for
  74  * linking objects on to the list.  We use a stack-type list, consing
  75  * objects on the front as they are added (this means that the
  76  * scavenge phase is depth-first, not breadth-first, but that
  77  * shouldn't matter).
  78  *
  79  * A separate list is kept for objects that have been scavenged
  80  * already - this is so that we can zero all the marks afterwards.
  81  *
  82  * An object is on the list if its static link field is non-zero; this
  83  * means that we have to mark the end of the list with '1', not NULL.
  84  *
  85  * Extra notes for generational GC:
  86  *
  87  * Each generation has a static object list associated with it.  When
  88  * collecting generations up to N, we treat the static object lists
  89  * from generations > N as roots.
  90  *
  91  * We build up a static object list while collecting generations 0..N,
  92  * which is then appended to the static object list of generation N+1.
  93  */
  94
  95 /* N is the oldest generation being collected, where the generations
  96  * are numbered starting at 0.  A major GC (indicated by the major_gc
  97  * flag) is when we're collecting all generations.  We only attempt to
  98  * deal with static objects and GC CAFs when doing a major GC.
  99  */
 100 nat N;
 101 rtsBool major_gc;
 102
 103 /* Data used for allocation area sizing.
 104  */
 105 static lnat g0s0_pcnt_kept = 30; // percentage of g0s0 live at last minor GC
 106
 107 /* Mut-list stats */
 108 #ifdef DEBUG
 109 nat mutlist_MUTVARS,
 110     mutlist_MUTARRS,
 111     mutlist_MVARS,
 112     mutlist_OTHERS;
 113 #endif
 114
 115 /* Thread-local data for each GC thread
 116  */
 117 gc_thread **gc_threads = NULL;
 118 // gc_thread *gct = NULL;  // this thread's gct TODO: make thread-local
 119
 120 // Number of threads running in *this* GC.  Affects how many
 121 // step->todos[] lists we have to look in to find work.
 122 nat n_gc_threads;
 123
 124 // For stats:
 125 long copied;        // *words* copied & scavenged during this GC
 126
 127 #ifdef THREADED_RTS
 128 SpinLock recordMutableGen_sync;
 129 #endif
 130
 131 DECLARE_GCT
 132
 133 /* -----------------------------------------------------------------------------
 134    Static function declarations
 135    -------------------------------------------------------------------------- */
 136
 137 static void mark_root               (void *user, StgClosure **root);
 138 static void zero_static_object_list (StgClosure* first_static);
 139 static nat  initialise_N            (rtsBool force_major_gc);
 140 static void alloc_gc_threads        (void);
 141 static void init_collected_gen      (nat g, nat threads);
 142 static void init_uncollected_gen    (nat g, nat threads);
 143 static void init_gc_thread          (gc_thread *t);
 144 static void update_task_list        (void);
 145 static void resize_generations      (void);
 146 static void resize_nursery          (void);
 147 static void start_gc_threads        (void);
 148 static void scavenge_until_all_done (void);
 149 static nat  inc_running             (void);
 150 static nat  dec_running             (void);
 151 static void wakeup_gc_threads       (nat n_threads);
 152 static void shutdown_gc_threads     (nat n_threads);
 153
 154 #if 0 && defined(DEBUG)
 155 static void gcCAFs                  (void);
 156 #endif
 157
 158 /* -----------------------------------------------------------------------------
 159    The mark bitmap & stack.
 160    -------------------------------------------------------------------------- */
 161
 162 #define MARK_STACK_BLOCKS 4
 163
 164 bdescr *mark_stack_bdescr;
 165 StgPtr *mark_stack;
 166 StgPtr *mark_sp;
 167 StgPtr *mark_splim;
 168
 169 // Flag and pointers used for falling back to a linear scan when the
 170 // mark stack overflows.
 171 rtsBool mark_stack_overflowed;
 172 bdescr *oldgen_scan_bd;
 173 StgPtr  oldgen_scan;
 174
 175 /* -----------------------------------------------------------------------------
 176    GarbageCollect: the main entry point to the garbage collector.
 177
 178    Locks held: all capabilities are held throughout GarbageCollect().
 179    -------------------------------------------------------------------------- */
 180
 181 void
 182 GarbageCollect ( rtsBool force_major_gc )
 183 {
 184   bdescr *bd;
 185   step *stp;
 186   lnat live, allocated, max_copied, avg_copied, slop;
 187   gc_thread *saved_gct;
 188   nat g, s, t, n;
 189
 190   // necessary if we stole a callee-saves register for gct:
 191   saved_gct = gct;
 192
 193 #ifdef PROFILING
 194   CostCentreStack *prev_CCS;
 195 #endif
 196
 197   ACQUIRE_SM_LOCK;
 198
 199 #if defined(RTS_USER_SIGNALS)
 200   if (RtsFlags.MiscFlags.install_signal_handlers) {
 201     // block signals
 202     blockUserSignals();
 203   }
 204 #endif
 205
 206   ASSERT(sizeof(step_workspace) == 16 * sizeof(StgWord));
 207   // otherwise adjust the padding in step_workspace.
 208
 209   // tell the stats department that we've started a GC
 210   stat_startGC();
 211
 212   // tell the STM to discard any cached closures it's hoping to re-use
 213   stmPreGCHook();
 214
 215 #ifdef DEBUG
 216   mutlist_MUTVARS = 0;
 217   mutlist_MUTARRS = 0;
 218   mutlist_OTHERS = 0;
 219 #endif
 220
 221   // attribute any costs to CCS_GC
 222 #ifdef PROFILING
 223   prev_CCS = CCCS;
 224   CCCS = CCS_GC;
 225 #endif
 226
 227   /* Approximate how much we allocated.
 228    * Todo: only when generating stats?
 229    */
 230   allocated = calcAllocated();
 231
 232   /* Figure out which generation to collect
 233    */
 234   n = initialise_N(force_major_gc);
 235
 236   /* Allocate + initialise the gc_thread structures.
 237    */
 238   alloc_gc_threads();
 239
 240   /* Start threads, so they can be spinning up while we finish initialisation.
 241    */
 242   start_gc_threads();
 243
 244   /* How many threads will be participating in this GC?
 245    * We don't try to parallelise minor GC.
 246    */
 247 #if defined(THREADED_RTS)
 248   if (n < (4*1024*1024 / BLOCK_SIZE)) {
 249       n_gc_threads = 1;
 250   } else {
 251       n_gc_threads = RtsFlags.ParFlags.gcThreads;
 252   }
 253 #else
 254   n_gc_threads = 1;
 255 #endif
 256   trace(TRACE_gc|DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
 257         N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
 258
 259 #ifdef RTS_GTK_FRONTPANEL
 260   if (RtsFlags.GcFlags.frontpanel) {
 261       updateFrontPanelBeforeGC(N);
 262   }
 263 #endif
 264
 265 #ifdef DEBUG
 266   // check for memory leaks if DEBUG is on
 267   memInventory(traceClass(DEBUG_gc));
 268 #endif
 269
 270   // check stack sanity *before* GC (ToDo: check all threads)
 271   IF_DEBUG(sanity, checkFreeListSanity());
 272
 273   // Initialise all our gc_thread structures
 274   for (t = 0; t < n_gc_threads; t++) {
 275       init_gc_thread(gc_threads[t]);
 276   }
 277
 278   // Initialise all the generations/steps that we're collecting.
 279   for (g = 0; g <= N; g++) {
 280       init_collected_gen(g,n_gc_threads);
 281   }
 282
 283   // Initialise all the generations/steps that we're *not* collecting.
 284   for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
 285       init_uncollected_gen(g,n_gc_threads);
 286   }
 287
 288   /* Allocate a mark stack if we're doing a major collection.
 289    */
 290   if (major_gc) {
 291       mark_stack_bdescr = allocGroup(MARK_STACK_BLOCKS);
 292       mark_stack = (StgPtr *)mark_stack_bdescr->start;
 293       mark_sp    = mark_stack;
 294       mark_splim = mark_stack + (MARK_STACK_BLOCKS * BLOCK_SIZE_W);
 295   } else {
 296       mark_stack_bdescr = NULL;
 297   }
 298
 299   // this is the main thread
 300   gct = gc_threads[0];
 301
 302   /* -----------------------------------------------------------------------
 303    * follow all the roots that we know about:
 304    *   - mutable lists from each generation > N
 305    * we want to *scavenge* these roots, not evacuate them: they're not
 306    * going to move in this GC.
 307    * Also do them in reverse generation order, for the usual reason:
 308    * namely to reduce the likelihood of spurious old->new pointers.
 309    */
 310   for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
 311       generations[g].saved_mut_list = generations[g].mut_list;
 312       generations[g].mut_list = allocBlock();
 313       // mut_list always has at least one block.
 314   }
 315
 316   // the main thread is running: this prevents any other threads from
 317   // exiting prematurely, so we can start them now.
 318   // NB. do this after the mutable lists have been saved above, otherwise
 319   // the other GC threads will be writing into the old mutable lists.
 320   inc_running();
 321   wakeup_gc_threads(n_gc_threads);
 322
 323   for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
 324       scavenge_mutable_list(&generations[g]);
 325   }
 326
 327   // follow roots from the CAF list (used by GHCi)
 328   gct->evac_step = 0;
 329   markCAFs(mark_root, gct);
 330
 331   // follow all the roots that the application knows about.
 332   gct->evac_step = 0;
 333   markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads);
 334
 335 #if defined(RTS_USER_SIGNALS)
 336   // mark the signal handlers (signals should be already blocked)
 337   markSignalHandlers(mark_root, gct);
 338 #endif
 339
 340   // Mark the weak pointer list, and prepare to detect dead weak pointers.
 341   markWeakPtrList();
 342   initWeakForGC();
 343
 344   // Mark the stable pointer table.
 345   markStablePtrTable(mark_root, gct);
 346
 347   /* -------------------------------------------------------------------------
 348    * Repeatedly scavenge all the areas we know about until there's no
 349    * more scavenging to be done.
 350    */
 351   for (;;)
 352   {
 353       scavenge_until_all_done();
 354       // The other threads are now stopped.  We might recurse back to
 355       // here, but from now on this is the only thread.
 356
 357       // if any blackholes are alive, make the threads that wait on
 358       // them alive too.
 359       if (traverseBlackholeQueue()) {
 360           inc_running();
 361           continue;
 362       }
 363
 364       // must be last...  invariant is that everything is fully
 365       // scavenged at this point.
 366       if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
 367           inc_running();
 368           continue;
 369       }
 370
 371       // If we get to here, there's really nothing left to do.
 372       break;
 373   }
 374
 375   shutdown_gc_threads(n_gc_threads);
 376
 377   // Update pointers from the Task list
 378   update_task_list();
 379
 380   // Update pointers from capabilities (probably just the spark queues)
 381   updateCapabilitiesPostGC();
 382
 383   // Now see which stable names are still alive.
 384   gcStablePtrTable();
 385
 386 #ifdef PROFILING
 387   // We call processHeapClosureForDead() on every closure destroyed during
 388   // the current garbage collection, so we invoke LdvCensusForDead().
 389   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
 390       || RtsFlags.ProfFlags.bioSelector != NULL)
 391     LdvCensusForDead(N);
 392 #endif
 393
 394   // NO MORE EVACUATION AFTER THIS POINT!
 395
 396   // Two-space collector: free the old to-space.
 397   // g0s0->old_blocks is the old nursery
 398   // g0s0->blocks is to-space from the previous GC
 399   if (RtsFlags.GcFlags.generations == 1) {
 400       if (g0s0->blocks != NULL) {
 401           freeChain(g0s0->blocks);
 402           g0s0->blocks = NULL;
 403       }
 404   }
 405
 406   // For each workspace, in each thread, move the copied blocks to the step
 407   {
 408       gc_thread *thr;
 409       step_workspace *ws;
 410       bdescr *prev, *next;
 411
 412       for (t = 0; t < n_gc_threads; t++) {
 413           thr = gc_threads[t];
 414
 415           // not step 0
 416           if (RtsFlags.GcFlags.generations == 1) {
 417               s = 0;
 418           } else {
 419               s = 1;
 420           }
 421           for (; s < total_steps; s++) {
 422               ws = &thr->steps[s];
 423
 424               // Push the final block
 425               if (ws->todo_bd) {
 426                   push_scanned_block(ws->todo_bd, ws);
 427               }
 428
 429               ASSERT(gct->scan_bd == NULL);
 430               ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
 431
 432               prev = NULL;
 433               for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
 434                   ws->step->n_words += bd->free - bd->start;
 435                   prev = bd;
 436               }
 437               if (prev != NULL) {
 438                   prev->link = ws->step->blocks;
 439                   ws->step->blocks = ws->scavd_list;
 440               }
 441               ws->step->n_blocks += ws->n_scavd_blocks;
 442           }
 443       }
 444
 445       // Add all the partial blocks *after* we've added all the full
 446       // blocks.  This is so that we can grab the partial blocks back
 447       // again and try to fill them up in the next GC.
 448       for (t = 0; t < n_gc_threads; t++) {
 449           thr = gc_threads[t];
 450
 451           // not step 0
 452           if (RtsFlags.GcFlags.generations == 1) {
 453               s = 0;
 454           } else {
 455               s = 1;
 456           }
 457           for (; s < total_steps; s++) {
 458               ws = &thr->steps[s];
 459
 460               prev = NULL;
 461               for (bd = ws->part_list; bd != NULL; bd = next) {
 462                   next = bd->link;
 463                   if (bd->free == bd->start) {
 464                       if (prev == NULL) {
 465                           ws->part_list = next;
 466                       } else {
 467                           prev->link = next;
 468                       }
 469                       freeGroup(bd);
 470                       ws->n_part_blocks--;
 471                   } else {
 472                       ws->step->n_words += bd->free - bd->start;
 473                       prev = bd;
 474                   }
 475               }
 476               if (prev != NULL) {
 477                   prev->link = ws->step->blocks;
 478                   ws->step->blocks = ws->part_list;
 479               }
 480               ws->step->n_blocks += ws->n_part_blocks;
 481
 482               ASSERT(countBlocks(ws->step->blocks) == ws->step->n_blocks);
 483               ASSERT(countOccupied(ws->step->blocks) == ws->step->n_words);
 484           }
 485       }
 486   }
 487
 488   // Finally: compaction of the oldest generation.
 489   if (major_gc && oldest_gen->steps[0].is_compacted) {
 490       compact(gct->scavenged_static_objects);
 491   }
 492
 493   IF_DEBUG(sanity, checkGlobalTSOList(rtsFalse));
 494
 495   /* run through all the generations/steps and tidy up
 496    */
 497   copied = 0;
 498   max_copied = 0;
 499   avg_copied = 0;
 500   {
 501       nat i;
 502       for (i=0; i < n_gc_threads; i++) {
 503           if (n_gc_threads > 1) {
 504               trace(TRACE_gc,"thread %d:", i);
 505               trace(TRACE_gc,"   copied           %ld", gc_threads[i]->copied * sizeof(W_));
 506               trace(TRACE_gc,"   scanned          %ld", gc_threads[i]->scanned * sizeof(W_));
 507               trace(TRACE_gc,"   any_work         %ld", gc_threads[i]->any_work);
 508               trace(TRACE_gc,"   no_work          %ld", gc_threads[i]->no_work);
 509               trace(TRACE_gc,"   scav_find_work %ld",   gc_threads[i]->scav_find_work);
 510           }
 511           copied += gc_threads[i]->copied;
 512           max_copied = stg_max(gc_threads[i]->copied, max_copied);
 513       }
 514       if (n_gc_threads == 1) {
 515           max_copied = 0;
 516           avg_copied = 0;
 517       } else {
 518           avg_copied = copied;
 519       }
 520   }
 521
 522   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 523
 524     if (g == N) {
 525       generations[g].collections++; // for stats
 526       if (n_gc_threads > 1) generations[g].par_collections++;
 527     }
 528
 529     // Count the mutable list as bytes "copied" for the purposes of
 530     // stats.  Every mutable list is copied during every GC.
 531     if (g > 0) {
 532         nat mut_list_size = 0;
 533         for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
 534             mut_list_size += bd->free - bd->start;
 535         }
 536         copied +=  mut_list_size;
 537
 538         debugTrace(DEBUG_gc,
 539                    "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
 540                    (unsigned long)(mut_list_size * sizeof(W_)),
 541                    mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
 542     }
 543
 544     for (s = 0; s < generations[g].n_steps; s++) {
 545       bdescr *next;
 546       stp = &generations[g].steps[s];
 547
 548       // for generations we collected...
 549       if (g <= N) {
 550
 551         /* free old memory and shift to-space into from-space for all
 552          * the collected steps (except the allocation area).  These
 553          * freed blocks will probaby be quickly recycled.
 554          */
 555         if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) {
 556             if (stp->is_compacted)
 557             {
 558                 // tack the new blocks on the end of the existing blocks
 559                 if (stp->old_blocks != NULL) {
 560                     for (bd = stp->old_blocks; bd != NULL; bd = next) {
 561                         stp->n_words += bd->free - bd->start;
 562
 563                         // NB. this step might not be compacted next
 564                         // time, so reset the BF_COMPACTED flags.
 565                         // They are set before GC if we're going to
 566                         // compact.  (search for BF_COMPACTED above).
 567                         bd->flags &= ~BF_COMPACTED;
 568
 569                         // between GCs, all blocks in the heap except
 570                         // for the nursery have the BF_EVACUATED flag set.
 571                         bd->flags |= BF_EVACUATED;
 572
 573                         next = bd->link;
 574                         if (next == NULL) {
 575                             bd->link = stp->blocks;
 576                         }
 577                     }
 578                     stp->blocks = stp->old_blocks;
 579                 }
 580                 // add the new blocks to the block tally
 581                 stp->n_blocks += stp->n_old_blocks;
 582                 ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
 583                 ASSERT(countOccupied(stp->blocks) == stp->n_words);
 584             }
 585             else // not copacted
 586             {
 587                 freeChain(stp->old_blocks);
 588             }
 589             stp->old_blocks = NULL;
 590             stp->n_old_blocks = 0;
 591         }
 592
 593         /* LARGE OBJECTS.  The current live large objects are chained on
 594          * scavenged_large, having been moved during garbage
 595          * collection from large_objects.  Any objects left on
 596          * large_objects list are therefore dead, so we free them here.
 597          */
 598         for (bd = stp->large_objects; bd != NULL; bd = next) {
 599           next = bd->link;
 600           freeGroup(bd);
 601           bd = next;
 602         }
 603
 604         stp->large_objects  = stp->scavenged_large_objects;
 605         stp->n_large_blocks = stp->n_scavenged_large_blocks;
 606
 607       }
 608       else // for older generations...
 609       {
 610         /* For older generations, we need to append the
 611          * scavenged_large_object list (i.e. large objects that have been
 612          * promoted during this GC) to the large_object list for that step.
 613          */
 614         for (bd = stp->scavenged_large_objects; bd; bd = next) {
 615           next = bd->link;
 616           dbl_link_onto(bd, &stp->large_objects);
 617         }
 618
 619         // add the new blocks we promoted during this GC
 620         stp->n_large_blocks += stp->n_scavenged_large_blocks;
 621       }
 622     }
 623   }
 624
 625   // update the max size of older generations after a major GC
 626   resize_generations();
 627
 628   // Calculate the amount of live data for stats.
 629   live = calcLiveWords();
 630
 631   // Free the small objects allocated via allocate(), since this will
 632   // all have been copied into G0S1 now.
 633   if (RtsFlags.GcFlags.generations > 1) {
 634       if (g0s0->blocks != NULL) {
 635           freeChain(g0s0->blocks);
 636           g0s0->blocks = NULL;
 637       }
 638       g0s0->n_blocks = 0;
 639       g0s0->n_words = 0;
 640   }
 641   alloc_blocks = 0;
 642   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 643
 644   // Start a new pinned_object_block
 645   pinned_object_block = NULL;
 646
 647   // Free the mark stack.
 648   if (mark_stack_bdescr != NULL) {
 649       freeGroup(mark_stack_bdescr);
 650   }
 651
 652   // Free any bitmaps.
 653   for (g = 0; g <= N; g++) {
 654       for (s = 0; s < generations[g].n_steps; s++) {
 655           stp = &generations[g].steps[s];
 656           if (stp->bitmap != NULL) {
 657               freeGroup(stp->bitmap);
 658               stp->bitmap = NULL;
 659           }
 660       }
 661   }
 662
 663   resize_nursery();
 664
 665  // mark the garbage collected CAFs as dead
 666 #if 0 && defined(DEBUG) // doesn't work at the moment
 667   if (major_gc) { gcCAFs(); }
 668 #endif
 669
 670 #ifdef PROFILING
 671   // resetStaticObjectForRetainerProfiling() must be called before
 672   // zeroing below.
 673   if (n_gc_threads > 1) {
 674       barf("profiling is currently broken with multi-threaded GC");
 675       // ToDo: fix the gct->scavenged_static_objects below
 676   }
 677   resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
 678 #endif
 679
 680   // zero the scavenged static object list
 681   if (major_gc) {
 682       nat i;
 683       for (i = 0; i < n_gc_threads; i++) {
 684           zero_static_object_list(gc_threads[i]->scavenged_static_objects);
 685       }
 686   }
 687
 688   // Reset the nursery
 689   resetNurseries();
 690
 691   // start any pending finalizers
 692   RELEASE_SM_LOCK;
 693   scheduleFinalizers(last_free_capability, old_weak_ptr_list);
 694   ACQUIRE_SM_LOCK;
 695
 696   // send exceptions to any threads which were about to die
 697   RELEASE_SM_LOCK;
 698   resurrectThreads(resurrected_threads);
 699   performPendingThrowTos(exception_threads);
 700   ACQUIRE_SM_LOCK;
 701
 702   // Update the stable pointer hash table.
 703   updateStablePtrTable(major_gc);
 704
 705   // check sanity after GC
 706   IF_DEBUG(sanity, checkSanity());
 707
 708   // extra GC trace info
 709   if (traceClass(TRACE_gc|DEBUG_gc)) statDescribeGens();
 710
 711 #ifdef DEBUG
 712   // symbol-table based profiling
 713   /*  heapCensus(to_blocks); */ /* ToDo */
 714 #endif
 715
 716   // restore enclosing cost centre
 717 #ifdef PROFILING
 718   CCCS = prev_CCS;
 719 #endif
 720
 721 #ifdef DEBUG
 722   // check for memory leaks if DEBUG is on
 723   memInventory(traceClass(DEBUG_gc));
 724 #endif
 725
 726 #ifdef RTS_GTK_FRONTPANEL
 727   if (RtsFlags.GcFlags.frontpanel) {
 728       updateFrontPanelAfterGC( N, live );
 729   }
 730 #endif
 731
 732   // ok, GC over: tell the stats department what happened.
 733   slop = calcLiveBlocks() * BLOCK_SIZE_W - live;
 734   stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop);
 735
 736 #if defined(RTS_USER_SIGNALS)
 737   if (RtsFlags.MiscFlags.install_signal_handlers) {
 738     // unblock signals again
 739     unblockUserSignals();
 740   }
 741 #endif
 742
 743   RELEASE_SM_LOCK;
 744
 745   gct = saved_gct;
 746 }
 747
 748 /* -----------------------------------------------------------------------------
 749    Figure out which generation to collect, initialise N and major_gc.
 750
 751    Also returns the total number of blocks in generations that will be
 752    collected.
 753    -------------------------------------------------------------------------- */
 754
 755 static nat
 756 initialise_N (rtsBool force_major_gc)
 757 {
 758     int g;
 759     nat s, blocks, blocks_total;
 760
 761     blocks = 0;
 762     blocks_total = 0;
 763
 764     if (force_major_gc) {
 765         N = RtsFlags.GcFlags.generations - 1;
 766     } else {
 767         N = 0;
 768     }
 769
 770     for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
 771         blocks = 0;
 772         for (s = 0; s < generations[g].n_steps; s++) {
 773             blocks += generations[g].steps[s].n_words / BLOCK_SIZE_W;
 774             blocks += generations[g].steps[s].n_large_blocks;
 775         }
 776         if (blocks >= generations[g].max_blocks) {
 777             N = stg_max(N,g);
 778         }
 779         if ((nat)g <= N) {
 780             blocks_total += blocks;
 781         }
 782     }
 783
 784     blocks_total += countNurseryBlocks();
 785
 786     major_gc = (N == RtsFlags.GcFlags.generations-1);
 787     return blocks_total;
 788 }
 789
 790 /* -----------------------------------------------------------------------------
 791    Initialise the gc_thread structures.
 792    -------------------------------------------------------------------------- */
 793
 794 static gc_thread *
 795 alloc_gc_thread (int n)
 796 {
 797     nat s;
 798     step_workspace *ws;
 799     gc_thread *t;
 800
 801     t = stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace),
 802                        "alloc_gc_thread");
 803
 804 #ifdef THREADED_RTS
 805     t->id = 0;
 806     initCondition(&t->wake_cond);
 807     initMutex(&t->wake_mutex);
 808     t->wakeup = rtsTrue;  // starts true, so we can wait for the
 809                           // thread to start up, see wakeup_gc_threads
 810     t->exit   = rtsFalse;
 811 #endif
 812
 813     t->thread_index = n;
 814     t->free_blocks = NULL;
 815     t->gc_count = 0;
 816
 817     init_gc_thread(t);
 818
 819 #ifdef USE_PAPI
 820     t->papi_events = -1;
 821 #endif
 822
 823     for (s = 0; s < total_steps; s++)
 824     {
 825         ws = &t->steps[s];
 826         ws->step = &all_steps[s];
 827         ASSERT(s == ws->step->abs_no);
 828         ws->gct = t;
 829
 830         ws->todo_bd = NULL;
 831         ws->buffer_todo_bd = NULL;
 832
 833         ws->part_list = NULL;
 834         ws->n_part_blocks = 0;
 835
 836         ws->scavd_list = NULL;
 837         ws->n_scavd_blocks = 0;
 838     }
 839
 840     return t;
 841 }
 842
 843
 844 static void
 845 alloc_gc_threads (void)
 846 {
 847     if (gc_threads == NULL) {
 848 #if defined(THREADED_RTS)
 849         nat i;
 850         gc_threads = stgMallocBytes (RtsFlags.ParFlags.gcThreads *
 851                                      sizeof(gc_thread*),
 852                                      "alloc_gc_threads");
 853
 854         for (i = 0; i < RtsFlags.ParFlags.gcThreads; i++) {
 855             gc_threads[i] = alloc_gc_thread(i);
 856         }
 857 #else
 858         gc_threads = stgMallocBytes (sizeof(gc_thread*),
 859                                      "alloc_gc_threads");
 860
 861         gc_threads[0] = alloc_gc_thread(0);
 862 #endif
 863     }
 864 }
 865
 866 /* ----------------------------------------------------------------------------
 867    Start GC threads
 868    ------------------------------------------------------------------------- */
 869
 870 static nat gc_running_threads;
 871
 872 #if defined(THREADED_RTS)
 873 static Mutex gc_running_mutex;
 874 #endif
 875
 876 static nat
 877 inc_running (void)
 878 {
 879     nat n_running;
 880     ACQUIRE_LOCK(&gc_running_mutex);
 881     n_running = ++gc_running_threads;
 882     RELEASE_LOCK(&gc_running_mutex);
 883     ASSERT(n_running <= n_gc_threads);
 884     return n_running;
 885 }
 886
 887 static nat
 888 dec_running (void)
 889 {
 890     nat n_running;
 891     ACQUIRE_LOCK(&gc_running_mutex);
 892     ASSERT(n_gc_threads != 0);
 893     n_running = --gc_running_threads;
 894     RELEASE_LOCK(&gc_running_mutex);
 895     return n_running;
 896 }
 897
 898 static void
 899 scavenge_until_all_done (void)
 900 {
 901     nat r;
 902
 903     debugTrace(DEBUG_gc, "GC thread %d working", gct->thread_index);
 904
 905 loop:
 906     scavenge_loop();
 907     // scavenge_loop() only exits when there's no work to do
 908     r = dec_running();
 909
 910     debugTrace(DEBUG_gc, "GC thread %d idle (%d still running)",
 911                gct->thread_index, r);
 912
 913     while (gc_running_threads != 0) {
 914         // usleep(1);
 915         if (any_work()) {
 916             inc_running();
 917             goto loop;
 918         }
 919         // any_work() does not remove the work from the queue, it
 920         // just checks for the presence of work.  If we find any,
 921         // then we increment gc_running_threads and go back to
 922         // scavenge_loop() to perform any pending work.
 923     }
 924
 925     // All threads are now stopped
 926     debugTrace(DEBUG_gc, "GC thread %d finished.", gct->thread_index);
 927 }
 928
 929 #if defined(THREADED_RTS)
 930 //
 931 // gc_thread_work(): Scavenge until there's no work left to do and all
 932 // the running threads are idle.
 933 //
 934 static void
 935 gc_thread_work (void)
 936 {
 937     // gc_running_threads has already been incremented for us; this is
 938     // a worker thread and the main thread bumped gc_running_threads
 939     // before waking us up.
 940
 941     // Every thread evacuates some roots.
 942     gct->evac_step = 0;
 943     markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads);
 944
 945     scavenge_until_all_done();
 946 }
 947
 948
 949 static void
 950 gc_thread_mainloop (void)
 951 {
 952     while (!gct->exit) {
 953
 954         // Wait until we're told to wake up
 955         ACQUIRE_LOCK(&gct->wake_mutex);
 956         gct->wakeup = rtsFalse;
 957         while (!gct->wakeup) {
 958             debugTrace(DEBUG_gc, "GC thread %d standing by...",
 959                        gct->thread_index);
 960             waitCondition(&gct->wake_cond, &gct->wake_mutex);
 961         }
 962         RELEASE_LOCK(&gct->wake_mutex);
 963         if (gct->exit) break;
 964
 965 #ifdef USE_PAPI
 966         // start performance counters in this thread...
 967         if (gct->papi_events == -1) {
 968             papi_init_eventset(&gct->papi_events);
 969         }
 970         papi_thread_start_gc1_count(gct->papi_events);
 971 #endif
 972
 973         gc_thread_work();
 974
 975 #ifdef USE_PAPI
 976         // count events in this thread towards the GC totals
 977         papi_thread_stop_gc1_count(gct->papi_events);
 978 #endif
 979     }
 980 }
 981 #endif
 982
 983 #if defined(THREADED_RTS)
 984 static void
 985 gc_thread_entry (gc_thread *my_gct)
 986 {
 987     gct = my_gct;
 988     debugTrace(DEBUG_gc, "GC thread %d starting...", gct->thread_index);
 989     gct->id = osThreadId();
 990     gc_thread_mainloop();
 991 }
 992 #endif
 993
 994 static void
 995 start_gc_threads (void)
 996 {
 997 #if defined(THREADED_RTS)
 998     nat i;
 999     OSThreadId id;
1000     static rtsBool done = rtsFalse;
1001
1002     gc_running_threads = 0;
1003     initMutex(&gc_running_mutex);
1004
1005     if (!done) {
1006         // Start from 1: the main thread is 0
1007         for (i = 1; i < RtsFlags.ParFlags.gcThreads; i++) {
1008             createOSThread(&id, (OSThreadProc*)&gc_thread_entry,
1009                            gc_threads[i]);
1010         }
1011         done = rtsTrue;
1012     }
1013 #endif
1014 }
1015
1016 static void
1017 wakeup_gc_threads (nat n_threads USED_IF_THREADS)
1018 {
1019 #if defined(THREADED_RTS)
1020     nat i;
1021     for (i=1; i < n_threads; i++) {
1022         inc_running();
1023         debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1024         do {
1025             ACQUIRE_LOCK(&gc_threads[i]->wake_mutex);
1026             if (gc_threads[i]->wakeup) {
1027                 RELEASE_LOCK(&gc_threads[i]->wake_mutex);
1028                 continue;
1029             } else {
1030                 break;
1031             }
1032         } while (1);
1033         gc_threads[i]->wakeup = rtsTrue;
1034         signalCondition(&gc_threads[i]->wake_cond);
1035         RELEASE_LOCK(&gc_threads[i]->wake_mutex);
1036     }
1037 #endif
1038 }
1039
1040 // After GC is complete, we must wait for all GC threads to enter the
1041 // standby state, otherwise they may still be executing inside
1042 // any_work(), and may even remain awake until the next GC starts.
1043 static void
1044 shutdown_gc_threads (nat n_threads USED_IF_THREADS)
1045 {
1046 #if defined(THREADED_RTS)
1047     nat i;
1048     rtsBool wakeup;
1049     for (i=1; i < n_threads; i++) {
1050         do {
1051             ACQUIRE_LOCK(&gc_threads[i]->wake_mutex);
1052             wakeup = gc_threads[i]->wakeup;
1053             // wakeup is false while the thread is waiting
1054             RELEASE_LOCK(&gc_threads[i]->wake_mutex);
1055         } while (wakeup);
1056     }
1057 #endif
1058 }
1059
1060 /* ----------------------------------------------------------------------------
1061    Initialise a generation that is to be collected
1062    ------------------------------------------------------------------------- */
1063
1064 static void
1065 init_collected_gen (nat g, nat n_threads)
1066 {
1067     nat s, t, i;
1068     step_workspace *ws;
1069     step *stp;
1070     bdescr *bd;
1071
1072     // Throw away the current mutable list.  Invariant: the mutable
1073     // list always has at least one block; this means we can avoid a
1074     // check for NULL in recordMutable().
1075     if (g != 0) {
1076         freeChain(generations[g].mut_list);
1077         generations[g].mut_list = allocBlock();
1078         for (i = 0; i < n_capabilities; i++) {
1079             freeChain(capabilities[i].mut_lists[g]);
1080             capabilities[i].mut_lists[g] = allocBlock();
1081         }
1082     }
1083
1084     for (s = 0; s < generations[g].n_steps; s++) {
1085
1086         stp = &generations[g].steps[s];
1087         ASSERT(stp->gen_no == g);
1088
1089         // we'll construct a new list of threads in this step
1090         // during GC, throw away the current list.
1091         stp->old_threads = stp->threads;
1092         stp->threads = END_TSO_QUEUE;
1093
1094         // generation 0, step 0 doesn't need to-space
1095         if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
1096             continue;
1097         }
1098
1099         // deprecate the existing blocks
1100         stp->old_blocks   = stp->blocks;
1101         stp->n_old_blocks = stp->n_blocks;
1102         stp->blocks       = NULL;
1103         stp->n_blocks     = 0;
1104         stp->n_words      = 0;
1105
1106         // we don't have any to-be-scavenged blocks yet
1107         stp->todos = NULL;
1108         stp->todos_last = NULL;
1109         stp->n_todos = 0;
1110
1111         // initialise the large object queues.
1112         stp->scavenged_large_objects = NULL;
1113         stp->n_scavenged_large_blocks = 0;
1114
1115         // mark the small objects as from-space
1116         for (bd = stp->old_blocks; bd; bd = bd->link) {
1117             bd->flags &= ~BF_EVACUATED;
1118         }
1119
1120         // mark the large objects as from-space
1121         for (bd = stp->large_objects; bd; bd = bd->link) {
1122             bd->flags &= ~BF_EVACUATED;
1123         }
1124
1125         // for a compacted step, we need to allocate the bitmap
1126         if (stp->is_compacted) {
1127             nat bitmap_size; // in bytes
1128             bdescr *bitmap_bdescr;
1129             StgWord *bitmap;
1130
1131             bitmap_size = stp->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1132
1133             if (bitmap_size > 0) {
1134                 bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1135                                            / BLOCK_SIZE);
1136                 stp->bitmap = bitmap_bdescr;
1137                 bitmap = bitmap_bdescr->start;
1138
1139                 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1140                            bitmap_size, bitmap);
1141
1142                 // don't forget to fill it with zeros!
1143                 memset(bitmap, 0, bitmap_size);
1144
1145                 // For each block in this step, point to its bitmap from the
1146                 // block descriptor.
1147                 for (bd=stp->old_blocks; bd != NULL; bd = bd->link) {
1148                     bd->u.bitmap = bitmap;
1149                     bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1150
1151                     // Also at this point we set the BF_COMPACTED flag
1152                     // for this block.  The invariant is that
1153                     // BF_COMPACTED is always unset, except during GC
1154                     // when it is set on those blocks which will be
1155                     // compacted.
1156                     bd->flags |= BF_COMPACTED;
1157                 }
1158             }
1159         }
1160     }
1161
1162     // For each GC thread, for each step, allocate a "todo" block to
1163     // store evacuated objects to be scavenged, and a block to store
1164     // evacuated objects that do not need to be scavenged.
1165     for (t = 0; t < n_threads; t++) {
1166         for (s = 0; s < generations[g].n_steps; s++) {
1167
1168             // we don't copy objects into g0s0, unless -G0
1169             if (g==0 && s==0 && RtsFlags.GcFlags.generations > 1) continue;
1170
1171             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1172
1173             ws->todo_large_objects = NULL;
1174
1175             ws->part_list = NULL;
1176             ws->n_part_blocks = 0;
1177
1178             // allocate the first to-space block; extra blocks will be
1179             // chained on as necessary.
1180             ws->todo_bd = NULL;
1181             ws->buffer_todo_bd = NULL;
1182             alloc_todo_block(ws,0);
1183
1184             ws->scavd_list = NULL;
1185             ws->n_scavd_blocks = 0;
1186         }
1187     }
1188 }
1189
1190
1191 /* ----------------------------------------------------------------------------
1192    Initialise a generation that is *not* to be collected
1193    ------------------------------------------------------------------------- */
1194
1195 static void
1196 init_uncollected_gen (nat g, nat threads)
1197 {
1198     nat s, t, i;
1199     step_workspace *ws;
1200     step *stp;
1201     bdescr *bd;
1202
1203     for (s = 0; s < generations[g].n_steps; s++) {
1204         stp = &generations[g].steps[s];
1205         stp->scavenged_large_objects = NULL;
1206         stp->n_scavenged_large_blocks = 0;
1207     }
1208
1209     for (s = 0; s < generations[g].n_steps; s++) {
1210
1211         stp = &generations[g].steps[s];
1212
1213         for (t = 0; t < threads; t++) {
1214             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1215
1216             ws->buffer_todo_bd = NULL;
1217             ws->todo_large_objects = NULL;
1218
1219             ws->part_list = NULL;
1220             ws->n_part_blocks = 0;
1221
1222             ws->scavd_list = NULL;
1223             ws->n_scavd_blocks = 0;
1224
1225             // If the block at the head of the list in this generation
1226             // is less than 3/4 full, then use it as a todo block.
1227             if (stp->blocks && isPartiallyFull(stp->blocks))
1228             {
1229                 ws->todo_bd = stp->blocks;
1230                 ws->todo_free = ws->todo_bd->free;
1231                 ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
1232                 stp->blocks = stp->blocks->link;
1233                 stp->n_blocks -= 1;
1234                 stp->n_words -= ws->todo_bd->free - ws->todo_bd->start;
1235                 ws->todo_bd->link = NULL;
1236                 // we must scan from the current end point.
1237                 ws->todo_bd->u.scan = ws->todo_bd->free;
1238             }
1239             else
1240             {
1241                 ws->todo_bd = NULL;
1242                 alloc_todo_block(ws,0);
1243             }
1244         }
1245
1246         // deal out any more partial blocks to the threads' part_lists
1247         t = 0;
1248         while (stp->blocks && isPartiallyFull(stp->blocks))
1249         {
1250             bd = stp->blocks;
1251             stp->blocks = bd->link;
1252             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1253             bd->link = ws->part_list;
1254             ws->part_list = bd;
1255             ws->n_part_blocks += 1;
1256             bd->u.scan = bd->free;
1257             stp->n_blocks -= 1;
1258             stp->n_words -= bd->free - bd->start;
1259             t++;
1260             if (t == n_gc_threads) t = 0;
1261         }
1262     }
1263
1264
1265     // Move the private mutable lists from each capability onto the
1266     // main mutable list for the generation.
1267     for (i = 0; i < n_capabilities; i++) {
1268         for (bd = capabilities[i].mut_lists[g];
1269              bd->link != NULL; bd = bd->link) {
1270             /* nothing */
1271         }
1272         bd->link = generations[g].mut_list;
1273         generations[g].mut_list = capabilities[i].mut_lists[g];
1274         capabilities[i].mut_lists[g] = allocBlock();
1275     }
1276 }
1277
1278 /* -----------------------------------------------------------------------------
1279    Initialise a gc_thread before GC
1280    -------------------------------------------------------------------------- */
1281
1282 static void
1283 init_gc_thread (gc_thread *t)
1284 {
1285     t->static_objects = END_OF_STATIC_LIST;
1286     t->scavenged_static_objects = END_OF_STATIC_LIST;
1287     t->scan_bd = NULL;
1288     t->evac_step = 0;
1289     t->failed_to_evac = rtsFalse;
1290     t->eager_promotion = rtsTrue;
1291     t->thunk_selector_depth = 0;
1292     t->copied = 0;
1293     t->scanned = 0;
1294     t->any_work = 0;
1295     t->no_work = 0;
1296     t->scav_find_work = 0;
1297 }
1298
1299 /* -----------------------------------------------------------------------------
1300    Function we pass to evacuate roots.
1301    -------------------------------------------------------------------------- */
1302
1303 static void
1304 mark_root(void *user, StgClosure **root)
1305 {
1306     // we stole a register for gct, but this function is called from
1307     // *outside* the GC where the register variable is not in effect,
1308     // so we need to save and restore it here.  NB. only call
1309     // mark_root() from the main GC thread, otherwise gct will be
1310     // incorrect.
1311     gc_thread *saved_gct;
1312     saved_gct = gct;
1313     gct = user;
1314
1315     evacuate(root);
1316
1317     gct = saved_gct;
1318 }
1319
1320 /* -----------------------------------------------------------------------------
1321    Initialising the static object & mutable lists
1322    -------------------------------------------------------------------------- */
1323
1324 static void
1325 zero_static_object_list(StgClosure* first_static)
1326 {
1327   StgClosure* p;
1328   StgClosure* link;
1329   const StgInfoTable *info;
1330
1331   for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1332     info = get_itbl(p);
1333     link = *STATIC_LINK(info, p);
1334     *STATIC_LINK(info,p) = NULL;
1335   }
1336 }
1337
1338 /* ----------------------------------------------------------------------------
1339    Update the pointers from the task list
1340
1341    These are treated as weak pointers because we want to allow a main
1342    thread to get a BlockedOnDeadMVar exception in the same way as any
1343    other thread.  Note that the threads should all have been retained
1344    by GC by virtue of being on the all_threads list, we're just
1345    updating pointers here.
1346    ------------------------------------------------------------------------- */
1347
1348 static void
1349 update_task_list (void)
1350 {
1351     Task *task;
1352     StgTSO *tso;
1353     for (task = all_tasks; task != NULL; task = task->all_link) {
1354         if (!task->stopped && task->tso) {
1355             ASSERT(task->tso->bound == task);
1356             tso = (StgTSO *) isAlive((StgClosure *)task->tso);
1357             if (tso == NULL) {
1358                 barf("task %p: main thread %d has been GC'd",
1359 #ifdef THREADED_RTS
1360                      (void *)task->id,
1361 #else
1362                      (void *)task,
1363 #endif
1364                      task->tso->id);
1365             }
1366             task->tso = tso;
1367         }
1368     }
1369 }
1370
1371 /* ----------------------------------------------------------------------------
1372    Reset the sizes of the older generations when we do a major
1373    collection.
1374
1375    CURRENT STRATEGY: make all generations except zero the same size.
1376    We have to stay within the maximum heap size, and leave a certain
1377    percentage of the maximum heap size available to allocate into.
1378    ------------------------------------------------------------------------- */
1379
1380 static void
1381 resize_generations (void)
1382 {
1383     nat g;
1384
1385     if (major_gc && RtsFlags.GcFlags.generations > 1) {
1386         nat live, size, min_alloc;
1387         nat max  = RtsFlags.GcFlags.maxHeapSize;
1388         nat gens = RtsFlags.GcFlags.generations;
1389
1390         // live in the oldest generations
1391         live = (oldest_gen->steps[0].n_words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W+
1392             oldest_gen->steps[0].n_large_blocks;
1393
1394         // default max size for all generations except zero
1395         size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1396                        RtsFlags.GcFlags.minOldGenSize);
1397
1398         // minimum size for generation zero
1399         min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1400                             RtsFlags.GcFlags.minAllocAreaSize);
1401
1402         // Auto-enable compaction when the residency reaches a
1403         // certain percentage of the maximum heap size (default: 30%).
1404         if (RtsFlags.GcFlags.generations > 1 &&
1405             (RtsFlags.GcFlags.compact ||
1406              (max > 0 &&
1407               oldest_gen->steps[0].n_blocks >
1408               (RtsFlags.GcFlags.compactThreshold * max) / 100))) {
1409             oldest_gen->steps[0].is_compacted = 1;
1410 //        debugBelch("compaction: on\n", live);
1411         } else {
1412             oldest_gen->steps[0].is_compacted = 0;
1413 //        debugBelch("compaction: off\n", live);
1414         }
1415
1416         // if we're going to go over the maximum heap size, reduce the
1417         // size of the generations accordingly.  The calculation is
1418         // different if compaction is turned on, because we don't need
1419         // to double the space required to collect the old generation.
1420         if (max != 0) {
1421
1422             // this test is necessary to ensure that the calculations
1423             // below don't have any negative results - we're working
1424             // with unsigned values here.
1425             if (max < min_alloc) {
1426                 heapOverflow();
1427             }
1428
1429             if (oldest_gen->steps[0].is_compacted) {
1430                 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1431                     size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1432                 }
1433             } else {
1434                 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1435                     size = (max - min_alloc) / ((gens - 1) * 2);
1436                 }
1437             }
1438
1439             if (size < live) {
1440                 heapOverflow();
1441             }
1442         }
1443
1444 #if 0
1445         debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1446                    min_alloc, size, max);
1447 #endif
1448
1449         for (g = 0; g < gens; g++) {
1450             generations[g].max_blocks = size;
1451         }
1452     }
1453 }
1454
1455 /* -----------------------------------------------------------------------------
1456    Calculate the new size of the nursery, and resize it.
1457    -------------------------------------------------------------------------- */
1458
1459 static void
1460 resize_nursery (void)
1461 {
1462     if (RtsFlags.GcFlags.generations == 1)
1463     {   // Two-space collector:
1464         nat blocks;
1465
1466         /* set up a new nursery.  Allocate a nursery size based on a
1467          * function of the amount of live data (by default a factor of 2)
1468          * Use the blocks from the old nursery if possible, freeing up any
1469          * left over blocks.
1470          *
1471          * If we get near the maximum heap size, then adjust our nursery
1472          * size accordingly.  If the nursery is the same size as the live
1473          * data (L), then we need 3L bytes.  We can reduce the size of the
1474          * nursery to bring the required memory down near 2L bytes.
1475          *
1476          * A normal 2-space collector would need 4L bytes to give the same
1477          * performance we get from 3L bytes, reducing to the same
1478          * performance at 2L bytes.
1479          */
1480         blocks = g0s0->n_blocks;
1481
1482         if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1483              blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1484              RtsFlags.GcFlags.maxHeapSize )
1485         {
1486             long adjusted_blocks;  // signed on purpose
1487             int pc_free;
1488
1489             adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1490
1491             debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1492                        RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1493
1494             pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1495             if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1496             {
1497                 heapOverflow();
1498             }
1499             blocks = adjusted_blocks;
1500         }
1501         else
1502         {
1503             blocks *= RtsFlags.GcFlags.oldGenFactor;
1504             if (blocks < RtsFlags.GcFlags.minAllocAreaSize)
1505             {
1506                 blocks = RtsFlags.GcFlags.minAllocAreaSize;
1507             }
1508         }
1509         resizeNurseries(blocks);
1510     }
1511     else  // Generational collector
1512     {
1513         /*
1514          * If the user has given us a suggested heap size, adjust our
1515          * allocation area to make best use of the memory available.
1516          */
1517         if (RtsFlags.GcFlags.heapSizeSuggestion)
1518         {
1519             long blocks;
1520             nat needed = calcNeeded();  // approx blocks needed at next GC
1521
1522             /* Guess how much will be live in generation 0 step 0 next time.
1523              * A good approximation is obtained by finding the
1524              * percentage of g0s0 that was live at the last minor GC.
1525              *
1526              * We have an accurate figure for the amount of copied data in
1527              * 'copied', but we must convert this to a number of blocks, with
1528              * a small adjustment for estimated slop at the end of a block
1529              * (- 10 words).
1530              */
1531             if (N == 0)
1532             {
1533                 g0s0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1534                     / countNurseryBlocks();
1535             }
1536
1537             /* Estimate a size for the allocation area based on the
1538              * information available.  We might end up going slightly under
1539              * or over the suggested heap size, but we should be pretty
1540              * close on average.
1541              *
1542              * Formula:            suggested - needed
1543              *                ----------------------------
1544              *                    1 + g0s0_pcnt_kept/100
1545              *
1546              * where 'needed' is the amount of memory needed at the next
1547              * collection for collecting all steps except g0s0.
1548              */
1549             blocks =
1550                 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1551                 (100 + (long)g0s0_pcnt_kept);
1552
1553             if (blocks < (long)RtsFlags.GcFlags.minAllocAreaSize) {
1554                 blocks = RtsFlags.GcFlags.minAllocAreaSize;
1555             }
1556
1557             resizeNurseries((nat)blocks);
1558         }
1559         else
1560         {
1561             // we might have added extra large blocks to the nursery, so
1562             // resize back to minAllocAreaSize again.
1563             resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1564         }
1565     }
1566 }
1567
1568 /* -----------------------------------------------------------------------------
1569    Sanity code for CAF garbage collection.
1570
1571    With DEBUG turned on, we manage a CAF list in addition to the SRT
1572    mechanism.  After GC, we run down the CAF list and blackhole any
1573    CAFs which have been garbage collected.  This means we get an error
1574    whenever the program tries to enter a garbage collected CAF.
1575
1576    Any garbage collected CAFs are taken off the CAF list at the same
1577    time.
1578    -------------------------------------------------------------------------- */
1579
1580 #if 0 && defined(DEBUG)
1581
1582 static void
1583 gcCAFs(void)
1584 {
1585   StgClosure*  p;
1586   StgClosure** pp;
1587   const StgInfoTable *info;
1588   nat i;
1589
1590   i = 0;
1591   p = caf_list;
1592   pp = &caf_list;
1593
1594   while (p != NULL) {
1595
1596     info = get_itbl(p);
1597
1598     ASSERT(info->type == IND_STATIC);
1599
1600     if (STATIC_LINK(info,p) == NULL) {
1601         debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1602         // black hole it
1603         SET_INFO(p,&stg_BLACKHOLE_info);
1604         p = STATIC_LINK2(info,p);
1605         *pp = p;
1606     }
1607     else {
1608       pp = &STATIC_LINK2(info,p);
1609       p = *pp;
1610       i++;
1611     }
1612
1613   }
1614
1615   debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1616 }
1617 #endif