rts/sm/GC.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team 1998-2008
   4  *
   5  * Generational garbage collector
   6  *
   7  * Documentation on the architecture of the Garbage Collector can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 // #include "PosixSource.h"
  15 #include "Rts.h"
  16 #include "RtsFlags.h"
  17 #include "RtsUtils.h"
  18 #include "Apply.h"
  19 #include "OSThreads.h"
  20 #include "LdvProfile.h"
  21 #include "Updates.h"
  22 #include "Stats.h"
  23 #include "Schedule.h"
  24 #include "Sanity.h"
  25 #include "BlockAlloc.h"
  26 #include "MBlock.h"
  27 #include "ProfHeap.h"
  28 #include "SchedAPI.h"
  29 #include "Weak.h"
  30 #include "Prelude.h"
  31 #include "ParTicky.h"           // ToDo: move into Rts.h
  32 #include "RtsSignals.h"
  33 #include "STM.h"
  34 #include "HsFFI.h"
  35 #include "Linker.h"
  36 #if defined(RTS_GTK_FRONTPANEL)
  37 #include "FrontPanel.h"
  38 #endif
  39 #include "Trace.h"
  40 #include "RetainerProfile.h"
  41 #include "RaiseAsync.h"
  42 #include "Sparks.h"
  43 #include "Papi.h"
  44
  45 #include "GC.h"
  46 #include "GCThread.h"
  47 #include "Compact.h"
  48 #include "Evac.h"
  49 #include "Scav.h"
  50 #include "GCUtils.h"
  51 #include "MarkWeak.h"
  52 #include "Sparks.h"
  53
  54 #include <string.h> // for memset()
  55 #include <unistd.h>
  56
  57 /* -----------------------------------------------------------------------------
  58    Global variables
  59    -------------------------------------------------------------------------- */
  60
  61 /* STATIC OBJECT LIST.
  62  *
  63  * During GC:
  64  * We maintain a linked list of static objects that are still live.
  65  * The requirements for this list are:
  66  *
  67  *  - we need to scan the list while adding to it, in order to
  68  *    scavenge all the static objects (in the same way that
  69  *    breadth-first scavenging works for dynamic objects).
  70  *
  71  *  - we need to be able to tell whether an object is already on
  72  *    the list, to break loops.
  73  *
  74  * Each static object has a "static link field", which we use for
  75  * linking objects on to the list.  We use a stack-type list, consing
  76  * objects on the front as they are added (this means that the
  77  * scavenge phase is depth-first, not breadth-first, but that
  78  * shouldn't matter).
  79  *
  80  * A separate list is kept for objects that have been scavenged
  81  * already - this is so that we can zero all the marks afterwards.
  82  *
  83  * An object is on the list if its static link field is non-zero; this
  84  * means that we have to mark the end of the list with '1', not NULL.
  85  *
  86  * Extra notes for generational GC:
  87  *
  88  * Each generation has a static object list associated with it.  When
  89  * collecting generations up to N, we treat the static object lists
  90  * from generations > N as roots.
  91  *
  92  * We build up a static object list while collecting generations 0..N,
  93  * which is then appended to the static object list of generation N+1.
  94  */
  95
  96 /* N is the oldest generation being collected, where the generations
  97  * are numbered starting at 0.  A major GC (indicated by the major_gc
  98  * flag) is when we're collecting all generations.  We only attempt to
  99  * deal with static objects and GC CAFs when doing a major GC.
 100  */
 101 nat N;
 102 rtsBool major_gc;
 103
 104 /* Data used for allocation area sizing.
 105  */
 106 static lnat g0s0_pcnt_kept = 30; // percentage of g0s0 live at last minor GC
 107
 108 /* Mut-list stats */
 109 #ifdef DEBUG
 110 nat mutlist_MUTVARS,
 111     mutlist_MUTARRS,
 112     mutlist_MVARS,
 113     mutlist_OTHERS;
 114 #endif
 115
 116 /* Thread-local data for each GC thread
 117  */
 118 gc_thread **gc_threads = NULL;
 119 // gc_thread *gct = NULL;  // this thread's gct TODO: make thread-local
 120
 121 // Number of threads running in *this* GC.  Affects how many
 122 // step->todos[] lists we have to look in to find work.
 123 nat n_gc_threads;
 124
 125 // For stats:
 126 long copied;        // *words* copied & scavenged during this GC
 127
 128 #ifdef THREADED_RTS
 129 SpinLock recordMutableGen_sync;
 130 #endif
 131
 132 /* -----------------------------------------------------------------------------
 133    Static function declarations
 134    -------------------------------------------------------------------------- */
 135
 136 static void mark_root               (void *user, StgClosure **root);
 137 static void zero_static_object_list (StgClosure* first_static);
 138 static nat  initialise_N            (rtsBool force_major_gc);
 139 static void alloc_gc_threads        (void);
 140 static void init_collected_gen      (nat g, nat threads);
 141 static void init_uncollected_gen    (nat g, nat threads);
 142 static void init_gc_thread          (gc_thread *t);
 143 static void update_task_list        (void);
 144 static void resize_generations      (void);
 145 static void resize_nursery          (void);
 146 static void start_gc_threads        (void);
 147 static void scavenge_until_all_done (void);
 148 static nat  inc_running             (void);
 149 static nat  dec_running             (void);
 150 static void wakeup_gc_threads       (nat n_threads);
 151 static void shutdown_gc_threads     (nat n_threads);
 152
 153 #if 0 && defined(DEBUG)
 154 static void gcCAFs                  (void);
 155 #endif
 156
 157 /* -----------------------------------------------------------------------------
 158    The mark bitmap & stack.
 159    -------------------------------------------------------------------------- */
 160
 161 #define MARK_STACK_BLOCKS 4
 162
 163 bdescr *mark_stack_bdescr;
 164 StgPtr *mark_stack;
 165 StgPtr *mark_sp;
 166 StgPtr *mark_splim;
 167
 168 // Flag and pointers used for falling back to a linear scan when the
 169 // mark stack overflows.
 170 rtsBool mark_stack_overflowed;
 171 bdescr *oldgen_scan_bd;
 172 StgPtr  oldgen_scan;
 173
 174 /* -----------------------------------------------------------------------------
 175    GarbageCollect: the main entry point to the garbage collector.
 176
 177    Locks held: all capabilities are held throughout GarbageCollect().
 178    -------------------------------------------------------------------------- */
 179
 180 void
 181 GarbageCollect ( rtsBool force_major_gc )
 182 {
 183   bdescr *bd;
 184   step *stp;
 185   lnat live, allocated, max_copied, avg_copied, slop;
 186   lnat oldgen_saved_blocks = 0;
 187   gc_thread *saved_gct;
 188   nat g, s, t, n;
 189
 190   // necessary if we stole a callee-saves register for gct:
 191   saved_gct = gct;
 192
 193 #ifdef PROFILING
 194   CostCentreStack *prev_CCS;
 195 #endif
 196
 197   ACQUIRE_SM_LOCK;
 198
 199 #if defined(RTS_USER_SIGNALS)
 200   if (RtsFlags.MiscFlags.install_signal_handlers) {
 201     // block signals
 202     blockUserSignals();
 203   }
 204 #endif
 205
 206   ASSERT(sizeof(step_workspace) == 16 * sizeof(StgWord));
 207   // otherwise adjust the padding in step_workspace.
 208
 209   // tell the stats department that we've started a GC
 210   stat_startGC();
 211
 212   // tell the STM to discard any cached closures it's hoping to re-use
 213   stmPreGCHook();
 214
 215 #ifdef DEBUG
 216   mutlist_MUTVARS = 0;
 217   mutlist_MUTARRS = 0;
 218   mutlist_OTHERS = 0;
 219 #endif
 220
 221   // attribute any costs to CCS_GC
 222 #ifdef PROFILING
 223   prev_CCS = CCCS;
 224   CCCS = CCS_GC;
 225 #endif
 226
 227   /* Approximate how much we allocated.
 228    * Todo: only when generating stats?
 229    */
 230   allocated = calcAllocated();
 231
 232   /* Figure out which generation to collect
 233    */
 234   n = initialise_N(force_major_gc);
 235
 236   /* Allocate + initialise the gc_thread structures.
 237    */
 238   alloc_gc_threads();
 239
 240   /* Start threads, so they can be spinning up while we finish initialisation.
 241    */
 242   start_gc_threads();
 243
 244   /* How many threads will be participating in this GC?
 245    * We don't try to parallelise minor GC.
 246    */
 247 #if defined(THREADED_RTS)
 248   if (n < (4*1024*1024 / BLOCK_SIZE)) {
 249       n_gc_threads = 1;
 250   } else {
 251       n_gc_threads = RtsFlags.ParFlags.gcThreads;
 252   }
 253 #else
 254   n_gc_threads = 1;
 255 #endif
 256   trace(TRACE_gc|DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
 257         N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
 258
 259 #ifdef RTS_GTK_FRONTPANEL
 260   if (RtsFlags.GcFlags.frontpanel) {
 261       updateFrontPanelBeforeGC(N);
 262   }
 263 #endif
 264
 265 #ifdef DEBUG
 266   // check for memory leaks if DEBUG is on
 267   memInventory(traceClass(DEBUG_gc));
 268 #endif
 269
 270   // check stack sanity *before* GC (ToDo: check all threads)
 271   IF_DEBUG(sanity, checkFreeListSanity());
 272
 273   // Initialise all our gc_thread structures
 274   for (t = 0; t < n_gc_threads; t++) {
 275       init_gc_thread(gc_threads[t]);
 276   }
 277
 278   // Initialise all the generations/steps that we're collecting.
 279   for (g = 0; g <= N; g++) {
 280       init_collected_gen(g,n_gc_threads);
 281   }
 282
 283   // Initialise all the generations/steps that we're *not* collecting.
 284   for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
 285       init_uncollected_gen(g,n_gc_threads);
 286   }
 287
 288   /* Allocate a mark stack if we're doing a major collection.
 289    */
 290   if (major_gc) {
 291       mark_stack_bdescr = allocGroup(MARK_STACK_BLOCKS);
 292       mark_stack = (StgPtr *)mark_stack_bdescr->start;
 293       mark_sp    = mark_stack;
 294       mark_splim = mark_stack + (MARK_STACK_BLOCKS * BLOCK_SIZE_W);
 295   } else {
 296       mark_stack_bdescr = NULL;
 297   }
 298
 299   // this is the main thread
 300   gct = gc_threads[0];
 301
 302   /* -----------------------------------------------------------------------
 303    * follow all the roots that we know about:
 304    *   - mutable lists from each generation > N
 305    * we want to *scavenge* these roots, not evacuate them: they're not
 306    * going to move in this GC.
 307    * Also do them in reverse generation order, for the usual reason:
 308    * namely to reduce the likelihood of spurious old->new pointers.
 309    */
 310   for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
 311       generations[g].saved_mut_list = generations[g].mut_list;
 312       generations[g].mut_list = allocBlock();
 313       // mut_list always has at least one block.
 314   }
 315
 316   // the main thread is running: this prevents any other threads from
 317   // exiting prematurely, so we can start them now.
 318   // NB. do this after the mutable lists have been saved above, otherwise
 319   // the other GC threads will be writing into the old mutable lists.
 320   inc_running();
 321   wakeup_gc_threads(n_gc_threads);
 322
 323   for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
 324       scavenge_mutable_list(&generations[g]);
 325   }
 326
 327   // follow roots from the CAF list (used by GHCi)
 328   gct->evac_step = 0;
 329   markCAFs(mark_root, gct);
 330
 331   // follow all the roots that the application knows about.
 332   gct->evac_step = 0;
 333   markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads);
 334
 335 #if defined(RTS_USER_SIGNALS)
 336   // mark the signal handlers (signals should be already blocked)
 337   markSignalHandlers(mark_root, gct);
 338 #endif
 339
 340   // Mark the weak pointer list, and prepare to detect dead weak pointers.
 341   markWeakPtrList();
 342   initWeakForGC();
 343
 344   // Mark the stable pointer table.
 345   markStablePtrTable(mark_root, gct);
 346
 347   /* -------------------------------------------------------------------------
 348    * Repeatedly scavenge all the areas we know about until there's no
 349    * more scavenging to be done.
 350    */
 351   for (;;)
 352   {
 353       scavenge_until_all_done();
 354       // The other threads are now stopped.  We might recurse back to
 355       // here, but from now on this is the only thread.
 356
 357       // if any blackholes are alive, make the threads that wait on
 358       // them alive too.
 359       if (traverseBlackholeQueue()) {
 360           inc_running();
 361           continue;
 362       }
 363
 364       // must be last...  invariant is that everything is fully
 365       // scavenged at this point.
 366       if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
 367           inc_running();
 368           continue;
 369       }
 370
 371       // If we get to here, there's really nothing left to do.
 372       break;
 373   }
 374
 375   shutdown_gc_threads(n_gc_threads);
 376
 377   // Update pointers from the Task list
 378   update_task_list();
 379
 380   // Now see which stable names are still alive.
 381   gcStablePtrTable();
 382
 383 #ifdef PROFILING
 384   // We call processHeapClosureForDead() on every closure destroyed during
 385   // the current garbage collection, so we invoke LdvCensusForDead().
 386   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
 387       || RtsFlags.ProfFlags.bioSelector != NULL)
 388     LdvCensusForDead(N);
 389 #endif
 390
 391   // NO MORE EVACUATION AFTER THIS POINT!
 392   // Finally: compaction of the oldest generation.
 393   if (major_gc && oldest_gen->steps[0].is_compacted) {
 394       // save number of blocks for stats
 395       oldgen_saved_blocks = oldest_gen->steps[0].n_old_blocks;
 396       compact(gct->scavenged_static_objects);
 397   }
 398
 399   IF_DEBUG(sanity, checkGlobalTSOList(rtsFalse));
 400
 401   // Two-space collector: free the old to-space.
 402   // g0s0->old_blocks is the old nursery
 403   // g0s0->blocks is to-space from the previous GC
 404   if (RtsFlags.GcFlags.generations == 1) {
 405       if (g0s0->blocks != NULL) {
 406           freeChain(g0s0->blocks);
 407           g0s0->blocks = NULL;
 408       }
 409   }
 410
 411   // For each workspace, in each thread, move the copied blocks to the step
 412   {
 413       gc_thread *thr;
 414       step_workspace *ws;
 415       bdescr *prev, *next;
 416
 417       for (t = 0; t < n_gc_threads; t++) {
 418           thr = gc_threads[t];
 419
 420           // not step 0
 421           if (RtsFlags.GcFlags.generations == 1) {
 422               s = 0;
 423           } else {
 424               s = 1;
 425           }
 426           for (; s < total_steps; s++) {
 427               ws = &thr->steps[s];
 428
 429               // Push the final block
 430               if (ws->todo_bd) {
 431                   push_scanned_block(ws->todo_bd, ws);
 432               }
 433
 434               ASSERT(gct->scan_bd == NULL);
 435               ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
 436
 437               prev = NULL;
 438               for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
 439                   ws->step->n_words += bd->free - bd->start;
 440                   prev = bd;
 441               }
 442               if (prev != NULL) {
 443                   prev->link = ws->step->blocks;
 444                   ws->step->blocks = ws->scavd_list;
 445               }
 446               ws->step->n_blocks += ws->n_scavd_blocks;
 447           }
 448       }
 449
 450       // Add all the partial blocks *after* we've added all the full
 451       // blocks.  This is so that we can grab the partial blocks back
 452       // again and try to fill them up in the next GC.
 453       for (t = 0; t < n_gc_threads; t++) {
 454           thr = gc_threads[t];
 455
 456           // not step 0
 457           if (RtsFlags.GcFlags.generations == 1) {
 458               s = 0;
 459           } else {
 460               s = 1;
 461           }
 462           for (; s < total_steps; s++) {
 463               ws = &thr->steps[s];
 464
 465               prev = NULL;
 466               for (bd = ws->part_list; bd != NULL; bd = next) {
 467                   next = bd->link;
 468                   if (bd->free == bd->start) {
 469                       if (prev == NULL) {
 470                           ws->part_list = next;
 471                       } else {
 472                           prev->link = next;
 473                       }
 474                       freeGroup(bd);
 475                       ws->n_part_blocks--;
 476                   } else {
 477                       ws->step->n_words += bd->free - bd->start;
 478                       prev = bd;
 479                   }
 480               }
 481               if (prev != NULL) {
 482                   prev->link = ws->step->blocks;
 483                   ws->step->blocks = ws->part_list;
 484               }
 485               ws->step->n_blocks += ws->n_part_blocks;
 486
 487               ASSERT(countBlocks(ws->step->blocks) == ws->step->n_blocks);
 488               ASSERT(countOccupied(ws->step->blocks) == ws->step->n_words);
 489           }
 490       }
 491   }
 492
 493   /* run through all the generations/steps and tidy up
 494    */
 495   copied = 0;
 496   max_copied = 0;
 497   avg_copied = 0;
 498   {
 499       nat i;
 500       for (i=0; i < n_gc_threads; i++) {
 501           if (n_gc_threads > 1) {
 502               trace(TRACE_gc,"thread %d:", i);
 503               trace(TRACE_gc,"   copied           %ld", gc_threads[i]->copied * sizeof(W_));
 504               trace(TRACE_gc,"   scanned          %ld", gc_threads[i]->scanned * sizeof(W_));
 505               trace(TRACE_gc,"   any_work         %ld", gc_threads[i]->any_work);
 506               trace(TRACE_gc,"   no_work          %ld", gc_threads[i]->no_work);
 507               trace(TRACE_gc,"   scav_find_work %ld",   gc_threads[i]->scav_find_work);
 508           }
 509           copied += gc_threads[i]->copied;
 510           max_copied = stg_max(gc_threads[i]->copied, max_copied);
 511       }
 512       if (n_gc_threads == 1) {
 513           max_copied = 0;
 514           avg_copied = 0;
 515       } else {
 516           avg_copied = copied;
 517       }
 518   }
 519
 520   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 521
 522     if (g == N) {
 523       generations[g].collections++; // for stats
 524       if (n_gc_threads > 1) generations[g].par_collections++;
 525     }
 526
 527     // Count the mutable list as bytes "copied" for the purposes of
 528     // stats.  Every mutable list is copied during every GC.
 529     if (g > 0) {
 530         nat mut_list_size = 0;
 531         for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
 532             mut_list_size += bd->free - bd->start;
 533         }
 534         copied +=  mut_list_size;
 535
 536         debugTrace(DEBUG_gc,
 537                    "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
 538                    (unsigned long)(mut_list_size * sizeof(W_)),
 539                    mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
 540     }
 541
 542     for (s = 0; s < generations[g].n_steps; s++) {
 543       bdescr *next;
 544       stp = &generations[g].steps[s];
 545
 546       // for generations we collected...
 547       if (g <= N) {
 548
 549         /* free old memory and shift to-space into from-space for all
 550          * the collected steps (except the allocation area).  These
 551          * freed blocks will probaby be quickly recycled.
 552          */
 553         if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) {
 554             if (stp->is_compacted)
 555             {
 556                 // for a compacted step, just shift the new to-space
 557                 // onto the front of the now-compacted existing blocks.
 558                 for (bd = stp->blocks; bd != NULL; bd = bd->link) {
 559                     stp->n_words += bd->free - bd->start;
 560                 }
 561                 // tack the new blocks on the end of the existing blocks
 562                 if (stp->old_blocks != NULL) {
 563                     for (bd = stp->old_blocks; bd != NULL; bd = next) {
 564                         // NB. this step might not be compacted next
 565                         // time, so reset the BF_COMPACTED flags.
 566                         // They are set before GC if we're going to
 567                         // compact.  (search for BF_COMPACTED above).
 568                         bd->flags &= ~BF_COMPACTED;
 569                         next = bd->link;
 570                         if (next == NULL) {
 571                             bd->link = stp->blocks;
 572                         }
 573                     }
 574                     stp->blocks = stp->old_blocks;
 575                 }
 576                 // add the new blocks to the block tally
 577                 stp->n_blocks += stp->n_old_blocks;
 578                 ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
 579                 ASSERT(countOccupied(stp->blocks) == stp->n_words);
 580             }
 581             else // not copacted
 582             {
 583                 freeChain(stp->old_blocks);
 584             }
 585             stp->old_blocks = NULL;
 586             stp->n_old_blocks = 0;
 587         }
 588
 589         /* LARGE OBJECTS.  The current live large objects are chained on
 590          * scavenged_large, having been moved during garbage
 591          * collection from large_objects.  Any objects left on
 592          * large_objects list are therefore dead, so we free them here.
 593          */
 594         for (bd = stp->large_objects; bd != NULL; bd = next) {
 595           next = bd->link;
 596           freeGroup(bd);
 597           bd = next;
 598         }
 599
 600         stp->large_objects  = stp->scavenged_large_objects;
 601         stp->n_large_blocks = stp->n_scavenged_large_blocks;
 602
 603       }
 604       else // for older generations...
 605       {
 606         /* For older generations, we need to append the
 607          * scavenged_large_object list (i.e. large objects that have been
 608          * promoted during this GC) to the large_object list for that step.
 609          */
 610         for (bd = stp->scavenged_large_objects; bd; bd = next) {
 611           next = bd->link;
 612           dbl_link_onto(bd, &stp->large_objects);
 613         }
 614
 615         // add the new blocks we promoted during this GC
 616         stp->n_large_blocks += stp->n_scavenged_large_blocks;
 617       }
 618     }
 619   }
 620
 621   // update the max size of older generations after a major GC
 622   resize_generations();
 623
 624   // Calculate the amount of live data for stats.
 625   live = calcLiveWords();
 626
 627   // Free the small objects allocated via allocate(), since this will
 628   // all have been copied into G0S1 now.
 629   if (RtsFlags.GcFlags.generations > 1) {
 630       if (g0s0->blocks != NULL) {
 631           freeChain(g0s0->blocks);
 632           g0s0->blocks = NULL;
 633       }
 634       g0s0->n_blocks = 0;
 635       g0s0->n_words = 0;
 636   }
 637   alloc_blocks = 0;
 638   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 639
 640   // Start a new pinned_object_block
 641   pinned_object_block = NULL;
 642
 643   // Free the mark stack.
 644   if (mark_stack_bdescr != NULL) {
 645       freeGroup(mark_stack_bdescr);
 646   }
 647
 648   // Free any bitmaps.
 649   for (g = 0; g <= N; g++) {
 650       for (s = 0; s < generations[g].n_steps; s++) {
 651           stp = &generations[g].steps[s];
 652           if (stp->bitmap != NULL) {
 653               freeGroup(stp->bitmap);
 654               stp->bitmap = NULL;
 655           }
 656       }
 657   }
 658
 659   resize_nursery();
 660
 661  // mark the garbage collected CAFs as dead
 662 #if 0 && defined(DEBUG) // doesn't work at the moment
 663   if (major_gc) { gcCAFs(); }
 664 #endif
 665
 666 #ifdef PROFILING
 667   // resetStaticObjectForRetainerProfiling() must be called before
 668   // zeroing below.
 669   if (n_gc_threads > 1) {
 670       barf("profiling is currently broken with multi-threaded GC");
 671       // ToDo: fix the gct->scavenged_static_objects below
 672   }
 673   resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
 674 #endif
 675
 676   // zero the scavenged static object list
 677   if (major_gc) {
 678       nat i;
 679       for (i = 0; i < n_gc_threads; i++) {
 680           zero_static_object_list(gc_threads[i]->scavenged_static_objects);
 681       }
 682   }
 683
 684   // Reset the nursery
 685   resetNurseries();
 686
 687   // start any pending finalizers
 688   RELEASE_SM_LOCK;
 689   scheduleFinalizers(last_free_capability, old_weak_ptr_list);
 690   ACQUIRE_SM_LOCK;
 691
 692   // send exceptions to any threads which were about to die
 693   RELEASE_SM_LOCK;
 694   resurrectThreads(resurrected_threads);
 695   performPendingThrowTos(exception_threads);
 696   ACQUIRE_SM_LOCK;
 697
 698   // Update the stable pointer hash table.
 699   updateStablePtrTable(major_gc);
 700
 701   // check sanity after GC
 702   IF_DEBUG(sanity, checkSanity());
 703
 704   // extra GC trace info
 705   if (traceClass(TRACE_gc|DEBUG_gc)) statDescribeGens();
 706
 707 #ifdef DEBUG
 708   // symbol-table based profiling
 709   /*  heapCensus(to_blocks); */ /* ToDo */
 710 #endif
 711
 712   // restore enclosing cost centre
 713 #ifdef PROFILING
 714   CCCS = prev_CCS;
 715 #endif
 716
 717 #ifdef DEBUG
 718   // check for memory leaks if DEBUG is on
 719   memInventory(traceClass(DEBUG_gc));
 720 #endif
 721
 722 #ifdef RTS_GTK_FRONTPANEL
 723   if (RtsFlags.GcFlags.frontpanel) {
 724       updateFrontPanelAfterGC( N, live );
 725   }
 726 #endif
 727
 728   // ok, GC over: tell the stats department what happened.
 729   slop = calcLiveBlocks() * BLOCK_SIZE_W - live;
 730   stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop);
 731
 732 #if defined(RTS_USER_SIGNALS)
 733   if (RtsFlags.MiscFlags.install_signal_handlers) {
 734     // unblock signals again
 735     unblockUserSignals();
 736   }
 737 #endif
 738
 739   RELEASE_SM_LOCK;
 740
 741   gct = saved_gct;
 742 }
 743
 744 /* -----------------------------------------------------------------------------
 745    Figure out which generation to collect, initialise N and major_gc.
 746
 747    Also returns the total number of blocks in generations that will be
 748    collected.
 749    -------------------------------------------------------------------------- */
 750
 751 static nat
 752 initialise_N (rtsBool force_major_gc)
 753 {
 754     int g;
 755     nat s, blocks, blocks_total;
 756
 757     blocks = 0;
 758     blocks_total = 0;
 759
 760     if (force_major_gc) {
 761         N = RtsFlags.GcFlags.generations - 1;
 762     } else {
 763         N = 0;
 764     }
 765
 766     for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
 767         blocks = 0;
 768         for (s = 0; s < generations[g].n_steps; s++) {
 769             blocks += generations[g].steps[s].n_words / BLOCK_SIZE_W;
 770             blocks += generations[g].steps[s].n_large_blocks;
 771         }
 772         if (blocks >= generations[g].max_blocks) {
 773             N = stg_max(N,g);
 774         }
 775         if ((nat)g <= N) {
 776             blocks_total += blocks;
 777         }
 778     }
 779
 780     blocks_total += countNurseryBlocks();
 781
 782     major_gc = (N == RtsFlags.GcFlags.generations-1);
 783     return blocks_total;
 784 }
 785
 786 /* -----------------------------------------------------------------------------
 787    Initialise the gc_thread structures.
 788    -------------------------------------------------------------------------- */
 789
 790 static gc_thread *
 791 alloc_gc_thread (int n)
 792 {
 793     nat s;
 794     step_workspace *ws;
 795     gc_thread *t;
 796
 797     t = stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace),
 798                        "alloc_gc_thread");
 799
 800 #ifdef THREADED_RTS
 801     t->id = 0;
 802     initCondition(&t->wake_cond);
 803     initMutex(&t->wake_mutex);
 804     t->wakeup = rtsTrue;  // starts true, so we can wait for the
 805                           // thread to start up, see wakeup_gc_threads
 806     t->exit   = rtsFalse;
 807 #endif
 808
 809     t->thread_index = n;
 810     t->free_blocks = NULL;
 811     t->gc_count = 0;
 812
 813     init_gc_thread(t);
 814
 815 #ifdef USE_PAPI
 816     t->papi_events = -1;
 817 #endif
 818
 819     for (s = 0; s < total_steps; s++)
 820     {
 821         ws = &t->steps[s];
 822         ws->step = &all_steps[s];
 823         ASSERT(s == ws->step->abs_no);
 824         ws->gct = t;
 825
 826         ws->todo_bd = NULL;
 827         ws->buffer_todo_bd = NULL;
 828
 829         ws->part_list = NULL;
 830         ws->n_part_blocks = 0;
 831
 832         ws->scavd_list = NULL;
 833         ws->n_scavd_blocks = 0;
 834     }
 835
 836     return t;
 837 }
 838
 839
 840 static void
 841 alloc_gc_threads (void)
 842 {
 843     if (gc_threads == NULL) {
 844 #if defined(THREADED_RTS)
 845         nat i;
 846         gc_threads = stgMallocBytes (RtsFlags.ParFlags.gcThreads *
 847                                      sizeof(gc_thread*),
 848                                      "alloc_gc_threads");
 849
 850         for (i = 0; i < RtsFlags.ParFlags.gcThreads; i++) {
 851             gc_threads[i] = alloc_gc_thread(i);
 852         }
 853 #else
 854         gc_threads = stgMallocBytes (sizeof(gc_thread*),
 855                                      "alloc_gc_threads");
 856
 857         gc_threads[0] = alloc_gc_thread(0);
 858 #endif
 859     }
 860 }
 861
 862 /* ----------------------------------------------------------------------------
 863    Start GC threads
 864    ------------------------------------------------------------------------- */
 865
 866 static nat gc_running_threads;
 867
 868 #if defined(THREADED_RTS)
 869 static Mutex gc_running_mutex;
 870 #endif
 871
 872 static nat
 873 inc_running (void)
 874 {
 875     nat n_running;
 876     ACQUIRE_LOCK(&gc_running_mutex);
 877     n_running = ++gc_running_threads;
 878     RELEASE_LOCK(&gc_running_mutex);
 879     ASSERT(n_running <= n_gc_threads);
 880     return n_running;
 881 }
 882
 883 static nat
 884 dec_running (void)
 885 {
 886     nat n_running;
 887     ACQUIRE_LOCK(&gc_running_mutex);
 888     ASSERT(n_gc_threads != 0);
 889     n_running = --gc_running_threads;
 890     RELEASE_LOCK(&gc_running_mutex);
 891     return n_running;
 892 }
 893
 894 static void
 895 scavenge_until_all_done (void)
 896 {
 897     nat r;
 898
 899     debugTrace(DEBUG_gc, "GC thread %d working", gct->thread_index);
 900
 901 loop:
 902     scavenge_loop();
 903     // scavenge_loop() only exits when there's no work to do
 904     r = dec_running();
 905
 906     debugTrace(DEBUG_gc, "GC thread %d idle (%d still running)",
 907                gct->thread_index, r);
 908
 909     while (gc_running_threads != 0) {
 910         usleep(1);
 911         if (any_work()) {
 912             inc_running();
 913             goto loop;
 914         }
 915         // any_work() does not remove the work from the queue, it
 916         // just checks for the presence of work.  If we find any,
 917         // then we increment gc_running_threads and go back to
 918         // scavenge_loop() to perform any pending work.
 919     }
 920
 921     // All threads are now stopped
 922     debugTrace(DEBUG_gc, "GC thread %d finished.", gct->thread_index);
 923 }
 924
 925 #if defined(THREADED_RTS)
 926 //
 927 // gc_thread_work(): Scavenge until there's no work left to do and all
 928 // the running threads are idle.
 929 //
 930 static void
 931 gc_thread_work (void)
 932 {
 933     // gc_running_threads has already been incremented for us; this is
 934     // a worker thread and the main thread bumped gc_running_threads
 935     // before waking us up.
 936
 937     // Every thread evacuates some roots.
 938     gct->evac_step = 0;
 939     markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads);
 940
 941     scavenge_until_all_done();
 942 }
 943
 944
 945 static void
 946 gc_thread_mainloop (void)
 947 {
 948     while (!gct->exit) {
 949
 950         // Wait until we're told to wake up
 951         ACQUIRE_LOCK(&gct->wake_mutex);
 952         gct->wakeup = rtsFalse;
 953         while (!gct->wakeup) {
 954             debugTrace(DEBUG_gc, "GC thread %d standing by...",
 955                        gct->thread_index);
 956             waitCondition(&gct->wake_cond, &gct->wake_mutex);
 957         }
 958         RELEASE_LOCK(&gct->wake_mutex);
 959         if (gct->exit) break;
 960
 961 #ifdef USE_PAPI
 962         // start performance counters in this thread...
 963         if (gct->papi_events == -1) {
 964             papi_init_eventset(&gct->papi_events);
 965         }
 966         papi_thread_start_gc1_count(gct->papi_events);
 967 #endif
 968
 969         gc_thread_work();
 970
 971 #ifdef USE_PAPI
 972         // count events in this thread towards the GC totals
 973         papi_thread_stop_gc1_count(gct->papi_events);
 974 #endif
 975     }
 976 }
 977 #endif
 978
 979 #if defined(THREADED_RTS)
 980 static void
 981 gc_thread_entry (gc_thread *my_gct)
 982 {
 983     gct = my_gct;
 984     debugTrace(DEBUG_gc, "GC thread %d starting...", gct->thread_index);
 985     gct->id = osThreadId();
 986     gc_thread_mainloop();
 987 }
 988 #endif
 989
 990 static void
 991 start_gc_threads (void)
 992 {
 993 #if defined(THREADED_RTS)
 994     nat i;
 995     OSThreadId id;
 996     static rtsBool done = rtsFalse;
 997
 998     gc_running_threads = 0;
 999     initMutex(&gc_running_mutex);
1000
1001     if (!done) {
1002         // Start from 1: the main thread is 0
1003         for (i = 1; i < RtsFlags.ParFlags.gcThreads; i++) {
1004             createOSThread(&id, (OSThreadProc*)&gc_thread_entry,
1005                            gc_threads[i]);
1006         }
1007         done = rtsTrue;
1008     }
1009 #endif
1010 }
1011
1012 static void
1013 wakeup_gc_threads (nat n_threads USED_IF_THREADS)
1014 {
1015 #if defined(THREADED_RTS)
1016     nat i;
1017     for (i=1; i < n_threads; i++) {
1018         inc_running();
1019         debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1020         do {
1021             ACQUIRE_LOCK(&gc_threads[i]->wake_mutex);
1022             if (gc_threads[i]->wakeup) {
1023                 RELEASE_LOCK(&gc_threads[i]->wake_mutex);
1024                 continue;
1025             } else {
1026                 break;
1027             }
1028         } while (1);
1029         gc_threads[i]->wakeup = rtsTrue;
1030         signalCondition(&gc_threads[i]->wake_cond);
1031         RELEASE_LOCK(&gc_threads[i]->wake_mutex);
1032     }
1033 #endif
1034 }
1035
1036 // After GC is complete, we must wait for all GC threads to enter the
1037 // standby state, otherwise they may still be executing inside
1038 // any_work(), and may even remain awake until the next GC starts.
1039 static void
1040 shutdown_gc_threads (nat n_threads USED_IF_THREADS)
1041 {
1042 #if defined(THREADED_RTS)
1043     nat i;
1044     rtsBool wakeup;
1045     for (i=1; i < n_threads; i++) {
1046         do {
1047             ACQUIRE_LOCK(&gc_threads[i]->wake_mutex);
1048             wakeup = gc_threads[i]->wakeup;
1049             // wakeup is false while the thread is waiting
1050             RELEASE_LOCK(&gc_threads[i]->wake_mutex);
1051         } while (wakeup);
1052     }
1053 #endif
1054 }
1055
1056 /* ----------------------------------------------------------------------------
1057    Initialise a generation that is to be collected
1058    ------------------------------------------------------------------------- */
1059
1060 static void
1061 init_collected_gen (nat g, nat n_threads)
1062 {
1063     nat s, t, i;
1064     step_workspace *ws;
1065     step *stp;
1066     bdescr *bd;
1067
1068     // Throw away the current mutable list.  Invariant: the mutable
1069     // list always has at least one block; this means we can avoid a
1070     // check for NULL in recordMutable().
1071     if (g != 0) {
1072         freeChain(generations[g].mut_list);
1073         generations[g].mut_list = allocBlock();
1074         for (i = 0; i < n_capabilities; i++) {
1075             freeChain(capabilities[i].mut_lists[g]);
1076             capabilities[i].mut_lists[g] = allocBlock();
1077         }
1078     }
1079
1080     for (s = 0; s < generations[g].n_steps; s++) {
1081
1082         stp = &generations[g].steps[s];
1083         ASSERT(stp->gen_no == g);
1084
1085         // we'll construct a new list of threads in this step
1086         // during GC, throw away the current list.
1087         stp->old_threads = stp->threads;
1088         stp->threads = END_TSO_QUEUE;
1089
1090         // generation 0, step 0 doesn't need to-space
1091         if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
1092             continue;
1093         }
1094
1095         // deprecate the existing blocks
1096         stp->old_blocks   = stp->blocks;
1097         stp->n_old_blocks = stp->n_blocks;
1098         stp->blocks       = NULL;
1099         stp->n_blocks     = 0;
1100         stp->n_words      = 0;
1101
1102         // we don't have any to-be-scavenged blocks yet
1103         stp->todos = NULL;
1104         stp->todos_last = NULL;
1105         stp->n_todos = 0;
1106
1107         // initialise the large object queues.
1108         stp->scavenged_large_objects = NULL;
1109         stp->n_scavenged_large_blocks = 0;
1110
1111         // mark the small objects as from-space
1112         for (bd = stp->old_blocks; bd; bd = bd->link) {
1113             bd->flags &= ~BF_EVACUATED;
1114         }
1115
1116         // mark the large objects as from-space
1117         for (bd = stp->large_objects; bd; bd = bd->link) {
1118             bd->flags &= ~BF_EVACUATED;
1119         }
1120
1121         // for a compacted step, we need to allocate the bitmap
1122         if (stp->is_compacted) {
1123             nat bitmap_size; // in bytes
1124             bdescr *bitmap_bdescr;
1125             StgWord *bitmap;
1126
1127             bitmap_size = stp->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1128
1129             if (bitmap_size > 0) {
1130                 bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1131                                            / BLOCK_SIZE);
1132                 stp->bitmap = bitmap_bdescr;
1133                 bitmap = bitmap_bdescr->start;
1134
1135                 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1136                            bitmap_size, bitmap);
1137
1138                 // don't forget to fill it with zeros!
1139                 memset(bitmap, 0, bitmap_size);
1140
1141                 // For each block in this step, point to its bitmap from the
1142                 // block descriptor.
1143                 for (bd=stp->old_blocks; bd != NULL; bd = bd->link) {
1144                     bd->u.bitmap = bitmap;
1145                     bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1146
1147                     // Also at this point we set the BF_COMPACTED flag
1148                     // for this block.  The invariant is that
1149                     // BF_COMPACTED is always unset, except during GC
1150                     // when it is set on those blocks which will be
1151                     // compacted.
1152                     bd->flags |= BF_COMPACTED;
1153                 }
1154             }
1155         }
1156     }
1157
1158     // For each GC thread, for each step, allocate a "todo" block to
1159     // store evacuated objects to be scavenged, and a block to store
1160     // evacuated objects that do not need to be scavenged.
1161     for (t = 0; t < n_threads; t++) {
1162         for (s = 0; s < generations[g].n_steps; s++) {
1163
1164             // we don't copy objects into g0s0, unless -G0
1165             if (g==0 && s==0 && RtsFlags.GcFlags.generations > 1) continue;
1166
1167             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1168
1169             ws->todo_large_objects = NULL;
1170
1171             ws->part_list = NULL;
1172             ws->n_part_blocks = 0;
1173
1174             // allocate the first to-space block; extra blocks will be
1175             // chained on as necessary.
1176             ws->todo_bd = NULL;
1177             ws->buffer_todo_bd = NULL;
1178             alloc_todo_block(ws,0);
1179
1180             ws->scavd_list = NULL;
1181             ws->n_scavd_blocks = 0;
1182         }
1183     }
1184 }
1185
1186
1187 /* ----------------------------------------------------------------------------
1188    Initialise a generation that is *not* to be collected
1189    ------------------------------------------------------------------------- */
1190
1191 static void
1192 init_uncollected_gen (nat g, nat threads)
1193 {
1194     nat s, t, i;
1195     step_workspace *ws;
1196     step *stp;
1197     bdescr *bd;
1198
1199     for (s = 0; s < generations[g].n_steps; s++) {
1200         stp = &generations[g].steps[s];
1201         stp->scavenged_large_objects = NULL;
1202         stp->n_scavenged_large_blocks = 0;
1203     }
1204
1205     for (s = 0; s < generations[g].n_steps; s++) {
1206
1207         stp = &generations[g].steps[s];
1208
1209         for (t = 0; t < threads; t++) {
1210             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1211
1212             ws->buffer_todo_bd = NULL;
1213             ws->todo_large_objects = NULL;
1214
1215             ws->part_list = NULL;
1216             ws->n_part_blocks = 0;
1217
1218             ws->scavd_list = NULL;
1219             ws->n_scavd_blocks = 0;
1220
1221             // If the block at the head of the list in this generation
1222             // is less than 3/4 full, then use it as a todo block.
1223             if (stp->blocks && isPartiallyFull(stp->blocks))
1224             {
1225                 ws->todo_bd = stp->blocks;
1226                 ws->todo_free = ws->todo_bd->free;
1227                 ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
1228                 stp->blocks = stp->blocks->link;
1229                 stp->n_blocks -= 1;
1230                 stp->n_words -= ws->todo_bd->free - ws->todo_bd->start;
1231                 ws->todo_bd->link = NULL;
1232                 // we must scan from the current end point.
1233                 ws->todo_bd->u.scan = ws->todo_bd->free;
1234             }
1235             else
1236             {
1237                 ws->todo_bd = NULL;
1238                 alloc_todo_block(ws,0);
1239             }
1240         }
1241
1242         // deal out any more partial blocks to the threads' part_lists
1243         t = 0;
1244         while (stp->blocks && isPartiallyFull(stp->blocks))
1245         {
1246             bd = stp->blocks;
1247             stp->blocks = bd->link;
1248             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1249             bd->link = ws->part_list;
1250             ws->part_list = bd;
1251             ws->n_part_blocks += 1;
1252             bd->u.scan = bd->free;
1253             stp->n_blocks -= 1;
1254             stp->n_words -= bd->free - bd->start;
1255             t++;
1256             if (t == n_gc_threads) t = 0;
1257         }
1258     }
1259
1260
1261     // Move the private mutable lists from each capability onto the
1262     // main mutable list for the generation.
1263     for (i = 0; i < n_capabilities; i++) {
1264         for (bd = capabilities[i].mut_lists[g];
1265              bd->link != NULL; bd = bd->link) {
1266             /* nothing */
1267         }
1268         bd->link = generations[g].mut_list;
1269         generations[g].mut_list = capabilities[i].mut_lists[g];
1270         capabilities[i].mut_lists[g] = allocBlock();
1271     }
1272 }
1273
1274 /* -----------------------------------------------------------------------------
1275    Initialise a gc_thread before GC
1276    -------------------------------------------------------------------------- */
1277
1278 static void
1279 init_gc_thread (gc_thread *t)
1280 {
1281     t->static_objects = END_OF_STATIC_LIST;
1282     t->scavenged_static_objects = END_OF_STATIC_LIST;
1283     t->scan_bd = NULL;
1284     t->evac_step = 0;
1285     t->failed_to_evac = rtsFalse;
1286     t->eager_promotion = rtsTrue;
1287     t->thunk_selector_depth = 0;
1288     t->copied = 0;
1289     t->scanned = 0;
1290     t->any_work = 0;
1291     t->no_work = 0;
1292     t->scav_find_work = 0;
1293 }
1294
1295 /* -----------------------------------------------------------------------------
1296    Function we pass to evacuate roots.
1297    -------------------------------------------------------------------------- */
1298
1299 static void
1300 mark_root(void *user, StgClosure **root)
1301 {
1302     // we stole a register for gct, but this function is called from
1303     // *outside* the GC where the register variable is not in effect,
1304     // so we need to save and restore it here.  NB. only call
1305     // mark_root() from the main GC thread, otherwise gct will be
1306     // incorrect.
1307     gc_thread *saved_gct;
1308     saved_gct = gct;
1309     gct = user;
1310
1311     evacuate(root);
1312
1313     gct = saved_gct;
1314 }
1315
1316 /* -----------------------------------------------------------------------------
1317    Initialising the static object & mutable lists
1318    -------------------------------------------------------------------------- */
1319
1320 static void
1321 zero_static_object_list(StgClosure* first_static)
1322 {
1323   StgClosure* p;
1324   StgClosure* link;
1325   const StgInfoTable *info;
1326
1327   for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1328     info = get_itbl(p);
1329     link = *STATIC_LINK(info, p);
1330     *STATIC_LINK(info,p) = NULL;
1331   }
1332 }
1333
1334 /* ----------------------------------------------------------------------------
1335    Update the pointers from the task list
1336
1337    These are treated as weak pointers because we want to allow a main
1338    thread to get a BlockedOnDeadMVar exception in the same way as any
1339    other thread.  Note that the threads should all have been retained
1340    by GC by virtue of being on the all_threads list, we're just
1341    updating pointers here.
1342    ------------------------------------------------------------------------- */
1343
1344 static void
1345 update_task_list (void)
1346 {
1347     Task *task;
1348     StgTSO *tso;
1349     for (task = all_tasks; task != NULL; task = task->all_link) {
1350         if (!task->stopped && task->tso) {
1351             ASSERT(task->tso->bound == task);
1352             tso = (StgTSO *) isAlive((StgClosure *)task->tso);
1353             if (tso == NULL) {
1354                 barf("task %p: main thread %d has been GC'd",
1355 #ifdef THREADED_RTS
1356                      (void *)task->id,
1357 #else
1358                      (void *)task,
1359 #endif
1360                      task->tso->id);
1361             }
1362             task->tso = tso;
1363         }
1364     }
1365 }
1366
1367 /* ----------------------------------------------------------------------------
1368    Reset the sizes of the older generations when we do a major
1369    collection.
1370
1371    CURRENT STRATEGY: make all generations except zero the same size.
1372    We have to stay within the maximum heap size, and leave a certain
1373    percentage of the maximum heap size available to allocate into.
1374    ------------------------------------------------------------------------- */
1375
1376 static void
1377 resize_generations (void)
1378 {
1379     nat g;
1380
1381     if (major_gc && RtsFlags.GcFlags.generations > 1) {
1382         nat live, size, min_alloc;
1383         nat max  = RtsFlags.GcFlags.maxHeapSize;
1384         nat gens = RtsFlags.GcFlags.generations;
1385
1386         // live in the oldest generations
1387         live = (oldest_gen->steps[0].n_words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W+
1388             oldest_gen->steps[0].n_large_blocks;
1389
1390         // default max size for all generations except zero
1391         size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1392                        RtsFlags.GcFlags.minOldGenSize);
1393
1394         // minimum size for generation zero
1395         min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1396                             RtsFlags.GcFlags.minAllocAreaSize);
1397
1398         // Auto-enable compaction when the residency reaches a
1399         // certain percentage of the maximum heap size (default: 30%).
1400         if (RtsFlags.GcFlags.generations > 1 &&
1401             (RtsFlags.GcFlags.compact ||
1402              (max > 0 &&
1403               oldest_gen->steps[0].n_blocks >
1404               (RtsFlags.GcFlags.compactThreshold * max) / 100))) {
1405             oldest_gen->steps[0].is_compacted = 1;
1406 //        debugBelch("compaction: on\n", live);
1407         } else {
1408             oldest_gen->steps[0].is_compacted = 0;
1409 //        debugBelch("compaction: off\n", live);
1410         }
1411
1412         // if we're going to go over the maximum heap size, reduce the
1413         // size of the generations accordingly.  The calculation is
1414         // different if compaction is turned on, because we don't need
1415         // to double the space required to collect the old generation.
1416         if (max != 0) {
1417
1418             // this test is necessary to ensure that the calculations
1419             // below don't have any negative results - we're working
1420             // with unsigned values here.
1421             if (max < min_alloc) {
1422                 heapOverflow();
1423             }
1424
1425             if (oldest_gen->steps[0].is_compacted) {
1426                 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1427                     size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1428                 }
1429             } else {
1430                 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1431                     size = (max - min_alloc) / ((gens - 1) * 2);
1432                 }
1433             }
1434
1435             if (size < live) {
1436                 heapOverflow();
1437             }
1438         }
1439
1440 #if 0
1441         debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1442                    min_alloc, size, max);
1443 #endif
1444
1445         for (g = 0; g < gens; g++) {
1446             generations[g].max_blocks = size;
1447         }
1448     }
1449 }
1450
1451 /* -----------------------------------------------------------------------------
1452    Calculate the new size of the nursery, and resize it.
1453    -------------------------------------------------------------------------- */
1454
1455 static void
1456 resize_nursery (void)
1457 {
1458     if (RtsFlags.GcFlags.generations == 1)
1459     {   // Two-space collector:
1460         nat blocks;
1461
1462         /* set up a new nursery.  Allocate a nursery size based on a
1463          * function of the amount of live data (by default a factor of 2)
1464          * Use the blocks from the old nursery if possible, freeing up any
1465          * left over blocks.
1466          *
1467          * If we get near the maximum heap size, then adjust our nursery
1468          * size accordingly.  If the nursery is the same size as the live
1469          * data (L), then we need 3L bytes.  We can reduce the size of the
1470          * nursery to bring the required memory down near 2L bytes.
1471          *
1472          * A normal 2-space collector would need 4L bytes to give the same
1473          * performance we get from 3L bytes, reducing to the same
1474          * performance at 2L bytes.
1475          */
1476         blocks = g0s0->n_blocks;
1477
1478         if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1479              blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1480              RtsFlags.GcFlags.maxHeapSize )
1481         {
1482             long adjusted_blocks;  // signed on purpose
1483             int pc_free;
1484
1485             adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1486
1487             debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1488                        RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1489
1490             pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1491             if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1492             {
1493                 heapOverflow();
1494             }
1495             blocks = adjusted_blocks;
1496         }
1497         else
1498         {
1499             blocks *= RtsFlags.GcFlags.oldGenFactor;
1500             if (blocks < RtsFlags.GcFlags.minAllocAreaSize)
1501             {
1502                 blocks = RtsFlags.GcFlags.minAllocAreaSize;
1503             }
1504         }
1505         resizeNurseries(blocks);
1506     }
1507     else  // Generational collector
1508     {
1509         /*
1510          * If the user has given us a suggested heap size, adjust our
1511          * allocation area to make best use of the memory available.
1512          */
1513         if (RtsFlags.GcFlags.heapSizeSuggestion)
1514         {
1515             long blocks;
1516             nat needed = calcNeeded();  // approx blocks needed at next GC
1517
1518             /* Guess how much will be live in generation 0 step 0 next time.
1519              * A good approximation is obtained by finding the
1520              * percentage of g0s0 that was live at the last minor GC.
1521              *
1522              * We have an accurate figure for the amount of copied data in
1523              * 'copied', but we must convert this to a number of blocks, with
1524              * a small adjustment for estimated slop at the end of a block
1525              * (- 10 words).
1526              */
1527             if (N == 0)
1528             {
1529                 g0s0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1530                     / countNurseryBlocks();
1531             }
1532
1533             /* Estimate a size for the allocation area based on the
1534              * information available.  We might end up going slightly under
1535              * or over the suggested heap size, but we should be pretty
1536              * close on average.
1537              *
1538              * Formula:            suggested - needed
1539              *                ----------------------------
1540              *                    1 + g0s0_pcnt_kept/100
1541              *
1542              * where 'needed' is the amount of memory needed at the next
1543              * collection for collecting all steps except g0s0.
1544              */
1545             blocks =
1546                 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1547                 (100 + (long)g0s0_pcnt_kept);
1548
1549             if (blocks < (long)RtsFlags.GcFlags.minAllocAreaSize) {
1550                 blocks = RtsFlags.GcFlags.minAllocAreaSize;
1551             }
1552
1553             resizeNurseries((nat)blocks);
1554         }
1555         else
1556         {
1557             // we might have added extra large blocks to the nursery, so
1558             // resize back to minAllocAreaSize again.
1559             resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1560         }
1561     }
1562 }
1563
1564 /* -----------------------------------------------------------------------------
1565    Sanity code for CAF garbage collection.
1566
1567    With DEBUG turned on, we manage a CAF list in addition to the SRT
1568    mechanism.  After GC, we run down the CAF list and blackhole any
1569    CAFs which have been garbage collected.  This means we get an error
1570    whenever the program tries to enter a garbage collected CAF.
1571
1572    Any garbage collected CAFs are taken off the CAF list at the same
1573    time.
1574    -------------------------------------------------------------------------- */
1575
1576 #if 0 && defined(DEBUG)
1577
1578 static void
1579 gcCAFs(void)
1580 {
1581   StgClosure*  p;
1582   StgClosure** pp;
1583   const StgInfoTable *info;
1584   nat i;
1585
1586   i = 0;
1587   p = caf_list;
1588   pp = &caf_list;
1589
1590   while (p != NULL) {
1591
1592     info = get_itbl(p);
1593
1594     ASSERT(info->type == IND_STATIC);
1595
1596     if (STATIC_LINK(info,p) == NULL) {
1597         debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1598         // black hole it
1599         SET_INFO(p,&stg_BLACKHOLE_info);
1600         p = STATIC_LINK2(info,p);
1601         *pp = p;
1602     }
1603     else {
1604       pp = &STATIC_LINK2(info,p);
1605       p = *pp;
1606       i++;
1607     }
1608
1609   }
1610
1611   debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1612 }
1613 #endif