rts/sm/GC.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team 1998-2006
   4  *
   5  * Generational garbage collector
   6  *
   7  * Documentation on the architecture of the Garbage Collector can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 #include "PosixSource.h"
  15 #include "Rts.h"
  16 #include "RtsFlags.h"
  17 #include "RtsUtils.h"
  18 #include "Apply.h"
  19 #include "OSThreads.h"
  20 #include "LdvProfile.h"
  21 #include "Updates.h"
  22 #include "Stats.h"
  23 #include "Schedule.h"
  24 #include "Sanity.h"
  25 #include "BlockAlloc.h"
  26 #include "MBlock.h"
  27 #include "ProfHeap.h"
  28 #include "SchedAPI.h"
  29 #include "Weak.h"
  30 #include "Prelude.h"
  31 #include "ParTicky.h"           // ToDo: move into Rts.h
  32 #include "RtsSignals.h"
  33 #include "STM.h"
  34 #include "HsFFI.h"
  35 #include "Linker.h"
  36 #if defined(RTS_GTK_FRONTPANEL)
  37 #include "FrontPanel.h"
  38 #endif
  39 #include "Trace.h"
  40 #include "RetainerProfile.h"
  41 #include "RaiseAsync.h"
  42 #include "Sparks.h"
  43 #include "Papi.h"
  44
  45 #include "GC.h"
  46 #include "Compact.h"
  47 #include "Evac.h"
  48 #include "Scav.h"
  49 #include "GCUtils.h"
  50 #include "MarkWeak.h"
  51 #include "Sparks.h"
  52
  53 #include <string.h> // for memset()
  54
  55 /* -----------------------------------------------------------------------------
  56    Global variables
  57    -------------------------------------------------------------------------- */
  58
  59 /* STATIC OBJECT LIST.
  60  *
  61  * During GC:
  62  * We maintain a linked list of static objects that are still live.
  63  * The requirements for this list are:
  64  *
  65  *  - we need to scan the list while adding to it, in order to
  66  *    scavenge all the static objects (in the same way that
  67  *    breadth-first scavenging works for dynamic objects).
  68  *
  69  *  - we need to be able to tell whether an object is already on
  70  *    the list, to break loops.
  71  *
  72  * Each static object has a "static link field", which we use for
  73  * linking objects on to the list.  We use a stack-type list, consing
  74  * objects on the front as they are added (this means that the
  75  * scavenge phase is depth-first, not breadth-first, but that
  76  * shouldn't matter).
  77  *
  78  * A separate list is kept for objects that have been scavenged
  79  * already - this is so that we can zero all the marks afterwards.
  80  *
  81  * An object is on the list if its static link field is non-zero; this
  82  * means that we have to mark the end of the list with '1', not NULL.
  83  *
  84  * Extra notes for generational GC:
  85  *
  86  * Each generation has a static object list associated with it.  When
  87  * collecting generations up to N, we treat the static object lists
  88  * from generations > N as roots.
  89  *
  90  * We build up a static object list while collecting generations 0..N,
  91  * which is then appended to the static object list of generation N+1.
  92  */
  93
  94 /* N is the oldest generation being collected, where the generations
  95  * are numbered starting at 0.  A major GC (indicated by the major_gc
  96  * flag) is when we're collecting all generations.  We only attempt to
  97  * deal with static objects and GC CAFs when doing a major GC.
  98  */
  99 nat N;
 100 rtsBool major_gc;
 101
 102 /* Data used for allocation area sizing.
 103  */
 104 static lnat g0s0_pcnt_kept = 30; // percentage of g0s0 live at last minor GC
 105
 106 /* Mut-list stats */
 107 #ifdef DEBUG
 108 nat mutlist_MUTVARS,
 109     mutlist_MUTARRS,
 110     mutlist_MVARS,
 111     mutlist_OTHERS;
 112 #endif
 113
 114 /* Thread-local data for each GC thread
 115  */
 116 gc_thread **gc_threads = NULL;
 117 // gc_thread *gct = NULL;  // this thread's gct TODO: make thread-local
 118
 119 // Number of threads running in *this* GC.  Affects how many
 120 // step->todos[] lists we have to look in to find work.
 121 nat n_gc_threads;
 122
 123 // For stats:
 124 long copied;        // *words* copied & scavenged during this GC
 125
 126 #ifdef THREADED_RTS
 127 SpinLock recordMutableGen_sync;
 128 #endif
 129
 130 /* -----------------------------------------------------------------------------
 131    Static function declarations
 132    -------------------------------------------------------------------------- */
 133
 134 static void mark_root               (StgClosure **root);
 135 static void zero_static_object_list (StgClosure* first_static);
 136 static void initialise_N            (rtsBool force_major_gc);
 137 static void alloc_gc_threads        (void);
 138 static void init_collected_gen      (nat g, nat threads);
 139 static void init_uncollected_gen    (nat g, nat threads);
 140 static void init_gc_thread          (gc_thread *t);
 141 static void update_task_list        (void);
 142 static void resize_generations      (void);
 143 static void resize_nursery          (void);
 144 static void start_gc_threads        (void);
 145 static void gc_thread_work          (void);
 146 static nat  inc_running             (void);
 147 static nat  dec_running             (void);
 148 static void wakeup_gc_threads       (nat n_threads);
 149
 150 #if 0 && defined(DEBUG)
 151 static void gcCAFs                  (void);
 152 #endif
 153
 154 /* -----------------------------------------------------------------------------
 155    The mark bitmap & stack.
 156    -------------------------------------------------------------------------- */
 157
 158 #define MARK_STACK_BLOCKS 4
 159
 160 bdescr *mark_stack_bdescr;
 161 StgPtr *mark_stack;
 162 StgPtr *mark_sp;
 163 StgPtr *mark_splim;
 164
 165 // Flag and pointers used for falling back to a linear scan when the
 166 // mark stack overflows.
 167 rtsBool mark_stack_overflowed;
 168 bdescr *oldgen_scan_bd;
 169 StgPtr  oldgen_scan;
 170
 171 /* -----------------------------------------------------------------------------
 172    GarbageCollect: the main entry point to the garbage collector.
 173
 174    Locks held: all capabilities are held throughout GarbageCollect().
 175    -------------------------------------------------------------------------- */
 176
 177 void
 178 GarbageCollect ( rtsBool force_major_gc )
 179 {
 180   bdescr *bd;
 181   step *stp;
 182   lnat live, allocated;
 183   lnat oldgen_saved_blocks = 0;
 184   gc_thread *saved_gct;
 185   nat g, s, t;
 186
 187   // necessary if we stole a callee-saves register for gct:
 188   saved_gct = gct;
 189
 190 #ifdef PROFILING
 191   CostCentreStack *prev_CCS;
 192 #endif
 193
 194   ACQUIRE_SM_LOCK;
 195
 196   debugTrace(DEBUG_gc, "starting GC");
 197
 198 #if defined(RTS_USER_SIGNALS)
 199   if (RtsFlags.MiscFlags.install_signal_handlers) {
 200     // block signals
 201     blockUserSignals();
 202   }
 203 #endif
 204
 205   // tell the stats department that we've started a GC
 206   stat_startGC();
 207
 208   // tell the STM to discard any cached closures it's hoping to re-use
 209   stmPreGCHook();
 210
 211 #ifdef DEBUG
 212   mutlist_MUTVARS = 0;
 213   mutlist_MUTARRS = 0;
 214   mutlist_OTHERS = 0;
 215 #endif
 216
 217   // attribute any costs to CCS_GC
 218 #ifdef PROFILING
 219   prev_CCS = CCCS;
 220   CCCS = CCS_GC;
 221 #endif
 222
 223   /* Approximate how much we allocated.
 224    * Todo: only when generating stats?
 225    */
 226   allocated = calcAllocated();
 227
 228   /* Figure out which generation to collect
 229    */
 230   initialise_N(force_major_gc);
 231
 232   /* Allocate + initialise the gc_thread structures.
 233    */
 234   alloc_gc_threads();
 235
 236   /* Start threads, so they can be spinning up while we finish initialisation.
 237    */
 238   start_gc_threads();
 239
 240   /* How many threads will be participating in this GC?
 241    * We don't try to parallelise minor GC.
 242    */
 243 #if defined(THREADED_RTS)
 244   if (N == 0) {
 245       n_gc_threads = 1;
 246   } else {
 247       n_gc_threads = RtsFlags.ParFlags.gcThreads;
 248   }
 249 #else
 250   n_gc_threads = 1;
 251 #endif
 252
 253 #ifdef RTS_GTK_FRONTPANEL
 254   if (RtsFlags.GcFlags.frontpanel) {
 255       updateFrontPanelBeforeGC(N);
 256   }
 257 #endif
 258
 259 #ifdef DEBUG
 260   // check for memory leaks if DEBUG is on
 261   memInventory(traceClass(DEBUG_gc));
 262 #endif
 263
 264   // check stack sanity *before* GC (ToDo: check all threads)
 265   IF_DEBUG(sanity, checkFreeListSanity());
 266
 267   // Initialise all the generations/steps that we're collecting.
 268   for (g = 0; g <= N; g++) {
 269       init_collected_gen(g,n_gc_threads);
 270   }
 271
 272   // Initialise all the generations/steps that we're *not* collecting.
 273   for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
 274       init_uncollected_gen(g,n_gc_threads);
 275   }
 276
 277   /* Allocate a mark stack if we're doing a major collection.
 278    */
 279   if (major_gc) {
 280       mark_stack_bdescr = allocGroup(MARK_STACK_BLOCKS);
 281       mark_stack = (StgPtr *)mark_stack_bdescr->start;
 282       mark_sp    = mark_stack;
 283       mark_splim = mark_stack + (MARK_STACK_BLOCKS * BLOCK_SIZE_W);
 284   } else {
 285       mark_stack_bdescr = NULL;
 286   }
 287
 288   // Initialise all our gc_thread structures
 289   for (t = 0; t < n_gc_threads; t++) {
 290       init_gc_thread(gc_threads[t]);
 291   }
 292
 293   // the main thread is running: this prevents any other threads from
 294   // exiting prematurely, so we can start them now.
 295   inc_running();
 296   wakeup_gc_threads(n_gc_threads);
 297
 298   // this is the main thread
 299   gct = gc_threads[0];
 300
 301   /* -----------------------------------------------------------------------
 302    * follow all the roots that we know about:
 303    *   - mutable lists from each generation > N
 304    * we want to *scavenge* these roots, not evacuate them: they're not
 305    * going to move in this GC.
 306    * Also do them in reverse generation order, for the usual reason:
 307    * namely to reduce the likelihood of spurious old->new pointers.
 308    */
 309   {
 310     for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
 311       generations[g].saved_mut_list = generations[g].mut_list;
 312       generations[g].mut_list = allocBlock();
 313         // mut_list always has at least one block.
 314     }
 315     for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
 316       scavenge_mutable_list(&generations[g]);
 317     }
 318   }
 319
 320   // follow roots from the CAF list (used by GHCi)
 321   gct->evac_step = 0;
 322   markCAFs(mark_root);
 323
 324   // follow all the roots that the application knows about.
 325   gct->evac_step = 0;
 326   GetRoots(mark_root);
 327
 328 #if defined(RTS_USER_SIGNALS)
 329   // mark the signal handlers (signals should be already blocked)
 330   markSignalHandlers(mark_root);
 331 #endif
 332
 333   // Mark the weak pointer list, and prepare to detect dead weak pointers.
 334   markWeakPtrList();
 335   initWeakForGC();
 336
 337   // Mark the stable pointer table.
 338   markStablePtrTable(mark_root);
 339
 340   /* -------------------------------------------------------------------------
 341    * Repeatedly scavenge all the areas we know about until there's no
 342    * more scavenging to be done.
 343    */
 344   for (;;)
 345   {
 346       gc_thread_work();
 347       // The other threads are now stopped.  We might recurse back to
 348       // here, but from now on this is the only thread.
 349
 350       // if any blackholes are alive, make the threads that wait on
 351       // them alive too.
 352       if (traverseBlackholeQueue()) {
 353           inc_running();
 354           continue;
 355       }
 356
 357       // must be last...  invariant is that everything is fully
 358       // scavenged at this point.
 359       if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
 360           inc_running();
 361           continue;
 362       }
 363
 364       // If we get to here, there's really nothing left to do.
 365       break;
 366   }
 367
 368   // Update pointers from the Task list
 369   update_task_list();
 370
 371   // Now see which stable names are still alive.
 372   gcStablePtrTable();
 373
 374 #ifdef PROFILING
 375   // We call processHeapClosureForDead() on every closure destroyed during
 376   // the current garbage collection, so we invoke LdvCensusForDead().
 377   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
 378       || RtsFlags.ProfFlags.bioSelector != NULL)
 379     LdvCensusForDead(N);
 380 #endif
 381
 382   // NO MORE EVACUATION AFTER THIS POINT!
 383   // Finally: compaction of the oldest generation.
 384   if (major_gc && oldest_gen->steps[0].is_compacted) {
 385       // save number of blocks for stats
 386       oldgen_saved_blocks = oldest_gen->steps[0].n_old_blocks;
 387       compact();
 388   }
 389
 390   IF_DEBUG(sanity, checkGlobalTSOList(rtsFalse));
 391
 392   // Two-space collector: free the old to-space.
 393   // g0s0->old_blocks is the old nursery
 394   // g0s0->blocks is to-space from the previous GC
 395   if (RtsFlags.GcFlags.generations == 1) {
 396       if (g0s0->blocks != NULL) {
 397           freeChain(g0s0->blocks);
 398           g0s0->blocks = NULL;
 399       }
 400   }
 401
 402   // For each workspace, in each thread:
 403   //    * clear the BF_EVACUATED flag from each copied block
 404   //    * move the copied blocks to the step
 405   {
 406       gc_thread *thr;
 407       step_workspace *ws;
 408       bdescr *prev;
 409
 410       for (t = 0; t < n_gc_threads; t++) {
 411           thr = gc_threads[t];
 412
 413           // not step 0
 414           for (s = 1; s < total_steps; s++) {
 415               ws = &thr->steps[s];
 416               // Not true?
 417               // ASSERT( ws->scan_bd == ws->todo_bd );
 418               ASSERT( ws->scan_bd ? ws->scan == ws->scan_bd->free : 1 );
 419
 420               // Push the final block
 421               if (ws->scan_bd) { push_scan_block(ws->scan_bd, ws); }
 422
 423               ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
 424
 425               prev = ws->scavd_list;
 426               for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
 427                   bd->flags &= ~BF_EVACUATED;    // now from-space
 428                   prev = bd;
 429               }
 430               prev->link = ws->stp->blocks;
 431               ws->stp->blocks = ws->scavd_list;
 432               ws->stp->n_blocks += ws->n_scavd_blocks;
 433               ASSERT(countBlocks(ws->stp->blocks) == ws->stp->n_blocks);
 434           }
 435       }
 436   }
 437
 438   // Two-space collector: swap the semi-spaces around.
 439   // Currently: g0s0->old_blocks is the old nursery
 440   //            g0s0->blocks is to-space from this GC
 441   // We want these the other way around.
 442   if (RtsFlags.GcFlags.generations == 1) {
 443       bdescr *nursery_blocks = g0s0->old_blocks;
 444       nat n_nursery_blocks = g0s0->n_old_blocks;
 445       g0s0->old_blocks = g0s0->blocks;
 446       g0s0->n_old_blocks = g0s0->n_blocks;
 447       g0s0->blocks = nursery_blocks;
 448       g0s0->n_blocks = n_nursery_blocks;
 449   }
 450
 451   /* run through all the generations/steps and tidy up
 452    */
 453   copied = 0;
 454   {
 455       nat i;
 456       for (i=0; i < n_gc_threads; i++) {
 457           if (major_gc) {
 458               trace(TRACE_gc,"thread %d:", i);
 459               trace(TRACE_gc,"   copied           %ld", gc_threads[i]->copied * sizeof(W_));
 460               trace(TRACE_gc,"   any_work         %ld", gc_threads[i]->any_work);
 461               trace(TRACE_gc,"   no_work          %ld", gc_threads[i]->no_work);
 462               trace(TRACE_gc,"   scav_global_work %ld", gc_threads[i]->scav_global_work);
 463               trace(TRACE_gc,"   scav_local_work  %ld", gc_threads[i]->scav_local_work);
 464           }
 465           copied += gc_threads[i]->copied;
 466       }
 467   }
 468
 469   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 470
 471     if (g <= N) {
 472       generations[g].collections++; // for stats
 473     }
 474
 475     // Count the mutable list as bytes "copied" for the purposes of
 476     // stats.  Every mutable list is copied during every GC.
 477     if (g > 0) {
 478         nat mut_list_size = 0;
 479         for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
 480             mut_list_size += bd->free - bd->start;
 481         }
 482         copied +=  mut_list_size;
 483
 484         debugTrace(DEBUG_gc,
 485                    "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
 486                    (unsigned long)(mut_list_size * sizeof(W_)),
 487                    mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
 488     }
 489
 490     for (s = 0; s < generations[g].n_steps; s++) {
 491       bdescr *next;
 492       stp = &generations[g].steps[s];
 493
 494       // for generations we collected...
 495       if (g <= N) {
 496
 497         /* free old memory and shift to-space into from-space for all
 498          * the collected steps (except the allocation area).  These
 499          * freed blocks will probaby be quickly recycled.
 500          */
 501         if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) {
 502             if (stp->is_compacted)
 503             {
 504                 // for a compacted step, just shift the new to-space
 505                 // onto the front of the now-compacted existing blocks.
 506                 for (bd = stp->blocks; bd != NULL; bd = bd->link) {
 507                     bd->flags &= ~BF_EVACUATED;  // now from-space
 508                 }
 509                 // tack the new blocks on the end of the existing blocks
 510                 if (stp->old_blocks != NULL) {
 511                     for (bd = stp->old_blocks; bd != NULL; bd = next) {
 512                         // NB. this step might not be compacted next
 513                         // time, so reset the BF_COMPACTED flags.
 514                         // They are set before GC if we're going to
 515                         // compact.  (search for BF_COMPACTED above).
 516                         bd->flags &= ~BF_COMPACTED;
 517                         next = bd->link;
 518                         if (next == NULL) {
 519                             bd->link = stp->blocks;
 520                         }
 521                     }
 522                     stp->blocks = stp->old_blocks;
 523                 }
 524                 // add the new blocks to the block tally
 525                 stp->n_blocks += stp->n_old_blocks;
 526                 ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
 527             }
 528             else // not copacted
 529             {
 530                 freeChain(stp->old_blocks);
 531             }
 532             stp->old_blocks = NULL;
 533             stp->n_old_blocks = 0;
 534         }
 535
 536         /* LARGE OBJECTS.  The current live large objects are chained on
 537          * scavenged_large, having been moved during garbage
 538          * collection from large_objects.  Any objects left on
 539          * large_objects list are therefore dead, so we free them here.
 540          */
 541         for (bd = stp->large_objects; bd != NULL; bd = next) {
 542           next = bd->link;
 543           freeGroup(bd);
 544           bd = next;
 545         }
 546
 547         // update the count of blocks used by large objects
 548         for (bd = stp->scavenged_large_objects; bd != NULL; bd = bd->link) {
 549           bd->flags &= ~BF_EVACUATED;
 550         }
 551         stp->large_objects  = stp->scavenged_large_objects;
 552         stp->n_large_blocks = stp->n_scavenged_large_blocks;
 553
 554       }
 555       else // for older generations...
 556       {
 557         /* For older generations, we need to append the
 558          * scavenged_large_object list (i.e. large objects that have been
 559          * promoted during this GC) to the large_object list for that step.
 560          */
 561         for (bd = stp->scavenged_large_objects; bd; bd = next) {
 562           next = bd->link;
 563           bd->flags &= ~BF_EVACUATED;
 564           dbl_link_onto(bd, &stp->large_objects);
 565         }
 566
 567         // add the new blocks we promoted during this GC
 568         stp->n_large_blocks += stp->n_scavenged_large_blocks;
 569       }
 570     }
 571   }
 572
 573   // update the max size of older generations after a major GC
 574   resize_generations();
 575
 576   // Guess the amount of live data for stats.
 577   live = calcLiveBlocks() * BLOCK_SIZE_W;
 578   debugTrace(DEBUG_gc, "Slop: %ldKB",
 579              (live - calcLiveWords()) / (1024/sizeof(W_)));
 580
 581   // Free the small objects allocated via allocate(), since this will
 582   // all have been copied into G0S1 now.
 583   if (RtsFlags.GcFlags.generations > 1) {
 584       if (g0s0->blocks != NULL) {
 585           freeChain(g0s0->blocks);
 586           g0s0->blocks = NULL;
 587       }
 588       g0s0->n_blocks = 0;
 589   }
 590   alloc_blocks = 0;
 591   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 592
 593   // Start a new pinned_object_block
 594   pinned_object_block = NULL;
 595
 596   // Free the mark stack.
 597   if (mark_stack_bdescr != NULL) {
 598       freeGroup(mark_stack_bdescr);
 599   }
 600
 601   // Free any bitmaps.
 602   for (g = 0; g <= N; g++) {
 603       for (s = 0; s < generations[g].n_steps; s++) {
 604           stp = &generations[g].steps[s];
 605           if (stp->bitmap != NULL) {
 606               freeGroup(stp->bitmap);
 607               stp->bitmap = NULL;
 608           }
 609       }
 610   }
 611
 612   resize_nursery();
 613
 614  // mark the garbage collected CAFs as dead
 615 #if 0 && defined(DEBUG) // doesn't work at the moment
 616   if (major_gc) { gcCAFs(); }
 617 #endif
 618
 619 #ifdef PROFILING
 620   // resetStaticObjectForRetainerProfiling() must be called before
 621   // zeroing below.
 622   if (n_gc_threads > 1) {
 623       barf("profiling is currently broken with multi-threaded GC");
 624       // ToDo: fix the gct->scavenged_static_objects below
 625   }
 626   resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
 627 #endif
 628
 629   // zero the scavenged static object list
 630   if (major_gc) {
 631       nat i;
 632       for (i = 0; i < n_gc_threads; i++) {
 633           zero_static_object_list(gc_threads[i]->scavenged_static_objects);
 634       }
 635   }
 636
 637   // Reset the nursery
 638   resetNurseries();
 639
 640   // start any pending finalizers
 641   RELEASE_SM_LOCK;
 642   scheduleFinalizers(last_free_capability, old_weak_ptr_list);
 643   ACQUIRE_SM_LOCK;
 644
 645   // send exceptions to any threads which were about to die
 646   RELEASE_SM_LOCK;
 647   resurrectThreads(resurrected_threads);
 648   ACQUIRE_SM_LOCK;
 649
 650   // Update the stable pointer hash table.
 651   updateStablePtrTable(major_gc);
 652
 653   // check sanity after GC
 654   IF_DEBUG(sanity, checkSanity());
 655
 656   // extra GC trace info
 657   if (traceClass(TRACE_gc)) statDescribeGens();
 658
 659 #ifdef DEBUG
 660   // symbol-table based profiling
 661   /*  heapCensus(to_blocks); */ /* ToDo */
 662 #endif
 663
 664   // restore enclosing cost centre
 665 #ifdef PROFILING
 666   CCCS = prev_CCS;
 667 #endif
 668
 669 #ifdef DEBUG
 670   // check for memory leaks if DEBUG is on
 671   memInventory(traceClass(DEBUG_gc));
 672 #endif
 673
 674 #ifdef RTS_GTK_FRONTPANEL
 675   if (RtsFlags.GcFlags.frontpanel) {
 676       updateFrontPanelAfterGC( N, live );
 677   }
 678 #endif
 679
 680   // ok, GC over: tell the stats department what happened.
 681   stat_endGC(allocated, live, copied, N);
 682
 683 #if defined(RTS_USER_SIGNALS)
 684   if (RtsFlags.MiscFlags.install_signal_handlers) {
 685     // unblock signals again
 686     unblockUserSignals();
 687   }
 688 #endif
 689
 690   RELEASE_SM_LOCK;
 691
 692   gct = saved_gct;
 693 }
 694
 695 /* -----------------------------------------------------------------------------
 696  * Mark all nodes pointed to by sparks in the spark queues (for GC) Does an
 697  * implicit slide i.e. after marking all sparks are at the beginning of the
 698  * spark pool and the spark pool only contains sparkable closures
 699  * -------------------------------------------------------------------------- */
 700
 701 #ifdef THREADED_RTS
 702 static void
 703 markSparkQueue (evac_fn evac, Capability *cap)
 704 {
 705     StgClosure **sparkp, **to_sparkp;
 706     nat n, pruned_sparks; // stats only
 707     StgSparkPool *pool;
 708
 709     PAR_TICKY_MARK_SPARK_QUEUE_START();
 710
 711     n = 0;
 712     pruned_sparks = 0;
 713
 714     pool = &(cap->r.rSparks);
 715
 716     ASSERT_SPARK_POOL_INVARIANTS(pool);
 717
 718 #if defined(PARALLEL_HASKELL)
 719     // stats only
 720     n = 0;
 721     pruned_sparks = 0;
 722 #endif
 723
 724     sparkp = pool->hd;
 725     to_sparkp = pool->hd;
 726     while (sparkp != pool->tl) {
 727         ASSERT(*sparkp!=NULL);
 728         ASSERT(LOOKS_LIKE_CLOSURE_PTR(((StgClosure *)*sparkp)));
 729         // ToDo?: statistics gathering here (also for GUM!)
 730         if (closure_SHOULD_SPARK(*sparkp)) {
 731             evac(sparkp);
 732             *to_sparkp++ = *sparkp;
 733             if (to_sparkp == pool->lim) {
 734                 to_sparkp = pool->base;
 735             }
 736             n++;
 737         } else {
 738             pruned_sparks++;
 739         }
 740         sparkp++;
 741         if (sparkp == pool->lim) {
 742             sparkp = pool->base;
 743         }
 744     }
 745     pool->tl = to_sparkp;
 746
 747     PAR_TICKY_MARK_SPARK_QUEUE_END(n);
 748
 749 #if defined(PARALLEL_HASKELL)
 750     debugTrace(DEBUG_sched,
 751                "marked %d sparks and pruned %d sparks on [%x]",
 752                n, pruned_sparks, mytid);
 753 #else
 754     debugTrace(DEBUG_sched,
 755                "marked %d sparks and pruned %d sparks",
 756                n, pruned_sparks);
 757 #endif
 758
 759     debugTrace(DEBUG_sched,
 760                "new spark queue len=%d; (hd=%p; tl=%p)\n",
 761                sparkPoolSize(pool), pool->hd, pool->tl);
 762 }
 763 #endif
 764
 765 /* ---------------------------------------------------------------------------
 766    Where are the roots that we know about?
 767
 768         - all the threads on the runnable queue
 769         - all the threads on the blocked queue
 770         - all the threads on the sleeping queue
 771         - all the thread currently executing a _ccall_GC
 772         - all the "main threads"
 773
 774    ------------------------------------------------------------------------ */
 775
 776 void
 777 GetRoots( evac_fn evac )
 778 {
 779     nat i;
 780     Capability *cap;
 781     Task *task;
 782
 783     // Each GC thread is responsible for following roots from the
 784     // Capability of the same number.  There will usually be the same
 785     // or fewer Capabilities as GC threads, but just in case there
 786     // are more, we mark every Capability whose number is the GC
 787     // thread's index plus a multiple of the number of GC threads.
 788     for (i = gct->thread_index; i < n_capabilities; i += n_gc_threads) {
 789         cap = &capabilities[i];
 790         evac((StgClosure **)(void *)&cap->run_queue_hd);
 791         evac((StgClosure **)(void *)&cap->run_queue_tl);
 792 #if defined(THREADED_RTS)
 793         evac((StgClosure **)(void *)&cap->wakeup_queue_hd);
 794         evac((StgClosure **)(void *)&cap->wakeup_queue_tl);
 795 #endif
 796         for (task = cap->suspended_ccalling_tasks; task != NULL;
 797              task=task->next) {
 798             debugTrace(DEBUG_sched,
 799                        "evac'ing suspended TSO %lu", (unsigned long)task->suspended_tso->id);
 800             evac((StgClosure **)(void *)&task->suspended_tso);
 801         }
 802
 803 #if defined(THREADED_RTS)
 804         markSparkQueue(evac,cap);
 805 #endif
 806     }
 807
 808 #if !defined(THREADED_RTS)
 809     evac((StgClosure **)(void *)&blocked_queue_hd);
 810     evac((StgClosure **)(void *)&blocked_queue_tl);
 811     evac((StgClosure **)(void *)&sleeping_queue);
 812 #endif
 813 }
 814
 815 /* -----------------------------------------------------------------------------
 816    isAlive determines whether the given closure is still alive (after
 817    a garbage collection) or not.  It returns the new address of the
 818    closure if it is alive, or NULL otherwise.
 819
 820    NOTE: Use it before compaction only!
 821          It untags and (if needed) retags pointers to closures.
 822    -------------------------------------------------------------------------- */
 823
 824
 825 StgClosure *
 826 isAlive(StgClosure *p)
 827 {
 828   const StgInfoTable *info;
 829   bdescr *bd;
 830   StgWord tag;
 831   StgClosure *q;
 832
 833   while (1) {
 834     /* The tag and the pointer are split, to be merged later when needed. */
 835     tag = GET_CLOSURE_TAG(p);
 836     q = UNTAG_CLOSURE(p);
 837
 838     ASSERT(LOOKS_LIKE_CLOSURE_PTR(q));
 839     info = get_itbl(q);
 840
 841     // ignore static closures
 842     //
 843     // ToDo: for static closures, check the static link field.
 844     // Problem here is that we sometimes don't set the link field, eg.
 845     // for static closures with an empty SRT or CONSTR_STATIC_NOCAFs.
 846     //
 847     if (!HEAP_ALLOCED(q)) {
 848         return p;
 849     }
 850
 851     // ignore closures in generations that we're not collecting.
 852     bd = Bdescr((P_)q);
 853     if (bd->gen_no > N) {
 854         return p;
 855     }
 856
 857     // if it's a pointer into to-space, then we're done
 858     if (bd->flags & BF_EVACUATED) {
 859         return p;
 860     }
 861
 862     // large objects use the evacuated flag
 863     if (bd->flags & BF_LARGE) {
 864         return NULL;
 865     }
 866
 867     // check the mark bit for compacted steps
 868     if ((bd->flags & BF_COMPACTED) && is_marked((P_)q,bd)) {
 869         return p;
 870     }
 871
 872     switch (info->type) {
 873
 874     case IND:
 875     case IND_STATIC:
 876     case IND_PERM:
 877     case IND_OLDGEN:            // rely on compatible layout with StgInd
 878     case IND_OLDGEN_PERM:
 879       // follow indirections
 880       p = ((StgInd *)q)->indirectee;
 881       continue;
 882
 883     case EVACUATED:
 884       // alive!
 885       return ((StgEvacuated *)q)->evacuee;
 886
 887     case TSO:
 888       if (((StgTSO *)q)->what_next == ThreadRelocated) {
 889         p = (StgClosure *)((StgTSO *)q)->link;
 890         continue;
 891       }
 892       return NULL;
 893
 894     default:
 895       // dead.
 896       return NULL;
 897     }
 898   }
 899 }
 900
 901 /* -----------------------------------------------------------------------------
 902    Figure out which generation to collect, initialise N and major_gc.
 903    -------------------------------------------------------------------------- */
 904
 905 static void
 906 initialise_N (rtsBool force_major_gc)
 907 {
 908     nat g;
 909
 910     if (force_major_gc) {
 911         N = RtsFlags.GcFlags.generations - 1;
 912         major_gc = rtsTrue;
 913     } else {
 914         N = 0;
 915         for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 916             if (generations[g].steps[0].n_blocks +
 917                 generations[g].steps[0].n_large_blocks
 918                 >= generations[g].max_blocks) {
 919                 N = g;
 920             }
 921         }
 922         major_gc = (N == RtsFlags.GcFlags.generations-1);
 923     }
 924 }
 925
 926 /* -----------------------------------------------------------------------------
 927    Initialise the gc_thread structures.
 928    -------------------------------------------------------------------------- */
 929
 930 static gc_thread *
 931 alloc_gc_thread (int n)
 932 {
 933     nat s;
 934     step_workspace *ws;
 935     gc_thread *t;
 936
 937     t = stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace),
 938                        "alloc_gc_thread");
 939
 940 #ifdef THREADED_RTS
 941     t->id = 0;
 942     initCondition(&t->wake_cond);
 943     initMutex(&t->wake_mutex);
 944     t->wakeup = rtsFalse;
 945     t->exit   = rtsFalse;
 946 #endif
 947
 948     t->thread_index = n;
 949     t->free_blocks = NULL;
 950     t->gc_count = 0;
 951
 952     init_gc_thread(t);
 953
 954 #ifdef USE_PAPI
 955     t->papi_events = -1;
 956 #endif
 957
 958     for (s = 0; s < total_steps; s++)
 959     {
 960         ws = &t->steps[s];
 961         ws->stp = &all_steps[s];
 962         ASSERT(s == ws->stp->abs_no);
 963         ws->gct = t;
 964
 965         ws->scan_bd = NULL;
 966         ws->scan = NULL;
 967
 968         ws->todo_bd = NULL;
 969         ws->buffer_todo_bd = NULL;
 970
 971         ws->scavd_list = NULL;
 972         ws->n_scavd_blocks = 0;
 973     }
 974
 975     return t;
 976 }
 977
 978
 979 static void
 980 alloc_gc_threads (void)
 981 {
 982     if (gc_threads == NULL) {
 983 #if defined(THREADED_RTS)
 984         nat i;
 985         gc_threads = stgMallocBytes (RtsFlags.ParFlags.gcThreads *
 986                                      sizeof(gc_thread*),
 987                                      "alloc_gc_threads");
 988
 989         for (i = 0; i < RtsFlags.ParFlags.gcThreads; i++) {
 990             gc_threads[i] = alloc_gc_thread(i);
 991         }
 992 #else
 993         gc_threads = stgMallocBytes (sizeof(gc_thread*),
 994                                      "alloc_gc_threads");
 995
 996         gc_threads[0] = alloc_gc_thread(0);
 997 #endif
 998     }
 999 }
1000
1001 /* ----------------------------------------------------------------------------
1002    Start GC threads
1003    ------------------------------------------------------------------------- */
1004
1005 static nat gc_running_threads;
1006
1007 #if defined(THREADED_RTS)
1008 static Mutex gc_running_mutex;
1009 #endif
1010
1011 static nat
1012 inc_running (void)
1013 {
1014     nat n_running;
1015     ACQUIRE_LOCK(&gc_running_mutex);
1016     n_running = ++gc_running_threads;
1017     RELEASE_LOCK(&gc_running_mutex);
1018     return n_running;
1019 }
1020
1021 static nat
1022 dec_running (void)
1023 {
1024     nat n_running;
1025     ACQUIRE_LOCK(&gc_running_mutex);
1026     n_running = --gc_running_threads;
1027     RELEASE_LOCK(&gc_running_mutex);
1028     return n_running;
1029 }
1030
1031 //
1032 // gc_thread_work(): Scavenge until there's no work left to do and all
1033 // the running threads are idle.
1034 //
1035 static void
1036 gc_thread_work (void)
1037 {
1038     nat r;
1039
1040     debugTrace(DEBUG_gc, "GC thread %d working", gct->thread_index);
1041
1042     // gc_running_threads has already been incremented for us; either
1043     // this is the main thread and we incremented it inside
1044     // GarbageCollect(), or this is a worker thread and the main
1045     // thread bumped gc_running_threads before waking us up.
1046
1047     // Every thread evacuates some roots.
1048     gct->evac_step = 0;
1049     GetRoots(mark_root);
1050
1051 loop:
1052     scavenge_loop();
1053     // scavenge_loop() only exits when there's no work to do
1054     r = dec_running();
1055
1056     debugTrace(DEBUG_gc, "GC thread %d idle (%d still running)",
1057                gct->thread_index, r);
1058
1059     while (gc_running_threads != 0) {
1060         if (any_work()) {
1061             inc_running();
1062             goto loop;
1063         }
1064         // any_work() does not remove the work from the queue, it
1065         // just checks for the presence of work.  If we find any,
1066         // then we increment gc_running_threads and go back to
1067         // scavenge_loop() to perform any pending work.
1068     }
1069
1070     // All threads are now stopped
1071     debugTrace(DEBUG_gc, "GC thread %d finished.", gct->thread_index);
1072 }
1073
1074
1075 #if defined(THREADED_RTS)
1076 static void
1077 gc_thread_mainloop (void)
1078 {
1079     while (!gct->exit) {
1080
1081         // Wait until we're told to wake up
1082         ACQUIRE_LOCK(&gct->wake_mutex);
1083         while (!gct->wakeup) {
1084             debugTrace(DEBUG_gc, "GC thread %d standing by...",
1085                        gct->thread_index);
1086             waitCondition(&gct->wake_cond, &gct->wake_mutex);
1087         }
1088         RELEASE_LOCK(&gct->wake_mutex);
1089         gct->wakeup = rtsFalse;
1090         if (gct->exit) break;
1091
1092 #ifdef USE_PAPI
1093         // start performance counters in this thread...
1094         if (gct->papi_events == -1) {
1095             papi_init_eventset(&gct->papi_events);
1096         }
1097         papi_thread_start_gc1_count(gct->papi_events);
1098 #endif
1099
1100         gc_thread_work();
1101
1102 #ifdef USE_PAPI
1103         // count events in this thread towards the GC totals
1104         papi_thread_stop_gc1_count(gct->papi_events);
1105 #endif
1106     }
1107 }
1108 #endif
1109
1110 #if defined(THREADED_RTS)
1111 static void
1112 gc_thread_entry (gc_thread *my_gct)
1113 {
1114     gct = my_gct;
1115     debugTrace(DEBUG_gc, "GC thread %d starting...", gct->thread_index);
1116     gct->id = osThreadId();
1117     gc_thread_mainloop();
1118 }
1119 #endif
1120
1121 static void
1122 start_gc_threads (void)
1123 {
1124 #if defined(THREADED_RTS)
1125     nat i;
1126     OSThreadId id;
1127     static rtsBool done = rtsFalse;
1128
1129     gc_running_threads = 0;
1130     initMutex(&gc_running_mutex);
1131
1132     if (!done) {
1133         // Start from 1: the main thread is 0
1134         for (i = 1; i < RtsFlags.ParFlags.gcThreads; i++) {
1135             createOSThread(&id, (OSThreadProc*)&gc_thread_entry,
1136                            gc_threads[i]);
1137         }
1138         done = rtsTrue;
1139     }
1140 #endif
1141 }
1142
1143 static void
1144 wakeup_gc_threads (nat n_threads USED_IF_THREADS)
1145 {
1146 #if defined(THREADED_RTS)
1147     nat i;
1148     for (i=1; i < n_threads; i++) {
1149         inc_running();
1150         ACQUIRE_LOCK(&gc_threads[i]->wake_mutex);
1151         gc_threads[i]->wakeup = rtsTrue;
1152         signalCondition(&gc_threads[i]->wake_cond);
1153         RELEASE_LOCK(&gc_threads[i]->wake_mutex);
1154     }
1155 #endif
1156 }
1157
1158 /* ----------------------------------------------------------------------------
1159    Initialise a generation that is to be collected
1160    ------------------------------------------------------------------------- */
1161
1162 static void
1163 init_collected_gen (nat g, nat n_threads)
1164 {
1165     nat s, t, i;
1166     step_workspace *ws;
1167     step *stp;
1168     bdescr *bd;
1169
1170     // Throw away the current mutable list.  Invariant: the mutable
1171     // list always has at least one block; this means we can avoid a
1172     // check for NULL in recordMutable().
1173     if (g != 0) {
1174         freeChain(generations[g].mut_list);
1175         generations[g].mut_list = allocBlock();
1176         for (i = 0; i < n_capabilities; i++) {
1177             freeChain(capabilities[i].mut_lists[g]);
1178             capabilities[i].mut_lists[g] = allocBlock();
1179         }
1180     }
1181
1182     for (s = 0; s < generations[g].n_steps; s++) {
1183
1184         // generation 0, step 0 doesn't need to-space
1185         if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
1186             continue;
1187         }
1188
1189         stp = &generations[g].steps[s];
1190         ASSERT(stp->gen_no == g);
1191
1192         // deprecate the existing blocks
1193         stp->old_blocks   = stp->blocks;
1194         stp->n_old_blocks = stp->n_blocks;
1195         stp->blocks       = NULL;
1196         stp->n_blocks     = 0;
1197
1198         // we don't have any to-be-scavenged blocks yet
1199         stp->todos = NULL;
1200         stp->n_todos = 0;
1201
1202         // initialise the large object queues.
1203         stp->scavenged_large_objects = NULL;
1204         stp->n_scavenged_large_blocks = 0;
1205
1206         // mark the large objects as not evacuated yet
1207         for (bd = stp->large_objects; bd; bd = bd->link) {
1208             bd->flags &= ~BF_EVACUATED;
1209         }
1210
1211         // for a compacted step, we need to allocate the bitmap
1212         if (stp->is_compacted) {
1213             nat bitmap_size; // in bytes
1214             bdescr *bitmap_bdescr;
1215             StgWord *bitmap;
1216
1217             bitmap_size = stp->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1218
1219             if (bitmap_size > 0) {
1220                 bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1221                                            / BLOCK_SIZE);
1222                 stp->bitmap = bitmap_bdescr;
1223                 bitmap = bitmap_bdescr->start;
1224
1225                 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1226                            bitmap_size, bitmap);
1227
1228                 // don't forget to fill it with zeros!
1229                 memset(bitmap, 0, bitmap_size);
1230
1231                 // For each block in this step, point to its bitmap from the
1232                 // block descriptor.
1233                 for (bd=stp->old_blocks; bd != NULL; bd = bd->link) {
1234                     bd->u.bitmap = bitmap;
1235                     bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1236
1237                     // Also at this point we set the BF_COMPACTED flag
1238                     // for this block.  The invariant is that
1239                     // BF_COMPACTED is always unset, except during GC
1240                     // when it is set on those blocks which will be
1241                     // compacted.
1242                     bd->flags |= BF_COMPACTED;
1243                 }
1244             }
1245         }
1246     }
1247
1248     // For each GC thread, for each step, allocate a "todo" block to
1249     // store evacuated objects to be scavenged, and a block to store
1250     // evacuated objects that do not need to be scavenged.
1251     for (t = 0; t < n_threads; t++) {
1252         for (s = 0; s < generations[g].n_steps; s++) {
1253
1254             // we don't copy objects into g0s0, unless -G0
1255             if (g==0 && s==0 && RtsFlags.GcFlags.generations > 1) continue;
1256
1257             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1258
1259             ws->scan_bd = NULL;
1260             ws->scan = NULL;
1261
1262             ws->todo_large_objects = NULL;
1263
1264             // allocate the first to-space block; extra blocks will be
1265             // chained on as necessary.
1266             ws->todo_bd = NULL;
1267             ws->buffer_todo_bd = NULL;
1268             gc_alloc_todo_block(ws);
1269
1270             ws->scavd_list = NULL;
1271             ws->n_scavd_blocks = 0;
1272         }
1273     }
1274 }
1275
1276
1277 /* ----------------------------------------------------------------------------
1278    Initialise a generation that is *not* to be collected
1279    ------------------------------------------------------------------------- */
1280
1281 static void
1282 init_uncollected_gen (nat g, nat threads)
1283 {
1284     nat s, t, i;
1285     step_workspace *ws;
1286     step *stp;
1287     bdescr *bd;
1288
1289     for (s = 0; s < generations[g].n_steps; s++) {
1290         stp = &generations[g].steps[s];
1291         stp->scavenged_large_objects = NULL;
1292         stp->n_scavenged_large_blocks = 0;
1293     }
1294
1295     for (t = 0; t < threads; t++) {
1296         for (s = 0; s < generations[g].n_steps; s++) {
1297
1298             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1299             stp = ws->stp;
1300
1301             ws->buffer_todo_bd = NULL;
1302             ws->todo_large_objects = NULL;
1303
1304             ws->scavd_list = NULL;
1305             ws->n_scavd_blocks = 0;
1306
1307             // If the block at the head of the list in this generation
1308             // is less than 3/4 full, then use it as a todo block.
1309             if (stp->blocks && isPartiallyFull(stp->blocks))
1310             {
1311                 ws->todo_bd = stp->blocks;
1312                 ws->todo_free = ws->todo_bd->free;
1313                 ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
1314                 stp->blocks = stp->blocks->link;
1315                 stp->n_blocks -= 1;
1316                 ws->todo_bd->link = NULL;
1317
1318                 // this block is also the scan block; we must scan
1319                 // from the current end point.
1320                 ws->scan_bd = ws->todo_bd;
1321                 ws->scan = ws->scan_bd->free;
1322
1323                 // subtract the contents of this block from the stats,
1324                 // because we'll count the whole block later.
1325                 copied -= ws->scan_bd->free - ws->scan_bd->start;
1326             }
1327             else
1328             {
1329                 ws->scan_bd = NULL;
1330                 ws->scan = NULL;
1331                 ws->todo_bd = NULL;
1332                 gc_alloc_todo_block(ws);
1333             }
1334         }
1335     }
1336
1337     // Move the private mutable lists from each capability onto the
1338     // main mutable list for the generation.
1339     for (i = 0; i < n_capabilities; i++) {
1340         for (bd = capabilities[i].mut_lists[g];
1341              bd->link != NULL; bd = bd->link) {
1342             /* nothing */
1343         }
1344         bd->link = generations[g].mut_list;
1345         generations[g].mut_list = capabilities[i].mut_lists[g];
1346         capabilities[i].mut_lists[g] = allocBlock();
1347     }
1348 }
1349
1350 /* -----------------------------------------------------------------------------
1351    Initialise a gc_thread before GC
1352    -------------------------------------------------------------------------- */
1353
1354 static void
1355 init_gc_thread (gc_thread *t)
1356 {
1357     t->static_objects = END_OF_STATIC_LIST;
1358     t->scavenged_static_objects = END_OF_STATIC_LIST;
1359     t->evac_step = 0;
1360     t->failed_to_evac = rtsFalse;
1361     t->eager_promotion = rtsTrue;
1362     t->thunk_selector_depth = 0;
1363     t->copied = 0;
1364     t->any_work = 0;
1365     t->no_work = 0;
1366     t->scav_global_work = 0;
1367     t->scav_local_work = 0;
1368
1369 }
1370
1371 /* -----------------------------------------------------------------------------
1372    Function we pass to GetRoots to evacuate roots.
1373    -------------------------------------------------------------------------- */
1374
1375 static void
1376 mark_root(StgClosure **root)
1377 {
1378   evacuate(root);
1379 }
1380
1381 /* -----------------------------------------------------------------------------
1382    Initialising the static object & mutable lists
1383    -------------------------------------------------------------------------- */
1384
1385 static void
1386 zero_static_object_list(StgClosure* first_static)
1387 {
1388   StgClosure* p;
1389   StgClosure* link;
1390   const StgInfoTable *info;
1391
1392   for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1393     info = get_itbl(p);
1394     link = *STATIC_LINK(info, p);
1395     *STATIC_LINK(info,p) = NULL;
1396   }
1397 }
1398
1399 /* -----------------------------------------------------------------------------
1400    Reverting CAFs
1401    -------------------------------------------------------------------------- */
1402
1403 void
1404 revertCAFs( void )
1405 {
1406     StgIndStatic *c;
1407
1408     for (c = (StgIndStatic *)revertible_caf_list; c != NULL;
1409          c = (StgIndStatic *)c->static_link)
1410     {
1411         SET_INFO(c, c->saved_info);
1412         c->saved_info = NULL;
1413         // could, but not necessary: c->static_link = NULL;
1414     }
1415     revertible_caf_list = NULL;
1416 }
1417
1418 void
1419 markCAFs( evac_fn evac )
1420 {
1421     StgIndStatic *c;
1422
1423     for (c = (StgIndStatic *)caf_list; c != NULL;
1424          c = (StgIndStatic *)c->static_link)
1425     {
1426         evac(&c->indirectee);
1427     }
1428     for (c = (StgIndStatic *)revertible_caf_list; c != NULL;
1429          c = (StgIndStatic *)c->static_link)
1430     {
1431         evac(&c->indirectee);
1432     }
1433 }
1434
1435 /* ----------------------------------------------------------------------------
1436    Update the pointers from the task list
1437
1438    These are treated as weak pointers because we want to allow a main
1439    thread to get a BlockedOnDeadMVar exception in the same way as any
1440    other thread.  Note that the threads should all have been retained
1441    by GC by virtue of being on the all_threads list, we're just
1442    updating pointers here.
1443    ------------------------------------------------------------------------- */
1444
1445 static void
1446 update_task_list (void)
1447 {
1448     Task *task;
1449     StgTSO *tso;
1450     for (task = all_tasks; task != NULL; task = task->all_link) {
1451         if (!task->stopped && task->tso) {
1452             ASSERT(task->tso->bound == task);
1453             tso = (StgTSO *) isAlive((StgClosure *)task->tso);
1454             if (tso == NULL) {
1455                 barf("task %p: main thread %d has been GC'd",
1456 #ifdef THREADED_RTS
1457                      (void *)task->id,
1458 #else
1459                      (void *)task,
1460 #endif
1461                      task->tso->id);
1462             }
1463             task->tso = tso;
1464         }
1465     }
1466 }
1467
1468 /* ----------------------------------------------------------------------------
1469    Reset the sizes of the older generations when we do a major
1470    collection.
1471
1472    CURRENT STRATEGY: make all generations except zero the same size.
1473    We have to stay within the maximum heap size, and leave a certain
1474    percentage of the maximum heap size available to allocate into.
1475    ------------------------------------------------------------------------- */
1476
1477 static void
1478 resize_generations (void)
1479 {
1480     nat g;
1481
1482     if (major_gc && RtsFlags.GcFlags.generations > 1) {
1483         nat live, size, min_alloc;
1484         nat max  = RtsFlags.GcFlags.maxHeapSize;
1485         nat gens = RtsFlags.GcFlags.generations;
1486
1487         // live in the oldest generations
1488         live = oldest_gen->steps[0].n_blocks +
1489             oldest_gen->steps[0].n_large_blocks;
1490
1491         // default max size for all generations except zero
1492         size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1493                        RtsFlags.GcFlags.minOldGenSize);
1494
1495         // minimum size for generation zero
1496         min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1497                             RtsFlags.GcFlags.minAllocAreaSize);
1498
1499         // Auto-enable compaction when the residency reaches a
1500         // certain percentage of the maximum heap size (default: 30%).
1501         if (RtsFlags.GcFlags.generations > 1 &&
1502             (RtsFlags.GcFlags.compact ||
1503              (max > 0 &&
1504               oldest_gen->steps[0].n_blocks >
1505               (RtsFlags.GcFlags.compactThreshold * max) / 100))) {
1506             oldest_gen->steps[0].is_compacted = 1;
1507 //        debugBelch("compaction: on\n", live);
1508         } else {
1509             oldest_gen->steps[0].is_compacted = 0;
1510 //        debugBelch("compaction: off\n", live);
1511         }
1512
1513         // if we're going to go over the maximum heap size, reduce the
1514         // size of the generations accordingly.  The calculation is
1515         // different if compaction is turned on, because we don't need
1516         // to double the space required to collect the old generation.
1517         if (max != 0) {
1518
1519             // this test is necessary to ensure that the calculations
1520             // below don't have any negative results - we're working
1521             // with unsigned values here.
1522             if (max < min_alloc) {
1523                 heapOverflow();
1524             }
1525
1526             if (oldest_gen->steps[0].is_compacted) {
1527                 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1528                     size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1529                 }
1530             } else {
1531                 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1532                     size = (max - min_alloc) / ((gens - 1) * 2);
1533                 }
1534             }
1535
1536             if (size < live) {
1537                 heapOverflow();
1538             }
1539         }
1540
1541 #if 0
1542         debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1543                    min_alloc, size, max);
1544 #endif
1545
1546         for (g = 0; g < gens; g++) {
1547             generations[g].max_blocks = size;
1548         }
1549     }
1550 }
1551
1552 /* -----------------------------------------------------------------------------
1553    Calculate the new size of the nursery, and resize it.
1554    -------------------------------------------------------------------------- */
1555
1556 static void
1557 resize_nursery (void)
1558 {
1559     if (RtsFlags.GcFlags.generations == 1)
1560     {   // Two-space collector:
1561         nat blocks;
1562
1563         /* set up a new nursery.  Allocate a nursery size based on a
1564          * function of the amount of live data (by default a factor of 2)
1565          * Use the blocks from the old nursery if possible, freeing up any
1566          * left over blocks.
1567          *
1568          * If we get near the maximum heap size, then adjust our nursery
1569          * size accordingly.  If the nursery is the same size as the live
1570          * data (L), then we need 3L bytes.  We can reduce the size of the
1571          * nursery to bring the required memory down near 2L bytes.
1572          *
1573          * A normal 2-space collector would need 4L bytes to give the same
1574          * performance we get from 3L bytes, reducing to the same
1575          * performance at 2L bytes.
1576          */
1577         blocks = g0s0->n_old_blocks;
1578
1579         if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1580              blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1581              RtsFlags.GcFlags.maxHeapSize )
1582         {
1583             long adjusted_blocks;  // signed on purpose
1584             int pc_free;
1585
1586             adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1587
1588             debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1589                        RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1590
1591             pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1592             if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1593             {
1594                 heapOverflow();
1595             }
1596             blocks = adjusted_blocks;
1597         }
1598         else
1599         {
1600             blocks *= RtsFlags.GcFlags.oldGenFactor;
1601             if (blocks < RtsFlags.GcFlags.minAllocAreaSize)
1602             {
1603                 blocks = RtsFlags.GcFlags.minAllocAreaSize;
1604             }
1605         }
1606         resizeNurseries(blocks);
1607     }
1608     else  // Generational collector
1609     {
1610         /*
1611          * If the user has given us a suggested heap size, adjust our
1612          * allocation area to make best use of the memory available.
1613          */
1614         if (RtsFlags.GcFlags.heapSizeSuggestion)
1615         {
1616             long blocks;
1617             nat needed = calcNeeded();  // approx blocks needed at next GC
1618
1619             /* Guess how much will be live in generation 0 step 0 next time.
1620              * A good approximation is obtained by finding the
1621              * percentage of g0s0 that was live at the last minor GC.
1622              *
1623              * We have an accurate figure for the amount of copied data in
1624              * 'copied', but we must convert this to a number of blocks, with
1625              * a small adjustment for estimated slop at the end of a block
1626              * (- 10 words).
1627              */
1628             if (N == 0)
1629             {
1630                 g0s0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1631                     / countNurseryBlocks();
1632             }
1633
1634             /* Estimate a size for the allocation area based on the
1635              * information available.  We might end up going slightly under
1636              * or over the suggested heap size, but we should be pretty
1637              * close on average.
1638              *
1639              * Formula:            suggested - needed
1640              *                ----------------------------
1641              *                    1 + g0s0_pcnt_kept/100
1642              *
1643              * where 'needed' is the amount of memory needed at the next
1644              * collection for collecting all steps except g0s0.
1645              */
1646             blocks =
1647                 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1648                 (100 + (long)g0s0_pcnt_kept);
1649
1650             if (blocks < (long)RtsFlags.GcFlags.minAllocAreaSize) {
1651                 blocks = RtsFlags.GcFlags.minAllocAreaSize;
1652             }
1653
1654             resizeNurseries((nat)blocks);
1655         }
1656         else
1657         {
1658             // we might have added extra large blocks to the nursery, so
1659             // resize back to minAllocAreaSize again.
1660             resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1661         }
1662     }
1663 }
1664
1665 /* -----------------------------------------------------------------------------
1666    Sanity code for CAF garbage collection.
1667
1668    With DEBUG turned on, we manage a CAF list in addition to the SRT
1669    mechanism.  After GC, we run down the CAF list and blackhole any
1670    CAFs which have been garbage collected.  This means we get an error
1671    whenever the program tries to enter a garbage collected CAF.
1672
1673    Any garbage collected CAFs are taken off the CAF list at the same
1674    time.
1675    -------------------------------------------------------------------------- */
1676
1677 #if 0 && defined(DEBUG)
1678
1679 static void
1680 gcCAFs(void)
1681 {
1682   StgClosure*  p;
1683   StgClosure** pp;
1684   const StgInfoTable *info;
1685   nat i;
1686
1687   i = 0;
1688   p = caf_list;
1689   pp = &caf_list;
1690
1691   while (p != NULL) {
1692
1693     info = get_itbl(p);
1694
1695     ASSERT(info->type == IND_STATIC);
1696
1697     if (STATIC_LINK(info,p) == NULL) {
1698         debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1699         // black hole it
1700         SET_INFO(p,&stg_BLACKHOLE_info);
1701         p = STATIC_LINK2(info,p);
1702         *pp = p;
1703     }
1704     else {
1705       pp = &STATIC_LINK2(info,p);
1706       p = *pp;
1707       i++;
1708     }
1709
1710   }
1711
1712   debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1713 }
1714 #endif