rts/sm/GC.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team 1998-2008
   4  *
   5  * Generational garbage collector
   6  *
   7  * Documentation on the architecture of the Garbage Collector can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 // #include "PosixSource.h"
  15 #include "Rts.h"
  16 #include "RtsFlags.h"
  17 #include "RtsUtils.h"
  18 #include "Apply.h"
  19 #include "OSThreads.h"
  20 #include "LdvProfile.h"
  21 #include "Updates.h"
  22 #include "Stats.h"
  23 #include "Schedule.h"
  24 #include "Sanity.h"
  25 #include "BlockAlloc.h"
  26 #include "MBlock.h"
  27 #include "ProfHeap.h"
  28 #include "SchedAPI.h"
  29 #include "Weak.h"
  30 #include "Prelude.h"
  31 #include "ParTicky.h"           // ToDo: move into Rts.h
  32 #include "RtsSignals.h"
  33 #include "STM.h"
  34 #include "HsFFI.h"
  35 #include "Linker.h"
  36 #if defined(RTS_GTK_FRONTPANEL)
  37 #include "FrontPanel.h"
  38 #endif
  39 #include "Trace.h"
  40 #include "RetainerProfile.h"
  41 #include "RaiseAsync.h"
  42 #include "Papi.h"
  43
  44 #include "GC.h"
  45 #include "GCThread.h"
  46 #include "Compact.h"
  47 #include "Evac.h"
  48 #include "Scav.h"
  49 #include "GCUtils.h"
  50 #include "MarkWeak.h"
  51 #include "Sparks.h"
  52 #include "Sweep.h"
  53
  54 #include <string.h> // for memset()
  55 #include <unistd.h>
  56
  57 /* -----------------------------------------------------------------------------
  58    Global variables
  59    -------------------------------------------------------------------------- */
  60
  61 /* STATIC OBJECT LIST.
  62  *
  63  * During GC:
  64  * We maintain a linked list of static objects that are still live.
  65  * The requirements for this list are:
  66  *
  67  *  - we need to scan the list while adding to it, in order to
  68  *    scavenge all the static objects (in the same way that
  69  *    breadth-first scavenging works for dynamic objects).
  70  *
  71  *  - we need to be able to tell whether an object is already on
  72  *    the list, to break loops.
  73  *
  74  * Each static object has a "static link field", which we use for
  75  * linking objects on to the list.  We use a stack-type list, consing
  76  * objects on the front as they are added (this means that the
  77  * scavenge phase is depth-first, not breadth-first, but that
  78  * shouldn't matter).
  79  *
  80  * A separate list is kept for objects that have been scavenged
  81  * already - this is so that we can zero all the marks afterwards.
  82  *
  83  * An object is on the list if its static link field is non-zero; this
  84  * means that we have to mark the end of the list with '1', not NULL.
  85  *
  86  * Extra notes for generational GC:
  87  *
  88  * Each generation has a static object list associated with it.  When
  89  * collecting generations up to N, we treat the static object lists
  90  * from generations > N as roots.
  91  *
  92  * We build up a static object list while collecting generations 0..N,
  93  * which is then appended to the static object list of generation N+1.
  94  */
  95
  96 /* N is the oldest generation being collected, where the generations
  97  * are numbered starting at 0.  A major GC (indicated by the major_gc
  98  * flag) is when we're collecting all generations.  We only attempt to
  99  * deal with static objects and GC CAFs when doing a major GC.
 100  */
 101 nat N;
 102 rtsBool major_gc;
 103
 104 /* Data used for allocation area sizing.
 105  */
 106 static lnat g0s0_pcnt_kept = 30; // percentage of g0s0 live at last minor GC
 107
 108 /* Mut-list stats */
 109 #ifdef DEBUG
 110 nat mutlist_MUTVARS,
 111     mutlist_MUTARRS,
 112     mutlist_MVARS,
 113     mutlist_OTHERS;
 114 #endif
 115
 116 /* Thread-local data for each GC thread
 117  */
 118 gc_thread **gc_threads = NULL;
 119
 120 #if !defined(THREADED_RTS)
 121 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(step_workspace)];
 122 #endif
 123
 124 // Number of threads running in *this* GC.  Affects how many
 125 // step->todos[] lists we have to look in to find work.
 126 nat n_gc_threads;
 127
 128 // For stats:
 129 long copied;        // *words* copied & scavenged during this GC
 130
 131 rtsBool work_stealing;
 132
 133 DECLARE_GCT
 134
 135 /* -----------------------------------------------------------------------------
 136    Static function declarations
 137    -------------------------------------------------------------------------- */
 138
 139 static void mark_root               (void *user, StgClosure **root);
 140 static void zero_static_object_list (StgClosure* first_static);
 141 static nat  initialise_N            (rtsBool force_major_gc);
 142 static void init_collected_gen      (nat g, nat threads);
 143 static void init_uncollected_gen    (nat g, nat threads);
 144 static void init_gc_thread          (gc_thread *t);
 145 static void update_task_list        (void);
 146 static void resize_generations      (void);
 147 static void resize_nursery          (void);
 148 static void start_gc_threads        (void);
 149 static void scavenge_until_all_done (void);
 150 static StgWord inc_running          (void);
 151 static StgWord dec_running          (void);
 152 static void wakeup_gc_threads       (nat n_threads, nat me);
 153 static void shutdown_gc_threads     (nat n_threads, nat me);
 154
 155 #if 0 && defined(DEBUG)
 156 static void gcCAFs                  (void);
 157 #endif
 158
 159 /* -----------------------------------------------------------------------------
 160    The mark bitmap & stack.
 161    -------------------------------------------------------------------------- */
 162
 163 #define MARK_STACK_BLOCKS 4
 164
 165 bdescr *mark_stack_bdescr;
 166 StgPtr *mark_stack;
 167 StgPtr *mark_sp;
 168 StgPtr *mark_splim;
 169
 170 // Flag and pointers used for falling back to a linear scan when the
 171 // mark stack overflows.
 172 rtsBool mark_stack_overflowed;
 173 bdescr *oldgen_scan_bd;
 174 StgPtr  oldgen_scan;
 175
 176 /* -----------------------------------------------------------------------------
 177    GarbageCollect: the main entry point to the garbage collector.
 178
 179    Locks held: all capabilities are held throughout GarbageCollect().
 180    -------------------------------------------------------------------------- */
 181
 182 void
 183 GarbageCollect (rtsBool force_major_gc,
 184                 nat gc_type USED_IF_THREADS,
 185                 Capability *cap)
 186 {
 187   bdescr *bd;
 188   step *stp;
 189   lnat live, allocated, max_copied, avg_copied, slop;
 190   gc_thread *saved_gct;
 191   nat g, s, t, n;
 192
 193   // necessary if we stole a callee-saves register for gct:
 194   saved_gct = gct;
 195
 196 #ifdef PROFILING
 197   CostCentreStack *prev_CCS;
 198 #endif
 199
 200   ACQUIRE_SM_LOCK;
 201
 202 #if defined(RTS_USER_SIGNALS)
 203   if (RtsFlags.MiscFlags.install_signal_handlers) {
 204     // block signals
 205     blockUserSignals();
 206   }
 207 #endif
 208
 209   ASSERT(sizeof(step_workspace) == 16 * sizeof(StgWord));
 210   // otherwise adjust the padding in step_workspace.
 211
 212   // tell the stats department that we've started a GC
 213   stat_startGC();
 214
 215   // tell the STM to discard any cached closures it's hoping to re-use
 216   stmPreGCHook();
 217
 218   // lock the StablePtr table
 219   stablePtrPreGC();
 220
 221 #ifdef DEBUG
 222   mutlist_MUTVARS = 0;
 223   mutlist_MUTARRS = 0;
 224   mutlist_OTHERS = 0;
 225 #endif
 226
 227   // attribute any costs to CCS_GC
 228 #ifdef PROFILING
 229   prev_CCS = CCCS;
 230   CCCS = CCS_GC;
 231 #endif
 232
 233   /* Approximate how much we allocated.
 234    * Todo: only when generating stats?
 235    */
 236   allocated = calcAllocated();
 237
 238   /* Figure out which generation to collect
 239    */
 240   n = initialise_N(force_major_gc);
 241
 242 #if defined(THREADED_RTS)
 243   work_stealing = RtsFlags.ParFlags.parGcLoadBalancing;
 244       // It's not always a good idea to do load balancing in parallel
 245       // GC.  In particular, for a parallel program we don't want to
 246       // lose locality by moving cached data into another CPU's cache
 247       // (this effect can be quite significant).
 248       //
 249       // We could have a more complex way to deterimine whether to do
 250       // work stealing or not, e.g. it might be a good idea to do it
 251       // if the heap is big.  For now, we just turn it on or off with
 252       // a flag.
 253 #endif
 254
 255   /* Start threads, so they can be spinning up while we finish initialisation.
 256    */
 257   start_gc_threads();
 258
 259 #if defined(THREADED_RTS)
 260   /* How many threads will be participating in this GC?
 261    * We don't try to parallelise minor GCs (unless the user asks for
 262    * it with +RTS -gn0), or mark/compact/sweep GC.
 263    */
 264   if (gc_type == PENDING_GC_PAR) {
 265       n_gc_threads = RtsFlags.ParFlags.nNodes;
 266   } else {
 267       n_gc_threads = 1;
 268   }
 269 #else
 270   n_gc_threads = 1;
 271 #endif
 272
 273   debugTrace(DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
 274         N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
 275
 276 #ifdef RTS_GTK_FRONTPANEL
 277   if (RtsFlags.GcFlags.frontpanel) {
 278       updateFrontPanelBeforeGC(N);
 279   }
 280 #endif
 281
 282 #ifdef DEBUG
 283   // check for memory leaks if DEBUG is on
 284   memInventory(traceClass(DEBUG_gc));
 285 #endif
 286
 287   // check stack sanity *before* GC
 288   IF_DEBUG(sanity, checkFreeListSanity());
 289   IF_DEBUG(sanity, checkMutableLists(rtsTrue));
 290
 291   // Initialise all our gc_thread structures
 292   for (t = 0; t < n_gc_threads; t++) {
 293       init_gc_thread(gc_threads[t]);
 294   }
 295
 296   // Initialise all the generations/steps that we're collecting.
 297   for (g = 0; g <= N; g++) {
 298       init_collected_gen(g,n_gc_threads);
 299   }
 300
 301   // Initialise all the generations/steps that we're *not* collecting.
 302   for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
 303       init_uncollected_gen(g,n_gc_threads);
 304   }
 305
 306   /* Allocate a mark stack if we're doing a major collection.
 307    */
 308   if (major_gc && oldest_gen->steps[0].mark) {
 309       nat mark_stack_blocks;
 310       mark_stack_blocks = stg_max(MARK_STACK_BLOCKS,
 311                                   oldest_gen->steps[0].n_old_blocks / 100);
 312       mark_stack_bdescr = allocGroup(mark_stack_blocks);
 313       mark_stack = (StgPtr *)mark_stack_bdescr->start;
 314       mark_sp    = mark_stack;
 315       mark_splim = mark_stack + (mark_stack_blocks * BLOCK_SIZE_W);
 316   } else {
 317       mark_stack_bdescr = NULL;
 318   }
 319
 320   // this is the main thread
 321 #ifdef THREADED_RTS
 322   if (n_gc_threads == 1) {
 323       SET_GCT(gc_threads[0]);
 324   } else {
 325       SET_GCT(gc_threads[cap->no]);
 326   }
 327 #else
 328 SET_GCT(gc_threads[0]);
 329 #endif
 330
 331   /* -----------------------------------------------------------------------
 332    * follow all the roots that we know about:
 333    */
 334
 335   // the main thread is running: this prevents any other threads from
 336   // exiting prematurely, so we can start them now.
 337   // NB. do this after the mutable lists have been saved above, otherwise
 338   // the other GC threads will be writing into the old mutable lists.
 339   inc_running();
 340   wakeup_gc_threads(n_gc_threads, gct->thread_index);
 341
 342   // Mutable lists from each generation > N
 343   // we want to *scavenge* these roots, not evacuate them: they're not
 344   // going to move in this GC.
 345   // Also do them in reverse generation order, for the usual reason:
 346   // namely to reduce the likelihood of spurious old->new pointers.
 347   //
 348   for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
 349       scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]);
 350       freeChain_sync(generations[g].saved_mut_list);
 351       generations[g].saved_mut_list = NULL;
 352
 353   }
 354
 355   // scavenge the capability-private mutable lists.  This isn't part
 356   // of markSomeCapabilities() because markSomeCapabilities() can only
 357   // call back into the GC via mark_root() (due to the gct register
 358   // variable).
 359   if (n_gc_threads == 1) {
 360       for (n = 0; n < n_capabilities; n++) {
 361           scavenge_capability_mut_lists(&capabilities[n]);
 362       }
 363   } else {
 364       scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
 365   }
 366
 367   // follow roots from the CAF list (used by GHCi)
 368   gct->evac_step = 0;
 369   markCAFs(mark_root, gct);
 370
 371   // follow all the roots that the application knows about.
 372   gct->evac_step = 0;
 373   markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
 374                        rtsTrue/*prune sparks*/);
 375
 376 #if defined(RTS_USER_SIGNALS)
 377   // mark the signal handlers (signals should be already blocked)
 378   markSignalHandlers(mark_root, gct);
 379 #endif
 380
 381   // Mark the weak pointer list, and prepare to detect dead weak pointers.
 382   markWeakPtrList();
 383   initWeakForGC();
 384
 385   // Mark the stable pointer table.
 386   markStablePtrTable(mark_root, gct);
 387
 388   /* -------------------------------------------------------------------------
 389    * Repeatedly scavenge all the areas we know about until there's no
 390    * more scavenging to be done.
 391    */
 392   for (;;)
 393   {
 394       scavenge_until_all_done();
 395       // The other threads are now stopped.  We might recurse back to
 396       // here, but from now on this is the only thread.
 397
 398       // if any blackholes are alive, make the threads that wait on
 399       // them alive too.
 400       if (traverseBlackholeQueue()) {
 401           inc_running();
 402           continue;
 403       }
 404
 405       // must be last...  invariant is that everything is fully
 406       // scavenged at this point.
 407       if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
 408           inc_running();
 409           continue;
 410       }
 411
 412       // If we get to here, there's really nothing left to do.
 413       break;
 414   }
 415
 416   shutdown_gc_threads(n_gc_threads, gct->thread_index);
 417
 418   // Update pointers from the Task list
 419   update_task_list();
 420
 421   // Now see which stable names are still alive.
 422   gcStablePtrTable();
 423
 424 #ifdef PROFILING
 425   // We call processHeapClosureForDead() on every closure destroyed during
 426   // the current garbage collection, so we invoke LdvCensusForDead().
 427   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
 428       || RtsFlags.ProfFlags.bioSelector != NULL)
 429     LdvCensusForDead(N);
 430 #endif
 431
 432   // NO MORE EVACUATION AFTER THIS POINT!
 433
 434   // Two-space collector: free the old to-space.
 435   // g0s0->old_blocks is the old nursery
 436   // g0s0->blocks is to-space from the previous GC
 437   if (RtsFlags.GcFlags.generations == 1) {
 438       if (g0s0->blocks != NULL) {
 439           freeChain(g0s0->blocks);
 440           g0s0->blocks = NULL;
 441       }
 442   }
 443
 444   // For each workspace, in each thread, move the copied blocks to the step
 445   {
 446       gc_thread *thr;
 447       step_workspace *ws;
 448       bdescr *prev, *next;
 449
 450       for (t = 0; t < n_gc_threads; t++) {
 451           thr = gc_threads[t];
 452
 453           // not step 0
 454           if (RtsFlags.GcFlags.generations == 1) {
 455               s = 0;
 456           } else {
 457               s = 1;
 458           }
 459           for (; s < total_steps; s++) {
 460               ws = &thr->steps[s];
 461
 462               // Push the final block
 463               if (ws->todo_bd) {
 464                   push_scanned_block(ws->todo_bd, ws);
 465               }
 466
 467               ASSERT(gct->scan_bd == NULL);
 468               ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
 469
 470               prev = NULL;
 471               for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
 472                   ws->step->n_words += bd->free - bd->start;
 473                   prev = bd;
 474               }
 475               if (prev != NULL) {
 476                   prev->link = ws->step->blocks;
 477                   ws->step->blocks = ws->scavd_list;
 478               }
 479               ws->step->n_blocks += ws->n_scavd_blocks;
 480           }
 481       }
 482
 483       // Add all the partial blocks *after* we've added all the full
 484       // blocks.  This is so that we can grab the partial blocks back
 485       // again and try to fill them up in the next GC.
 486       for (t = 0; t < n_gc_threads; t++) {
 487           thr = gc_threads[t];
 488
 489           // not step 0
 490           if (RtsFlags.GcFlags.generations == 1) {
 491               s = 0;
 492           } else {
 493               s = 1;
 494           }
 495           for (; s < total_steps; s++) {
 496               ws = &thr->steps[s];
 497
 498               prev = NULL;
 499               for (bd = ws->part_list; bd != NULL; bd = next) {
 500                   next = bd->link;
 501                   if (bd->free == bd->start) {
 502                       if (prev == NULL) {
 503                           ws->part_list = next;
 504                       } else {
 505                           prev->link = next;
 506                       }
 507                       freeGroup(bd);
 508                       ws->n_part_blocks--;
 509                   } else {
 510                       ws->step->n_words += bd->free - bd->start;
 511                       prev = bd;
 512                   }
 513               }
 514               if (prev != NULL) {
 515                   prev->link = ws->step->blocks;
 516                   ws->step->blocks = ws->part_list;
 517               }
 518               ws->step->n_blocks += ws->n_part_blocks;
 519
 520               ASSERT(countBlocks(ws->step->blocks) == ws->step->n_blocks);
 521               ASSERT(countOccupied(ws->step->blocks) == ws->step->n_words);
 522           }
 523       }
 524   }
 525
 526   // Finally: compact or sweep the oldest generation.
 527   if (major_gc && oldest_gen->steps[0].mark) {
 528       if (oldest_gen->steps[0].compact)
 529           compact(gct->scavenged_static_objects);
 530       else
 531           sweep(&oldest_gen->steps[0]);
 532   }
 533
 534   /* run through all the generations/steps and tidy up
 535    */
 536   copied = 0;
 537   max_copied = 0;
 538   avg_copied = 0;
 539   {
 540       nat i;
 541       for (i=0; i < n_gc_threads; i++) {
 542           if (n_gc_threads > 1) {
 543               debugTrace(DEBUG_gc,"thread %d:", i);
 544               debugTrace(DEBUG_gc,"   copied           %ld", gc_threads[i]->copied * sizeof(W_));
 545               debugTrace(DEBUG_gc,"   scanned          %ld", gc_threads[i]->scanned * sizeof(W_));
 546               debugTrace(DEBUG_gc,"   any_work         %ld", gc_threads[i]->any_work);
 547               debugTrace(DEBUG_gc,"   no_work          %ld", gc_threads[i]->no_work);
 548               debugTrace(DEBUG_gc,"   scav_find_work %ld",   gc_threads[i]->scav_find_work);
 549           }
 550           copied += gc_threads[i]->copied;
 551           max_copied = stg_max(gc_threads[i]->copied, max_copied);
 552       }
 553       if (n_gc_threads == 1) {
 554           max_copied = 0;
 555           avg_copied = 0;
 556       } else {
 557           avg_copied = copied;
 558       }
 559   }
 560
 561   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 562
 563     if (g == N) {
 564       generations[g].collections++; // for stats
 565       if (n_gc_threads > 1) generations[g].par_collections++;
 566     }
 567
 568     // Count the mutable list as bytes "copied" for the purposes of
 569     // stats.  Every mutable list is copied during every GC.
 570     if (g > 0) {
 571         nat mut_list_size = 0;
 572         for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
 573             mut_list_size += bd->free - bd->start;
 574         }
 575         for (n = 0; n < n_capabilities; n++) {
 576             for (bd = capabilities[n].mut_lists[g];
 577                  bd != NULL; bd = bd->link) {
 578                 mut_list_size += bd->free - bd->start;
 579             }
 580         }
 581         copied +=  mut_list_size;
 582
 583         debugTrace(DEBUG_gc,
 584                    "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
 585                    (unsigned long)(mut_list_size * sizeof(W_)),
 586                    mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
 587     }
 588
 589     for (s = 0; s < generations[g].n_steps; s++) {
 590       bdescr *next, *prev;
 591       stp = &generations[g].steps[s];
 592
 593       // for generations we collected...
 594       if (g <= N) {
 595
 596         /* free old memory and shift to-space into from-space for all
 597          * the collected steps (except the allocation area).  These
 598          * freed blocks will probaby be quickly recycled.
 599          */
 600         if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) {
 601             if (stp->mark)
 602             {
 603                 // tack the new blocks on the end of the existing blocks
 604                 if (stp->old_blocks != NULL) {
 605
 606                     prev = NULL;
 607                     for (bd = stp->old_blocks; bd != NULL; bd = next) {
 608
 609                         next = bd->link;
 610
 611                         if (!(bd->flags & BF_MARKED))
 612                         {
 613                             if (prev == NULL) {
 614                                 stp->old_blocks = next;
 615                             } else {
 616                                 prev->link = next;
 617                             }
 618                             freeGroup(bd);
 619                             stp->n_old_blocks--;
 620                         }
 621                         else
 622                         {
 623                             stp->n_words += bd->free - bd->start;
 624
 625                             // NB. this step might not be compacted next
 626                             // time, so reset the BF_MARKED flags.
 627                             // They are set before GC if we're going to
 628                             // compact.  (search for BF_MARKED above).
 629                             bd->flags &= ~BF_MARKED;
 630
 631                             // between GCs, all blocks in the heap except
 632                             // for the nursery have the BF_EVACUATED flag set.
 633                             bd->flags |= BF_EVACUATED;
 634
 635                             prev = bd;
 636                         }
 637                     }
 638
 639                     if (prev != NULL) {
 640                         prev->link = stp->blocks;
 641                         stp->blocks = stp->old_blocks;
 642                     }
 643                 }
 644                 // add the new blocks to the block tally
 645                 stp->n_blocks += stp->n_old_blocks;
 646                 ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
 647                 ASSERT(countOccupied(stp->blocks) == stp->n_words);
 648             }
 649             else // not copacted
 650             {
 651                 freeChain(stp->old_blocks);
 652             }
 653             stp->old_blocks = NULL;
 654             stp->n_old_blocks = 0;
 655         }
 656
 657         /* LARGE OBJECTS.  The current live large objects are chained on
 658          * scavenged_large, having been moved during garbage
 659          * collection from large_objects.  Any objects left on
 660          * large_objects list are therefore dead, so we free them here.
 661          */
 662         for (bd = stp->large_objects; bd != NULL; bd = next) {
 663           next = bd->link;
 664           freeGroup(bd);
 665           bd = next;
 666         }
 667
 668         stp->large_objects  = stp->scavenged_large_objects;
 669         stp->n_large_blocks = stp->n_scavenged_large_blocks;
 670
 671       }
 672       else // for older generations...
 673       {
 674         /* For older generations, we need to append the
 675          * scavenged_large_object list (i.e. large objects that have been
 676          * promoted during this GC) to the large_object list for that step.
 677          */
 678         for (bd = stp->scavenged_large_objects; bd; bd = next) {
 679           next = bd->link;
 680           dbl_link_onto(bd, &stp->large_objects);
 681         }
 682
 683         // add the new blocks we promoted during this GC
 684         stp->n_large_blocks += stp->n_scavenged_large_blocks;
 685       }
 686     }
 687   }
 688
 689   // update the max size of older generations after a major GC
 690   resize_generations();
 691
 692   // Calculate the amount of live data for stats.
 693   live = calcLiveWords();
 694
 695   // Free the small objects allocated via allocate(), since this will
 696   // all have been copied into G0S1 now.
 697   if (RtsFlags.GcFlags.generations > 1) {
 698       if (g0s0->blocks != NULL) {
 699           freeChain(g0s0->blocks);
 700           g0s0->blocks = NULL;
 701       }
 702       g0s0->n_blocks = 0;
 703       g0s0->n_words = 0;
 704   }
 705   alloc_blocks = 0;
 706   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 707
 708   // Start a new pinned_object_block
 709   pinned_object_block = NULL;
 710
 711   // Free the mark stack.
 712   if (mark_stack_bdescr != NULL) {
 713       freeGroup(mark_stack_bdescr);
 714   }
 715
 716   // Free any bitmaps.
 717   for (g = 0; g <= N; g++) {
 718       for (s = 0; s < generations[g].n_steps; s++) {
 719           stp = &generations[g].steps[s];
 720           if (stp->bitmap != NULL) {
 721               freeGroup(stp->bitmap);
 722               stp->bitmap = NULL;
 723           }
 724       }
 725   }
 726
 727   resize_nursery();
 728
 729  // mark the garbage collected CAFs as dead
 730 #if 0 && defined(DEBUG) // doesn't work at the moment
 731   if (major_gc) { gcCAFs(); }
 732 #endif
 733
 734 #ifdef PROFILING
 735   // resetStaticObjectForRetainerProfiling() must be called before
 736   // zeroing below.
 737   if (n_gc_threads > 1) {
 738       barf("profiling is currently broken with multi-threaded GC");
 739       // ToDo: fix the gct->scavenged_static_objects below
 740   }
 741   resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
 742 #endif
 743
 744   // zero the scavenged static object list
 745   if (major_gc) {
 746       nat i;
 747       for (i = 0; i < n_gc_threads; i++) {
 748           zero_static_object_list(gc_threads[i]->scavenged_static_objects);
 749       }
 750   }
 751
 752   // Reset the nursery
 753   resetNurseries();
 754
 755   // start any pending finalizers
 756   RELEASE_SM_LOCK;
 757   scheduleFinalizers(cap, old_weak_ptr_list);
 758   ACQUIRE_SM_LOCK;
 759
 760   // send exceptions to any threads which were about to die
 761   RELEASE_SM_LOCK;
 762   resurrectThreads(resurrected_threads);
 763   performPendingThrowTos(exception_threads);
 764   ACQUIRE_SM_LOCK;
 765
 766   // Update the stable pointer hash table.
 767   updateStablePtrTable(major_gc);
 768
 769   // check sanity after GC
 770   IF_DEBUG(sanity, checkSanity());
 771
 772   // extra GC trace info
 773   IF_DEBUG(gc, statDescribeGens());
 774
 775 #ifdef DEBUG
 776   // symbol-table based profiling
 777   /*  heapCensus(to_blocks); */ /* ToDo */
 778 #endif
 779
 780   // restore enclosing cost centre
 781 #ifdef PROFILING
 782   CCCS = prev_CCS;
 783 #endif
 784
 785 #ifdef DEBUG
 786   // check for memory leaks if DEBUG is on
 787   memInventory(traceClass(DEBUG_gc));
 788 #endif
 789
 790 #ifdef RTS_GTK_FRONTPANEL
 791   if (RtsFlags.GcFlags.frontpanel) {
 792       updateFrontPanelAfterGC( N, live );
 793   }
 794 #endif
 795
 796   // ok, GC over: tell the stats department what happened.
 797   slop = calcLiveBlocks() * BLOCK_SIZE_W - live;
 798   stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop);
 799
 800   // unlock the StablePtr table
 801   stablePtrPostGC();
 802
 803   // Guess which generation we'll collect *next* time
 804   initialise_N(force_major_gc);
 805
 806 #if defined(RTS_USER_SIGNALS)
 807   if (RtsFlags.MiscFlags.install_signal_handlers) {
 808     // unblock signals again
 809     unblockUserSignals();
 810   }
 811 #endif
 812
 813   RELEASE_SM_LOCK;
 814
 815   SET_GCT(saved_gct);
 816 }
 817
 818 /* -----------------------------------------------------------------------------
 819    Figure out which generation to collect, initialise N and major_gc.
 820
 821    Also returns the total number of blocks in generations that will be
 822    collected.
 823    -------------------------------------------------------------------------- */
 824
 825 static nat
 826 initialise_N (rtsBool force_major_gc)
 827 {
 828     int g;
 829     nat s, blocks, blocks_total;
 830
 831     blocks = 0;
 832     blocks_total = 0;
 833
 834     if (force_major_gc) {
 835         N = RtsFlags.GcFlags.generations - 1;
 836     } else {
 837         N = 0;
 838     }
 839
 840     for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
 841         blocks = 0;
 842         for (s = 0; s < generations[g].n_steps; s++) {
 843             blocks += generations[g].steps[s].n_words / BLOCK_SIZE_W;
 844             blocks += generations[g].steps[s].n_large_blocks;
 845         }
 846         if (blocks >= generations[g].max_blocks) {
 847             N = stg_max(N,g);
 848         }
 849         if ((nat)g <= N) {
 850             blocks_total += blocks;
 851         }
 852     }
 853
 854     blocks_total += countNurseryBlocks();
 855
 856     major_gc = (N == RtsFlags.GcFlags.generations-1);
 857     return blocks_total;
 858 }
 859
 860 /* -----------------------------------------------------------------------------
 861    Initialise the gc_thread structures.
 862    -------------------------------------------------------------------------- */
 863
 864 #define GC_THREAD_INACTIVE             0
 865 #define GC_THREAD_STANDING_BY          1
 866 #define GC_THREAD_RUNNING              2
 867 #define GC_THREAD_WAITING_TO_CONTINUE  3
 868
 869 static void
 870 new_gc_thread (nat n, gc_thread *t)
 871 {
 872     nat s;
 873     step_workspace *ws;
 874
 875 #ifdef THREADED_RTS
 876     t->id = 0;
 877     initSpinLock(&t->gc_spin);
 878     initSpinLock(&t->mut_spin);
 879     ACQUIRE_SPIN_LOCK(&t->gc_spin);
 880     t->wakeup = GC_THREAD_INACTIVE;  // starts true, so we can wait for the
 881                           // thread to start up, see wakeup_gc_threads
 882 #endif
 883
 884     t->thread_index = n;
 885     t->free_blocks = NULL;
 886     t->gc_count = 0;
 887
 888     init_gc_thread(t);
 889
 890 #ifdef USE_PAPI
 891     t->papi_events = -1;
 892 #endif
 893
 894     for (s = 0; s < total_steps; s++)
 895     {
 896         ws = &t->steps[s];
 897         ws->step = &all_steps[s];
 898         ASSERT(s == ws->step->abs_no);
 899         ws->my_gct = t;
 900
 901         ws->todo_bd = NULL;
 902         ws->todo_q = newWSDeque(128);
 903         ws->todo_overflow = NULL;
 904         ws->n_todo_overflow = 0;
 905
 906         ws->part_list = NULL;
 907         ws->n_part_blocks = 0;
 908
 909         ws->scavd_list = NULL;
 910         ws->n_scavd_blocks = 0;
 911     }
 912 }
 913
 914
 915 void
 916 initGcThreads (void)
 917 {
 918     if (gc_threads == NULL) {
 919 #if defined(THREADED_RTS)
 920         nat i;
 921         gc_threads = stgMallocBytes (RtsFlags.ParFlags.nNodes *
 922                                      sizeof(gc_thread*),
 923                                      "alloc_gc_threads");
 924
 925         for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
 926             gc_threads[i] =
 927                 stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace),
 928                                "alloc_gc_threads");
 929
 930             new_gc_thread(i, gc_threads[i]);
 931         }
 932 #else
 933         gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
 934         gc_threads[0] = gct;
 935         new_gc_thread(0,gc_threads[0]);
 936 #endif
 937     }
 938 }
 939
 940 void
 941 freeGcThreads (void)
 942 {
 943     if (gc_threads != NULL) {
 944 #if defined(THREADED_RTS)
 945         nat i;
 946         for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
 947             stgFree (gc_threads[i]);
 948         }
 949         stgFree (gc_threads);
 950 #else
 951         stgFree (gc_threads);
 952 #endif
 953         gc_threads = NULL;
 954     }
 955 }
 956
 957 /* ----------------------------------------------------------------------------
 958    Start GC threads
 959    ------------------------------------------------------------------------- */
 960
 961 static volatile StgWord gc_running_threads;
 962
 963 static StgWord
 964 inc_running (void)
 965 {
 966     StgWord new;
 967     new = atomic_inc(&gc_running_threads);
 968     ASSERT(new <= n_gc_threads);
 969     return new;
 970 }
 971
 972 static StgWord
 973 dec_running (void)
 974 {
 975     ASSERT(gc_running_threads != 0);
 976     return atomic_dec(&gc_running_threads);
 977 }
 978
 979 static rtsBool
 980 any_work (void)
 981 {
 982     int s;
 983     step_workspace *ws;
 984
 985     gct->any_work++;
 986
 987     write_barrier();
 988
 989     // scavenge objects in compacted generation
 990     if (mark_stack_overflowed || oldgen_scan_bd != NULL ||
 991         (mark_stack_bdescr != NULL && !mark_stack_empty())) {
 992         return rtsTrue;
 993     }
 994
 995     // Check for global work in any step.  We don't need to check for
 996     // local work, because we have already exited scavenge_loop(),
 997     // which means there is no local work for this thread.
 998     for (s = total_steps-1; s >= 0; s--) {
 999         if (s == 0 && RtsFlags.GcFlags.generations > 1) {
1000             continue;
1001         }
1002         ws = &gct->steps[s];
1003         if (ws->todo_large_objects) return rtsTrue;
1004         if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
1005         if (ws->todo_overflow) return rtsTrue;
1006     }
1007
1008 #if defined(THREADED_RTS)
1009     if (work_stealing) {
1010         nat n;
1011         // look for work to steal
1012         for (n = 0; n < n_gc_threads; n++) {
1013             if (n == gct->thread_index) continue;
1014             for (s = total_steps-1; s >= 0; s--) {
1015                 ws = &gc_threads[n]->steps[s];
1016                 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
1017             }
1018         }
1019     }
1020 #endif
1021
1022     gct->no_work++;
1023
1024     return rtsFalse;
1025 }
1026
1027 static void
1028 scavenge_until_all_done (void)
1029 {
1030     nat r;
1031
1032     debugTrace(DEBUG_gc, "GC thread %d working", gct->thread_index);
1033
1034 loop:
1035 #if defined(THREADED_RTS)
1036     if (n_gc_threads > 1) {
1037         scavenge_loop();
1038     } else {
1039         scavenge_loop1();
1040     }
1041 #else
1042     scavenge_loop();
1043 #endif
1044
1045     // scavenge_loop() only exits when there's no work to do
1046     r = dec_running();
1047
1048     debugTrace(DEBUG_gc, "GC thread %d idle (%d still running)",
1049                gct->thread_index, r);
1050
1051     while (gc_running_threads != 0) {
1052         // usleep(1);
1053         if (any_work()) {
1054             inc_running();
1055             goto loop;
1056         }
1057         // any_work() does not remove the work from the queue, it
1058         // just checks for the presence of work.  If we find any,
1059         // then we increment gc_running_threads and go back to
1060         // scavenge_loop() to perform any pending work.
1061     }
1062
1063     // All threads are now stopped
1064     debugTrace(DEBUG_gc, "GC thread %d finished.", gct->thread_index);
1065 }
1066
1067 #if defined(THREADED_RTS)
1068
1069 void
1070 gcWorkerThread (Capability *cap)
1071 {
1072     cap->in_gc = rtsTrue;
1073
1074     gct = gc_threads[cap->no];
1075     gct->id = osThreadId();
1076
1077     // Wait until we're told to wake up
1078     RELEASE_SPIN_LOCK(&gct->mut_spin);
1079     gct->wakeup = GC_THREAD_STANDING_BY;
1080     debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1081     ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1082
1083 #ifdef USE_PAPI
1084     // start performance counters in this thread...
1085     if (gct->papi_events == -1) {
1086         papi_init_eventset(&gct->papi_events);
1087     }
1088     papi_thread_start_gc1_count(gct->papi_events);
1089 #endif
1090
1091     // Every thread evacuates some roots.
1092     gct->evac_step = 0;
1093     markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
1094                          rtsTrue/*prune sparks*/);
1095     scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
1096
1097     scavenge_until_all_done();
1098
1099 #ifdef USE_PAPI
1100     // count events in this thread towards the GC totals
1101     papi_thread_stop_gc1_count(gct->papi_events);
1102 #endif
1103
1104     // Wait until we're told to continue
1105     RELEASE_SPIN_LOCK(&gct->gc_spin);
1106     gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1107     debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1108                gct->thread_index);
1109     ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1110     debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1111 }
1112
1113 #endif
1114
1115 void
1116 waitForGcThreads (Capability *cap USED_IF_THREADS)
1117 {
1118 #if defined(THREADED_RTS)
1119     nat n_threads = RtsFlags.ParFlags.nNodes;
1120     nat me = cap->no;
1121     nat i, j;
1122     rtsBool retry = rtsTrue;
1123
1124     while(retry) {
1125         for (i=0; i < n_threads; i++) {
1126             if (i == me) continue;
1127             if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1128                 prodCapability(&capabilities[i], cap->running_task);
1129             }
1130         }
1131         for (j=0; j < 10000000; j++) {
1132             retry = rtsFalse;
1133             for (i=0; i < n_threads; i++) {
1134                 if (i == me) continue;
1135                 write_barrier();
1136                 setContextSwitches();
1137                 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1138                     retry = rtsTrue;
1139                 }
1140             }
1141             if (!retry) break;
1142         }
1143     }
1144 #endif
1145 }
1146
1147 static void
1148 start_gc_threads (void)
1149 {
1150 #if defined(THREADED_RTS)
1151     gc_running_threads = 0;
1152 #endif
1153 }
1154
1155 static void
1156 wakeup_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1157 {
1158 #if defined(THREADED_RTS)
1159     nat i;
1160     for (i=0; i < n_threads; i++) {
1161         if (i == me) continue;
1162         inc_running();
1163         debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1164         if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1165
1166         gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1167         ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1168         RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1169     }
1170 #endif
1171 }
1172
1173 // After GC is complete, we must wait for all GC threads to enter the
1174 // standby state, otherwise they may still be executing inside
1175 // any_work(), and may even remain awake until the next GC starts.
1176 static void
1177 shutdown_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1178 {
1179 #if defined(THREADED_RTS)
1180     nat i;
1181     for (i=0; i < n_threads; i++) {
1182         if (i == me) continue;
1183         while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1184     }
1185 #endif
1186 }
1187
1188 void
1189 releaseGCThreads (Capability *cap USED_IF_THREADS)
1190 {
1191 #if defined(THREADED_RTS)
1192     nat n_threads = RtsFlags.ParFlags.nNodes;
1193     nat me = cap->no;
1194     nat i;
1195     for (i=0; i < n_threads; i++) {
1196         if (i == me) continue;
1197         if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1198             barf("releaseGCThreads");
1199
1200         gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1201         ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1202         RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1203     }
1204 #endif
1205 }
1206
1207 /* ----------------------------------------------------------------------------
1208    Initialise a generation that is to be collected
1209    ------------------------------------------------------------------------- */
1210
1211 static void
1212 init_collected_gen (nat g, nat n_threads)
1213 {
1214     nat s, t, i;
1215     step_workspace *ws;
1216     step *stp;
1217     bdescr *bd;
1218
1219     // Throw away the current mutable list.  Invariant: the mutable
1220     // list always has at least one block; this means we can avoid a
1221     // check for NULL in recordMutable().
1222     if (g != 0) {
1223         freeChain(generations[g].mut_list);
1224         generations[g].mut_list = allocBlock();
1225         for (i = 0; i < n_capabilities; i++) {
1226             freeChain(capabilities[i].mut_lists[g]);
1227             capabilities[i].mut_lists[g] = allocBlock();
1228         }
1229     }
1230
1231     for (s = 0; s < generations[g].n_steps; s++) {
1232
1233         stp = &generations[g].steps[s];
1234         ASSERT(stp->gen_no == g);
1235
1236         // we'll construct a new list of threads in this step
1237         // during GC, throw away the current list.
1238         stp->old_threads = stp->threads;
1239         stp->threads = END_TSO_QUEUE;
1240
1241         // generation 0, step 0 doesn't need to-space
1242         if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
1243             continue;
1244         }
1245
1246         // deprecate the existing blocks
1247         stp->old_blocks   = stp->blocks;
1248         stp->n_old_blocks = stp->n_blocks;
1249         stp->blocks       = NULL;
1250         stp->n_blocks     = 0;
1251         stp->n_words      = 0;
1252         stp->live_estimate = 0;
1253
1254         // initialise the large object queues.
1255         stp->scavenged_large_objects = NULL;
1256         stp->n_scavenged_large_blocks = 0;
1257
1258         // mark the small objects as from-space
1259         for (bd = stp->old_blocks; bd; bd = bd->link) {
1260             bd->flags &= ~BF_EVACUATED;
1261         }
1262
1263         // mark the large objects as from-space
1264         for (bd = stp->large_objects; bd; bd = bd->link) {
1265             bd->flags &= ~BF_EVACUATED;
1266         }
1267
1268         // for a compacted step, we need to allocate the bitmap
1269         if (stp->mark) {
1270             nat bitmap_size; // in bytes
1271             bdescr *bitmap_bdescr;
1272             StgWord *bitmap;
1273
1274             bitmap_size = stp->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1275
1276             if (bitmap_size > 0) {
1277                 bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1278                                            / BLOCK_SIZE);
1279                 stp->bitmap = bitmap_bdescr;
1280                 bitmap = bitmap_bdescr->start;
1281
1282                 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1283                            bitmap_size, bitmap);
1284
1285                 // don't forget to fill it with zeros!
1286                 memset(bitmap, 0, bitmap_size);
1287
1288                 // For each block in this step, point to its bitmap from the
1289                 // block descriptor.
1290                 for (bd=stp->old_blocks; bd != NULL; bd = bd->link) {
1291                     bd->u.bitmap = bitmap;
1292                     bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1293
1294                     // Also at this point we set the BF_MARKED flag
1295                     // for this block.  The invariant is that
1296                     // BF_MARKED is always unset, except during GC
1297                     // when it is set on those blocks which will be
1298                     // compacted.
1299                     if (!(bd->flags & BF_FRAGMENTED)) {
1300                         bd->flags |= BF_MARKED;
1301                     }
1302                 }
1303             }
1304         }
1305     }
1306
1307     // For each GC thread, for each step, allocate a "todo" block to
1308     // store evacuated objects to be scavenged, and a block to store
1309     // evacuated objects that do not need to be scavenged.
1310     for (t = 0; t < n_threads; t++) {
1311         for (s = 0; s < generations[g].n_steps; s++) {
1312
1313             // we don't copy objects into g0s0, unless -G0
1314             if (g==0 && s==0 && RtsFlags.GcFlags.generations > 1) continue;
1315
1316             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1317
1318             ws->todo_large_objects = NULL;
1319
1320             ws->part_list = NULL;
1321             ws->n_part_blocks = 0;
1322
1323             // allocate the first to-space block; extra blocks will be
1324             // chained on as necessary.
1325             ws->todo_bd = NULL;
1326             ASSERT(looksEmptyWSDeque(ws->todo_q));
1327             alloc_todo_block(ws,0);
1328
1329             ws->todo_overflow = NULL;
1330             ws->n_todo_overflow = 0;
1331
1332             ws->scavd_list = NULL;
1333             ws->n_scavd_blocks = 0;
1334         }
1335     }
1336 }
1337
1338
1339 /* ----------------------------------------------------------------------------
1340    Initialise a generation that is *not* to be collected
1341    ------------------------------------------------------------------------- */
1342
1343 static void
1344 init_uncollected_gen (nat g, nat threads)
1345 {
1346     nat s, t, n;
1347     step_workspace *ws;
1348     step *stp;
1349     bdescr *bd;
1350
1351     // save the current mutable lists for this generation, and
1352     // allocate a fresh block for each one.  We'll traverse these
1353     // mutable lists as roots early on in the GC.
1354     generations[g].saved_mut_list = generations[g].mut_list;
1355     generations[g].mut_list = allocBlock();
1356     for (n = 0; n < n_capabilities; n++) {
1357         capabilities[n].saved_mut_lists[g] = capabilities[n].mut_lists[g];
1358         capabilities[n].mut_lists[g] = allocBlock();
1359     }
1360
1361     for (s = 0; s < generations[g].n_steps; s++) {
1362         stp = &generations[g].steps[s];
1363         stp->scavenged_large_objects = NULL;
1364         stp->n_scavenged_large_blocks = 0;
1365     }
1366
1367     for (s = 0; s < generations[g].n_steps; s++) {
1368
1369         stp = &generations[g].steps[s];
1370
1371         for (t = 0; t < threads; t++) {
1372             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1373
1374             ASSERT(looksEmptyWSDeque(ws->todo_q));
1375             ws->todo_large_objects = NULL;
1376
1377             ws->part_list = NULL;
1378             ws->n_part_blocks = 0;
1379
1380             ws->scavd_list = NULL;
1381             ws->n_scavd_blocks = 0;
1382
1383             // If the block at the head of the list in this generation
1384             // is less than 3/4 full, then use it as a todo block.
1385             if (stp->blocks && isPartiallyFull(stp->blocks))
1386             {
1387                 ws->todo_bd = stp->blocks;
1388                 ws->todo_free = ws->todo_bd->free;
1389                 ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
1390                 stp->blocks = stp->blocks->link;
1391                 stp->n_blocks -= 1;
1392                 stp->n_words -= ws->todo_bd->free - ws->todo_bd->start;
1393                 ws->todo_bd->link = NULL;
1394                 // we must scan from the current end point.
1395                 ws->todo_bd->u.scan = ws->todo_bd->free;
1396             }
1397             else
1398             {
1399                 ws->todo_bd = NULL;
1400                 alloc_todo_block(ws,0);
1401             }
1402         }
1403
1404         // deal out any more partial blocks to the threads' part_lists
1405         t = 0;
1406         while (stp->blocks && isPartiallyFull(stp->blocks))
1407         {
1408             bd = stp->blocks;
1409             stp->blocks = bd->link;
1410             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1411             bd->link = ws->part_list;
1412             ws->part_list = bd;
1413             ws->n_part_blocks += 1;
1414             bd->u.scan = bd->free;
1415             stp->n_blocks -= 1;
1416             stp->n_words -= bd->free - bd->start;
1417             t++;
1418             if (t == n_gc_threads) t = 0;
1419         }
1420     }
1421 }
1422
1423 /* -----------------------------------------------------------------------------
1424    Initialise a gc_thread before GC
1425    -------------------------------------------------------------------------- */
1426
1427 static void
1428 init_gc_thread (gc_thread *t)
1429 {
1430     t->static_objects = END_OF_STATIC_LIST;
1431     t->scavenged_static_objects = END_OF_STATIC_LIST;
1432     t->scan_bd = NULL;
1433     t->mut_lists = capabilities[t->thread_index].mut_lists;
1434     t->evac_step = 0;
1435     t->failed_to_evac = rtsFalse;
1436     t->eager_promotion = rtsTrue;
1437     t->thunk_selector_depth = 0;
1438     t->copied = 0;
1439     t->scanned = 0;
1440     t->any_work = 0;
1441     t->no_work = 0;
1442     t->scav_find_work = 0;
1443 }
1444
1445 /* -----------------------------------------------------------------------------
1446    Function we pass to evacuate roots.
1447    -------------------------------------------------------------------------- */
1448
1449 static void
1450 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1451 {
1452     // we stole a register for gct, but this function is called from
1453     // *outside* the GC where the register variable is not in effect,
1454     // so we need to save and restore it here.  NB. only call
1455     // mark_root() from the main GC thread, otherwise gct will be
1456     // incorrect.
1457     gc_thread *saved_gct;
1458     saved_gct = gct;
1459     SET_GCT(user);
1460
1461     evacuate(root);
1462
1463     SET_GCT(saved_gct);
1464 }
1465
1466 /* -----------------------------------------------------------------------------
1467    Initialising the static object & mutable lists
1468    -------------------------------------------------------------------------- */
1469
1470 static void
1471 zero_static_object_list(StgClosure* first_static)
1472 {
1473   StgClosure* p;
1474   StgClosure* link;
1475   const StgInfoTable *info;
1476
1477   for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1478     info = get_itbl(p);
1479     link = *STATIC_LINK(info, p);
1480     *STATIC_LINK(info,p) = NULL;
1481   }
1482 }
1483
1484 /* ----------------------------------------------------------------------------
1485    Update the pointers from the task list
1486
1487    These are treated as weak pointers because we want to allow a main
1488    thread to get a BlockedOnDeadMVar exception in the same way as any
1489    other thread.  Note that the threads should all have been retained
1490    by GC by virtue of being on the all_threads list, we're just
1491    updating pointers here.
1492    ------------------------------------------------------------------------- */
1493
1494 static void
1495 update_task_list (void)
1496 {
1497     Task *task;
1498     StgTSO *tso;
1499     for (task = all_tasks; task != NULL; task = task->all_link) {
1500         if (!task->stopped && task->tso) {
1501             ASSERT(task->tso->bound == task);
1502             tso = (StgTSO *) isAlive((StgClosure *)task->tso);
1503             if (tso == NULL) {
1504                 barf("task %p: main thread %d has been GC'd",
1505 #ifdef THREADED_RTS
1506                      (void *)task->id,
1507 #else
1508                      (void *)task,
1509 #endif
1510                      task->tso->id);
1511             }
1512             task->tso = tso;
1513         }
1514     }
1515 }
1516
1517 /* ----------------------------------------------------------------------------
1518    Reset the sizes of the older generations when we do a major
1519    collection.
1520
1521    CURRENT STRATEGY: make all generations except zero the same size.
1522    We have to stay within the maximum heap size, and leave a certain
1523    percentage of the maximum heap size available to allocate into.
1524    ------------------------------------------------------------------------- */
1525
1526 static void
1527 resize_generations (void)
1528 {
1529     nat g;
1530
1531     if (major_gc && RtsFlags.GcFlags.generations > 1) {
1532         nat live, size, min_alloc, words;
1533         nat max  = RtsFlags.GcFlags.maxHeapSize;
1534         nat gens = RtsFlags.GcFlags.generations;
1535
1536         // live in the oldest generations
1537         if (oldest_gen->steps[0].live_estimate != 0) {
1538             words = oldest_gen->steps[0].live_estimate;
1539         } else {
1540             words = oldest_gen->steps[0].n_words;
1541         }
1542         live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1543             oldest_gen->steps[0].n_large_blocks;
1544
1545         // default max size for all generations except zero
1546         size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1547                        RtsFlags.GcFlags.minOldGenSize);
1548
1549         // minimum size for generation zero
1550         min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1551                             RtsFlags.GcFlags.minAllocAreaSize);
1552
1553         // Auto-enable compaction when the residency reaches a
1554         // certain percentage of the maximum heap size (default: 30%).
1555         if (RtsFlags.GcFlags.generations > 1 &&
1556             (RtsFlags.GcFlags.compact ||
1557              (max > 0 &&
1558               oldest_gen->steps[0].n_blocks >
1559               (RtsFlags.GcFlags.compactThreshold * max) / 100))) {
1560             oldest_gen->steps[0].mark = 1;
1561             oldest_gen->steps[0].compact = 1;
1562 //        debugBelch("compaction: on\n", live);
1563         } else {
1564             oldest_gen->steps[0].mark = 0;
1565             oldest_gen->steps[0].compact = 0;
1566 //        debugBelch("compaction: off\n", live);
1567         }
1568
1569         if (RtsFlags.GcFlags.sweep) {
1570             oldest_gen->steps[0].mark = 1;
1571         }
1572
1573         // if we're going to go over the maximum heap size, reduce the
1574         // size of the generations accordingly.  The calculation is
1575         // different if compaction is turned on, because we don't need
1576         // to double the space required to collect the old generation.
1577         if (max != 0) {
1578
1579             // this test is necessary to ensure that the calculations
1580             // below don't have any negative results - we're working
1581             // with unsigned values here.
1582             if (max < min_alloc) {
1583                 heapOverflow();
1584             }
1585
1586             if (oldest_gen->steps[0].compact) {
1587                 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1588                     size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1589                 }
1590             } else {
1591                 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1592                     size = (max - min_alloc) / ((gens - 1) * 2);
1593                 }
1594             }
1595
1596             if (size < live) {
1597                 heapOverflow();
1598             }
1599         }
1600
1601 #if 0
1602         debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1603                    min_alloc, size, max);
1604 #endif
1605
1606         for (g = 0; g < gens; g++) {
1607             generations[g].max_blocks = size;
1608         }
1609     }
1610 }
1611
1612 /* -----------------------------------------------------------------------------
1613    Calculate the new size of the nursery, and resize it.
1614    -------------------------------------------------------------------------- */
1615
1616 static void
1617 resize_nursery (void)
1618 {
1619     if (RtsFlags.GcFlags.generations == 1)
1620     {   // Two-space collector:
1621         nat blocks;
1622
1623         /* set up a new nursery.  Allocate a nursery size based on a
1624          * function of the amount of live data (by default a factor of 2)
1625          * Use the blocks from the old nursery if possible, freeing up any
1626          * left over blocks.
1627          *
1628          * If we get near the maximum heap size, then adjust our nursery
1629          * size accordingly.  If the nursery is the same size as the live
1630          * data (L), then we need 3L bytes.  We can reduce the size of the
1631          * nursery to bring the required memory down near 2L bytes.
1632          *
1633          * A normal 2-space collector would need 4L bytes to give the same
1634          * performance we get from 3L bytes, reducing to the same
1635          * performance at 2L bytes.
1636          */
1637         blocks = g0s0->n_blocks;
1638
1639         if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1640              blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1641              RtsFlags.GcFlags.maxHeapSize )
1642         {
1643             long adjusted_blocks;  // signed on purpose
1644             int pc_free;
1645
1646             adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1647
1648             debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1649                        RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1650
1651             pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1652             if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1653             {
1654                 heapOverflow();
1655             }
1656             blocks = adjusted_blocks;
1657         }
1658         else
1659         {
1660             blocks *= RtsFlags.GcFlags.oldGenFactor;
1661             if (blocks < RtsFlags.GcFlags.minAllocAreaSize)
1662             {
1663                 blocks = RtsFlags.GcFlags.minAllocAreaSize;
1664             }
1665         }
1666         resizeNurseries(blocks);
1667     }
1668     else  // Generational collector
1669     {
1670         /*
1671          * If the user has given us a suggested heap size, adjust our
1672          * allocation area to make best use of the memory available.
1673          */
1674         if (RtsFlags.GcFlags.heapSizeSuggestion)
1675         {
1676             long blocks;
1677             nat needed = calcNeeded();  // approx blocks needed at next GC
1678
1679             /* Guess how much will be live in generation 0 step 0 next time.
1680              * A good approximation is obtained by finding the
1681              * percentage of g0s0 that was live at the last minor GC.
1682              *
1683              * We have an accurate figure for the amount of copied data in
1684              * 'copied', but we must convert this to a number of blocks, with
1685              * a small adjustment for estimated slop at the end of a block
1686              * (- 10 words).
1687              */
1688             if (N == 0)
1689             {
1690                 g0s0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1691                     / countNurseryBlocks();
1692             }
1693
1694             /* Estimate a size for the allocation area based on the
1695              * information available.  We might end up going slightly under
1696              * or over the suggested heap size, but we should be pretty
1697              * close on average.
1698              *
1699              * Formula:            suggested - needed
1700              *                ----------------------------
1701              *                    1 + g0s0_pcnt_kept/100
1702              *
1703              * where 'needed' is the amount of memory needed at the next
1704              * collection for collecting all steps except g0s0.
1705              */
1706             blocks =
1707                 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1708                 (100 + (long)g0s0_pcnt_kept);
1709
1710             if (blocks < (long)RtsFlags.GcFlags.minAllocAreaSize) {
1711                 blocks = RtsFlags.GcFlags.minAllocAreaSize;
1712             }
1713
1714             resizeNurseries((nat)blocks);
1715         }
1716         else
1717         {
1718             // we might have added extra large blocks to the nursery, so
1719             // resize back to minAllocAreaSize again.
1720             resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1721         }
1722     }
1723 }
1724
1725 /* -----------------------------------------------------------------------------
1726    Sanity code for CAF garbage collection.
1727
1728    With DEBUG turned on, we manage a CAF list in addition to the SRT
1729    mechanism.  After GC, we run down the CAF list and blackhole any
1730    CAFs which have been garbage collected.  This means we get an error
1731    whenever the program tries to enter a garbage collected CAF.
1732
1733    Any garbage collected CAFs are taken off the CAF list at the same
1734    time.
1735    -------------------------------------------------------------------------- */
1736
1737 #if 0 && defined(DEBUG)
1738
1739 static void
1740 gcCAFs(void)
1741 {
1742   StgClosure*  p;
1743   StgClosure** pp;
1744   const StgInfoTable *info;
1745   nat i;
1746
1747   i = 0;
1748   p = caf_list;
1749   pp = &caf_list;
1750
1751   while (p != NULL) {
1752
1753     info = get_itbl(p);
1754
1755     ASSERT(info->type == IND_STATIC);
1756
1757     if (STATIC_LINK(info,p) == NULL) {
1758         debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1759         // black hole it
1760         SET_INFO(p,&stg_BLACKHOLE_info);
1761         p = STATIC_LINK2(info,p);
1762         *pp = p;
1763     }
1764     else {
1765       pp = &STATIC_LINK2(info,p);
1766       p = *pp;
1767       i++;
1768     }
1769
1770   }
1771
1772   debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1773 }
1774 #endif