rts/sm/GC.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team 1998-2008
   4  *
   5  * Generational garbage collector
   6  *
   7  * Documentation on the architecture of the Garbage Collector can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 #include "PosixSource.h"
  15 #include "Rts.h"
  16 #include "HsFFI.h"
  17
  18 #include "Storage.h"
  19 #include "RtsUtils.h"
  20 #include "Apply.h"
  21 #include "Updates.h"
  22 #include "Stats.h"
  23 #include "Schedule.h"
  24 #include "Sanity.h"
  25 #include "BlockAlloc.h"
  26 #include "ProfHeap.h"
  27 #include "Weak.h"
  28 #include "Prelude.h"
  29 #include "RtsSignals.h"
  30 #include "STM.h"
  31 #if defined(RTS_GTK_FRONTPANEL)
  32 #include "FrontPanel.h"
  33 #endif
  34 #include "Trace.h"
  35 #include "RetainerProfile.h"
  36 #include "LdvProfile.h"
  37 #include "RaiseAsync.h"
  38 #include "Papi.h"
  39 #include "Stable.h"
  40
  41 #include "GC.h"
  42 #include "GCThread.h"
  43 #include "Compact.h"
  44 #include "Evac.h"
  45 #include "Scav.h"
  46 #include "GCUtils.h"
  47 #include "MarkWeak.h"
  48 #include "Sparks.h"
  49 #include "Sweep.h"
  50
  51 #include <string.h> // for memset()
  52 #include <unistd.h>
  53
  54 /* -----------------------------------------------------------------------------
  55    Global variables
  56    -------------------------------------------------------------------------- */
  57
  58 /* STATIC OBJECT LIST.
  59  *
  60  * During GC:
  61  * We maintain a linked list of static objects that are still live.
  62  * The requirements for this list are:
  63  *
  64  *  - we need to scan the list while adding to it, in order to
  65  *    scavenge all the static objects (in the same way that
  66  *    breadth-first scavenging works for dynamic objects).
  67  *
  68  *  - we need to be able to tell whether an object is already on
  69  *    the list, to break loops.
  70  *
  71  * Each static object has a "static link field", which we use for
  72  * linking objects on to the list.  We use a stack-type list, consing
  73  * objects on the front as they are added (this means that the
  74  * scavenge phase is depth-first, not breadth-first, but that
  75  * shouldn't matter).
  76  *
  77  * A separate list is kept for objects that have been scavenged
  78  * already - this is so that we can zero all the marks afterwards.
  79  *
  80  * An object is on the list if its static link field is non-zero; this
  81  * means that we have to mark the end of the list with '1', not NULL.
  82  *
  83  * Extra notes for generational GC:
  84  *
  85  * Each generation has a static object list associated with it.  When
  86  * collecting generations up to N, we treat the static object lists
  87  * from generations > N as roots.
  88  *
  89  * We build up a static object list while collecting generations 0..N,
  90  * which is then appended to the static object list of generation N+1.
  91  */
  92
  93 /* N is the oldest generation being collected, where the generations
  94  * are numbered starting at 0.  A major GC (indicated by the major_gc
  95  * flag) is when we're collecting all generations.  We only attempt to
  96  * deal with static objects and GC CAFs when doing a major GC.
  97  */
  98 nat N;
  99 rtsBool major_gc;
 100
 101 /* Data used for allocation area sizing.
 102  */
 103 static lnat g0s0_pcnt_kept = 30; // percentage of g0s0 live at last minor GC
 104
 105 /* Mut-list stats */
 106 #ifdef DEBUG
 107 nat mutlist_MUTVARS,
 108     mutlist_MUTARRS,
 109     mutlist_MVARS,
 110     mutlist_OTHERS;
 111 #endif
 112
 113 /* Thread-local data for each GC thread
 114  */
 115 gc_thread **gc_threads = NULL;
 116
 117 #if !defined(THREADED_RTS)
 118 StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(step_workspace)];
 119 #endif
 120
 121 // Number of threads running in *this* GC.  Affects how many
 122 // step->todos[] lists we have to look in to find work.
 123 nat n_gc_threads;
 124
 125 // For stats:
 126 long copied;        // *words* copied & scavenged during this GC
 127
 128 rtsBool work_stealing;
 129
 130 DECLARE_GCT
 131
 132 /* -----------------------------------------------------------------------------
 133    Static function declarations
 134    -------------------------------------------------------------------------- */
 135
 136 static void mark_root               (void *user, StgClosure **root);
 137 static void zero_static_object_list (StgClosure* first_static);
 138 static nat  initialise_N            (rtsBool force_major_gc);
 139 static void init_collected_gen      (nat g, nat threads);
 140 static void init_uncollected_gen    (nat g, nat threads);
 141 static void init_gc_thread          (gc_thread *t);
 142 static void update_task_list        (void);
 143 static void resize_generations      (void);
 144 static void resize_nursery          (void);
 145 static void start_gc_threads        (void);
 146 static void scavenge_until_all_done (void);
 147 static StgWord inc_running          (void);
 148 static StgWord dec_running          (void);
 149 static void wakeup_gc_threads       (nat n_threads, nat me);
 150 static void shutdown_gc_threads     (nat n_threads, nat me);
 151
 152 #if 0 && defined(DEBUG)
 153 static void gcCAFs                  (void);
 154 #endif
 155
 156 /* -----------------------------------------------------------------------------
 157    The mark bitmap & stack.
 158    -------------------------------------------------------------------------- */
 159
 160 #define MARK_STACK_BLOCKS 4
 161
 162 bdescr *mark_stack_bdescr;
 163 StgPtr *mark_stack;
 164 StgPtr *mark_sp;
 165 StgPtr *mark_splim;
 166
 167 // Flag and pointers used for falling back to a linear scan when the
 168 // mark stack overflows.
 169 rtsBool mark_stack_overflowed;
 170 bdescr *oldgen_scan_bd;
 171 StgPtr  oldgen_scan;
 172
 173 /* -----------------------------------------------------------------------------
 174    GarbageCollect: the main entry point to the garbage collector.
 175
 176    Locks held: all capabilities are held throughout GarbageCollect().
 177    -------------------------------------------------------------------------- */
 178
 179 void
 180 GarbageCollect (rtsBool force_major_gc,
 181                 nat gc_type USED_IF_THREADS,
 182                 Capability *cap)
 183 {
 184   bdescr *bd;
 185   step *stp;
 186   lnat live, allocated, max_copied, avg_copied, slop;
 187   gc_thread *saved_gct;
 188   nat g, s, t, n;
 189
 190   // necessary if we stole a callee-saves register for gct:
 191   saved_gct = gct;
 192
 193 #ifdef PROFILING
 194   CostCentreStack *prev_CCS;
 195 #endif
 196
 197   ACQUIRE_SM_LOCK;
 198
 199 #if defined(RTS_USER_SIGNALS)
 200   if (RtsFlags.MiscFlags.install_signal_handlers) {
 201     // block signals
 202     blockUserSignals();
 203   }
 204 #endif
 205
 206   ASSERT(sizeof(step_workspace) == 16 * sizeof(StgWord));
 207   // otherwise adjust the padding in step_workspace.
 208
 209   // tell the stats department that we've started a GC
 210   stat_startGC();
 211
 212   // tell the STM to discard any cached closures it's hoping to re-use
 213   stmPreGCHook();
 214
 215   // lock the StablePtr table
 216   stablePtrPreGC();
 217
 218 #ifdef DEBUG
 219   mutlist_MUTVARS = 0;
 220   mutlist_MUTARRS = 0;
 221   mutlist_OTHERS = 0;
 222 #endif
 223
 224   // attribute any costs to CCS_GC
 225 #ifdef PROFILING
 226   prev_CCS = CCCS;
 227   CCCS = CCS_GC;
 228 #endif
 229
 230   /* Approximate how much we allocated.
 231    * Todo: only when generating stats?
 232    */
 233   allocated = calcAllocated();
 234
 235   /* Figure out which generation to collect
 236    */
 237   n = initialise_N(force_major_gc);
 238
 239 #if defined(THREADED_RTS)
 240   work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
 241                   N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
 242       // It's not always a good idea to do load balancing in parallel
 243       // GC.  In particular, for a parallel program we don't want to
 244       // lose locality by moving cached data into another CPU's cache
 245       // (this effect can be quite significant).
 246       //
 247       // We could have a more complex way to deterimine whether to do
 248       // work stealing or not, e.g. it might be a good idea to do it
 249       // if the heap is big.  For now, we just turn it on or off with
 250       // a flag.
 251 #endif
 252
 253   /* Start threads, so they can be spinning up while we finish initialisation.
 254    */
 255   start_gc_threads();
 256
 257 #if defined(THREADED_RTS)
 258   /* How many threads will be participating in this GC?
 259    * We don't try to parallelise minor GCs (unless the user asks for
 260    * it with +RTS -gn0), or mark/compact/sweep GC.
 261    */
 262   if (gc_type == PENDING_GC_PAR) {
 263       n_gc_threads = RtsFlags.ParFlags.nNodes;
 264   } else {
 265       n_gc_threads = 1;
 266   }
 267 #else
 268   n_gc_threads = 1;
 269 #endif
 270
 271   debugTrace(DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
 272         N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
 273
 274 #ifdef RTS_GTK_FRONTPANEL
 275   if (RtsFlags.GcFlags.frontpanel) {
 276       updateFrontPanelBeforeGC(N);
 277   }
 278 #endif
 279
 280 #ifdef DEBUG
 281   // check for memory leaks if DEBUG is on
 282   memInventory(DEBUG_gc);
 283 #endif
 284
 285   // check stack sanity *before* GC
 286   IF_DEBUG(sanity, checkFreeListSanity());
 287   IF_DEBUG(sanity, checkMutableLists(rtsTrue));
 288
 289   // Initialise all our gc_thread structures
 290   for (t = 0; t < n_gc_threads; t++) {
 291       init_gc_thread(gc_threads[t]);
 292   }
 293
 294   // Initialise all the generations/steps that we're collecting.
 295   for (g = 0; g <= N; g++) {
 296       init_collected_gen(g,n_gc_threads);
 297   }
 298
 299   // Initialise all the generations/steps that we're *not* collecting.
 300   for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
 301       init_uncollected_gen(g,n_gc_threads);
 302   }
 303
 304   /* Allocate a mark stack if we're doing a major collection.
 305    */
 306   if (major_gc && oldest_gen->steps[0].mark) {
 307       nat mark_stack_blocks;
 308       mark_stack_blocks = stg_max(MARK_STACK_BLOCKS,
 309                                   oldest_gen->steps[0].n_old_blocks / 100);
 310       mark_stack_bdescr = allocGroup(mark_stack_blocks);
 311       mark_stack = (StgPtr *)mark_stack_bdescr->start;
 312       mark_sp    = mark_stack;
 313       mark_splim = mark_stack + (mark_stack_blocks * BLOCK_SIZE_W);
 314   } else {
 315       mark_stack_bdescr = NULL;
 316   }
 317
 318   // this is the main thread
 319 #ifdef THREADED_RTS
 320   if (n_gc_threads == 1) {
 321       SET_GCT(gc_threads[0]);
 322   } else {
 323       SET_GCT(gc_threads[cap->no]);
 324   }
 325 #else
 326 SET_GCT(gc_threads[0]);
 327 #endif
 328
 329   /* -----------------------------------------------------------------------
 330    * follow all the roots that we know about:
 331    */
 332
 333   // the main thread is running: this prevents any other threads from
 334   // exiting prematurely, so we can start them now.
 335   // NB. do this after the mutable lists have been saved above, otherwise
 336   // the other GC threads will be writing into the old mutable lists.
 337   inc_running();
 338   wakeup_gc_threads(n_gc_threads, gct->thread_index);
 339
 340   // Mutable lists from each generation > N
 341   // we want to *scavenge* these roots, not evacuate them: they're not
 342   // going to move in this GC.
 343   // Also do them in reverse generation order, for the usual reason:
 344   // namely to reduce the likelihood of spurious old->new pointers.
 345   //
 346   for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
 347       scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]);
 348       freeChain_sync(generations[g].saved_mut_list);
 349       generations[g].saved_mut_list = NULL;
 350
 351   }
 352
 353   // scavenge the capability-private mutable lists.  This isn't part
 354   // of markSomeCapabilities() because markSomeCapabilities() can only
 355   // call back into the GC via mark_root() (due to the gct register
 356   // variable).
 357   if (n_gc_threads == 1) {
 358       for (n = 0; n < n_capabilities; n++) {
 359           scavenge_capability_mut_lists(&capabilities[n]);
 360       }
 361   } else {
 362       scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
 363   }
 364
 365   // follow roots from the CAF list (used by GHCi)
 366   gct->evac_step = 0;
 367   markCAFs(mark_root, gct);
 368
 369   // follow all the roots that the application knows about.
 370   gct->evac_step = 0;
 371   markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
 372                        rtsTrue/*prune sparks*/);
 373
 374 #if defined(RTS_USER_SIGNALS)
 375   // mark the signal handlers (signals should be already blocked)
 376   markSignalHandlers(mark_root, gct);
 377 #endif
 378
 379   // Mark the weak pointer list, and prepare to detect dead weak pointers.
 380   markWeakPtrList();
 381   initWeakForGC();
 382
 383   // Mark the stable pointer table.
 384   markStablePtrTable(mark_root, gct);
 385
 386   /* -------------------------------------------------------------------------
 387    * Repeatedly scavenge all the areas we know about until there's no
 388    * more scavenging to be done.
 389    */
 390   for (;;)
 391   {
 392       scavenge_until_all_done();
 393       // The other threads are now stopped.  We might recurse back to
 394       // here, but from now on this is the only thread.
 395
 396       // if any blackholes are alive, make the threads that wait on
 397       // them alive too.
 398       if (traverseBlackholeQueue()) {
 399           inc_running();
 400           continue;
 401       }
 402
 403       // must be last...  invariant is that everything is fully
 404       // scavenged at this point.
 405       if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
 406           inc_running();
 407           continue;
 408       }
 409
 410       // If we get to here, there's really nothing left to do.
 411       break;
 412   }
 413
 414   shutdown_gc_threads(n_gc_threads, gct->thread_index);
 415
 416   // Update pointers from the Task list
 417   update_task_list();
 418
 419   // Now see which stable names are still alive.
 420   gcStablePtrTable();
 421
 422 #ifdef PROFILING
 423   // We call processHeapClosureForDead() on every closure destroyed during
 424   // the current garbage collection, so we invoke LdvCensusForDead().
 425   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
 426       || RtsFlags.ProfFlags.bioSelector != NULL)
 427     LdvCensusForDead(N);
 428 #endif
 429
 430   // NO MORE EVACUATION AFTER THIS POINT!
 431
 432   // Two-space collector: free the old to-space.
 433   // g0s0->old_blocks is the old nursery
 434   // g0s0->blocks is to-space from the previous GC
 435   if (RtsFlags.GcFlags.generations == 1) {
 436       if (g0s0->blocks != NULL) {
 437           freeChain(g0s0->blocks);
 438           g0s0->blocks = NULL;
 439       }
 440   }
 441
 442   // For each workspace, in each thread, move the copied blocks to the step
 443   {
 444       gc_thread *thr;
 445       step_workspace *ws;
 446       bdescr *prev, *next;
 447
 448       for (t = 0; t < n_gc_threads; t++) {
 449           thr = gc_threads[t];
 450
 451           // not step 0
 452           if (RtsFlags.GcFlags.generations == 1) {
 453               s = 0;
 454           } else {
 455               s = 1;
 456           }
 457           for (; s < total_steps; s++) {
 458               ws = &thr->steps[s];
 459
 460               // Push the final block
 461               if (ws->todo_bd) {
 462                   push_scanned_block(ws->todo_bd, ws);
 463               }
 464
 465               ASSERT(gct->scan_bd == NULL);
 466               ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
 467
 468               prev = NULL;
 469               for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
 470                   ws->step->n_words += bd->free - bd->start;
 471                   prev = bd;
 472               }
 473               if (prev != NULL) {
 474                   prev->link = ws->step->blocks;
 475                   ws->step->blocks = ws->scavd_list;
 476               }
 477               ws->step->n_blocks += ws->n_scavd_blocks;
 478           }
 479       }
 480
 481       // Add all the partial blocks *after* we've added all the full
 482       // blocks.  This is so that we can grab the partial blocks back
 483       // again and try to fill them up in the next GC.
 484       for (t = 0; t < n_gc_threads; t++) {
 485           thr = gc_threads[t];
 486
 487           // not step 0
 488           if (RtsFlags.GcFlags.generations == 1) {
 489               s = 0;
 490           } else {
 491               s = 1;
 492           }
 493           for (; s < total_steps; s++) {
 494               ws = &thr->steps[s];
 495
 496               prev = NULL;
 497               for (bd = ws->part_list; bd != NULL; bd = next) {
 498                   next = bd->link;
 499                   if (bd->free == bd->start) {
 500                       if (prev == NULL) {
 501                           ws->part_list = next;
 502                       } else {
 503                           prev->link = next;
 504                       }
 505                       freeGroup(bd);
 506                       ws->n_part_blocks--;
 507                   } else {
 508                       ws->step->n_words += bd->free - bd->start;
 509                       prev = bd;
 510                   }
 511               }
 512               if (prev != NULL) {
 513                   prev->link = ws->step->blocks;
 514                   ws->step->blocks = ws->part_list;
 515               }
 516               ws->step->n_blocks += ws->n_part_blocks;
 517
 518               ASSERT(countBlocks(ws->step->blocks) == ws->step->n_blocks);
 519               ASSERT(countOccupied(ws->step->blocks) == ws->step->n_words);
 520           }
 521       }
 522   }
 523
 524   // Finally: compact or sweep the oldest generation.
 525   if (major_gc && oldest_gen->steps[0].mark) {
 526       if (oldest_gen->steps[0].compact)
 527           compact(gct->scavenged_static_objects);
 528       else
 529           sweep(&oldest_gen->steps[0]);
 530   }
 531
 532   /* run through all the generations/steps and tidy up
 533    */
 534   copied = 0;
 535   max_copied = 0;
 536   avg_copied = 0;
 537   {
 538       nat i;
 539       for (i=0; i < n_gc_threads; i++) {
 540           if (n_gc_threads > 1) {
 541               debugTrace(DEBUG_gc,"thread %d:", i);
 542               debugTrace(DEBUG_gc,"   copied           %ld", gc_threads[i]->copied * sizeof(W_));
 543               debugTrace(DEBUG_gc,"   scanned          %ld", gc_threads[i]->scanned * sizeof(W_));
 544               debugTrace(DEBUG_gc,"   any_work         %ld", gc_threads[i]->any_work);
 545               debugTrace(DEBUG_gc,"   no_work          %ld", gc_threads[i]->no_work);
 546               debugTrace(DEBUG_gc,"   scav_find_work %ld",   gc_threads[i]->scav_find_work);
 547           }
 548           copied += gc_threads[i]->copied;
 549           max_copied = stg_max(gc_threads[i]->copied, max_copied);
 550       }
 551       if (n_gc_threads == 1) {
 552           max_copied = 0;
 553           avg_copied = 0;
 554       } else {
 555           avg_copied = copied;
 556       }
 557   }
 558
 559   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 560
 561     if (g == N) {
 562       generations[g].collections++; // for stats
 563       if (n_gc_threads > 1) generations[g].par_collections++;
 564     }
 565
 566     // Count the mutable list as bytes "copied" for the purposes of
 567     // stats.  Every mutable list is copied during every GC.
 568     if (g > 0) {
 569         nat mut_list_size = 0;
 570         for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
 571             mut_list_size += bd->free - bd->start;
 572         }
 573         for (n = 0; n < n_capabilities; n++) {
 574             for (bd = capabilities[n].mut_lists[g];
 575                  bd != NULL; bd = bd->link) {
 576                 mut_list_size += bd->free - bd->start;
 577             }
 578         }
 579         copied +=  mut_list_size;
 580
 581         debugTrace(DEBUG_gc,
 582                    "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
 583                    (unsigned long)(mut_list_size * sizeof(W_)),
 584                    mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
 585     }
 586
 587     for (s = 0; s < generations[g].n_steps; s++) {
 588       bdescr *next, *prev;
 589       stp = &generations[g].steps[s];
 590
 591       // for generations we collected...
 592       if (g <= N) {
 593
 594         /* free old memory and shift to-space into from-space for all
 595          * the collected steps (except the allocation area).  These
 596          * freed blocks will probaby be quickly recycled.
 597          */
 598         if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) {
 599             if (stp->mark)
 600             {
 601                 // tack the new blocks on the end of the existing blocks
 602                 if (stp->old_blocks != NULL) {
 603
 604                     prev = NULL;
 605                     for (bd = stp->old_blocks; bd != NULL; bd = next) {
 606
 607                         next = bd->link;
 608
 609                         if (!(bd->flags & BF_MARKED))
 610                         {
 611                             if (prev == NULL) {
 612                                 stp->old_blocks = next;
 613                             } else {
 614                                 prev->link = next;
 615                             }
 616                             freeGroup(bd);
 617                             stp->n_old_blocks--;
 618                         }
 619                         else
 620                         {
 621                             stp->n_words += bd->free - bd->start;
 622
 623                             // NB. this step might not be compacted next
 624                             // time, so reset the BF_MARKED flags.
 625                             // They are set before GC if we're going to
 626                             // compact.  (search for BF_MARKED above).
 627                             bd->flags &= ~BF_MARKED;
 628
 629                             // between GCs, all blocks in the heap except
 630                             // for the nursery have the BF_EVACUATED flag set.
 631                             bd->flags |= BF_EVACUATED;
 632
 633                             prev = bd;
 634                         }
 635                     }
 636
 637                     if (prev != NULL) {
 638                         prev->link = stp->blocks;
 639                         stp->blocks = stp->old_blocks;
 640                     }
 641                 }
 642                 // add the new blocks to the block tally
 643                 stp->n_blocks += stp->n_old_blocks;
 644                 ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
 645                 ASSERT(countOccupied(stp->blocks) == stp->n_words);
 646             }
 647             else // not copacted
 648             {
 649                 freeChain(stp->old_blocks);
 650             }
 651             stp->old_blocks = NULL;
 652             stp->n_old_blocks = 0;
 653         }
 654
 655         /* LARGE OBJECTS.  The current live large objects are chained on
 656          * scavenged_large, having been moved during garbage
 657          * collection from large_objects.  Any objects left on
 658          * large_objects list are therefore dead, so we free them here.
 659          */
 660         for (bd = stp->large_objects; bd != NULL; bd = next) {
 661           next = bd->link;
 662           freeGroup(bd);
 663           bd = next;
 664         }
 665
 666         stp->large_objects  = stp->scavenged_large_objects;
 667         stp->n_large_blocks = stp->n_scavenged_large_blocks;
 668
 669       }
 670       else // for older generations...
 671       {
 672         /* For older generations, we need to append the
 673          * scavenged_large_object list (i.e. large objects that have been
 674          * promoted during this GC) to the large_object list for that step.
 675          */
 676         for (bd = stp->scavenged_large_objects; bd; bd = next) {
 677           next = bd->link;
 678           dbl_link_onto(bd, &stp->large_objects);
 679         }
 680
 681         // add the new blocks we promoted during this GC
 682         stp->n_large_blocks += stp->n_scavenged_large_blocks;
 683       }
 684     }
 685   }
 686
 687   // update the max size of older generations after a major GC
 688   resize_generations();
 689
 690   // Calculate the amount of live data for stats.
 691   live = calcLiveWords();
 692
 693   // Free the small objects allocated via allocate(), since this will
 694   // all have been copied into G0S1 now.
 695   if (RtsFlags.GcFlags.generations > 1) {
 696       if (g0s0->blocks != NULL) {
 697           freeChain(g0s0->blocks);
 698           g0s0->blocks = NULL;
 699       }
 700       g0s0->n_blocks = 0;
 701       g0s0->n_words = 0;
 702   }
 703   alloc_blocks = 0;
 704   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 705
 706   // Start a new pinned_object_block
 707   pinned_object_block = NULL;
 708
 709   // Free the mark stack.
 710   if (mark_stack_bdescr != NULL) {
 711       freeGroup(mark_stack_bdescr);
 712   }
 713
 714   // Free any bitmaps.
 715   for (g = 0; g <= N; g++) {
 716       for (s = 0; s < generations[g].n_steps; s++) {
 717           stp = &generations[g].steps[s];
 718           if (stp->bitmap != NULL) {
 719               freeGroup(stp->bitmap);
 720               stp->bitmap = NULL;
 721           }
 722       }
 723   }
 724
 725   resize_nursery();
 726
 727  // mark the garbage collected CAFs as dead
 728 #if 0 && defined(DEBUG) // doesn't work at the moment
 729   if (major_gc) { gcCAFs(); }
 730 #endif
 731
 732 #ifdef PROFILING
 733   // resetStaticObjectForRetainerProfiling() must be called before
 734   // zeroing below.
 735   if (n_gc_threads > 1) {
 736       barf("profiling is currently broken with multi-threaded GC");
 737       // ToDo: fix the gct->scavenged_static_objects below
 738   }
 739   resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
 740 #endif
 741
 742   // zero the scavenged static object list
 743   if (major_gc) {
 744       nat i;
 745       for (i = 0; i < n_gc_threads; i++) {
 746           zero_static_object_list(gc_threads[i]->scavenged_static_objects);
 747       }
 748   }
 749
 750   // Reset the nursery
 751   resetNurseries();
 752
 753   // start any pending finalizers
 754   RELEASE_SM_LOCK;
 755   scheduleFinalizers(cap, old_weak_ptr_list);
 756   ACQUIRE_SM_LOCK;
 757
 758   // send exceptions to any threads which were about to die
 759   RELEASE_SM_LOCK;
 760   resurrectThreads(resurrected_threads);
 761   performPendingThrowTos(exception_threads);
 762   ACQUIRE_SM_LOCK;
 763
 764   // Update the stable pointer hash table.
 765   updateStablePtrTable(major_gc);
 766
 767   // check sanity after GC
 768   IF_DEBUG(sanity, checkSanity());
 769
 770   // extra GC trace info
 771   IF_DEBUG(gc, statDescribeGens());
 772
 773 #ifdef DEBUG
 774   // symbol-table based profiling
 775   /*  heapCensus(to_blocks); */ /* ToDo */
 776 #endif
 777
 778   // restore enclosing cost centre
 779 #ifdef PROFILING
 780   CCCS = prev_CCS;
 781 #endif
 782
 783 #ifdef DEBUG
 784   // check for memory leaks if DEBUG is on
 785   memInventory(DEBUG_gc);
 786 #endif
 787
 788 #ifdef RTS_GTK_FRONTPANEL
 789   if (RtsFlags.GcFlags.frontpanel) {
 790       updateFrontPanelAfterGC( N, live );
 791   }
 792 #endif
 793
 794   // ok, GC over: tell the stats department what happened.
 795   slop = calcLiveBlocks() * BLOCK_SIZE_W - live;
 796   stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop);
 797
 798   // unlock the StablePtr table
 799   stablePtrPostGC();
 800
 801   // Guess which generation we'll collect *next* time
 802   initialise_N(force_major_gc);
 803
 804 #if defined(RTS_USER_SIGNALS)
 805   if (RtsFlags.MiscFlags.install_signal_handlers) {
 806     // unblock signals again
 807     unblockUserSignals();
 808   }
 809 #endif
 810
 811   RELEASE_SM_LOCK;
 812
 813   SET_GCT(saved_gct);
 814 }
 815
 816 /* -----------------------------------------------------------------------------
 817    Figure out which generation to collect, initialise N and major_gc.
 818
 819    Also returns the total number of blocks in generations that will be
 820    collected.
 821    -------------------------------------------------------------------------- */
 822
 823 static nat
 824 initialise_N (rtsBool force_major_gc)
 825 {
 826     int g;
 827     nat s, blocks, blocks_total;
 828
 829     blocks = 0;
 830     blocks_total = 0;
 831
 832     if (force_major_gc) {
 833         N = RtsFlags.GcFlags.generations - 1;
 834     } else {
 835         N = 0;
 836     }
 837
 838     for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
 839         blocks = 0;
 840         for (s = 0; s < generations[g].n_steps; s++) {
 841             blocks += generations[g].steps[s].n_words / BLOCK_SIZE_W;
 842             blocks += generations[g].steps[s].n_large_blocks;
 843         }
 844         if (blocks >= generations[g].max_blocks) {
 845             N = stg_max(N,g);
 846         }
 847         if ((nat)g <= N) {
 848             blocks_total += blocks;
 849         }
 850     }
 851
 852     blocks_total += countNurseryBlocks();
 853
 854     major_gc = (N == RtsFlags.GcFlags.generations-1);
 855     return blocks_total;
 856 }
 857
 858 /* -----------------------------------------------------------------------------
 859    Initialise the gc_thread structures.
 860    -------------------------------------------------------------------------- */
 861
 862 #define GC_THREAD_INACTIVE             0
 863 #define GC_THREAD_STANDING_BY          1
 864 #define GC_THREAD_RUNNING              2
 865 #define GC_THREAD_WAITING_TO_CONTINUE  3
 866
 867 static void
 868 new_gc_thread (nat n, gc_thread *t)
 869 {
 870     nat s;
 871     step_workspace *ws;
 872
 873 #ifdef THREADED_RTS
 874     t->id = 0;
 875     initSpinLock(&t->gc_spin);
 876     initSpinLock(&t->mut_spin);
 877     ACQUIRE_SPIN_LOCK(&t->gc_spin);
 878     t->wakeup = GC_THREAD_INACTIVE;  // starts true, so we can wait for the
 879                           // thread to start up, see wakeup_gc_threads
 880 #endif
 881
 882     t->thread_index = n;
 883     t->free_blocks = NULL;
 884     t->gc_count = 0;
 885
 886     init_gc_thread(t);
 887
 888 #ifdef USE_PAPI
 889     t->papi_events = -1;
 890 #endif
 891
 892     for (s = 0; s < total_steps; s++)
 893     {
 894         ws = &t->steps[s];
 895         ws->step = &all_steps[s];
 896         ASSERT(s == ws->step->abs_no);
 897         ws->my_gct = t;
 898
 899         ws->todo_bd = NULL;
 900         ws->todo_q = newWSDeque(128);
 901         ws->todo_overflow = NULL;
 902         ws->n_todo_overflow = 0;
 903
 904         ws->part_list = NULL;
 905         ws->n_part_blocks = 0;
 906
 907         ws->scavd_list = NULL;
 908         ws->n_scavd_blocks = 0;
 909     }
 910 }
 911
 912
 913 void
 914 initGcThreads (void)
 915 {
 916     if (gc_threads == NULL) {
 917 #if defined(THREADED_RTS)
 918         nat i;
 919         gc_threads = stgMallocBytes (RtsFlags.ParFlags.nNodes *
 920                                      sizeof(gc_thread*),
 921                                      "alloc_gc_threads");
 922
 923         for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
 924             gc_threads[i] =
 925                 stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace),
 926                                "alloc_gc_threads");
 927
 928             new_gc_thread(i, gc_threads[i]);
 929         }
 930 #else
 931         gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
 932         gc_threads[0] = gct;
 933         new_gc_thread(0,gc_threads[0]);
 934 #endif
 935     }
 936 }
 937
 938 void
 939 freeGcThreads (void)
 940 {
 941     if (gc_threads != NULL) {
 942 #if defined(THREADED_RTS)
 943         nat i;
 944         for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
 945             stgFree (gc_threads[i]);
 946         }
 947         stgFree (gc_threads);
 948 #else
 949         stgFree (gc_threads);
 950 #endif
 951         gc_threads = NULL;
 952     }
 953 }
 954
 955 /* ----------------------------------------------------------------------------
 956    Start GC threads
 957    ------------------------------------------------------------------------- */
 958
 959 static volatile StgWord gc_running_threads;
 960
 961 static StgWord
 962 inc_running (void)
 963 {
 964     StgWord new;
 965     new = atomic_inc(&gc_running_threads);
 966     ASSERT(new <= n_gc_threads);
 967     return new;
 968 }
 969
 970 static StgWord
 971 dec_running (void)
 972 {
 973     ASSERT(gc_running_threads != 0);
 974     return atomic_dec(&gc_running_threads);
 975 }
 976
 977 static rtsBool
 978 any_work (void)
 979 {
 980     int s;
 981     step_workspace *ws;
 982
 983     gct->any_work++;
 984
 985     write_barrier();
 986
 987     // scavenge objects in compacted generation
 988     if (mark_stack_overflowed || oldgen_scan_bd != NULL ||
 989         (mark_stack_bdescr != NULL && !mark_stack_empty())) {
 990         return rtsTrue;
 991     }
 992
 993     // Check for global work in any step.  We don't need to check for
 994     // local work, because we have already exited scavenge_loop(),
 995     // which means there is no local work for this thread.
 996     for (s = total_steps-1; s >= 0; s--) {
 997         if (s == 0 && RtsFlags.GcFlags.generations > 1) {
 998             continue;
 999         }
1000         ws = &gct->steps[s];
1001         if (ws->todo_large_objects) return rtsTrue;
1002         if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
1003         if (ws->todo_overflow) return rtsTrue;
1004     }
1005
1006 #if defined(THREADED_RTS)
1007     if (work_stealing) {
1008         nat n;
1009         // look for work to steal
1010         for (n = 0; n < n_gc_threads; n++) {
1011             if (n == gct->thread_index) continue;
1012             for (s = total_steps-1; s >= 0; s--) {
1013                 ws = &gc_threads[n]->steps[s];
1014                 if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
1015             }
1016         }
1017     }
1018 #endif
1019
1020     gct->no_work++;
1021
1022     return rtsFalse;
1023 }
1024
1025 static void
1026 scavenge_until_all_done (void)
1027 {
1028     nat r;
1029
1030     debugTrace(DEBUG_gc, "GC thread %d working", gct->thread_index);
1031
1032 loop:
1033 #if defined(THREADED_RTS)
1034     if (n_gc_threads > 1) {
1035         scavenge_loop();
1036     } else {
1037         scavenge_loop1();
1038     }
1039 #else
1040     scavenge_loop();
1041 #endif
1042
1043     // scavenge_loop() only exits when there's no work to do
1044     r = dec_running();
1045
1046     debugTrace(DEBUG_gc, "GC thread %d idle (%d still running)",
1047                gct->thread_index, r);
1048
1049     while (gc_running_threads != 0) {
1050         // usleep(1);
1051         if (any_work()) {
1052             inc_running();
1053             goto loop;
1054         }
1055         // any_work() does not remove the work from the queue, it
1056         // just checks for the presence of work.  If we find any,
1057         // then we increment gc_running_threads and go back to
1058         // scavenge_loop() to perform any pending work.
1059     }
1060
1061     // All threads are now stopped
1062     debugTrace(DEBUG_gc, "GC thread %d finished.", gct->thread_index);
1063 }
1064
1065 #if defined(THREADED_RTS)
1066
1067 void
1068 gcWorkerThread (Capability *cap)
1069 {
1070     gc_thread *saved_gct;
1071
1072     // necessary if we stole a callee-saves register for gct:
1073     saved_gct = gct;
1074
1075     cap->in_gc = rtsTrue;
1076
1077     gct = gc_threads[cap->no];
1078     gct->id = osThreadId();
1079
1080     // Wait until we're told to wake up
1081     RELEASE_SPIN_LOCK(&gct->mut_spin);
1082     gct->wakeup = GC_THREAD_STANDING_BY;
1083     debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
1084     ACQUIRE_SPIN_LOCK(&gct->gc_spin);
1085
1086 #ifdef USE_PAPI
1087     // start performance counters in this thread...
1088     if (gct->papi_events == -1) {
1089         papi_init_eventset(&gct->papi_events);
1090     }
1091     papi_thread_start_gc1_count(gct->papi_events);
1092 #endif
1093
1094     // Every thread evacuates some roots.
1095     gct->evac_step = 0;
1096     markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
1097                          rtsTrue/*prune sparks*/);
1098     scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
1099
1100     scavenge_until_all_done();
1101
1102 #ifdef USE_PAPI
1103     // count events in this thread towards the GC totals
1104     papi_thread_stop_gc1_count(gct->papi_events);
1105 #endif
1106
1107     // Wait until we're told to continue
1108     RELEASE_SPIN_LOCK(&gct->gc_spin);
1109     gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
1110     debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
1111                gct->thread_index);
1112     ACQUIRE_SPIN_LOCK(&gct->mut_spin);
1113     debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
1114
1115     SET_GCT(saved_gct);
1116 }
1117
1118 #endif
1119
1120 #if defined(THREADED_RTS)
1121
1122 void
1123 waitForGcThreads (Capability *cap USED_IF_THREADS)
1124 {
1125     nat n_threads = RtsFlags.ParFlags.nNodes;
1126     nat me = cap->no;
1127     nat i, j;
1128     rtsBool retry = rtsTrue;
1129
1130     while(retry) {
1131         for (i=0; i < n_threads; i++) {
1132             if (i == me) continue;
1133             if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1134                 prodCapability(&capabilities[i], cap->running_task);
1135             }
1136         }
1137         for (j=0; j < 10000000; j++) {
1138             retry = rtsFalse;
1139             for (i=0; i < n_threads; i++) {
1140                 if (i == me) continue;
1141                 write_barrier();
1142                 setContextSwitches();
1143                 if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
1144                     retry = rtsTrue;
1145                 }
1146             }
1147             if (!retry) break;
1148         }
1149     }
1150 }
1151
1152 #endif // THREADED_RTS
1153
1154 static void
1155 start_gc_threads (void)
1156 {
1157 #if defined(THREADED_RTS)
1158     gc_running_threads = 0;
1159 #endif
1160 }
1161
1162 static void
1163 wakeup_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1164 {
1165 #if defined(THREADED_RTS)
1166     nat i;
1167     for (i=0; i < n_threads; i++) {
1168         if (i == me) continue;
1169         inc_running();
1170         debugTrace(DEBUG_gc, "waking up gc thread %d", i);
1171         if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
1172
1173         gc_threads[i]->wakeup = GC_THREAD_RUNNING;
1174         ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1175         RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1176     }
1177 #endif
1178 }
1179
1180 // After GC is complete, we must wait for all GC threads to enter the
1181 // standby state, otherwise they may still be executing inside
1182 // any_work(), and may even remain awake until the next GC starts.
1183 static void
1184 shutdown_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
1185 {
1186 #if defined(THREADED_RTS)
1187     nat i;
1188     for (i=0; i < n_threads; i++) {
1189         if (i == me) continue;
1190         while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
1191     }
1192 #endif
1193 }
1194
1195 #if defined(THREADED_RTS)
1196 void
1197 releaseGCThreads (Capability *cap USED_IF_THREADS)
1198 {
1199     nat n_threads = RtsFlags.ParFlags.nNodes;
1200     nat me = cap->no;
1201     nat i;
1202     for (i=0; i < n_threads; i++) {
1203         if (i == me) continue;
1204         if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
1205             barf("releaseGCThreads");
1206
1207         gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
1208         ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
1209         RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
1210     }
1211 }
1212 #endif
1213
1214 /* ----------------------------------------------------------------------------
1215    Initialise a generation that is to be collected
1216    ------------------------------------------------------------------------- */
1217
1218 static void
1219 init_collected_gen (nat g, nat n_threads)
1220 {
1221     nat s, t, i;
1222     step_workspace *ws;
1223     step *stp;
1224     bdescr *bd;
1225
1226     // Throw away the current mutable list.  Invariant: the mutable
1227     // list always has at least one block; this means we can avoid a
1228     // check for NULL in recordMutable().
1229     if (g != 0) {
1230         freeChain(generations[g].mut_list);
1231         generations[g].mut_list = allocBlock();
1232         for (i = 0; i < n_capabilities; i++) {
1233             freeChain(capabilities[i].mut_lists[g]);
1234             capabilities[i].mut_lists[g] = allocBlock();
1235         }
1236     }
1237
1238     for (s = 0; s < generations[g].n_steps; s++) {
1239
1240         stp = &generations[g].steps[s];
1241         ASSERT(stp->gen_no == g);
1242
1243         // we'll construct a new list of threads in this step
1244         // during GC, throw away the current list.
1245         stp->old_threads = stp->threads;
1246         stp->threads = END_TSO_QUEUE;
1247
1248         // generation 0, step 0 doesn't need to-space
1249         if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
1250             continue;
1251         }
1252
1253         // deprecate the existing blocks
1254         stp->old_blocks   = stp->blocks;
1255         stp->n_old_blocks = stp->n_blocks;
1256         stp->blocks       = NULL;
1257         stp->n_blocks     = 0;
1258         stp->n_words      = 0;
1259         stp->live_estimate = 0;
1260
1261         // initialise the large object queues.
1262         stp->scavenged_large_objects = NULL;
1263         stp->n_scavenged_large_blocks = 0;
1264
1265         // mark the small objects as from-space
1266         for (bd = stp->old_blocks; bd; bd = bd->link) {
1267             bd->flags &= ~BF_EVACUATED;
1268         }
1269
1270         // mark the large objects as from-space
1271         for (bd = stp->large_objects; bd; bd = bd->link) {
1272             bd->flags &= ~BF_EVACUATED;
1273         }
1274
1275         // for a compacted step, we need to allocate the bitmap
1276         if (stp->mark) {
1277             nat bitmap_size; // in bytes
1278             bdescr *bitmap_bdescr;
1279             StgWord *bitmap;
1280
1281             bitmap_size = stp->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1282
1283             if (bitmap_size > 0) {
1284                 bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1285                                            / BLOCK_SIZE);
1286                 stp->bitmap = bitmap_bdescr;
1287                 bitmap = bitmap_bdescr->start;
1288
1289                 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1290                            bitmap_size, bitmap);
1291
1292                 // don't forget to fill it with zeros!
1293                 memset(bitmap, 0, bitmap_size);
1294
1295                 // For each block in this step, point to its bitmap from the
1296                 // block descriptor.
1297                 for (bd=stp->old_blocks; bd != NULL; bd = bd->link) {
1298                     bd->u.bitmap = bitmap;
1299                     bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1300
1301                     // Also at this point we set the BF_MARKED flag
1302                     // for this block.  The invariant is that
1303                     // BF_MARKED is always unset, except during GC
1304                     // when it is set on those blocks which will be
1305                     // compacted.
1306                     if (!(bd->flags & BF_FRAGMENTED)) {
1307                         bd->flags |= BF_MARKED;
1308                     }
1309                 }
1310             }
1311         }
1312     }
1313
1314     // For each GC thread, for each step, allocate a "todo" block to
1315     // store evacuated objects to be scavenged, and a block to store
1316     // evacuated objects that do not need to be scavenged.
1317     for (t = 0; t < n_threads; t++) {
1318         for (s = 0; s < generations[g].n_steps; s++) {
1319
1320             // we don't copy objects into g0s0, unless -G0
1321             if (g==0 && s==0 && RtsFlags.GcFlags.generations > 1) continue;
1322
1323             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1324
1325             ws->todo_large_objects = NULL;
1326
1327             ws->part_list = NULL;
1328             ws->n_part_blocks = 0;
1329
1330             // allocate the first to-space block; extra blocks will be
1331             // chained on as necessary.
1332             ws->todo_bd = NULL;
1333             ASSERT(looksEmptyWSDeque(ws->todo_q));
1334             alloc_todo_block(ws,0);
1335
1336             ws->todo_overflow = NULL;
1337             ws->n_todo_overflow = 0;
1338
1339             ws->scavd_list = NULL;
1340             ws->n_scavd_blocks = 0;
1341         }
1342     }
1343 }
1344
1345
1346 /* ----------------------------------------------------------------------------
1347    Initialise a generation that is *not* to be collected
1348    ------------------------------------------------------------------------- */
1349
1350 static void
1351 init_uncollected_gen (nat g, nat threads)
1352 {
1353     nat s, t, n;
1354     step_workspace *ws;
1355     step *stp;
1356     bdescr *bd;
1357
1358     // save the current mutable lists for this generation, and
1359     // allocate a fresh block for each one.  We'll traverse these
1360     // mutable lists as roots early on in the GC.
1361     generations[g].saved_mut_list = generations[g].mut_list;
1362     generations[g].mut_list = allocBlock();
1363     for (n = 0; n < n_capabilities; n++) {
1364         capabilities[n].saved_mut_lists[g] = capabilities[n].mut_lists[g];
1365         capabilities[n].mut_lists[g] = allocBlock();
1366     }
1367
1368     for (s = 0; s < generations[g].n_steps; s++) {
1369         stp = &generations[g].steps[s];
1370         stp->scavenged_large_objects = NULL;
1371         stp->n_scavenged_large_blocks = 0;
1372     }
1373
1374     for (s = 0; s < generations[g].n_steps; s++) {
1375
1376         stp = &generations[g].steps[s];
1377
1378         for (t = 0; t < threads; t++) {
1379             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1380
1381             ASSERT(looksEmptyWSDeque(ws->todo_q));
1382             ws->todo_large_objects = NULL;
1383
1384             ws->part_list = NULL;
1385             ws->n_part_blocks = 0;
1386
1387             ws->scavd_list = NULL;
1388             ws->n_scavd_blocks = 0;
1389
1390             // If the block at the head of the list in this generation
1391             // is less than 3/4 full, then use it as a todo block.
1392             if (stp->blocks && isPartiallyFull(stp->blocks))
1393             {
1394                 ws->todo_bd = stp->blocks;
1395                 ws->todo_free = ws->todo_bd->free;
1396                 ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
1397                 stp->blocks = stp->blocks->link;
1398                 stp->n_blocks -= 1;
1399                 stp->n_words -= ws->todo_bd->free - ws->todo_bd->start;
1400                 ws->todo_bd->link = NULL;
1401                 // we must scan from the current end point.
1402                 ws->todo_bd->u.scan = ws->todo_bd->free;
1403             }
1404             else
1405             {
1406                 ws->todo_bd = NULL;
1407                 alloc_todo_block(ws,0);
1408             }
1409         }
1410
1411         // deal out any more partial blocks to the threads' part_lists
1412         t = 0;
1413         while (stp->blocks && isPartiallyFull(stp->blocks))
1414         {
1415             bd = stp->blocks;
1416             stp->blocks = bd->link;
1417             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1418             bd->link = ws->part_list;
1419             ws->part_list = bd;
1420             ws->n_part_blocks += 1;
1421             bd->u.scan = bd->free;
1422             stp->n_blocks -= 1;
1423             stp->n_words -= bd->free - bd->start;
1424             t++;
1425             if (t == n_gc_threads) t = 0;
1426         }
1427     }
1428 }
1429
1430 /* -----------------------------------------------------------------------------
1431    Initialise a gc_thread before GC
1432    -------------------------------------------------------------------------- */
1433
1434 static void
1435 init_gc_thread (gc_thread *t)
1436 {
1437     t->static_objects = END_OF_STATIC_LIST;
1438     t->scavenged_static_objects = END_OF_STATIC_LIST;
1439     t->scan_bd = NULL;
1440     t->mut_lists = capabilities[t->thread_index].mut_lists;
1441     t->evac_step = 0;
1442     t->failed_to_evac = rtsFalse;
1443     t->eager_promotion = rtsTrue;
1444     t->thunk_selector_depth = 0;
1445     t->copied = 0;
1446     t->scanned = 0;
1447     t->any_work = 0;
1448     t->no_work = 0;
1449     t->scav_find_work = 0;
1450 }
1451
1452 /* -----------------------------------------------------------------------------
1453    Function we pass to evacuate roots.
1454    -------------------------------------------------------------------------- */
1455
1456 static void
1457 mark_root(void *user USED_IF_THREADS, StgClosure **root)
1458 {
1459     // we stole a register for gct, but this function is called from
1460     // *outside* the GC where the register variable is not in effect,
1461     // so we need to save and restore it here.  NB. only call
1462     // mark_root() from the main GC thread, otherwise gct will be
1463     // incorrect.
1464     gc_thread *saved_gct;
1465     saved_gct = gct;
1466     SET_GCT(user);
1467
1468     evacuate(root);
1469
1470     SET_GCT(saved_gct);
1471 }
1472
1473 /* -----------------------------------------------------------------------------
1474    Initialising the static object & mutable lists
1475    -------------------------------------------------------------------------- */
1476
1477 static void
1478 zero_static_object_list(StgClosure* first_static)
1479 {
1480   StgClosure* p;
1481   StgClosure* link;
1482   const StgInfoTable *info;
1483
1484   for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1485     info = get_itbl(p);
1486     link = *STATIC_LINK(info, p);
1487     *STATIC_LINK(info,p) = NULL;
1488   }
1489 }
1490
1491 /* ----------------------------------------------------------------------------
1492    Update the pointers from the task list
1493
1494    These are treated as weak pointers because we want to allow a main
1495    thread to get a BlockedOnDeadMVar exception in the same way as any
1496    other thread.  Note that the threads should all have been retained
1497    by GC by virtue of being on the all_threads list, we're just
1498    updating pointers here.
1499    ------------------------------------------------------------------------- */
1500
1501 static void
1502 update_task_list (void)
1503 {
1504     Task *task;
1505     StgTSO *tso;
1506     for (task = all_tasks; task != NULL; task = task->all_link) {
1507         if (!task->stopped && task->tso) {
1508             ASSERT(task->tso->bound == task);
1509             tso = (StgTSO *) isAlive((StgClosure *)task->tso);
1510             if (tso == NULL) {
1511                 barf("task %p: main thread %d has been GC'd",
1512 #ifdef THREADED_RTS
1513                      (void *)task->id,
1514 #else
1515                      (void *)task,
1516 #endif
1517                      task->tso->id);
1518             }
1519             task->tso = tso;
1520         }
1521     }
1522 }
1523
1524 /* ----------------------------------------------------------------------------
1525    Reset the sizes of the older generations when we do a major
1526    collection.
1527
1528    CURRENT STRATEGY: make all generations except zero the same size.
1529    We have to stay within the maximum heap size, and leave a certain
1530    percentage of the maximum heap size available to allocate into.
1531    ------------------------------------------------------------------------- */
1532
1533 static void
1534 resize_generations (void)
1535 {
1536     nat g;
1537
1538     if (major_gc && RtsFlags.GcFlags.generations > 1) {
1539         nat live, size, min_alloc, words;
1540         nat max  = RtsFlags.GcFlags.maxHeapSize;
1541         nat gens = RtsFlags.GcFlags.generations;
1542
1543         // live in the oldest generations
1544         if (oldest_gen->steps[0].live_estimate != 0) {
1545             words = oldest_gen->steps[0].live_estimate;
1546         } else {
1547             words = oldest_gen->steps[0].n_words;
1548         }
1549         live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
1550             oldest_gen->steps[0].n_large_blocks;
1551
1552         // default max size for all generations except zero
1553         size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1554                        RtsFlags.GcFlags.minOldGenSize);
1555
1556         // minimum size for generation zero
1557         min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1558                             RtsFlags.GcFlags.minAllocAreaSize);
1559
1560         // Auto-enable compaction when the residency reaches a
1561         // certain percentage of the maximum heap size (default: 30%).
1562         if (RtsFlags.GcFlags.generations > 1 &&
1563             (RtsFlags.GcFlags.compact ||
1564              (max > 0 &&
1565               oldest_gen->steps[0].n_blocks >
1566               (RtsFlags.GcFlags.compactThreshold * max) / 100))) {
1567             oldest_gen->steps[0].mark = 1;
1568             oldest_gen->steps[0].compact = 1;
1569 //        debugBelch("compaction: on\n", live);
1570         } else {
1571             oldest_gen->steps[0].mark = 0;
1572             oldest_gen->steps[0].compact = 0;
1573 //        debugBelch("compaction: off\n", live);
1574         }
1575
1576         if (RtsFlags.GcFlags.sweep) {
1577             oldest_gen->steps[0].mark = 1;
1578         }
1579
1580         // if we're going to go over the maximum heap size, reduce the
1581         // size of the generations accordingly.  The calculation is
1582         // different if compaction is turned on, because we don't need
1583         // to double the space required to collect the old generation.
1584         if (max != 0) {
1585
1586             // this test is necessary to ensure that the calculations
1587             // below don't have any negative results - we're working
1588             // with unsigned values here.
1589             if (max < min_alloc) {
1590                 heapOverflow();
1591             }
1592
1593             if (oldest_gen->steps[0].compact) {
1594                 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1595                     size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1596                 }
1597             } else {
1598                 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1599                     size = (max - min_alloc) / ((gens - 1) * 2);
1600                 }
1601             }
1602
1603             if (size < live) {
1604                 heapOverflow();
1605             }
1606         }
1607
1608 #if 0
1609         debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1610                    min_alloc, size, max);
1611 #endif
1612
1613         for (g = 0; g < gens; g++) {
1614             generations[g].max_blocks = size;
1615         }
1616     }
1617 }
1618
1619 /* -----------------------------------------------------------------------------
1620    Calculate the new size of the nursery, and resize it.
1621    -------------------------------------------------------------------------- */
1622
1623 static void
1624 resize_nursery (void)
1625 {
1626     lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities;
1627
1628     if (RtsFlags.GcFlags.generations == 1)
1629     {   // Two-space collector:
1630         nat blocks;
1631
1632         /* set up a new nursery.  Allocate a nursery size based on a
1633          * function of the amount of live data (by default a factor of 2)
1634          * Use the blocks from the old nursery if possible, freeing up any
1635          * left over blocks.
1636          *
1637          * If we get near the maximum heap size, then adjust our nursery
1638          * size accordingly.  If the nursery is the same size as the live
1639          * data (L), then we need 3L bytes.  We can reduce the size of the
1640          * nursery to bring the required memory down near 2L bytes.
1641          *
1642          * A normal 2-space collector would need 4L bytes to give the same
1643          * performance we get from 3L bytes, reducing to the same
1644          * performance at 2L bytes.
1645          */
1646         blocks = g0s0->n_blocks;
1647
1648         if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1649              blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1650              RtsFlags.GcFlags.maxHeapSize )
1651         {
1652             long adjusted_blocks;  // signed on purpose
1653             int pc_free;
1654
1655             adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1656
1657             debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1658                        RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1659
1660             pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1661             if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1662             {
1663                 heapOverflow();
1664             }
1665             blocks = adjusted_blocks;
1666         }
1667         else
1668         {
1669             blocks *= RtsFlags.GcFlags.oldGenFactor;
1670             if (blocks < min_nursery)
1671             {
1672                 blocks = min_nursery;
1673             }
1674         }
1675         resizeNurseries(blocks);
1676     }
1677     else  // Generational collector
1678     {
1679         /*
1680          * If the user has given us a suggested heap size, adjust our
1681          * allocation area to make best use of the memory available.
1682          */
1683         if (RtsFlags.GcFlags.heapSizeSuggestion)
1684         {
1685             long blocks;
1686             nat needed = calcNeeded();  // approx blocks needed at next GC
1687
1688             /* Guess how much will be live in generation 0 step 0 next time.
1689              * A good approximation is obtained by finding the
1690              * percentage of g0s0 that was live at the last minor GC.
1691              *
1692              * We have an accurate figure for the amount of copied data in
1693              * 'copied', but we must convert this to a number of blocks, with
1694              * a small adjustment for estimated slop at the end of a block
1695              * (- 10 words).
1696              */
1697             if (N == 0)
1698             {
1699                 g0s0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1700                     / countNurseryBlocks();
1701             }
1702
1703             /* Estimate a size for the allocation area based on the
1704              * information available.  We might end up going slightly under
1705              * or over the suggested heap size, but we should be pretty
1706              * close on average.
1707              *
1708              * Formula:            suggested - needed
1709              *                ----------------------------
1710              *                    1 + g0s0_pcnt_kept/100
1711              *
1712              * where 'needed' is the amount of memory needed at the next
1713              * collection for collecting all steps except g0s0.
1714              */
1715             blocks =
1716                 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1717                 (100 + (long)g0s0_pcnt_kept);
1718
1719             if (blocks < (long)min_nursery) {
1720                 blocks = min_nursery;
1721             }
1722
1723             resizeNurseries((nat)blocks);
1724         }
1725         else
1726         {
1727             // we might have added extra large blocks to the nursery, so
1728             // resize back to minAllocAreaSize again.
1729             resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1730         }
1731     }
1732 }
1733
1734 /* -----------------------------------------------------------------------------
1735    Sanity code for CAF garbage collection.
1736
1737    With DEBUG turned on, we manage a CAF list in addition to the SRT
1738    mechanism.  After GC, we run down the CAF list and blackhole any
1739    CAFs which have been garbage collected.  This means we get an error
1740    whenever the program tries to enter a garbage collected CAF.
1741
1742    Any garbage collected CAFs are taken off the CAF list at the same
1743    time.
1744    -------------------------------------------------------------------------- */
1745
1746 #if 0 && defined(DEBUG)
1747
1748 static void
1749 gcCAFs(void)
1750 {
1751   StgClosure*  p;
1752   StgClosure** pp;
1753   const StgInfoTable *info;
1754   nat i;
1755
1756   i = 0;
1757   p = caf_list;
1758   pp = &caf_list;
1759
1760   while (p != NULL) {
1761
1762     info = get_itbl(p);
1763
1764     ASSERT(info->type == IND_STATIC);
1765
1766     if (STATIC_LINK(info,p) == NULL) {
1767         debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1768         // black hole it
1769         SET_INFO(p,&stg_BLACKHOLE_info);
1770         p = STATIC_LINK2(info,p);
1771         *pp = p;
1772     }
1773     else {
1774       pp = &STATIC_LINK2(info,p);
1775       p = *pp;
1776       i++;
1777     }
1778
1779   }
1780
1781   debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1782 }
1783 #endif