rts/sm/GC.c

   1 /* -----------------------------------------------------------------------------
   2  *
   3  * (c) The GHC Team 1998-2006
   4  *
   5  * Generational garbage collector
   6  *
   7  * Documentation on the architecture of the Garbage Collector can be
   8  * found in the online commentary:
   9  *
  10  *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
  11  *
  12  * ---------------------------------------------------------------------------*/
  13
  14 // #include "PosixSource.h"
  15 #include "Rts.h"
  16 #include "RtsFlags.h"
  17 #include "RtsUtils.h"
  18 #include "Apply.h"
  19 #include "OSThreads.h"
  20 #include "LdvProfile.h"
  21 #include "Updates.h"
  22 #include "Stats.h"
  23 #include "Schedule.h"
  24 #include "Sanity.h"
  25 #include "BlockAlloc.h"
  26 #include "MBlock.h"
  27 #include "ProfHeap.h"
  28 #include "SchedAPI.h"
  29 #include "Weak.h"
  30 #include "Prelude.h"
  31 #include "ParTicky.h"           // ToDo: move into Rts.h
  32 #include "RtsSignals.h"
  33 #include "STM.h"
  34 #include "HsFFI.h"
  35 #include "Linker.h"
  36 #if defined(RTS_GTK_FRONTPANEL)
  37 #include "FrontPanel.h"
  38 #endif
  39 #include "Trace.h"
  40 #include "RetainerProfile.h"
  41 #include "RaiseAsync.h"
  42 #include "Sparks.h"
  43 #include "Papi.h"
  44
  45 #include "GC.h"
  46 #include "Compact.h"
  47 #include "Evac.h"
  48 #include "Scav.h"
  49 #include "GCUtils.h"
  50 #include "MarkWeak.h"
  51 #include "Sparks.h"
  52
  53 #include <string.h> // for memset()
  54 #include <unistd.h>
  55
  56 /* -----------------------------------------------------------------------------
  57    Global variables
  58    -------------------------------------------------------------------------- */
  59
  60 /* STATIC OBJECT LIST.
  61  *
  62  * During GC:
  63  * We maintain a linked list of static objects that are still live.
  64  * The requirements for this list are:
  65  *
  66  *  - we need to scan the list while adding to it, in order to
  67  *    scavenge all the static objects (in the same way that
  68  *    breadth-first scavenging works for dynamic objects).
  69  *
  70  *  - we need to be able to tell whether an object is already on
  71  *    the list, to break loops.
  72  *
  73  * Each static object has a "static link field", which we use for
  74  * linking objects on to the list.  We use a stack-type list, consing
  75  * objects on the front as they are added (this means that the
  76  * scavenge phase is depth-first, not breadth-first, but that
  77  * shouldn't matter).
  78  *
  79  * A separate list is kept for objects that have been scavenged
  80  * already - this is so that we can zero all the marks afterwards.
  81  *
  82  * An object is on the list if its static link field is non-zero; this
  83  * means that we have to mark the end of the list with '1', not NULL.
  84  *
  85  * Extra notes for generational GC:
  86  *
  87  * Each generation has a static object list associated with it.  When
  88  * collecting generations up to N, we treat the static object lists
  89  * from generations > N as roots.
  90  *
  91  * We build up a static object list while collecting generations 0..N,
  92  * which is then appended to the static object list of generation N+1.
  93  */
  94
  95 /* N is the oldest generation being collected, where the generations
  96  * are numbered starting at 0.  A major GC (indicated by the major_gc
  97  * flag) is when we're collecting all generations.  We only attempt to
  98  * deal with static objects and GC CAFs when doing a major GC.
  99  */
 100 nat N;
 101 rtsBool major_gc;
 102
 103 /* Data used for allocation area sizing.
 104  */
 105 static lnat g0s0_pcnt_kept = 30; // percentage of g0s0 live at last minor GC
 106
 107 /* Mut-list stats */
 108 #ifdef DEBUG
 109 nat mutlist_MUTVARS,
 110     mutlist_MUTARRS,
 111     mutlist_MVARS,
 112     mutlist_OTHERS;
 113 #endif
 114
 115 /* Thread-local data for each GC thread
 116  */
 117 gc_thread **gc_threads = NULL;
 118 // gc_thread *gct = NULL;  // this thread's gct TODO: make thread-local
 119
 120 // Number of threads running in *this* GC.  Affects how many
 121 // step->todos[] lists we have to look in to find work.
 122 nat n_gc_threads;
 123
 124 // For stats:
 125 long copied;        // *words* copied & scavenged during this GC
 126
 127 #ifdef THREADED_RTS
 128 SpinLock recordMutableGen_sync;
 129 #endif
 130
 131 /* -----------------------------------------------------------------------------
 132    Static function declarations
 133    -------------------------------------------------------------------------- */
 134
 135 static void mark_root               (StgClosure **root);
 136 static void zero_static_object_list (StgClosure* first_static);
 137 static void initialise_N            (rtsBool force_major_gc);
 138 static void alloc_gc_threads        (void);
 139 static void init_collected_gen      (nat g, nat threads);
 140 static void init_uncollected_gen    (nat g, nat threads);
 141 static void init_gc_thread          (gc_thread *t);
 142 static void update_task_list        (void);
 143 static void resize_generations      (void);
 144 static void resize_nursery          (void);
 145 static void start_gc_threads        (void);
 146 static void gc_thread_work          (void);
 147 static nat  inc_running             (void);
 148 static nat  dec_running             (void);
 149 static void wakeup_gc_threads       (nat n_threads);
 150
 151 #if 0 && defined(DEBUG)
 152 static void gcCAFs                  (void);
 153 #endif
 154
 155 /* -----------------------------------------------------------------------------
 156    The mark bitmap & stack.
 157    -------------------------------------------------------------------------- */
 158
 159 #define MARK_STACK_BLOCKS 4
 160
 161 bdescr *mark_stack_bdescr;
 162 StgPtr *mark_stack;
 163 StgPtr *mark_sp;
 164 StgPtr *mark_splim;
 165
 166 // Flag and pointers used for falling back to a linear scan when the
 167 // mark stack overflows.
 168 rtsBool mark_stack_overflowed;
 169 bdescr *oldgen_scan_bd;
 170 StgPtr  oldgen_scan;
 171
 172 /* -----------------------------------------------------------------------------
 173    GarbageCollect: the main entry point to the garbage collector.
 174
 175    Locks held: all capabilities are held throughout GarbageCollect().
 176    -------------------------------------------------------------------------- */
 177
 178 void
 179 GarbageCollect ( rtsBool force_major_gc )
 180 {
 181   bdescr *bd;
 182   step *stp;
 183   lnat live, allocated;
 184   lnat oldgen_saved_blocks = 0;
 185   gc_thread *saved_gct;
 186   nat g, s, t;
 187
 188   // necessary if we stole a callee-saves register for gct:
 189   saved_gct = gct;
 190
 191 #ifdef PROFILING
 192   CostCentreStack *prev_CCS;
 193 #endif
 194
 195   ACQUIRE_SM_LOCK;
 196
 197   debugTrace(DEBUG_gc, "starting GC");
 198
 199 #if defined(RTS_USER_SIGNALS)
 200   if (RtsFlags.MiscFlags.install_signal_handlers) {
 201     // block signals
 202     blockUserSignals();
 203   }
 204 #endif
 205
 206   // tell the stats department that we've started a GC
 207   stat_startGC();
 208
 209   // tell the STM to discard any cached closures it's hoping to re-use
 210   stmPreGCHook();
 211
 212 #ifdef DEBUG
 213   mutlist_MUTVARS = 0;
 214   mutlist_MUTARRS = 0;
 215   mutlist_OTHERS = 0;
 216 #endif
 217
 218   // attribute any costs to CCS_GC
 219 #ifdef PROFILING
 220   prev_CCS = CCCS;
 221   CCCS = CCS_GC;
 222 #endif
 223
 224   /* Approximate how much we allocated.
 225    * Todo: only when generating stats?
 226    */
 227   allocated = calcAllocated();
 228
 229   /* Figure out which generation to collect
 230    */
 231   initialise_N(force_major_gc);
 232
 233   /* Allocate + initialise the gc_thread structures.
 234    */
 235   alloc_gc_threads();
 236
 237   /* Start threads, so they can be spinning up while we finish initialisation.
 238    */
 239   start_gc_threads();
 240
 241   /* How many threads will be participating in this GC?
 242    * We don't try to parallelise minor GC.
 243    */
 244 #if defined(THREADED_RTS)
 245   if (N == 0) {
 246       n_gc_threads = 1;
 247   } else {
 248       n_gc_threads = RtsFlags.ParFlags.gcThreads;
 249   }
 250 #else
 251   n_gc_threads = 1;
 252 #endif
 253
 254 #ifdef RTS_GTK_FRONTPANEL
 255   if (RtsFlags.GcFlags.frontpanel) {
 256       updateFrontPanelBeforeGC(N);
 257   }
 258 #endif
 259
 260 #ifdef DEBUG
 261   // check for memory leaks if DEBUG is on
 262   memInventory(traceClass(DEBUG_gc));
 263 #endif
 264
 265   // check stack sanity *before* GC (ToDo: check all threads)
 266   IF_DEBUG(sanity, checkFreeListSanity());
 267
 268   // Initialise all the generations/steps that we're collecting.
 269   for (g = 0; g <= N; g++) {
 270       init_collected_gen(g,n_gc_threads);
 271   }
 272
 273   // Initialise all the generations/steps that we're *not* collecting.
 274   for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
 275       init_uncollected_gen(g,n_gc_threads);
 276   }
 277
 278   /* Allocate a mark stack if we're doing a major collection.
 279    */
 280   if (major_gc) {
 281       mark_stack_bdescr = allocGroup(MARK_STACK_BLOCKS);
 282       mark_stack = (StgPtr *)mark_stack_bdescr->start;
 283       mark_sp    = mark_stack;
 284       mark_splim = mark_stack + (MARK_STACK_BLOCKS * BLOCK_SIZE_W);
 285   } else {
 286       mark_stack_bdescr = NULL;
 287   }
 288
 289   // Initialise all our gc_thread structures
 290   for (t = 0; t < n_gc_threads; t++) {
 291       init_gc_thread(gc_threads[t]);
 292   }
 293
 294   // the main thread is running: this prevents any other threads from
 295   // exiting prematurely, so we can start them now.
 296   inc_running();
 297   wakeup_gc_threads(n_gc_threads);
 298
 299   // this is the main thread
 300   gct = gc_threads[0];
 301
 302   /* -----------------------------------------------------------------------
 303    * follow all the roots that we know about:
 304    *   - mutable lists from each generation > N
 305    * we want to *scavenge* these roots, not evacuate them: they're not
 306    * going to move in this GC.
 307    * Also do them in reverse generation order, for the usual reason:
 308    * namely to reduce the likelihood of spurious old->new pointers.
 309    */
 310   {
 311     for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
 312       generations[g].saved_mut_list = generations[g].mut_list;
 313       generations[g].mut_list = allocBlock();
 314         // mut_list always has at least one block.
 315     }
 316     for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
 317       scavenge_mutable_list(&generations[g]);
 318     }
 319   }
 320
 321   // follow roots from the CAF list (used by GHCi)
 322   gct->evac_step = 0;
 323   markCAFs(mark_root);
 324
 325   // follow all the roots that the application knows about.
 326   gct->evac_step = 0;
 327   GetRoots(mark_root);
 328
 329 #if defined(RTS_USER_SIGNALS)
 330   // mark the signal handlers (signals should be already blocked)
 331   markSignalHandlers(mark_root);
 332 #endif
 333
 334   // Mark the weak pointer list, and prepare to detect dead weak pointers.
 335   markWeakPtrList();
 336   initWeakForGC();
 337
 338   // Mark the stable pointer table.
 339   markStablePtrTable(mark_root);
 340
 341   /* -------------------------------------------------------------------------
 342    * Repeatedly scavenge all the areas we know about until there's no
 343    * more scavenging to be done.
 344    */
 345   for (;;)
 346   {
 347       gc_thread_work();
 348       // The other threads are now stopped.  We might recurse back to
 349       // here, but from now on this is the only thread.
 350
 351       // if any blackholes are alive, make the threads that wait on
 352       // them alive too.
 353       if (traverseBlackholeQueue()) {
 354           inc_running();
 355           continue;
 356       }
 357
 358       // must be last...  invariant is that everything is fully
 359       // scavenged at this point.
 360       if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
 361           inc_running();
 362           continue;
 363       }
 364
 365       // If we get to here, there's really nothing left to do.
 366       break;
 367   }
 368
 369   // Update pointers from the Task list
 370   update_task_list();
 371
 372   // Now see which stable names are still alive.
 373   gcStablePtrTable();
 374
 375 #ifdef PROFILING
 376   // We call processHeapClosureForDead() on every closure destroyed during
 377   // the current garbage collection, so we invoke LdvCensusForDead().
 378   if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_LDV
 379       || RtsFlags.ProfFlags.bioSelector != NULL)
 380     LdvCensusForDead(N);
 381 #endif
 382
 383   // NO MORE EVACUATION AFTER THIS POINT!
 384   // Finally: compaction of the oldest generation.
 385   if (major_gc && oldest_gen->steps[0].is_compacted) {
 386       // save number of blocks for stats
 387       oldgen_saved_blocks = oldest_gen->steps[0].n_old_blocks;
 388       compact();
 389   }
 390
 391   IF_DEBUG(sanity, checkGlobalTSOList(rtsFalse));
 392
 393   // Two-space collector: free the old to-space.
 394   // g0s0->old_blocks is the old nursery
 395   // g0s0->blocks is to-space from the previous GC
 396   if (RtsFlags.GcFlags.generations == 1) {
 397       if (g0s0->blocks != NULL) {
 398           freeChain(g0s0->blocks);
 399           g0s0->blocks = NULL;
 400       }
 401   }
 402
 403   // For each workspace, in each thread:
 404   //    * clear the BF_EVACUATED flag from each copied block
 405   //    * move the copied blocks to the step
 406   {
 407       gc_thread *thr;
 408       step_workspace *ws;
 409       bdescr *prev;
 410
 411       for (t = 0; t < n_gc_threads; t++) {
 412           thr = gc_threads[t];
 413
 414           // not step 0
 415           for (s = 1; s < total_steps; s++) {
 416               ws = &thr->steps[s];
 417               // Not true?
 418               // ASSERT( ws->scan_bd == ws->todo_bd );
 419               ASSERT( ws->scan_bd ? ws->scan == ws->scan_bd->free : 1 );
 420
 421               // Push the final block
 422               if (ws->scan_bd) { push_scan_block(ws->scan_bd, ws); }
 423
 424               ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
 425
 426               prev = ws->scavd_list;
 427               for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
 428                   bd->flags &= ~BF_EVACUATED;    // now from-space
 429                   prev = bd;
 430               }
 431               prev->link = ws->stp->blocks;
 432               ws->stp->blocks = ws->scavd_list;
 433               ws->stp->n_blocks += ws->n_scavd_blocks;
 434               ASSERT(countBlocks(ws->stp->blocks) == ws->stp->n_blocks);
 435           }
 436       }
 437   }
 438
 439   // Two-space collector: swap the semi-spaces around.
 440   // Currently: g0s0->old_blocks is the old nursery
 441   //            g0s0->blocks is to-space from this GC
 442   // We want these the other way around.
 443   if (RtsFlags.GcFlags.generations == 1) {
 444       bdescr *nursery_blocks = g0s0->old_blocks;
 445       nat n_nursery_blocks = g0s0->n_old_blocks;
 446       g0s0->old_blocks = g0s0->blocks;
 447       g0s0->n_old_blocks = g0s0->n_blocks;
 448       g0s0->blocks = nursery_blocks;
 449       g0s0->n_blocks = n_nursery_blocks;
 450   }
 451
 452   /* run through all the generations/steps and tidy up
 453    */
 454   copied = 0;
 455   {
 456       nat i;
 457       for (i=0; i < n_gc_threads; i++) {
 458           if (major_gc) {
 459               trace(TRACE_gc,"thread %d:", i);
 460               trace(TRACE_gc,"   copied           %ld", gc_threads[i]->copied * sizeof(W_));
 461               trace(TRACE_gc,"   any_work         %ld", gc_threads[i]->any_work);
 462               trace(TRACE_gc,"   no_work          %ld", gc_threads[i]->no_work);
 463               trace(TRACE_gc,"   scav_global_work %ld", gc_threads[i]->scav_global_work);
 464               trace(TRACE_gc,"   scav_local_work  %ld", gc_threads[i]->scav_local_work);
 465           }
 466           copied += gc_threads[i]->copied;
 467       }
 468   }
 469
 470   for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 471
 472     if (g <= N) {
 473       generations[g].collections++; // for stats
 474     }
 475
 476     // Count the mutable list as bytes "copied" for the purposes of
 477     // stats.  Every mutable list is copied during every GC.
 478     if (g > 0) {
 479         nat mut_list_size = 0;
 480         for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
 481             mut_list_size += bd->free - bd->start;
 482         }
 483         copied +=  mut_list_size;
 484
 485         debugTrace(DEBUG_gc,
 486                    "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)",
 487                    (unsigned long)(mut_list_size * sizeof(W_)),
 488                    mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS);
 489     }
 490
 491     for (s = 0; s < generations[g].n_steps; s++) {
 492       bdescr *next;
 493       stp = &generations[g].steps[s];
 494
 495       // for generations we collected...
 496       if (g <= N) {
 497
 498         /* free old memory and shift to-space into from-space for all
 499          * the collected steps (except the allocation area).  These
 500          * freed blocks will probaby be quickly recycled.
 501          */
 502         if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) {
 503             if (stp->is_compacted)
 504             {
 505                 // for a compacted step, just shift the new to-space
 506                 // onto the front of the now-compacted existing blocks.
 507                 for (bd = stp->blocks; bd != NULL; bd = bd->link) {
 508                     bd->flags &= ~BF_EVACUATED;  // now from-space
 509                 }
 510                 // tack the new blocks on the end of the existing blocks
 511                 if (stp->old_blocks != NULL) {
 512                     for (bd = stp->old_blocks; bd != NULL; bd = next) {
 513                         // NB. this step might not be compacted next
 514                         // time, so reset the BF_COMPACTED flags.
 515                         // They are set before GC if we're going to
 516                         // compact.  (search for BF_COMPACTED above).
 517                         bd->flags &= ~BF_COMPACTED;
 518                         next = bd->link;
 519                         if (next == NULL) {
 520                             bd->link = stp->blocks;
 521                         }
 522                     }
 523                     stp->blocks = stp->old_blocks;
 524                 }
 525                 // add the new blocks to the block tally
 526                 stp->n_blocks += stp->n_old_blocks;
 527                 ASSERT(countBlocks(stp->blocks) == stp->n_blocks);
 528             }
 529             else // not copacted
 530             {
 531                 freeChain(stp->old_blocks);
 532             }
 533             stp->old_blocks = NULL;
 534             stp->n_old_blocks = 0;
 535         }
 536
 537         /* LARGE OBJECTS.  The current live large objects are chained on
 538          * scavenged_large, having been moved during garbage
 539          * collection from large_objects.  Any objects left on
 540          * large_objects list are therefore dead, so we free them here.
 541          */
 542         for (bd = stp->large_objects; bd != NULL; bd = next) {
 543           next = bd->link;
 544           freeGroup(bd);
 545           bd = next;
 546         }
 547
 548         // update the count of blocks used by large objects
 549         for (bd = stp->scavenged_large_objects; bd != NULL; bd = bd->link) {
 550           bd->flags &= ~BF_EVACUATED;
 551         }
 552         stp->large_objects  = stp->scavenged_large_objects;
 553         stp->n_large_blocks = stp->n_scavenged_large_blocks;
 554
 555       }
 556       else // for older generations...
 557       {
 558         /* For older generations, we need to append the
 559          * scavenged_large_object list (i.e. large objects that have been
 560          * promoted during this GC) to the large_object list for that step.
 561          */
 562         for (bd = stp->scavenged_large_objects; bd; bd = next) {
 563           next = bd->link;
 564           bd->flags &= ~BF_EVACUATED;
 565           dbl_link_onto(bd, &stp->large_objects);
 566         }
 567
 568         // add the new blocks we promoted during this GC
 569         stp->n_large_blocks += stp->n_scavenged_large_blocks;
 570       }
 571     }
 572   }
 573
 574   // update the max size of older generations after a major GC
 575   resize_generations();
 576
 577   // Guess the amount of live data for stats.
 578   live = calcLiveBlocks() * BLOCK_SIZE_W;
 579   debugTrace(DEBUG_gc, "Slop: %ldKB",
 580              (live - calcLiveWords()) / (1024/sizeof(W_)));
 581
 582   // Free the small objects allocated via allocate(), since this will
 583   // all have been copied into G0S1 now.
 584   if (RtsFlags.GcFlags.generations > 1) {
 585       if (g0s0->blocks != NULL) {
 586           freeChain(g0s0->blocks);
 587           g0s0->blocks = NULL;
 588       }
 589       g0s0->n_blocks = 0;
 590   }
 591   alloc_blocks = 0;
 592   alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
 593
 594   // Start a new pinned_object_block
 595   pinned_object_block = NULL;
 596
 597   // Free the mark stack.
 598   if (mark_stack_bdescr != NULL) {
 599       freeGroup(mark_stack_bdescr);
 600   }
 601
 602   // Free any bitmaps.
 603   for (g = 0; g <= N; g++) {
 604       for (s = 0; s < generations[g].n_steps; s++) {
 605           stp = &generations[g].steps[s];
 606           if (stp->bitmap != NULL) {
 607               freeGroup(stp->bitmap);
 608               stp->bitmap = NULL;
 609           }
 610       }
 611   }
 612
 613   resize_nursery();
 614
 615  // mark the garbage collected CAFs as dead
 616 #if 0 && defined(DEBUG) // doesn't work at the moment
 617   if (major_gc) { gcCAFs(); }
 618 #endif
 619
 620 #ifdef PROFILING
 621   // resetStaticObjectForRetainerProfiling() must be called before
 622   // zeroing below.
 623   if (n_gc_threads > 1) {
 624       barf("profiling is currently broken with multi-threaded GC");
 625       // ToDo: fix the gct->scavenged_static_objects below
 626   }
 627   resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects);
 628 #endif
 629
 630   // zero the scavenged static object list
 631   if (major_gc) {
 632       nat i;
 633       for (i = 0; i < n_gc_threads; i++) {
 634           zero_static_object_list(gc_threads[i]->scavenged_static_objects);
 635       }
 636   }
 637
 638   // Reset the nursery
 639   resetNurseries();
 640
 641   // start any pending finalizers
 642   RELEASE_SM_LOCK;
 643   scheduleFinalizers(last_free_capability, old_weak_ptr_list);
 644   ACQUIRE_SM_LOCK;
 645
 646   // send exceptions to any threads which were about to die
 647   RELEASE_SM_LOCK;
 648   resurrectThreads(resurrected_threads);
 649   ACQUIRE_SM_LOCK;
 650
 651   // Update the stable pointer hash table.
 652   updateStablePtrTable(major_gc);
 653
 654   // check sanity after GC
 655   IF_DEBUG(sanity, checkSanity());
 656
 657   // extra GC trace info
 658   if (traceClass(TRACE_gc)) statDescribeGens();
 659
 660 #ifdef DEBUG
 661   // symbol-table based profiling
 662   /*  heapCensus(to_blocks); */ /* ToDo */
 663 #endif
 664
 665   // restore enclosing cost centre
 666 #ifdef PROFILING
 667   CCCS = prev_CCS;
 668 #endif
 669
 670 #ifdef DEBUG
 671   // check for memory leaks if DEBUG is on
 672   memInventory(traceClass(DEBUG_gc));
 673 #endif
 674
 675 #ifdef RTS_GTK_FRONTPANEL
 676   if (RtsFlags.GcFlags.frontpanel) {
 677       updateFrontPanelAfterGC( N, live );
 678   }
 679 #endif
 680
 681   // ok, GC over: tell the stats department what happened.
 682   stat_endGC(allocated, live, copied, N);
 683
 684 #if defined(RTS_USER_SIGNALS)
 685   if (RtsFlags.MiscFlags.install_signal_handlers) {
 686     // unblock signals again
 687     unblockUserSignals();
 688   }
 689 #endif
 690
 691   RELEASE_SM_LOCK;
 692
 693   gct = saved_gct;
 694 }
 695
 696 /* -----------------------------------------------------------------------------
 697  * Mark all nodes pointed to by sparks in the spark queues (for GC) Does an
 698  * implicit slide i.e. after marking all sparks are at the beginning of the
 699  * spark pool and the spark pool only contains sparkable closures
 700  * -------------------------------------------------------------------------- */
 701
 702 #ifdef THREADED_RTS
 703 static void
 704 markSparkQueue (evac_fn evac, Capability *cap)
 705 {
 706     StgClosure **sparkp, **to_sparkp;
 707     nat n, pruned_sparks; // stats only
 708     StgSparkPool *pool;
 709
 710     PAR_TICKY_MARK_SPARK_QUEUE_START();
 711
 712     n = 0;
 713     pruned_sparks = 0;
 714
 715     pool = &(cap->r.rSparks);
 716
 717     ASSERT_SPARK_POOL_INVARIANTS(pool);
 718
 719 #if defined(PARALLEL_HASKELL)
 720     // stats only
 721     n = 0;
 722     pruned_sparks = 0;
 723 #endif
 724
 725     sparkp = pool->hd;
 726     to_sparkp = pool->hd;
 727     while (sparkp != pool->tl) {
 728         ASSERT(*sparkp!=NULL);
 729         ASSERT(LOOKS_LIKE_CLOSURE_PTR(((StgClosure *)*sparkp)));
 730         // ToDo?: statistics gathering here (also for GUM!)
 731         if (closure_SHOULD_SPARK(*sparkp)) {
 732             evac(sparkp);
 733             *to_sparkp++ = *sparkp;
 734             if (to_sparkp == pool->lim) {
 735                 to_sparkp = pool->base;
 736             }
 737             n++;
 738         } else {
 739             pruned_sparks++;
 740         }
 741         sparkp++;
 742         if (sparkp == pool->lim) {
 743             sparkp = pool->base;
 744         }
 745     }
 746     pool->tl = to_sparkp;
 747
 748     PAR_TICKY_MARK_SPARK_QUEUE_END(n);
 749
 750 #if defined(PARALLEL_HASKELL)
 751     debugTrace(DEBUG_sched,
 752                "marked %d sparks and pruned %d sparks on [%x]",
 753                n, pruned_sparks, mytid);
 754 #else
 755     debugTrace(DEBUG_sched,
 756                "marked %d sparks and pruned %d sparks",
 757                n, pruned_sparks);
 758 #endif
 759
 760     debugTrace(DEBUG_sched,
 761                "new spark queue len=%d; (hd=%p; tl=%p)\n",
 762                sparkPoolSize(pool), pool->hd, pool->tl);
 763 }
 764 #endif
 765
 766 /* ---------------------------------------------------------------------------
 767    Where are the roots that we know about?
 768
 769         - all the threads on the runnable queue
 770         - all the threads on the blocked queue
 771         - all the threads on the sleeping queue
 772         - all the thread currently executing a _ccall_GC
 773         - all the "main threads"
 774
 775    ------------------------------------------------------------------------ */
 776
 777 void
 778 GetRoots( evac_fn evac )
 779 {
 780     nat i;
 781     Capability *cap;
 782     Task *task;
 783
 784     // Each GC thread is responsible for following roots from the
 785     // Capability of the same number.  There will usually be the same
 786     // or fewer Capabilities as GC threads, but just in case there
 787     // are more, we mark every Capability whose number is the GC
 788     // thread's index plus a multiple of the number of GC threads.
 789     for (i = gct->thread_index; i < n_capabilities; i += n_gc_threads) {
 790         cap = &capabilities[i];
 791         evac((StgClosure **)(void *)&cap->run_queue_hd);
 792         evac((StgClosure **)(void *)&cap->run_queue_tl);
 793 #if defined(THREADED_RTS)
 794         evac((StgClosure **)(void *)&cap->wakeup_queue_hd);
 795         evac((StgClosure **)(void *)&cap->wakeup_queue_tl);
 796 #endif
 797         for (task = cap->suspended_ccalling_tasks; task != NULL;
 798              task=task->next) {
 799             debugTrace(DEBUG_sched,
 800                        "evac'ing suspended TSO %lu", (unsigned long)task->suspended_tso->id);
 801             evac((StgClosure **)(void *)&task->suspended_tso);
 802         }
 803
 804 #if defined(THREADED_RTS)
 805         markSparkQueue(evac,cap);
 806 #endif
 807     }
 808
 809 #if !defined(THREADED_RTS)
 810     evac((StgClosure **)(void *)&blocked_queue_hd);
 811     evac((StgClosure **)(void *)&blocked_queue_tl);
 812     evac((StgClosure **)(void *)&sleeping_queue);
 813 #endif
 814 }
 815
 816 /* -----------------------------------------------------------------------------
 817    isAlive determines whether the given closure is still alive (after
 818    a garbage collection) or not.  It returns the new address of the
 819    closure if it is alive, or NULL otherwise.
 820
 821    NOTE: Use it before compaction only!
 822          It untags and (if needed) retags pointers to closures.
 823    -------------------------------------------------------------------------- */
 824
 825
 826 StgClosure *
 827 isAlive(StgClosure *p)
 828 {
 829   const StgInfoTable *info;
 830   bdescr *bd;
 831   StgWord tag;
 832   StgClosure *q;
 833
 834   while (1) {
 835     /* The tag and the pointer are split, to be merged later when needed. */
 836     tag = GET_CLOSURE_TAG(p);
 837     q = UNTAG_CLOSURE(p);
 838
 839     ASSERT(LOOKS_LIKE_CLOSURE_PTR(q));
 840     info = get_itbl(q);
 841
 842     // ignore static closures
 843     //
 844     // ToDo: for static closures, check the static link field.
 845     // Problem here is that we sometimes don't set the link field, eg.
 846     // for static closures with an empty SRT or CONSTR_STATIC_NOCAFs.
 847     //
 848     if (!HEAP_ALLOCED(q)) {
 849         return p;
 850     }
 851
 852     // ignore closures in generations that we're not collecting.
 853     bd = Bdescr((P_)q);
 854     if (bd->gen_no > N) {
 855         return p;
 856     }
 857
 858     // if it's a pointer into to-space, then we're done
 859     if (bd->flags & BF_EVACUATED) {
 860         return p;
 861     }
 862
 863     // large objects use the evacuated flag
 864     if (bd->flags & BF_LARGE) {
 865         return NULL;
 866     }
 867
 868     // check the mark bit for compacted steps
 869     if ((bd->flags & BF_COMPACTED) && is_marked((P_)q,bd)) {
 870         return p;
 871     }
 872
 873     switch (info->type) {
 874
 875     case IND:
 876     case IND_STATIC:
 877     case IND_PERM:
 878     case IND_OLDGEN:            // rely on compatible layout with StgInd
 879     case IND_OLDGEN_PERM:
 880       // follow indirections
 881       p = ((StgInd *)q)->indirectee;
 882       continue;
 883
 884     case EVACUATED:
 885       // alive!
 886       return ((StgEvacuated *)q)->evacuee;
 887
 888     case TSO:
 889       if (((StgTSO *)q)->what_next == ThreadRelocated) {
 890         p = (StgClosure *)((StgTSO *)q)->link;
 891         continue;
 892       }
 893       return NULL;
 894
 895     default:
 896       // dead.
 897       return NULL;
 898     }
 899   }
 900 }
 901
 902 /* -----------------------------------------------------------------------------
 903    Figure out which generation to collect, initialise N and major_gc.
 904    -------------------------------------------------------------------------- */
 905
 906 static void
 907 initialise_N (rtsBool force_major_gc)
 908 {
 909     nat g;
 910
 911     if (force_major_gc) {
 912         N = RtsFlags.GcFlags.generations - 1;
 913         major_gc = rtsTrue;
 914     } else {
 915         N = 0;
 916         for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
 917             if (generations[g].steps[0].n_blocks +
 918                 generations[g].steps[0].n_large_blocks
 919                 >= generations[g].max_blocks) {
 920                 N = g;
 921             }
 922         }
 923         major_gc = (N == RtsFlags.GcFlags.generations-1);
 924     }
 925 }
 926
 927 /* -----------------------------------------------------------------------------
 928    Initialise the gc_thread structures.
 929    -------------------------------------------------------------------------- */
 930
 931 static gc_thread *
 932 alloc_gc_thread (int n)
 933 {
 934     nat s;
 935     step_workspace *ws;
 936     gc_thread *t;
 937
 938     t = stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace),
 939                        "alloc_gc_thread");
 940
 941 #ifdef THREADED_RTS
 942     t->id = 0;
 943     initCondition(&t->wake_cond);
 944     initMutex(&t->wake_mutex);
 945     t->wakeup = rtsFalse;
 946     t->exit   = rtsFalse;
 947 #endif
 948
 949     t->thread_index = n;
 950     t->free_blocks = NULL;
 951     t->gc_count = 0;
 952
 953     init_gc_thread(t);
 954
 955 #ifdef USE_PAPI
 956     t->papi_events = -1;
 957 #endif
 958
 959     for (s = 0; s < total_steps; s++)
 960     {
 961         ws = &t->steps[s];
 962         ws->stp = &all_steps[s];
 963         ASSERT(s == ws->stp->abs_no);
 964         ws->gct = t;
 965
 966         ws->scan_bd = NULL;
 967         ws->scan = NULL;
 968
 969         ws->todo_bd = NULL;
 970         ws->buffer_todo_bd = NULL;
 971
 972         ws->scavd_list = NULL;
 973         ws->n_scavd_blocks = 0;
 974     }
 975
 976     return t;
 977 }
 978
 979
 980 static void
 981 alloc_gc_threads (void)
 982 {
 983     if (gc_threads == NULL) {
 984 #if defined(THREADED_RTS)
 985         nat i;
 986         gc_threads = stgMallocBytes (RtsFlags.ParFlags.gcThreads *
 987                                      sizeof(gc_thread*),
 988                                      "alloc_gc_threads");
 989
 990         for (i = 0; i < RtsFlags.ParFlags.gcThreads; i++) {
 991             gc_threads[i] = alloc_gc_thread(i);
 992         }
 993 #else
 994         gc_threads = stgMallocBytes (sizeof(gc_thread*),
 995                                      "alloc_gc_threads");
 996
 997         gc_threads[0] = alloc_gc_thread(0);
 998 #endif
 999     }
1000 }
1001
1002 /* ----------------------------------------------------------------------------
1003    Start GC threads
1004    ------------------------------------------------------------------------- */
1005
1006 static nat gc_running_threads;
1007
1008 #if defined(THREADED_RTS)
1009 static Mutex gc_running_mutex;
1010 #endif
1011
1012 static nat
1013 inc_running (void)
1014 {
1015     nat n_running;
1016     ACQUIRE_LOCK(&gc_running_mutex);
1017     n_running = ++gc_running_threads;
1018     RELEASE_LOCK(&gc_running_mutex);
1019     return n_running;
1020 }
1021
1022 static nat
1023 dec_running (void)
1024 {
1025     nat n_running;
1026     ACQUIRE_LOCK(&gc_running_mutex);
1027     n_running = --gc_running_threads;
1028     RELEASE_LOCK(&gc_running_mutex);
1029     return n_running;
1030 }
1031
1032 //
1033 // gc_thread_work(): Scavenge until there's no work left to do and all
1034 // the running threads are idle.
1035 //
1036 static void
1037 gc_thread_work (void)
1038 {
1039     nat r;
1040
1041     debugTrace(DEBUG_gc, "GC thread %d working", gct->thread_index);
1042
1043     // gc_running_threads has already been incremented for us; either
1044     // this is the main thread and we incremented it inside
1045     // GarbageCollect(), or this is a worker thread and the main
1046     // thread bumped gc_running_threads before waking us up.
1047
1048     // Every thread evacuates some roots.
1049     gct->evac_step = 0;
1050     GetRoots(mark_root);
1051
1052 loop:
1053     scavenge_loop();
1054     // scavenge_loop() only exits when there's no work to do
1055     r = dec_running();
1056
1057     debugTrace(DEBUG_gc, "GC thread %d idle (%d still running)",
1058                gct->thread_index, r);
1059
1060     while (gc_running_threads != 0) {
1061         usleep(1);
1062         if (any_work()) {
1063             inc_running();
1064             goto loop;
1065         }
1066         // any_work() does not remove the work from the queue, it
1067         // just checks for the presence of work.  If we find any,
1068         // then we increment gc_running_threads and go back to
1069         // scavenge_loop() to perform any pending work.
1070     }
1071
1072     // All threads are now stopped
1073     debugTrace(DEBUG_gc, "GC thread %d finished.", gct->thread_index);
1074 }
1075
1076
1077 #if defined(THREADED_RTS)
1078 static void
1079 gc_thread_mainloop (void)
1080 {
1081     while (!gct->exit) {
1082
1083         // Wait until we're told to wake up
1084         ACQUIRE_LOCK(&gct->wake_mutex);
1085         while (!gct->wakeup) {
1086             debugTrace(DEBUG_gc, "GC thread %d standing by...",
1087                        gct->thread_index);
1088             waitCondition(&gct->wake_cond, &gct->wake_mutex);
1089         }
1090         RELEASE_LOCK(&gct->wake_mutex);
1091         gct->wakeup = rtsFalse;
1092         if (gct->exit) break;
1093
1094 #ifdef USE_PAPI
1095         // start performance counters in this thread...
1096         if (gct->papi_events == -1) {
1097             papi_init_eventset(&gct->papi_events);
1098         }
1099         papi_thread_start_gc1_count(gct->papi_events);
1100 #endif
1101
1102         gc_thread_work();
1103
1104 #ifdef USE_PAPI
1105         // count events in this thread towards the GC totals
1106         papi_thread_stop_gc1_count(gct->papi_events);
1107 #endif
1108     }
1109 }
1110 #endif
1111
1112 #if defined(THREADED_RTS)
1113 static void
1114 gc_thread_entry (gc_thread *my_gct)
1115 {
1116     gct = my_gct;
1117     debugTrace(DEBUG_gc, "GC thread %d starting...", gct->thread_index);
1118     gct->id = osThreadId();
1119     gc_thread_mainloop();
1120 }
1121 #endif
1122
1123 static void
1124 start_gc_threads (void)
1125 {
1126 #if defined(THREADED_RTS)
1127     nat i;
1128     OSThreadId id;
1129     static rtsBool done = rtsFalse;
1130
1131     gc_running_threads = 0;
1132     initMutex(&gc_running_mutex);
1133
1134     if (!done) {
1135         // Start from 1: the main thread is 0
1136         for (i = 1; i < RtsFlags.ParFlags.gcThreads; i++) {
1137             createOSThread(&id, (OSThreadProc*)&gc_thread_entry,
1138                            gc_threads[i]);
1139         }
1140         done = rtsTrue;
1141     }
1142 #endif
1143 }
1144
1145 static void
1146 wakeup_gc_threads (nat n_threads USED_IF_THREADS)
1147 {
1148 #if defined(THREADED_RTS)
1149     nat i;
1150     for (i=1; i < n_threads; i++) {
1151         inc_running();
1152         ACQUIRE_LOCK(&gc_threads[i]->wake_mutex);
1153         gc_threads[i]->wakeup = rtsTrue;
1154         signalCondition(&gc_threads[i]->wake_cond);
1155         RELEASE_LOCK(&gc_threads[i]->wake_mutex);
1156     }
1157 #endif
1158 }
1159
1160 /* ----------------------------------------------------------------------------
1161    Initialise a generation that is to be collected
1162    ------------------------------------------------------------------------- */
1163
1164 static void
1165 init_collected_gen (nat g, nat n_threads)
1166 {
1167     nat s, t, i;
1168     step_workspace *ws;
1169     step *stp;
1170     bdescr *bd;
1171
1172     // Throw away the current mutable list.  Invariant: the mutable
1173     // list always has at least one block; this means we can avoid a
1174     // check for NULL in recordMutable().
1175     if (g != 0) {
1176         freeChain(generations[g].mut_list);
1177         generations[g].mut_list = allocBlock();
1178         for (i = 0; i < n_capabilities; i++) {
1179             freeChain(capabilities[i].mut_lists[g]);
1180             capabilities[i].mut_lists[g] = allocBlock();
1181         }
1182     }
1183
1184     for (s = 0; s < generations[g].n_steps; s++) {
1185
1186         // generation 0, step 0 doesn't need to-space
1187         if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
1188             continue;
1189         }
1190
1191         stp = &generations[g].steps[s];
1192         ASSERT(stp->gen_no == g);
1193
1194         // deprecate the existing blocks
1195         stp->old_blocks   = stp->blocks;
1196         stp->n_old_blocks = stp->n_blocks;
1197         stp->blocks       = NULL;
1198         stp->n_blocks     = 0;
1199
1200         // we don't have any to-be-scavenged blocks yet
1201         stp->todos = NULL;
1202         stp->todos_last = NULL;
1203         stp->n_todos = 0;
1204
1205         // initialise the large object queues.
1206         stp->scavenged_large_objects = NULL;
1207         stp->n_scavenged_large_blocks = 0;
1208
1209         // mark the large objects as not evacuated yet
1210         for (bd = stp->large_objects; bd; bd = bd->link) {
1211             bd->flags &= ~BF_EVACUATED;
1212         }
1213
1214         // for a compacted step, we need to allocate the bitmap
1215         if (stp->is_compacted) {
1216             nat bitmap_size; // in bytes
1217             bdescr *bitmap_bdescr;
1218             StgWord *bitmap;
1219
1220             bitmap_size = stp->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
1221
1222             if (bitmap_size > 0) {
1223                 bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
1224                                            / BLOCK_SIZE);
1225                 stp->bitmap = bitmap_bdescr;
1226                 bitmap = bitmap_bdescr->start;
1227
1228                 debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
1229                            bitmap_size, bitmap);
1230
1231                 // don't forget to fill it with zeros!
1232                 memset(bitmap, 0, bitmap_size);
1233
1234                 // For each block in this step, point to its bitmap from the
1235                 // block descriptor.
1236                 for (bd=stp->old_blocks; bd != NULL; bd = bd->link) {
1237                     bd->u.bitmap = bitmap;
1238                     bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
1239
1240                     // Also at this point we set the BF_COMPACTED flag
1241                     // for this block.  The invariant is that
1242                     // BF_COMPACTED is always unset, except during GC
1243                     // when it is set on those blocks which will be
1244                     // compacted.
1245                     bd->flags |= BF_COMPACTED;
1246                 }
1247             }
1248         }
1249     }
1250
1251     // For each GC thread, for each step, allocate a "todo" block to
1252     // store evacuated objects to be scavenged, and a block to store
1253     // evacuated objects that do not need to be scavenged.
1254     for (t = 0; t < n_threads; t++) {
1255         for (s = 0; s < generations[g].n_steps; s++) {
1256
1257             // we don't copy objects into g0s0, unless -G0
1258             if (g==0 && s==0 && RtsFlags.GcFlags.generations > 1) continue;
1259
1260             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1261
1262             ws->scan_bd = NULL;
1263             ws->scan = NULL;
1264
1265             ws->todo_large_objects = NULL;
1266
1267             // allocate the first to-space block; extra blocks will be
1268             // chained on as necessary.
1269             ws->todo_bd = NULL;
1270             ws->buffer_todo_bd = NULL;
1271             gc_alloc_todo_block(ws);
1272
1273             ws->scavd_list = NULL;
1274             ws->n_scavd_blocks = 0;
1275         }
1276     }
1277 }
1278
1279
1280 /* ----------------------------------------------------------------------------
1281    Initialise a generation that is *not* to be collected
1282    ------------------------------------------------------------------------- */
1283
1284 static void
1285 init_uncollected_gen (nat g, nat threads)
1286 {
1287     nat s, t, i;
1288     step_workspace *ws;
1289     step *stp;
1290     bdescr *bd;
1291
1292     for (s = 0; s < generations[g].n_steps; s++) {
1293         stp = &generations[g].steps[s];
1294         stp->scavenged_large_objects = NULL;
1295         stp->n_scavenged_large_blocks = 0;
1296     }
1297
1298     for (t = 0; t < threads; t++) {
1299         for (s = 0; s < generations[g].n_steps; s++) {
1300
1301             ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
1302             stp = ws->stp;
1303
1304             ws->buffer_todo_bd = NULL;
1305             ws->todo_large_objects = NULL;
1306
1307             ws->scavd_list = NULL;
1308             ws->n_scavd_blocks = 0;
1309
1310             // If the block at the head of the list in this generation
1311             // is less than 3/4 full, then use it as a todo block.
1312             if (stp->blocks && isPartiallyFull(stp->blocks))
1313             {
1314                 ws->todo_bd = stp->blocks;
1315                 ws->todo_free = ws->todo_bd->free;
1316                 ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
1317                 stp->blocks = stp->blocks->link;
1318                 stp->n_blocks -= 1;
1319                 ws->todo_bd->link = NULL;
1320
1321                 // this block is also the scan block; we must scan
1322                 // from the current end point.
1323                 ws->scan_bd = ws->todo_bd;
1324                 ws->scan = ws->scan_bd->free;
1325
1326                 // subtract the contents of this block from the stats,
1327                 // because we'll count the whole block later.
1328                 copied -= ws->scan_bd->free - ws->scan_bd->start;
1329             }
1330             else
1331             {
1332                 ws->scan_bd = NULL;
1333                 ws->scan = NULL;
1334                 ws->todo_bd = NULL;
1335                 gc_alloc_todo_block(ws);
1336             }
1337         }
1338     }
1339
1340     // Move the private mutable lists from each capability onto the
1341     // main mutable list for the generation.
1342     for (i = 0; i < n_capabilities; i++) {
1343         for (bd = capabilities[i].mut_lists[g];
1344              bd->link != NULL; bd = bd->link) {
1345             /* nothing */
1346         }
1347         bd->link = generations[g].mut_list;
1348         generations[g].mut_list = capabilities[i].mut_lists[g];
1349         capabilities[i].mut_lists[g] = allocBlock();
1350     }
1351 }
1352
1353 /* -----------------------------------------------------------------------------
1354    Initialise a gc_thread before GC
1355    -------------------------------------------------------------------------- */
1356
1357 static void
1358 init_gc_thread (gc_thread *t)
1359 {
1360     t->static_objects = END_OF_STATIC_LIST;
1361     t->scavenged_static_objects = END_OF_STATIC_LIST;
1362     t->evac_step = 0;
1363     t->failed_to_evac = rtsFalse;
1364     t->eager_promotion = rtsTrue;
1365     t->thunk_selector_depth = 0;
1366     t->copied = 0;
1367     t->any_work = 0;
1368     t->no_work = 0;
1369     t->scav_global_work = 0;
1370     t->scav_local_work = 0;
1371
1372 }
1373
1374 /* -----------------------------------------------------------------------------
1375    Function we pass to GetRoots to evacuate roots.
1376    -------------------------------------------------------------------------- */
1377
1378 static void
1379 mark_root(StgClosure **root)
1380 {
1381   evacuate(root);
1382 }
1383
1384 /* -----------------------------------------------------------------------------
1385    Initialising the static object & mutable lists
1386    -------------------------------------------------------------------------- */
1387
1388 static void
1389 zero_static_object_list(StgClosure* first_static)
1390 {
1391   StgClosure* p;
1392   StgClosure* link;
1393   const StgInfoTable *info;
1394
1395   for (p = first_static; p != END_OF_STATIC_LIST; p = link) {
1396     info = get_itbl(p);
1397     link = *STATIC_LINK(info, p);
1398     *STATIC_LINK(info,p) = NULL;
1399   }
1400 }
1401
1402 /* -----------------------------------------------------------------------------
1403    Reverting CAFs
1404    -------------------------------------------------------------------------- */
1405
1406 void
1407 revertCAFs( void )
1408 {
1409     StgIndStatic *c;
1410
1411     for (c = (StgIndStatic *)revertible_caf_list; c != NULL;
1412          c = (StgIndStatic *)c->static_link)
1413     {
1414         SET_INFO(c, c->saved_info);
1415         c->saved_info = NULL;
1416         // could, but not necessary: c->static_link = NULL;
1417     }
1418     revertible_caf_list = NULL;
1419 }
1420
1421 void
1422 markCAFs( evac_fn evac )
1423 {
1424     StgIndStatic *c;
1425
1426     for (c = (StgIndStatic *)caf_list; c != NULL;
1427          c = (StgIndStatic *)c->static_link)
1428     {
1429         evac(&c->indirectee);
1430     }
1431     for (c = (StgIndStatic *)revertible_caf_list; c != NULL;
1432          c = (StgIndStatic *)c->static_link)
1433     {
1434         evac(&c->indirectee);
1435     }
1436 }
1437
1438 /* ----------------------------------------------------------------------------
1439    Update the pointers from the task list
1440
1441    These are treated as weak pointers because we want to allow a main
1442    thread to get a BlockedOnDeadMVar exception in the same way as any
1443    other thread.  Note that the threads should all have been retained
1444    by GC by virtue of being on the all_threads list, we're just
1445    updating pointers here.
1446    ------------------------------------------------------------------------- */
1447
1448 static void
1449 update_task_list (void)
1450 {
1451     Task *task;
1452     StgTSO *tso;
1453     for (task = all_tasks; task != NULL; task = task->all_link) {
1454         if (!task->stopped && task->tso) {
1455             ASSERT(task->tso->bound == task);
1456             tso = (StgTSO *) isAlive((StgClosure *)task->tso);
1457             if (tso == NULL) {
1458                 barf("task %p: main thread %d has been GC'd",
1459 #ifdef THREADED_RTS
1460                      (void *)task->id,
1461 #else
1462                      (void *)task,
1463 #endif
1464                      task->tso->id);
1465             }
1466             task->tso = tso;
1467         }
1468     }
1469 }
1470
1471 /* ----------------------------------------------------------------------------
1472    Reset the sizes of the older generations when we do a major
1473    collection.
1474
1475    CURRENT STRATEGY: make all generations except zero the same size.
1476    We have to stay within the maximum heap size, and leave a certain
1477    percentage of the maximum heap size available to allocate into.
1478    ------------------------------------------------------------------------- */
1479
1480 static void
1481 resize_generations (void)
1482 {
1483     nat g;
1484
1485     if (major_gc && RtsFlags.GcFlags.generations > 1) {
1486         nat live, size, min_alloc;
1487         nat max  = RtsFlags.GcFlags.maxHeapSize;
1488         nat gens = RtsFlags.GcFlags.generations;
1489
1490         // live in the oldest generations
1491         live = oldest_gen->steps[0].n_blocks +
1492             oldest_gen->steps[0].n_large_blocks;
1493
1494         // default max size for all generations except zero
1495         size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
1496                        RtsFlags.GcFlags.minOldGenSize);
1497
1498         // minimum size for generation zero
1499         min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
1500                             RtsFlags.GcFlags.minAllocAreaSize);
1501
1502         // Auto-enable compaction when the residency reaches a
1503         // certain percentage of the maximum heap size (default: 30%).
1504         if (RtsFlags.GcFlags.generations > 1 &&
1505             (RtsFlags.GcFlags.compact ||
1506              (max > 0 &&
1507               oldest_gen->steps[0].n_blocks >
1508               (RtsFlags.GcFlags.compactThreshold * max) / 100))) {
1509             oldest_gen->steps[0].is_compacted = 1;
1510 //        debugBelch("compaction: on\n", live);
1511         } else {
1512             oldest_gen->steps[0].is_compacted = 0;
1513 //        debugBelch("compaction: off\n", live);
1514         }
1515
1516         // if we're going to go over the maximum heap size, reduce the
1517         // size of the generations accordingly.  The calculation is
1518         // different if compaction is turned on, because we don't need
1519         // to double the space required to collect the old generation.
1520         if (max != 0) {
1521
1522             // this test is necessary to ensure that the calculations
1523             // below don't have any negative results - we're working
1524             // with unsigned values here.
1525             if (max < min_alloc) {
1526                 heapOverflow();
1527             }
1528
1529             if (oldest_gen->steps[0].is_compacted) {
1530                 if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
1531                     size = (max - min_alloc) / ((gens - 1) * 2 - 1);
1532                 }
1533             } else {
1534                 if ( (size * (gens - 1) * 2) + min_alloc > max ) {
1535                     size = (max - min_alloc) / ((gens - 1) * 2);
1536                 }
1537             }
1538
1539             if (size < live) {
1540                 heapOverflow();
1541             }
1542         }
1543
1544 #if 0
1545         debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live,
1546                    min_alloc, size, max);
1547 #endif
1548
1549         for (g = 0; g < gens; g++) {
1550             generations[g].max_blocks = size;
1551         }
1552     }
1553 }
1554
1555 /* -----------------------------------------------------------------------------
1556    Calculate the new size of the nursery, and resize it.
1557    -------------------------------------------------------------------------- */
1558
1559 static void
1560 resize_nursery (void)
1561 {
1562     if (RtsFlags.GcFlags.generations == 1)
1563     {   // Two-space collector:
1564         nat blocks;
1565
1566         /* set up a new nursery.  Allocate a nursery size based on a
1567          * function of the amount of live data (by default a factor of 2)
1568          * Use the blocks from the old nursery if possible, freeing up any
1569          * left over blocks.
1570          *
1571          * If we get near the maximum heap size, then adjust our nursery
1572          * size accordingly.  If the nursery is the same size as the live
1573          * data (L), then we need 3L bytes.  We can reduce the size of the
1574          * nursery to bring the required memory down near 2L bytes.
1575          *
1576          * A normal 2-space collector would need 4L bytes to give the same
1577          * performance we get from 3L bytes, reducing to the same
1578          * performance at 2L bytes.
1579          */
1580         blocks = g0s0->n_old_blocks;
1581
1582         if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
1583              blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
1584              RtsFlags.GcFlags.maxHeapSize )
1585         {
1586             long adjusted_blocks;  // signed on purpose
1587             int pc_free;
1588
1589             adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks);
1590
1591             debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld",
1592                        RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks);
1593
1594             pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;
1595             if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */
1596             {
1597                 heapOverflow();
1598             }
1599             blocks = adjusted_blocks;
1600         }
1601         else
1602         {
1603             blocks *= RtsFlags.GcFlags.oldGenFactor;
1604             if (blocks < RtsFlags.GcFlags.minAllocAreaSize)
1605             {
1606                 blocks = RtsFlags.GcFlags.minAllocAreaSize;
1607             }
1608         }
1609         resizeNurseries(blocks);
1610     }
1611     else  // Generational collector
1612     {
1613         /*
1614          * If the user has given us a suggested heap size, adjust our
1615          * allocation area to make best use of the memory available.
1616          */
1617         if (RtsFlags.GcFlags.heapSizeSuggestion)
1618         {
1619             long blocks;
1620             nat needed = calcNeeded();  // approx blocks needed at next GC
1621
1622             /* Guess how much will be live in generation 0 step 0 next time.
1623              * A good approximation is obtained by finding the
1624              * percentage of g0s0 that was live at the last minor GC.
1625              *
1626              * We have an accurate figure for the amount of copied data in
1627              * 'copied', but we must convert this to a number of blocks, with
1628              * a small adjustment for estimated slop at the end of a block
1629              * (- 10 words).
1630              */
1631             if (N == 0)
1632             {
1633                 g0s0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100)
1634                     / countNurseryBlocks();
1635             }
1636
1637             /* Estimate a size for the allocation area based on the
1638              * information available.  We might end up going slightly under
1639              * or over the suggested heap size, but we should be pretty
1640              * close on average.
1641              *
1642              * Formula:            suggested - needed
1643              *                ----------------------------
1644              *                    1 + g0s0_pcnt_kept/100
1645              *
1646              * where 'needed' is the amount of memory needed at the next
1647              * collection for collecting all steps except g0s0.
1648              */
1649             blocks =
1650                 (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
1651                 (100 + (long)g0s0_pcnt_kept);
1652
1653             if (blocks < (long)RtsFlags.GcFlags.minAllocAreaSize) {
1654                 blocks = RtsFlags.GcFlags.minAllocAreaSize;
1655             }
1656
1657             resizeNurseries((nat)blocks);
1658         }
1659         else
1660         {
1661             // we might have added extra large blocks to the nursery, so
1662             // resize back to minAllocAreaSize again.
1663             resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize);
1664         }
1665     }
1666 }
1667
1668 /* -----------------------------------------------------------------------------
1669    Sanity code for CAF garbage collection.
1670
1671    With DEBUG turned on, we manage a CAF list in addition to the SRT
1672    mechanism.  After GC, we run down the CAF list and blackhole any
1673    CAFs which have been garbage collected.  This means we get an error
1674    whenever the program tries to enter a garbage collected CAF.
1675
1676    Any garbage collected CAFs are taken off the CAF list at the same
1677    time.
1678    -------------------------------------------------------------------------- */
1679
1680 #if 0 && defined(DEBUG)
1681
1682 static void
1683 gcCAFs(void)
1684 {
1685   StgClosure*  p;
1686   StgClosure** pp;
1687   const StgInfoTable *info;
1688   nat i;
1689
1690   i = 0;
1691   p = caf_list;
1692   pp = &caf_list;
1693
1694   while (p != NULL) {
1695
1696     info = get_itbl(p);
1697
1698     ASSERT(info->type == IND_STATIC);
1699
1700     if (STATIC_LINK(info,p) == NULL) {
1701         debugTrace(DEBUG_gccafs, "CAF gc'd at 0x%04lx", (long)p);
1702         // black hole it
1703         SET_INFO(p,&stg_BLACKHOLE_info);
1704         p = STATIC_LINK2(info,p);
1705         *pp = p;
1706     }
1707     else {
1708       pp = &STATIC_LINK2(info,p);
1709       p = *pp;
1710       i++;
1711     }
1712
1713   }
1714
1715   debugTrace(DEBUG_gccafs, "%d CAFs live", i);
1716 }
1717 #endif