X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=rts%2Fsm%2FGC.c;h=978b7a0a20b76bd642a5f6651dbbcd979e35f4d5;hb=49780c2e25cfbe821d585c5a31cb95aa49f41f14;hp=270784e45703a463304d1f83867f086e540817f4;hpb=63b6c9338c67278da8d1d7d8e7e7ce7373ac52da;p=ghc-hetmet.git diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 270784e..978b7a0 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------------- * - * (c) The GHC Team 1998-2006 + * (c) The GHC Team 1998-2008 * * Generational garbage collector * @@ -11,7 +11,7 @@ * * ---------------------------------------------------------------------------*/ -#include "PosixSource.h" +// #include "PosixSource.h" #include "Rts.h" #include "RtsFlags.h" #include "RtsUtils.h" @@ -39,15 +39,24 @@ #include "Trace.h" #include "RetainerProfile.h" #include "RaiseAsync.h" +#include "Sparks.h" +#include "Papi.h" #include "GC.h" +#include "GCThread.h" #include "Compact.h" #include "Evac.h" #include "Scav.h" #include "GCUtils.h" #include "MarkWeak.h" +#include "Sparks.h" #include // for memset() +#include + +/* ----------------------------------------------------------------------------- + Global variables + -------------------------------------------------------------------------- */ /* STATIC OBJECT LIST. * @@ -83,8 +92,6 @@ * We build up a static object list while collecting generations 0..N, * which is then appended to the static object list of generation N+1. */ -StgClosure* static_objects; // live static objects -StgClosure* scavenged_static_objects; // static objects scavenged so far /* N is the oldest generation being collected, where the generations * are numbered starting at 0. A major GC (indicated by the major_gc @@ -94,53 +101,61 @@ StgClosure* scavenged_static_objects; // static objects scavenged so far nat N; rtsBool major_gc; -/* Youngest generation that objects should be evacuated to in - * evacuate(). (Logically an argument to evacuate, but it's static - * a lot of the time so we optimise it into a global variable). - */ -nat evac_gen; - -/* Whether to do eager promotion or not. - */ -rtsBool eager_promotion; - -/* Flag indicating failure to evacuate an object to the desired - * generation. - */ -rtsBool failed_to_evac; - -/* Saved nursery (used for 2-space collector only) - */ -static bdescr *saved_nursery; -static nat saved_n_blocks; - /* Data used for allocation area sizing. */ -lnat new_blocks; // blocks allocated during this GC -lnat new_scavd_blocks; // ditto, but depth-first blocks static lnat g0s0_pcnt_kept = 30; // percentage of g0s0 live at last minor GC /* Mut-list stats */ #ifdef DEBUG nat mutlist_MUTVARS, mutlist_MUTARRS, + mutlist_MVARS, mutlist_OTHERS; #endif +/* Thread-local data for each GC thread + */ +gc_thread **gc_threads = NULL; +// gc_thread *gct = NULL; // this thread's gct TODO: make thread-local + +// Number of threads running in *this* GC. Affects how many +// step->todos[] lists we have to look in to find work. +nat n_gc_threads; + +// For stats: +long copied; // *words* copied & scavenged during this GC + +#ifdef THREADED_RTS +SpinLock recordMutableGen_sync; +#endif + /* ----------------------------------------------------------------------------- Static function declarations -------------------------------------------------------------------------- */ -static void mark_root ( StgClosure **root ); - -static void zero_static_object_list ( StgClosure* first_static ); +static void mark_root (void *user, StgClosure **root); +static void zero_static_object_list (StgClosure* first_static); +static nat initialise_N (rtsBool force_major_gc); +static void alloc_gc_threads (void); +static void init_collected_gen (nat g, nat threads); +static void init_uncollected_gen (nat g, nat threads); +static void init_gc_thread (gc_thread *t); +static void update_task_list (void); +static void resize_generations (void); +static void resize_nursery (void); +static void start_gc_threads (void); +static void scavenge_until_all_done (void); +static nat inc_running (void); +static nat dec_running (void); +static void wakeup_gc_threads (nat n_threads); +static void shutdown_gc_threads (nat n_threads); #if 0 && defined(DEBUG) -static void gcCAFs ( void ); +static void gcCAFs (void); #endif /* ----------------------------------------------------------------------------- - inline functions etc. for dealing with the mark bitmap & stack. + The mark bitmap & stack. -------------------------------------------------------------------------- */ #define MARK_STACK_BLOCKS 4 @@ -157,37 +172,9 @@ bdescr *oldgen_scan_bd; StgPtr oldgen_scan; /* ----------------------------------------------------------------------------- - GarbageCollect - - Rough outline of the algorithm: for garbage collecting generation N - (and all younger generations): - - - follow all pointers in the root set. the root set includes all - mutable objects in all generations (mutable_list). - - - for each pointer, evacuate the object it points to into either - - + to-space of the step given by step->to, which is the next - highest step in this generation or the first step in the next - generation if this is the last step. - - + to-space of generations[evac_gen]->steps[0], if evac_gen != 0. - When we evacuate an object we attempt to evacuate - everything it points to into the same generation - this is - achieved by setting evac_gen to the desired generation. If - we can't do this, then an entry in the mut list has to - be made for the cross-generation pointer. - - + if the object is already in a generation > N, then leave - it alone. - - - repeatedly scavenge to-space from each step in each generation - being collected until no more objects can be evacuated. - - - free from-space in each step, and set from-space = to-space. + GarbageCollect: the main entry point to the garbage collector. Locks held: all capabilities are held throughout GarbageCollect(). - -------------------------------------------------------------------------- */ void @@ -195,33 +182,35 @@ GarbageCollect ( rtsBool force_major_gc ) { bdescr *bd; step *stp; - lnat live, allocated, copied = 0, scavd_copied = 0; + lnat live, allocated, max_copied, avg_copied, slop; lnat oldgen_saved_blocks = 0; - nat g, s, i; + gc_thread *saved_gct; + nat g, s, t, n; - ACQUIRE_SM_LOCK; + // necessary if we stole a callee-saves register for gct: + saved_gct = gct; #ifdef PROFILING CostCentreStack *prev_CCS; #endif - debugTrace(DEBUG_gc, "starting GC"); + ACQUIRE_SM_LOCK; #if defined(RTS_USER_SIGNALS) - // block signals - blockUserSignals(); + if (RtsFlags.MiscFlags.install_signal_handlers) { + // block signals + blockUserSignals(); + } #endif - // tell the STM to discard any cached closures its hoping to re-use - stmPreGCHook(); + ASSERT(sizeof(step_workspace) == 16 * sizeof(StgWord)); + // otherwise adjust the padding in step_workspace. // tell the stats department that we've started a GC stat_startGC(); -#ifdef DEBUG - // check for memory leaks if DEBUG is on - memInventory(); -#endif + // tell the STM to discard any cached closures it's hoping to re-use + stmPreGCHook(); #ifdef DEBUG mutlist_MUTVARS = 0; @@ -242,20 +231,30 @@ GarbageCollect ( rtsBool force_major_gc ) /* Figure out which generation to collect */ - if (force_major_gc) { - N = RtsFlags.GcFlags.generations - 1; - major_gc = rtsTrue; + n = initialise_N(force_major_gc); + + /* Allocate + initialise the gc_thread structures. + */ + alloc_gc_threads(); + + /* Start threads, so they can be spinning up while we finish initialisation. + */ + start_gc_threads(); + + /* How many threads will be participating in this GC? + * We don't try to parallelise minor GC. + */ +#if defined(THREADED_RTS) + if (n < (4*1024*1024 / BLOCK_SIZE)) { + n_gc_threads = 1; } else { - N = 0; - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - if (generations[g].steps[0].n_blocks + - generations[g].steps[0].n_large_blocks - >= generations[g].max_blocks) { - N = g; - } - } - major_gc = (N == RtsFlags.GcFlags.generations-1); + n_gc_threads = RtsFlags.ParFlags.gcThreads; } +#else + n_gc_threads = 1; +#endif + trace(TRACE_gc|DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)", + N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads); #ifdef RTS_GTK_FRONTPANEL if (RtsFlags.GcFlags.frontpanel) { @@ -263,161 +262,27 @@ GarbageCollect ( rtsBool force_major_gc ) } #endif +#ifdef DEBUG + // check for memory leaks if DEBUG is on + memInventory(traceClass(DEBUG_gc)); +#endif + // check stack sanity *before* GC (ToDo: check all threads) IF_DEBUG(sanity, checkFreeListSanity()); - /* Initialise the static object lists - */ - static_objects = END_OF_STATIC_LIST; - scavenged_static_objects = END_OF_STATIC_LIST; - - /* Save the nursery if we're doing a two-space collection. - * g0s0->blocks will be used for to-space, so we need to get the - * nursery out of the way. - */ - if (RtsFlags.GcFlags.generations == 1) { - saved_nursery = g0s0->blocks; - saved_n_blocks = g0s0->n_blocks; - g0s0->blocks = NULL; - g0s0->n_blocks = 0; + // Initialise all our gc_thread structures + for (t = 0; t < n_gc_threads; t++) { + init_gc_thread(gc_threads[t]); } - /* Keep a count of how many new blocks we allocated during this GC - * (used for resizing the allocation area, later). - */ - new_blocks = 0; - new_scavd_blocks = 0; - - // Initialise to-space in all the generations/steps that we're - // collecting. - // + // Initialise all the generations/steps that we're collecting. for (g = 0; g <= N; g++) { - - // throw away the mutable list. Invariant: the mutable list - // always has at least one block; this means we can avoid a check for - // NULL in recordMutable(). - if (g != 0) { - freeChain(generations[g].mut_list); - generations[g].mut_list = allocBlock(); - for (i = 0; i < n_capabilities; i++) { - freeChain(capabilities[i].mut_lists[g]); - capabilities[i].mut_lists[g] = allocBlock(); - } - } - - for (s = 0; s < generations[g].n_steps; s++) { - - // generation 0, step 0 doesn't need to-space - if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) { - continue; - } - - stp = &generations[g].steps[s]; - ASSERT(stp->gen_no == g); - - // start a new to-space for this step. - stp->old_blocks = stp->blocks; - stp->n_old_blocks = stp->n_blocks; - - // allocate the first to-space block; extra blocks will be - // chained on as necessary. - stp->hp_bd = NULL; - bd = gc_alloc_block(stp); - stp->blocks = bd; - stp->n_blocks = 1; - stp->scan = bd->start; - stp->scan_bd = bd; - - // allocate a block for "already scavenged" objects. This goes - // on the front of the stp->blocks list, so it won't be - // traversed by the scavenging sweep. - gc_alloc_scavd_block(stp); - - // initialise the large object queues. - stp->new_large_objects = NULL; - stp->scavenged_large_objects = NULL; - stp->n_scavenged_large_blocks = 0; - - // mark the large objects as not evacuated yet - for (bd = stp->large_objects; bd; bd = bd->link) { - bd->flags &= ~BF_EVACUATED; - } - - // for a compacted step, we need to allocate the bitmap - if (stp->is_compacted) { - nat bitmap_size; // in bytes - bdescr *bitmap_bdescr; - StgWord *bitmap; - - bitmap_size = stp->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE); - - if (bitmap_size > 0) { - bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size) - / BLOCK_SIZE); - stp->bitmap = bitmap_bdescr; - bitmap = bitmap_bdescr->start; - - debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p", - bitmap_size, bitmap); - - // don't forget to fill it with zeros! - memset(bitmap, 0, bitmap_size); - - // For each block in this step, point to its bitmap from the - // block descriptor. - for (bd=stp->old_blocks; bd != NULL; bd = bd->link) { - bd->u.bitmap = bitmap; - bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE); - - // Also at this point we set the BF_COMPACTED flag - // for this block. The invariant is that - // BF_COMPACTED is always unset, except during GC - // when it is set on those blocks which will be - // compacted. - bd->flags |= BF_COMPACTED; - } - } - } - } + init_collected_gen(g,n_gc_threads); } - - /* make sure the older generations have at least one block to - * allocate into (this makes things easier for copy(), see below). - */ + + // Initialise all the generations/steps that we're *not* collecting. for (g = N+1; g < RtsFlags.GcFlags.generations; g++) { - for (s = 0; s < generations[g].n_steps; s++) { - stp = &generations[g].steps[s]; - if (stp->hp_bd == NULL) { - ASSERT(stp->blocks == NULL); - bd = gc_alloc_block(stp); - stp->blocks = bd; - stp->n_blocks = 1; - } - if (stp->scavd_hp == NULL) { - gc_alloc_scavd_block(stp); - stp->n_blocks++; - } - /* Set the scan pointer for older generations: remember we - * still have to scavenge objects that have been promoted. */ - stp->scan = stp->hp; - stp->scan_bd = stp->hp_bd; - stp->new_large_objects = NULL; - stp->scavenged_large_objects = NULL; - stp->n_scavenged_large_blocks = 0; - } - - /* Move the private mutable lists from each capability onto the - * main mutable list for the generation. - */ - for (i = 0; i < n_capabilities; i++) { - for (bd = capabilities[i].mut_lists[g]; - bd->link != NULL; bd = bd->link) { - /* nothing */ - } - bd->link = generations[g].mut_list; - generations[g].mut_list = capabilities[i].mut_lists[g]; - capabilities[i].mut_lists[g] = allocBlock(); - } + init_uncollected_gen(g,n_gc_threads); } /* Allocate a mark stack if we're doing a major collection. @@ -431,172 +296,90 @@ GarbageCollect ( rtsBool force_major_gc ) mark_stack_bdescr = NULL; } - eager_promotion = rtsTrue; // for now + // this is the main thread + gct = gc_threads[0]; /* ----------------------------------------------------------------------- * follow all the roots that we know about: * - mutable lists from each generation > N * we want to *scavenge* these roots, not evacuate them: they're not * going to move in this GC. - * Also: do them in reverse generation order. This is because we - * often want to promote objects that are pointed to by older - * generations early, so we don't have to repeatedly copy them. - * Doing the generations in reverse order ensures that we don't end - * up in the situation where we want to evac an object to gen 3 and - * it has already been evaced to gen 2. + * Also do them in reverse generation order, for the usual reason: + * namely to reduce the likelihood of spurious old->new pointers. */ - { - int st; - for (g = RtsFlags.GcFlags.generations-1; g > N; g--) { + for (g = RtsFlags.GcFlags.generations-1; g > N; g--) { generations[g].saved_mut_list = generations[g].mut_list; generations[g].mut_list = allocBlock(); - // mut_list always has at least one block. - } + // mut_list always has at least one block. + } + + // the main thread is running: this prevents any other threads from + // exiting prematurely, so we can start them now. + // NB. do this after the mutable lists have been saved above, otherwise + // the other GC threads will be writing into the old mutable lists. + inc_running(); + wakeup_gc_threads(n_gc_threads); - for (g = RtsFlags.GcFlags.generations-1; g > N; g--) { + for (g = RtsFlags.GcFlags.generations-1; g > N; g--) { scavenge_mutable_list(&generations[g]); - evac_gen = g; - for (st = generations[g].n_steps-1; st >= 0; st--) { - scavenge(&generations[g].steps[st]); - } - } } - /* follow roots from the CAF list (used by GHCi) - */ - evac_gen = 0; - markCAFs(mark_root); + // follow roots from the CAF list (used by GHCi) + gct->evac_step = 0; + markCAFs(mark_root, gct); - /* follow all the roots that the application knows about. - */ - evac_gen = 0; - GetRoots(mark_root); + // follow all the roots that the application knows about. + gct->evac_step = 0; + markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads); - /* Mark the weak pointer list, and prepare to detect dead weak - * pointers. - */ +#if defined(RTS_USER_SIGNALS) + // mark the signal handlers (signals should be already blocked) + markSignalHandlers(mark_root, gct); +#endif + + // Mark the weak pointer list, and prepare to detect dead weak pointers. markWeakPtrList(); initWeakForGC(); - /* Mark the stable pointer table. - */ - markStablePtrTable(mark_root); + // Mark the stable pointer table. + markStablePtrTable(mark_root, gct); /* ------------------------------------------------------------------------- * Repeatedly scavenge all the areas we know about until there's no * more scavenging to be done. */ - { - rtsBool flag; - loop: - flag = rtsFalse; - - // scavenge static objects - if (major_gc && static_objects != END_OF_STATIC_LIST) { - IF_DEBUG(sanity, checkStaticObjects(static_objects)); - scavenge_static(); - } - - /* When scavenging the older generations: Objects may have been - * evacuated from generations <= N into older generations, and we - * need to scavenge these objects. We're going to try to ensure that - * any evacuations that occur move the objects into at least the - * same generation as the object being scavenged, otherwise we - * have to create new entries on the mutable list for the older - * generation. - */ - - // scavenge each step in generations 0..maxgen - { - long gen; - int st; - - loop2: - // scavenge objects in compacted generation - if (mark_stack_overflowed || oldgen_scan_bd != NULL || - (mark_stack_bdescr != NULL && !mark_stack_empty())) { - scavenge_mark_stack(); - flag = rtsTrue; + for (;;) + { + scavenge_until_all_done(); + // The other threads are now stopped. We might recurse back to + // here, but from now on this is the only thread. + + // if any blackholes are alive, make the threads that wait on + // them alive too. + if (traverseBlackholeQueue()) { + inc_running(); + continue; } - - for (gen = RtsFlags.GcFlags.generations; --gen >= 0; ) { - for (st = generations[gen].n_steps; --st >= 0; ) { - if (gen == 0 && st == 0 && RtsFlags.GcFlags.generations > 1) { - continue; - } - stp = &generations[gen].steps[st]; - evac_gen = gen; - if (stp->hp_bd != stp->scan_bd || stp->scan < stp->hp) { - scavenge(stp); - flag = rtsTrue; - goto loop2; - } - if (stp->new_large_objects != NULL) { - scavenge_large(stp); - flag = rtsTrue; - goto loop2; - } - } + + // must be last... invariant is that everything is fully + // scavenged at this point. + if (traverseWeakPtrList()) { // returns rtsTrue if evaced something + inc_running(); + continue; } - } - - // if any blackholes are alive, make the threads that wait on - // them alive too. - if (traverseBlackholeQueue()) - flag = rtsTrue; - if (flag) { goto loop; } - - // must be last... invariant is that everything is fully - // scavenged at this point. - if (traverseWeakPtrList()) { // returns rtsTrue if evaced something - goto loop; - } + // If we get to here, there's really nothing left to do. + break; } - /* Update the pointers from the task list - these are - * treated as weak pointers because we want to allow a main thread - * to get a BlockedOnDeadMVar exception in the same way as any other - * thread. Note that the threads should all have been retained by - * GC by virtue of being on the all_threads list, we're just - * updating pointers here. - */ - { - Task *task; - StgTSO *tso; - for (task = all_tasks; task != NULL; task = task->all_link) { - if (!task->stopped && task->tso) { - ASSERT(task->tso->bound == task); - tso = (StgTSO *) isAlive((StgClosure *)task->tso); - if (tso == NULL) { - barf("task %p: main thread %d has been GC'd", -#ifdef THREADED_RTS - (void *)task->id, -#else - (void *)task, -#endif - task->tso->id); - } - task->tso = tso; - } - } - } + shutdown_gc_threads(n_gc_threads); + + // Update pointers from the Task list + update_task_list(); // Now see which stable names are still alive. gcStablePtrTable(); - // Tidy the end of the to-space chains - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - for (s = 0; s < generations[g].n_steps; s++) { - stp = &generations[g].steps[s]; - if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) { - ASSERT(Bdescr(stp->hp) == stp->hp_bd); - stp->hp_bd->free = stp->hp; - Bdescr(stp->scavd_hp)->free = stp->scavd_hp; - } - } - } - #ifdef PROFILING // We call processHeapClosureForDead() on every closure destroyed during // the current garbage collection, so we invoke LdvCensusForDead(). @@ -610,19 +393,122 @@ GarbageCollect ( rtsBool force_major_gc ) if (major_gc && oldest_gen->steps[0].is_compacted) { // save number of blocks for stats oldgen_saved_blocks = oldest_gen->steps[0].n_old_blocks; - compact(); + compact(gct->scavenged_static_objects); } IF_DEBUG(sanity, checkGlobalTSOList(rtsFalse)); + // Two-space collector: free the old to-space. + // g0s0->old_blocks is the old nursery + // g0s0->blocks is to-space from the previous GC + if (RtsFlags.GcFlags.generations == 1) { + if (g0s0->blocks != NULL) { + freeChain(g0s0->blocks); + g0s0->blocks = NULL; + } + } + + // For each workspace, in each thread: + // * clear the BF_EVACUATED flag from each copied block + // * move the copied blocks to the step + { + gc_thread *thr; + step_workspace *ws; + bdescr *prev, *next; + + for (t = 0; t < n_gc_threads; t++) { + thr = gc_threads[t]; + + // not step 0 + if (RtsFlags.GcFlags.generations == 1) { + s = 0; + } else { + s = 1; + } + for (; s < total_steps; s++) { + ws = &thr->steps[s]; + + // Push the final block + if (ws->todo_bd) { + push_scanned_block(ws->todo_bd, ws); + } + + ASSERT(gct->scan_bd == NULL); + ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks); + + prev = NULL; + for (bd = ws->scavd_list; bd != NULL; bd = bd->link) { + bd->flags &= ~BF_EVACUATED; // now from-space + ws->step->n_words += bd->free - bd->start; + prev = bd; + } + if (prev != NULL) { + prev->link = ws->step->blocks; + ws->step->blocks = ws->scavd_list; + } + ws->step->n_blocks += ws->n_scavd_blocks; + + prev = NULL; + for (bd = ws->part_list; bd != NULL; bd = next) { + next = bd->link; + if (bd->free == bd->start) { + if (prev == NULL) { + ws->part_list = next; + } else { + prev->link = next; + } + freeGroup(bd); + ws->n_part_blocks--; + } else { + bd->flags &= ~BF_EVACUATED; // now from-space + ws->step->n_words += bd->free - bd->start; + prev = bd; + } + } + if (prev != NULL) { + prev->link = ws->step->blocks; + ws->step->blocks = ws->part_list; + } + ws->step->n_blocks += ws->n_part_blocks; + + ASSERT(countBlocks(ws->step->blocks) == ws->step->n_blocks); + ASSERT(countOccupied(ws->step->blocks) == ws->step->n_words); + } + } + } + /* run through all the generations/steps and tidy up */ - copied = new_blocks * BLOCK_SIZE_W; - scavd_copied = new_scavd_blocks * BLOCK_SIZE_W; + copied = 0; + max_copied = 0; + avg_copied = 0; + { + nat i; + for (i=0; i < n_gc_threads; i++) { + if (n_gc_threads > 1) { + trace(TRACE_gc,"thread %d:", i); + trace(TRACE_gc," copied %ld", gc_threads[i]->copied * sizeof(W_)); + trace(TRACE_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_)); + trace(TRACE_gc," any_work %ld", gc_threads[i]->any_work); + trace(TRACE_gc," no_work %ld", gc_threads[i]->no_work); + trace(TRACE_gc," scav_find_work %ld", gc_threads[i]->scav_find_work); + } + copied += gc_threads[i]->copied; + max_copied = stg_max(gc_threads[i]->copied, max_copied); + } + if (n_gc_threads == 1) { + max_copied = 0; + avg_copied = 0; + } else { + avg_copied = copied; + } + } + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - if (g <= N) { + if (g == N) { generations[g].collections++; // for stats + if (n_gc_threads > 1) generations[g].par_collections++; } // Count the mutable list as bytes "copied" for the purposes of @@ -635,24 +521,15 @@ GarbageCollect ( rtsBool force_major_gc ) copied += mut_list_size; debugTrace(DEBUG_gc, - "mut_list_size: %lu (%d vars, %d arrays, %d others)", + "mut_list_size: %lu (%d vars, %d arrays, %d MVARs, %d others)", (unsigned long)(mut_list_size * sizeof(W_)), - mutlist_MUTVARS, mutlist_MUTARRS, mutlist_OTHERS); + mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS); } for (s = 0; s < generations[g].n_steps; s++) { bdescr *next; stp = &generations[g].steps[s]; - if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) { - // stats information: how much we copied - if (g <= N) { - copied -= stp->hp_bd->start + BLOCK_SIZE_W - - stp->hp_bd->free; - scavd_copied -= stp->scavd_hpLim - stp->scavd_hp; - } - } - // for generations we collected... if (g <= N) { @@ -660,12 +537,14 @@ GarbageCollect ( rtsBool force_major_gc ) * the collected steps (except the allocation area). These * freed blocks will probaby be quickly recycled. */ - if (!(g == 0 && s == 0)) { - if (stp->is_compacted) { + if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) { + if (stp->is_compacted) + { // for a compacted step, just shift the new to-space // onto the front of the now-compacted existing blocks. for (bd = stp->blocks; bd != NULL; bd = bd->link) { bd->flags &= ~BF_EVACUATED; // now from-space + stp->n_words += bd->free - bd->start; } // tack the new blocks on the end of the existing blocks if (stp->old_blocks != NULL) { @@ -685,11 +564,11 @@ GarbageCollect ( rtsBool force_major_gc ) // add the new blocks to the block tally stp->n_blocks += stp->n_old_blocks; ASSERT(countBlocks(stp->blocks) == stp->n_blocks); - } else { + ASSERT(countOccupied(stp->blocks) == stp->n_words); + } + else // not copacted + { freeChain(stp->old_blocks); - for (bd = stp->blocks; bd != NULL; bd = bd->link) { - bd->flags &= ~BF_EVACUATED; // now from-space - } } stp->old_blocks = NULL; stp->n_old_blocks = 0; @@ -713,9 +592,9 @@ GarbageCollect ( rtsBool force_major_gc ) stp->large_objects = stp->scavenged_large_objects; stp->n_large_blocks = stp->n_scavenged_large_blocks; - } else { - // for older generations... - + } + else // for older generations... + { /* For older generations, we need to append the * scavenged_large_object list (i.e. large objects that have been * promoted during this GC) to the large_object list for that step. @@ -732,108 +611,34 @@ GarbageCollect ( rtsBool force_major_gc ) } } - /* Reset the sizes of the older generations when we do a major - * collection. - * - * CURRENT STRATEGY: make all generations except zero the same size. - * We have to stay within the maximum heap size, and leave a certain - * percentage of the maximum heap size available to allocate into. - */ - if (major_gc && RtsFlags.GcFlags.generations > 1) { - nat live, size, min_alloc; - nat max = RtsFlags.GcFlags.maxHeapSize; - nat gens = RtsFlags.GcFlags.generations; - - // live in the oldest generations - live = oldest_gen->steps[0].n_blocks + - oldest_gen->steps[0].n_large_blocks; - - // default max size for all generations except zero - size = stg_max(live * RtsFlags.GcFlags.oldGenFactor, - RtsFlags.GcFlags.minOldGenSize); - - // minimum size for generation zero - min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200, - RtsFlags.GcFlags.minAllocAreaSize); - - // Auto-enable compaction when the residency reaches a - // certain percentage of the maximum heap size (default: 30%). - if (RtsFlags.GcFlags.generations > 1 && - (RtsFlags.GcFlags.compact || - (max > 0 && - oldest_gen->steps[0].n_blocks > - (RtsFlags.GcFlags.compactThreshold * max) / 100))) { - oldest_gen->steps[0].is_compacted = 1; -// debugBelch("compaction: on\n", live); - } else { - oldest_gen->steps[0].is_compacted = 0; -// debugBelch("compaction: off\n", live); - } - - // if we're going to go over the maximum heap size, reduce the - // size of the generations accordingly. The calculation is - // different if compaction is turned on, because we don't need - // to double the space required to collect the old generation. - if (max != 0) { - - // this test is necessary to ensure that the calculations - // below don't have any negative results - we're working - // with unsigned values here. - if (max < min_alloc) { - heapOverflow(); - } - - if (oldest_gen->steps[0].is_compacted) { - if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) { - size = (max - min_alloc) / ((gens - 1) * 2 - 1); - } - } else { - if ( (size * (gens - 1) * 2) + min_alloc > max ) { - size = (max - min_alloc) / ((gens - 1) * 2); - } - } - - if (size < live) { - heapOverflow(); - } - } - -#if 0 - debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live, - min_alloc, size, max); -#endif - - for (g = 0; g < gens; g++) { - generations[g].max_blocks = size; + // update the max size of older generations after a major GC + resize_generations(); + + // Calculate the amount of live data for stats. + live = calcLiveWords(); + + // Free the small objects allocated via allocate(), since this will + // all have been copied into G0S1 now. + if (RtsFlags.GcFlags.generations > 1) { + if (g0s0->blocks != NULL) { + freeChain(g0s0->blocks); + g0s0->blocks = NULL; } + g0s0->n_blocks = 0; + g0s0->n_words = 0; } - - // Guess the amount of live data for stats. - live = calcLive(); - - /* Free the small objects allocated via allocate(), since this will - * all have been copied into G0S1 now. - */ - if (small_alloc_list != NULL) { - freeChain(small_alloc_list); - } - small_alloc_list = NULL; alloc_blocks = 0; - alloc_Hp = NULL; - alloc_HpLim = NULL; alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize; // Start a new pinned_object_block pinned_object_block = NULL; - /* Free the mark stack. - */ + // Free the mark stack. if (mark_stack_bdescr != NULL) { freeGroup(mark_stack_bdescr); } - /* Free any bitmaps. - */ + // Free any bitmaps. for (g = 0; g <= N; g++) { for (s = 0; s < generations[g].n_steps; s++) { stp = &generations[g].steps[s]; @@ -844,112 +649,7 @@ GarbageCollect ( rtsBool force_major_gc ) } } - /* Two-space collector: - * Free the old to-space, and estimate the amount of live data. - */ - if (RtsFlags.GcFlags.generations == 1) { - nat blocks; - - if (g0s0->old_blocks != NULL) { - freeChain(g0s0->old_blocks); - } - for (bd = g0s0->blocks; bd != NULL; bd = bd->link) { - bd->flags = 0; // now from-space - } - g0s0->old_blocks = g0s0->blocks; - g0s0->n_old_blocks = g0s0->n_blocks; - g0s0->blocks = saved_nursery; - g0s0->n_blocks = saved_n_blocks; - - /* For a two-space collector, we need to resize the nursery. */ - - /* set up a new nursery. Allocate a nursery size based on a - * function of the amount of live data (by default a factor of 2) - * Use the blocks from the old nursery if possible, freeing up any - * left over blocks. - * - * If we get near the maximum heap size, then adjust our nursery - * size accordingly. If the nursery is the same size as the live - * data (L), then we need 3L bytes. We can reduce the size of the - * nursery to bring the required memory down near 2L bytes. - * - * A normal 2-space collector would need 4L bytes to give the same - * performance we get from 3L bytes, reducing to the same - * performance at 2L bytes. - */ - blocks = g0s0->n_old_blocks; - - if ( RtsFlags.GcFlags.maxHeapSize != 0 && - blocks * RtsFlags.GcFlags.oldGenFactor * 2 > - RtsFlags.GcFlags.maxHeapSize ) { - long adjusted_blocks; // signed on purpose - int pc_free; - - adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks); - - debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld", - RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks); - - pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize; - if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even be < 0 */ { - heapOverflow(); - } - blocks = adjusted_blocks; - - } else { - blocks *= RtsFlags.GcFlags.oldGenFactor; - if (blocks < RtsFlags.GcFlags.minAllocAreaSize) { - blocks = RtsFlags.GcFlags.minAllocAreaSize; - } - } - resizeNurseries(blocks); - - } else { - /* Generational collector: - * If the user has given us a suggested heap size, adjust our - * allocation area to make best use of the memory available. - */ - - if (RtsFlags.GcFlags.heapSizeSuggestion) { - long blocks; - nat needed = calcNeeded(); // approx blocks needed at next GC - - /* Guess how much will be live in generation 0 step 0 next time. - * A good approximation is obtained by finding the - * percentage of g0s0 that was live at the last minor GC. - */ - if (N == 0) { - g0s0_pcnt_kept = (new_blocks * 100) / countNurseryBlocks(); - } - - /* Estimate a size for the allocation area based on the - * information available. We might end up going slightly under - * or over the suggested heap size, but we should be pretty - * close on average. - * - * Formula: suggested - needed - * ---------------------------- - * 1 + g0s0_pcnt_kept/100 - * - * where 'needed' is the amount of memory needed at the next - * collection for collecting all steps except g0s0. - */ - blocks = - (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) / - (100 + (long)g0s0_pcnt_kept); - - if (blocks < (long)RtsFlags.GcFlags.minAllocAreaSize) { - blocks = RtsFlags.GcFlags.minAllocAreaSize; - } - - resizeNurseries((nat)blocks); - - } else { - // we might have added extra large blocks to the nursery, so - // resize back to minAllocAreaSize again. - resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize); - } - } + resize_nursery(); // mark the garbage collected CAFs as dead #if 0 && defined(DEBUG) // doesn't work at the moment @@ -959,12 +659,19 @@ GarbageCollect ( rtsBool force_major_gc ) #ifdef PROFILING // resetStaticObjectForRetainerProfiling() must be called before // zeroing below. - resetStaticObjectForRetainerProfiling(); + if (n_gc_threads > 1) { + barf("profiling is currently broken with multi-threaded GC"); + // ToDo: fix the gct->scavenged_static_objects below + } + resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects); #endif // zero the scavenged static object list if (major_gc) { - zero_static_object_list(scavenged_static_objects); + nat i; + for (i = 0; i < n_gc_threads; i++) { + zero_static_object_list(gc_threads[i]->scavenged_static_objects); + } } // Reset the nursery @@ -987,7 +694,7 @@ GarbageCollect ( rtsBool force_major_gc ) IF_DEBUG(sanity, checkSanity()); // extra GC trace info - IF_DEBUG(gc, statDescribeGens()); + if (traceClass(TRACE_gc|DEBUG_gc)) statDescribeGens(); #ifdef DEBUG // symbol-table based profiling @@ -1001,7 +708,7 @@ GarbageCollect ( rtsBool force_major_gc ) #ifdef DEBUG // check for memory leaks if DEBUG is on - memInventory(); + memInventory(traceClass(DEBUG_gc)); #endif #ifdef RTS_GTK_FRONTPANEL @@ -1011,100 +718,562 @@ GarbageCollect ( rtsBool force_major_gc ) #endif // ok, GC over: tell the stats department what happened. - stat_endGC(allocated, live, copied, scavd_copied, N); + slop = calcLiveBlocks() * BLOCK_SIZE_W - live; + stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop); #if defined(RTS_USER_SIGNALS) - // unblock signals again - unblockUserSignals(); + if (RtsFlags.MiscFlags.install_signal_handlers) { + // unblock signals again + unblockUserSignals(); + } #endif RELEASE_SM_LOCK; + + gct = saved_gct; } /* ----------------------------------------------------------------------------- - isAlive determines whether the given closure is still alive (after - a garbage collection) or not. It returns the new address of the - closure if it is alive, or NULL otherwise. + Figure out which generation to collect, initialise N and major_gc. + + Also returns the total number of blocks in generations that will be + collected. + -------------------------------------------------------------------------- */ + +static nat +initialise_N (rtsBool force_major_gc) +{ + int g; + nat s, blocks, blocks_total; + + blocks = 0; + blocks_total = 0; + + if (force_major_gc) { + N = RtsFlags.GcFlags.generations - 1; + } else { + N = 0; + } + + for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) { + blocks = 0; + for (s = 0; s < generations[g].n_steps; s++) { + blocks += generations[g].steps[s].n_words / BLOCK_SIZE_W; + blocks += generations[g].steps[s].n_large_blocks; + } + if (blocks >= generations[g].max_blocks) { + N = stg_max(N,g); + } + if ((nat)g <= N) { + blocks_total += blocks; + } + } - NOTE: Use it before compaction only! + blocks_total += countNurseryBlocks(); + + major_gc = (N == RtsFlags.GcFlags.generations-1); + return blocks_total; +} + +/* ----------------------------------------------------------------------------- + Initialise the gc_thread structures. -------------------------------------------------------------------------- */ +static gc_thread * +alloc_gc_thread (int n) +{ + nat s; + step_workspace *ws; + gc_thread *t; + + t = stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace), + "alloc_gc_thread"); + +#ifdef THREADED_RTS + t->id = 0; + initCondition(&t->wake_cond); + initMutex(&t->wake_mutex); + t->wakeup = rtsTrue; // starts true, so we can wait for the + // thread to start up, see wakeup_gc_threads + t->exit = rtsFalse; +#endif + + t->thread_index = n; + t->free_blocks = NULL; + t->gc_count = 0; + + init_gc_thread(t); + +#ifdef USE_PAPI + t->papi_events = -1; +#endif + + for (s = 0; s < total_steps; s++) + { + ws = &t->steps[s]; + ws->step = &all_steps[s]; + ASSERT(s == ws->step->abs_no); + ws->gct = t; + + ws->todo_bd = NULL; + ws->buffer_todo_bd = NULL; + + ws->part_list = NULL; + ws->n_part_blocks = 0; + + ws->scavd_list = NULL; + ws->n_scavd_blocks = 0; + } + + return t; +} + + +static void +alloc_gc_threads (void) +{ + if (gc_threads == NULL) { +#if defined(THREADED_RTS) + nat i; + gc_threads = stgMallocBytes (RtsFlags.ParFlags.gcThreads * + sizeof(gc_thread*), + "alloc_gc_threads"); + + for (i = 0; i < RtsFlags.ParFlags.gcThreads; i++) { + gc_threads[i] = alloc_gc_thread(i); + } +#else + gc_threads = stgMallocBytes (sizeof(gc_thread*), + "alloc_gc_threads"); + + gc_threads[0] = alloc_gc_thread(0); +#endif + } +} + +/* ---------------------------------------------------------------------------- + Start GC threads + ------------------------------------------------------------------------- */ -StgClosure * -isAlive(StgClosure *p) +static nat gc_running_threads; + +#if defined(THREADED_RTS) +static Mutex gc_running_mutex; +#endif + +static nat +inc_running (void) { - const StgInfoTable *info; - bdescr *bd; + nat n_running; + ACQUIRE_LOCK(&gc_running_mutex); + n_running = ++gc_running_threads; + RELEASE_LOCK(&gc_running_mutex); + ASSERT(n_running <= n_gc_threads); + return n_running; +} - while (1) { +static nat +dec_running (void) +{ + nat n_running; + ACQUIRE_LOCK(&gc_running_mutex); + ASSERT(n_gc_threads != 0); + n_running = --gc_running_threads; + RELEASE_LOCK(&gc_running_mutex); + return n_running; +} - ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); - info = get_itbl(p); +static void +scavenge_until_all_done (void) +{ + nat r; + + debugTrace(DEBUG_gc, "GC thread %d working", gct->thread_index); + +loop: + scavenge_loop(); + // scavenge_loop() only exits when there's no work to do + r = dec_running(); + + debugTrace(DEBUG_gc, "GC thread %d idle (%d still running)", + gct->thread_index, r); + + while (gc_running_threads != 0) { + usleep(1); + if (any_work()) { + inc_running(); + goto loop; + } + // any_work() does not remove the work from the queue, it + // just checks for the presence of work. If we find any, + // then we increment gc_running_threads and go back to + // scavenge_loop() to perform any pending work. + } + + // All threads are now stopped + debugTrace(DEBUG_gc, "GC thread %d finished.", gct->thread_index); +} + +#if defined(THREADED_RTS) +// +// gc_thread_work(): Scavenge until there's no work left to do and all +// the running threads are idle. +// +static void +gc_thread_work (void) +{ + // gc_running_threads has already been incremented for us; this is + // a worker thread and the main thread bumped gc_running_threads + // before waking us up. + + // Every thread evacuates some roots. + gct->evac_step = 0; + markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads); + + scavenge_until_all_done(); +} + + +static void +gc_thread_mainloop (void) +{ + while (!gct->exit) { + + // Wait until we're told to wake up + ACQUIRE_LOCK(&gct->wake_mutex); + gct->wakeup = rtsFalse; + while (!gct->wakeup) { + debugTrace(DEBUG_gc, "GC thread %d standing by...", + gct->thread_index); + waitCondition(&gct->wake_cond, &gct->wake_mutex); + } + RELEASE_LOCK(&gct->wake_mutex); + if (gct->exit) break; + +#ifdef USE_PAPI + // start performance counters in this thread... + if (gct->papi_events == -1) { + papi_init_eventset(&gct->papi_events); + } + papi_thread_start_gc1_count(gct->papi_events); +#endif - // ignore static closures - // - // ToDo: for static closures, check the static link field. - // Problem here is that we sometimes don't set the link field, eg. - // for static closures with an empty SRT or CONSTR_STATIC_NOCAFs. - // - if (!HEAP_ALLOCED(p)) { - return p; + gc_thread_work(); + +#ifdef USE_PAPI + // count events in this thread towards the GC totals + papi_thread_stop_gc1_count(gct->papi_events); +#endif } +} +#endif - // ignore closures in generations that we're not collecting. - bd = Bdescr((P_)p); - if (bd->gen_no > N) { - return p; +#if defined(THREADED_RTS) +static void +gc_thread_entry (gc_thread *my_gct) +{ + gct = my_gct; + debugTrace(DEBUG_gc, "GC thread %d starting...", gct->thread_index); + gct->id = osThreadId(); + gc_thread_mainloop(); +} +#endif + +static void +start_gc_threads (void) +{ +#if defined(THREADED_RTS) + nat i; + OSThreadId id; + static rtsBool done = rtsFalse; + + gc_running_threads = 0; + initMutex(&gc_running_mutex); + + if (!done) { + // Start from 1: the main thread is 0 + for (i = 1; i < RtsFlags.ParFlags.gcThreads; i++) { + createOSThread(&id, (OSThreadProc*)&gc_thread_entry, + gc_threads[i]); + } + done = rtsTrue; } +#endif +} - // if it's a pointer into to-space, then we're done - if (bd->flags & BF_EVACUATED) { - return p; +static void +wakeup_gc_threads (nat n_threads USED_IF_THREADS) +{ +#if defined(THREADED_RTS) + nat i; + for (i=1; i < n_threads; i++) { + inc_running(); + debugTrace(DEBUG_gc, "waking up gc thread %d", i); + do { + ACQUIRE_LOCK(&gc_threads[i]->wake_mutex); + if (gc_threads[i]->wakeup) { + RELEASE_LOCK(&gc_threads[i]->wake_mutex); + continue; + } else { + break; + } + } while (1); + gc_threads[i]->wakeup = rtsTrue; + signalCondition(&gc_threads[i]->wake_cond); + RELEASE_LOCK(&gc_threads[i]->wake_mutex); } +#endif +} - // large objects use the evacuated flag - if (bd->flags & BF_LARGE) { - return NULL; +// After GC is complete, we must wait for all GC threads to enter the +// standby state, otherwise they may still be executing inside +// any_work(), and may even remain awake until the next GC starts. +static void +shutdown_gc_threads (nat n_threads USED_IF_THREADS) +{ +#if defined(THREADED_RTS) + nat i; + rtsBool wakeup; + for (i=1; i < n_threads; i++) { + do { + ACQUIRE_LOCK(&gc_threads[i]->wake_mutex); + wakeup = gc_threads[i]->wakeup; + // wakeup is false while the thread is waiting + RELEASE_LOCK(&gc_threads[i]->wake_mutex); + } while (wakeup); } +#endif +} + +/* ---------------------------------------------------------------------------- + Initialise a generation that is to be collected + ------------------------------------------------------------------------- */ + +static void +init_collected_gen (nat g, nat n_threads) +{ + nat s, t, i; + step_workspace *ws; + step *stp; + bdescr *bd; - // check the mark bit for compacted steps - if ((bd->flags & BF_COMPACTED) && is_marked((P_)p,bd)) { - return p; + // Throw away the current mutable list. Invariant: the mutable + // list always has at least one block; this means we can avoid a + // check for NULL in recordMutable(). + if (g != 0) { + freeChain(generations[g].mut_list); + generations[g].mut_list = allocBlock(); + for (i = 0; i < n_capabilities; i++) { + freeChain(capabilities[i].mut_lists[g]); + capabilities[i].mut_lists[g] = allocBlock(); + } } - switch (info->type) { - - case IND: - case IND_STATIC: - case IND_PERM: - case IND_OLDGEN: // rely on compatible layout with StgInd - case IND_OLDGEN_PERM: - // follow indirections - p = ((StgInd *)p)->indirectee; - continue; - - case EVACUATED: - // alive! - return ((StgEvacuated *)p)->evacuee; - - case TSO: - if (((StgTSO *)p)->what_next == ThreadRelocated) { - p = (StgClosure *)((StgTSO *)p)->link; - continue; - } - return NULL; - - default: - // dead. - return NULL; + for (s = 0; s < generations[g].n_steps; s++) { + + // generation 0, step 0 doesn't need to-space + if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) { + continue; + } + + stp = &generations[g].steps[s]; + ASSERT(stp->gen_no == g); + + // deprecate the existing blocks + stp->old_blocks = stp->blocks; + stp->n_old_blocks = stp->n_blocks; + stp->blocks = NULL; + stp->n_blocks = 0; + stp->n_words = 0; + + // we don't have any to-be-scavenged blocks yet + stp->todos = NULL; + stp->todos_last = NULL; + stp->n_todos = 0; + + // initialise the large object queues. + stp->scavenged_large_objects = NULL; + stp->n_scavenged_large_blocks = 0; + + // mark the large objects as not evacuated yet + for (bd = stp->large_objects; bd; bd = bd->link) { + bd->flags &= ~BF_EVACUATED; + } + + // for a compacted step, we need to allocate the bitmap + if (stp->is_compacted) { + nat bitmap_size; // in bytes + bdescr *bitmap_bdescr; + StgWord *bitmap; + + bitmap_size = stp->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE); + + if (bitmap_size > 0) { + bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size) + / BLOCK_SIZE); + stp->bitmap = bitmap_bdescr; + bitmap = bitmap_bdescr->start; + + debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p", + bitmap_size, bitmap); + + // don't forget to fill it with zeros! + memset(bitmap, 0, bitmap_size); + + // For each block in this step, point to its bitmap from the + // block descriptor. + for (bd=stp->old_blocks; bd != NULL; bd = bd->link) { + bd->u.bitmap = bitmap; + bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE); + + // Also at this point we set the BF_COMPACTED flag + // for this block. The invariant is that + // BF_COMPACTED is always unset, except during GC + // when it is set on those blocks which will be + // compacted. + bd->flags |= BF_COMPACTED; + } + } + } } - } + + // For each GC thread, for each step, allocate a "todo" block to + // store evacuated objects to be scavenged, and a block to store + // evacuated objects that do not need to be scavenged. + for (t = 0; t < n_threads; t++) { + for (s = 0; s < generations[g].n_steps; s++) { + + // we don't copy objects into g0s0, unless -G0 + if (g==0 && s==0 && RtsFlags.GcFlags.generations > 1) continue; + + ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s]; + + ws->todo_large_objects = NULL; + + ws->part_list = NULL; + ws->n_part_blocks = 0; + + // allocate the first to-space block; extra blocks will be + // chained on as necessary. + ws->todo_bd = NULL; + ws->buffer_todo_bd = NULL; + alloc_todo_block(ws,0); + + ws->scavd_list = NULL; + ws->n_scavd_blocks = 0; + } + } +} + + +/* ---------------------------------------------------------------------------- + Initialise a generation that is *not* to be collected + ------------------------------------------------------------------------- */ + +static void +init_uncollected_gen (nat g, nat threads) +{ + nat s, t, i; + step_workspace *ws; + step *stp; + bdescr *bd; + + for (s = 0; s < generations[g].n_steps; s++) { + stp = &generations[g].steps[s]; + stp->scavenged_large_objects = NULL; + stp->n_scavenged_large_blocks = 0; + } + + for (t = 0; t < threads; t++) { + for (s = 0; s < generations[g].n_steps; s++) { + + ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s]; + stp = ws->step; + + ws->buffer_todo_bd = NULL; + ws->todo_large_objects = NULL; + + ws->part_list = NULL; + ws->n_part_blocks = 0; + + ws->scavd_list = NULL; + ws->n_scavd_blocks = 0; + + // If the block at the head of the list in this generation + // is less than 3/4 full, then use it as a todo block. + if (stp->blocks && isPartiallyFull(stp->blocks)) + { + ws->todo_bd = stp->blocks; + ws->todo_free = ws->todo_bd->free; + ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W; + stp->blocks = stp->blocks->link; + stp->n_blocks -= 1; + stp->n_words -= ws->todo_bd->free - ws->todo_bd->start; + ws->todo_bd->link = NULL; + // we must scan from the current end point. + ws->todo_bd->u.scan = ws->todo_bd->free; + } + else + { + ws->todo_bd = NULL; + alloc_todo_block(ws,0); + } + } + } + + // Move the private mutable lists from each capability onto the + // main mutable list for the generation. + for (i = 0; i < n_capabilities; i++) { + for (bd = capabilities[i].mut_lists[g]; + bd->link != NULL; bd = bd->link) { + /* nothing */ + } + bd->link = generations[g].mut_list; + generations[g].mut_list = capabilities[i].mut_lists[g]; + capabilities[i].mut_lists[g] = allocBlock(); + } +} + +/* ----------------------------------------------------------------------------- + Initialise a gc_thread before GC + -------------------------------------------------------------------------- */ + +static void +init_gc_thread (gc_thread *t) +{ + t->static_objects = END_OF_STATIC_LIST; + t->scavenged_static_objects = END_OF_STATIC_LIST; + t->scan_bd = NULL; + t->evac_step = 0; + t->failed_to_evac = rtsFalse; + t->eager_promotion = rtsTrue; + t->thunk_selector_depth = 0; + t->copied = 0; + t->scanned = 0; + t->any_work = 0; + t->no_work = 0; + t->scav_find_work = 0; } +/* ----------------------------------------------------------------------------- + Function we pass to evacuate roots. + -------------------------------------------------------------------------- */ + static void -mark_root(StgClosure **root) +mark_root(void *user, StgClosure **root) { - *root = evacuate(*root); + // we stole a register for gct, but this function is called from + // *outside* the GC where the register variable is not in effect, + // so we need to save and restore it here. NB. only call + // mark_root() from the main GC thread, otherwise gct will be + // incorrect. + gc_thread *saved_gct; + saved_gct = gct; + gct = user; + + evacuate(root); + + gct = saved_gct; } /* ----------------------------------------------------------------------------- @@ -1125,39 +1294,233 @@ zero_static_object_list(StgClosure* first_static) } } -/* ----------------------------------------------------------------------------- - Reverting CAFs - -------------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------------- + Update the pointers from the task list -void -revertCAFs( void ) -{ - StgIndStatic *c; + These are treated as weak pointers because we want to allow a main + thread to get a BlockedOnDeadMVar exception in the same way as any + other thread. Note that the threads should all have been retained + by GC by virtue of being on the all_threads list, we're just + updating pointers here. + ------------------------------------------------------------------------- */ - for (c = (StgIndStatic *)revertible_caf_list; c != NULL; - c = (StgIndStatic *)c->static_link) - { - SET_INFO(c, c->saved_info); - c->saved_info = NULL; - // could, but not necessary: c->static_link = NULL; +static void +update_task_list (void) +{ + Task *task; + StgTSO *tso; + for (task = all_tasks; task != NULL; task = task->all_link) { + if (!task->stopped && task->tso) { + ASSERT(task->tso->bound == task); + tso = (StgTSO *) isAlive((StgClosure *)task->tso); + if (tso == NULL) { + barf("task %p: main thread %d has been GC'd", +#ifdef THREADED_RTS + (void *)task->id, +#else + (void *)task, +#endif + task->tso->id); + } + task->tso = tso; + } } - revertible_caf_list = NULL; } -void -markCAFs( evac_fn evac ) +/* ---------------------------------------------------------------------------- + Reset the sizes of the older generations when we do a major + collection. + + CURRENT STRATEGY: make all generations except zero the same size. + We have to stay within the maximum heap size, and leave a certain + percentage of the maximum heap size available to allocate into. + ------------------------------------------------------------------------- */ + +static void +resize_generations (void) { - StgIndStatic *c; + nat g; - for (c = (StgIndStatic *)caf_list; c != NULL; - c = (StgIndStatic *)c->static_link) - { - evac(&c->indirectee); + if (major_gc && RtsFlags.GcFlags.generations > 1) { + nat live, size, min_alloc; + nat max = RtsFlags.GcFlags.maxHeapSize; + nat gens = RtsFlags.GcFlags.generations; + + // live in the oldest generations + live = (oldest_gen->steps[0].n_words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W+ + oldest_gen->steps[0].n_large_blocks; + + // default max size for all generations except zero + size = stg_max(live * RtsFlags.GcFlags.oldGenFactor, + RtsFlags.GcFlags.minOldGenSize); + + // minimum size for generation zero + min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200, + RtsFlags.GcFlags.minAllocAreaSize); + + // Auto-enable compaction when the residency reaches a + // certain percentage of the maximum heap size (default: 30%). + if (RtsFlags.GcFlags.generations > 1 && + (RtsFlags.GcFlags.compact || + (max > 0 && + oldest_gen->steps[0].n_blocks > + (RtsFlags.GcFlags.compactThreshold * max) / 100))) { + oldest_gen->steps[0].is_compacted = 1; +// debugBelch("compaction: on\n", live); + } else { + oldest_gen->steps[0].is_compacted = 0; +// debugBelch("compaction: off\n", live); + } + + // if we're going to go over the maximum heap size, reduce the + // size of the generations accordingly. The calculation is + // different if compaction is turned on, because we don't need + // to double the space required to collect the old generation. + if (max != 0) { + + // this test is necessary to ensure that the calculations + // below don't have any negative results - we're working + // with unsigned values here. + if (max < min_alloc) { + heapOverflow(); + } + + if (oldest_gen->steps[0].is_compacted) { + if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) { + size = (max - min_alloc) / ((gens - 1) * 2 - 1); + } + } else { + if ( (size * (gens - 1) * 2) + min_alloc > max ) { + size = (max - min_alloc) / ((gens - 1) * 2); + } + } + + if (size < live) { + heapOverflow(); + } + } + +#if 0 + debugBelch("live: %d, min_alloc: %d, size : %d, max = %d\n", live, + min_alloc, size, max); +#endif + + for (g = 0; g < gens; g++) { + generations[g].max_blocks = size; + } } - for (c = (StgIndStatic *)revertible_caf_list; c != NULL; - c = (StgIndStatic *)c->static_link) +} + +/* ----------------------------------------------------------------------------- + Calculate the new size of the nursery, and resize it. + -------------------------------------------------------------------------- */ + +static void +resize_nursery (void) +{ + if (RtsFlags.GcFlags.generations == 1) + { // Two-space collector: + nat blocks; + + /* set up a new nursery. Allocate a nursery size based on a + * function of the amount of live data (by default a factor of 2) + * Use the blocks from the old nursery if possible, freeing up any + * left over blocks. + * + * If we get near the maximum heap size, then adjust our nursery + * size accordingly. If the nursery is the same size as the live + * data (L), then we need 3L bytes. We can reduce the size of the + * nursery to bring the required memory down near 2L bytes. + * + * A normal 2-space collector would need 4L bytes to give the same + * performance we get from 3L bytes, reducing to the same + * performance at 2L bytes. + */ + blocks = g0s0->n_blocks; + + if ( RtsFlags.GcFlags.maxHeapSize != 0 && + blocks * RtsFlags.GcFlags.oldGenFactor * 2 > + RtsFlags.GcFlags.maxHeapSize ) + { + long adjusted_blocks; // signed on purpose + int pc_free; + + adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks); + + debugTrace(DEBUG_gc, "near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %ld", + RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks); + + pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize; + if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even * be < 0 */ + { + heapOverflow(); + } + blocks = adjusted_blocks; + } + else + { + blocks *= RtsFlags.GcFlags.oldGenFactor; + if (blocks < RtsFlags.GcFlags.minAllocAreaSize) + { + blocks = RtsFlags.GcFlags.minAllocAreaSize; + } + } + resizeNurseries(blocks); + } + else // Generational collector { - evac(&c->indirectee); + /* + * If the user has given us a suggested heap size, adjust our + * allocation area to make best use of the memory available. + */ + if (RtsFlags.GcFlags.heapSizeSuggestion) + { + long blocks; + nat needed = calcNeeded(); // approx blocks needed at next GC + + /* Guess how much will be live in generation 0 step 0 next time. + * A good approximation is obtained by finding the + * percentage of g0s0 that was live at the last minor GC. + * + * We have an accurate figure for the amount of copied data in + * 'copied', but we must convert this to a number of blocks, with + * a small adjustment for estimated slop at the end of a block + * (- 10 words). + */ + if (N == 0) + { + g0s0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100) + / countNurseryBlocks(); + } + + /* Estimate a size for the allocation area based on the + * information available. We might end up going slightly under + * or over the suggested heap size, but we should be pretty + * close on average. + * + * Formula: suggested - needed + * ---------------------------- + * 1 + g0s0_pcnt_kept/100 + * + * where 'needed' is the amount of memory needed at the next + * collection for collecting all steps except g0s0. + */ + blocks = + (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) / + (100 + (long)g0s0_pcnt_kept); + + if (blocks < (long)RtsFlags.GcFlags.minAllocAreaSize) { + blocks = RtsFlags.GcFlags.minAllocAreaSize; + } + + resizeNurseries((nat)blocks); + } + else + { + // we might have added extra large blocks to the nursery, so + // resize back to minAllocAreaSize again. + resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize); + } } } @@ -1211,25 +1574,3 @@ gcCAFs(void) debugTrace(DEBUG_gccafs, "%d CAFs live", i); } #endif - -/* ----------------------------------------------------------------------------- - * Debugging - * -------------------------------------------------------------------------- */ - -#if DEBUG -void -printMutableList(generation *gen) -{ - bdescr *bd; - StgPtr p; - - debugBelch("mutable list %p: ", gen->mut_list); - - for (bd = gen->mut_list; bd != NULL; bd = bd->link) { - for (p = bd->start; p < bd->free; p++) { - debugBelch("%p (%s), ", (void *)*p, info_type((StgClosure *)*p)); - } - } - debugBelch("\n"); -} -#endif /* DEBUG */