X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=rts%2Fsm%2FGC.c;h=079ef83b4d6a86e84fb2994e089d503fd4d74c5b;hb=78956377551e1433b0a87128c5f88c254ec46b40;hp=a07086e65f8d2b881e5b8cdedd3332404f31c8b7;hpb=8db56c8606e6c0e89a87d34c3f67124f1e8b988e;p=ghc-hetmet.git diff --git a/rts/sm/GC.c b/rts/sm/GC.c index a07086e..079ef83 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------------- * - * (c) The GHC Team 1998-2006 + * (c) The GHC Team 1998-2008 * * Generational garbage collector * @@ -11,7 +11,7 @@ * * ---------------------------------------------------------------------------*/ -#include "PosixSource.h" +// #include "PosixSource.h" #include "Rts.h" #include "RtsFlags.h" #include "RtsUtils.h" @@ -39,10 +39,10 @@ #include "Trace.h" #include "RetainerProfile.h" #include "RaiseAsync.h" -#include "Sparks.h" #include "Papi.h" #include "GC.h" +#include "GCThread.h" #include "Compact.h" #include "Evac.h" #include "Scav.h" @@ -51,6 +51,7 @@ #include "Sparks.h" #include // for memset() +#include /* ----------------------------------------------------------------------------- Global variables @@ -90,11 +91,6 @@ * We build up a static object list while collecting generations 0..N, * which is then appended to the static object list of generation N+1. */ -StgClosure* static_objects; // live static objects -StgClosure* scavenged_static_objects; // static objects scavenged so far -#ifdef THREADED_RTS -SpinLock static_objects_sync; -#endif /* N is the oldest generation being collected, where the generations * are numbered starting at 0. A major GC (indicated by the major_gc @@ -118,7 +114,7 @@ nat mutlist_MUTVARS, /* Thread-local data for each GC thread */ -gc_thread *gc_threads = NULL; +gc_thread **gc_threads = NULL; // gc_thread *gct = NULL; // this thread's gct TODO: make thread-local // Number of threads running in *this* GC. Affects how many @@ -136,9 +132,9 @@ SpinLock recordMutableGen_sync; Static function declarations -------------------------------------------------------------------------- */ -static void mark_root (StgClosure **root); +static void mark_root (void *user, StgClosure **root); static void zero_static_object_list (StgClosure* first_static); -static void initialise_N (rtsBool force_major_gc); +static nat initialise_N (rtsBool force_major_gc); static void alloc_gc_threads (void); static void init_collected_gen (nat g, nat threads); static void init_uncollected_gen (nat g, nat threads); @@ -147,10 +143,11 @@ static void update_task_list (void); static void resize_generations (void); static void resize_nursery (void); static void start_gc_threads (void); -static void gc_thread_work (void); +static void scavenge_until_all_done (void); static nat inc_running (void); static nat dec_running (void); static void wakeup_gc_threads (nat n_threads); +static void shutdown_gc_threads (nat n_threads); #if 0 && defined(DEBUG) static void gcCAFs (void); @@ -184,10 +181,9 @@ GarbageCollect ( rtsBool force_major_gc ) { bdescr *bd; step *stp; - lnat live, allocated; - lnat oldgen_saved_blocks = 0; + lnat live, allocated, max_copied, avg_copied, slop; gc_thread *saved_gct; - nat g, s, t; + nat g, s, t, n; // necessary if we stole a callee-saves register for gct: saved_gct = gct; @@ -198,8 +194,6 @@ GarbageCollect ( rtsBool force_major_gc ) ACQUIRE_SM_LOCK; - debugTrace(DEBUG_gc, "starting GC"); - #if defined(RTS_USER_SIGNALS) if (RtsFlags.MiscFlags.install_signal_handlers) { // block signals @@ -207,16 +201,14 @@ GarbageCollect ( rtsBool force_major_gc ) } #endif - // tell the STM to discard any cached closures it's hoping to re-use - stmPreGCHook(); + ASSERT(sizeof(step_workspace) == 16 * sizeof(StgWord)); + // otherwise adjust the padding in step_workspace. // tell the stats department that we've started a GC stat_startGC(); -#ifdef DEBUG - // check for memory leaks if DEBUG is on - memInventory(); -#endif + // tell the STM to discard any cached closures it's hoping to re-use + stmPreGCHook(); #ifdef DEBUG mutlist_MUTVARS = 0; @@ -237,7 +229,7 @@ GarbageCollect ( rtsBool force_major_gc ) /* Figure out which generation to collect */ - initialise_N(force_major_gc); + n = initialise_N(force_major_gc); /* Allocate + initialise the gc_thread structures. */ @@ -251,7 +243,7 @@ GarbageCollect ( rtsBool force_major_gc ) * We don't try to parallelise minor GC. */ #if defined(THREADED_RTS) - if (N == 0) { + if (n < (4*1024*1024 / BLOCK_SIZE)) { n_gc_threads = 1; } else { n_gc_threads = RtsFlags.ParFlags.gcThreads; @@ -259,6 +251,8 @@ GarbageCollect ( rtsBool force_major_gc ) #else n_gc_threads = 1; #endif + trace(TRACE_gc|DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)", + N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads); #ifdef RTS_GTK_FRONTPANEL if (RtsFlags.GcFlags.frontpanel) { @@ -266,19 +260,18 @@ GarbageCollect ( rtsBool force_major_gc ) } #endif +#ifdef DEBUG + // check for memory leaks if DEBUG is on + memInventory(traceClass(DEBUG_gc)); +#endif + // check stack sanity *before* GC (ToDo: check all threads) IF_DEBUG(sanity, checkFreeListSanity()); - /* Initialise the static object lists - */ - static_objects = END_OF_STATIC_LIST; - scavenged_static_objects = END_OF_STATIC_LIST; - -#ifdef THREADED_RTS - initSpinLock(&static_objects_sync); - initSpinLock(&recordMutableGen_sync); - initSpinLock(&gc_alloc_block_sync); -#endif + // Initialise all our gc_thread structures + for (t = 0; t < n_gc_threads; t++) { + init_gc_thread(gc_threads[t]); + } // Initialise all the generations/steps that we're collecting. for (g = 0; g <= N; g++) { @@ -301,21 +294,8 @@ GarbageCollect ( rtsBool force_major_gc ) mark_stack_bdescr = NULL; } - // Initialise all our gc_thread structures - for (t = 0; t < n_gc_threads; t++) { - init_gc_thread(&gc_threads[t]); - } - - // the main thread is running: this prevents any other threads from - // exiting prematurely, so we can start them now. - inc_running(); - wakeup_gc_threads(n_gc_threads); - - // Initialise stats - copied = 0; - // this is the main thread - gct = &gc_threads[0]; + gct = gc_threads[0]; /* ----------------------------------------------------------------------- * follow all the roots that we know about: @@ -325,31 +305,42 @@ GarbageCollect ( rtsBool force_major_gc ) * Also do them in reverse generation order, for the usual reason: * namely to reduce the likelihood of spurious old->new pointers. */ - { - for (g = RtsFlags.GcFlags.generations-1; g > N; g--) { + for (g = RtsFlags.GcFlags.generations-1; g > N; g--) { generations[g].saved_mut_list = generations[g].mut_list; generations[g].mut_list = allocBlock(); - // mut_list always has at least one block. - } - for (g = RtsFlags.GcFlags.generations-1; g > N; g--) { + // mut_list always has at least one block. + } + + // the main thread is running: this prevents any other threads from + // exiting prematurely, so we can start them now. + // NB. do this after the mutable lists have been saved above, otherwise + // the other GC threads will be writing into the old mutable lists. + inc_running(); + wakeup_gc_threads(n_gc_threads); + + for (g = RtsFlags.GcFlags.generations-1; g > N; g--) { scavenge_mutable_list(&generations[g]); - } } // follow roots from the CAF list (used by GHCi) gct->evac_step = 0; - markCAFs(mark_root); + markCAFs(mark_root, gct); // follow all the roots that the application knows about. gct->evac_step = 0; - GetRoots(mark_root); + markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads); + +#if defined(RTS_USER_SIGNALS) + // mark the signal handlers (signals should be already blocked) + markSignalHandlers(mark_root, gct); +#endif // Mark the weak pointer list, and prepare to detect dead weak pointers. markWeakPtrList(); initWeakForGC(); // Mark the stable pointer table. - markStablePtrTable(mark_root); + markStablePtrTable(mark_root, gct); /* ------------------------------------------------------------------------- * Repeatedly scavenge all the areas we know about until there's no @@ -357,7 +348,7 @@ GarbageCollect ( rtsBool force_major_gc ) */ for (;;) { - gc_thread_work(); + scavenge_until_all_done(); // The other threads are now stopped. We might recurse back to // here, but from now on this is the only thread. @@ -379,9 +370,14 @@ GarbageCollect ( rtsBool force_major_gc ) break; } + shutdown_gc_threads(n_gc_threads); + // Update pointers from the Task list update_task_list(); + // Update pointers from capabilities (probably just the spark queues) + updateCapabilitiesPostGC(); + // Now see which stable names are still alive. gcStablePtrTable(); @@ -394,14 +390,6 @@ GarbageCollect ( rtsBool force_major_gc ) #endif // NO MORE EVACUATION AFTER THIS POINT! - // Finally: compaction of the oldest generation. - if (major_gc && oldest_gen->steps[0].is_compacted) { - // save number of blocks for stats - oldgen_saved_blocks = oldest_gen->steps[0].n_old_blocks; - compact(); - } - - IF_DEBUG(sanity, checkGlobalTSOList(rtsFalse)); // Two-space collector: free the old to-space. // g0s0->old_blocks is the old nursery @@ -413,64 +401,127 @@ GarbageCollect ( rtsBool force_major_gc ) } } - // For each workspace, in each thread: - // * clear the BF_EVACUATED flag from each copied block - // * move the copied blocks to the step + // For each workspace, in each thread, move the copied blocks to the step { gc_thread *thr; step_workspace *ws; - bdescr *prev; + bdescr *prev, *next; + + for (t = 0; t < n_gc_threads; t++) { + thr = gc_threads[t]; + + // not step 0 + if (RtsFlags.GcFlags.generations == 1) { + s = 0; + } else { + s = 1; + } + for (; s < total_steps; s++) { + ws = &thr->steps[s]; + + // Push the final block + if (ws->todo_bd) { + push_scanned_block(ws->todo_bd, ws); + } + + ASSERT(gct->scan_bd == NULL); + ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks); + + prev = NULL; + for (bd = ws->scavd_list; bd != NULL; bd = bd->link) { + ws->step->n_words += bd->free - bd->start; + prev = bd; + } + if (prev != NULL) { + prev->link = ws->step->blocks; + ws->step->blocks = ws->scavd_list; + } + ws->step->n_blocks += ws->n_scavd_blocks; + } + } + // Add all the partial blocks *after* we've added all the full + // blocks. This is so that we can grab the partial blocks back + // again and try to fill them up in the next GC. for (t = 0; t < n_gc_threads; t++) { - thr = &gc_threads[t]; - - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - for (s = 0; s < generations[g].n_steps; s++) { - ws = &thr->steps[g][s]; - if (g==0 && s==0) continue; - - // Not true? - // ASSERT( ws->scan_bd == ws->todo_bd ); - ASSERT( ws->scan_bd ? ws->scan == ws->scan_bd->free : 1 ); - - // Push the final block - if (ws->scan_bd) { push_scan_block(ws->scan_bd, ws); } - - ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks); - - prev = ws->scavd_list; - for (bd = ws->scavd_list; bd != NULL; bd = bd->link) { - bd->flags &= ~BF_EVACUATED; // now from-space - prev = bd; - } - prev->link = ws->stp->blocks; - ws->stp->blocks = ws->scavd_list; - ws->stp->n_blocks += ws->n_scavd_blocks; - ASSERT(countBlocks(ws->stp->blocks) == ws->stp->n_blocks); - } + thr = gc_threads[t]; + + // not step 0 + if (RtsFlags.GcFlags.generations == 1) { + s = 0; + } else { + s = 1; + } + for (; s < total_steps; s++) { + ws = &thr->steps[s]; + + prev = NULL; + for (bd = ws->part_list; bd != NULL; bd = next) { + next = bd->link; + if (bd->free == bd->start) { + if (prev == NULL) { + ws->part_list = next; + } else { + prev->link = next; + } + freeGroup(bd); + ws->n_part_blocks--; + } else { + ws->step->n_words += bd->free - bd->start; + prev = bd; + } + } + if (prev != NULL) { + prev->link = ws->step->blocks; + ws->step->blocks = ws->part_list; + } + ws->step->n_blocks += ws->n_part_blocks; + + ASSERT(countBlocks(ws->step->blocks) == ws->step->n_blocks); + ASSERT(countOccupied(ws->step->blocks) == ws->step->n_words); } } } - // Two-space collector: swap the semi-spaces around. - // Currently: g0s0->old_blocks is the old nursery - // g0s0->blocks is to-space from this GC - // We want these the other way around. - if (RtsFlags.GcFlags.generations == 1) { - bdescr *nursery_blocks = g0s0->old_blocks; - nat n_nursery_blocks = g0s0->n_old_blocks; - g0s0->old_blocks = g0s0->blocks; - g0s0->n_old_blocks = g0s0->n_blocks; - g0s0->blocks = nursery_blocks; - g0s0->n_blocks = n_nursery_blocks; + // Finally: compaction of the oldest generation. + if (major_gc && oldest_gen->steps[0].is_compacted) { + compact(gct->scavenged_static_objects); } + IF_DEBUG(sanity, checkGlobalTSOList(rtsFalse)); + /* run through all the generations/steps and tidy up */ + copied = 0; + max_copied = 0; + avg_copied = 0; + { + nat i; + for (i=0; i < n_gc_threads; i++) { + if (n_gc_threads > 1) { + trace(TRACE_gc,"thread %d:", i); + trace(TRACE_gc," copied %ld", gc_threads[i]->copied * sizeof(W_)); + trace(TRACE_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_)); + trace(TRACE_gc," any_work %ld", gc_threads[i]->any_work); + trace(TRACE_gc," no_work %ld", gc_threads[i]->no_work); + trace(TRACE_gc," scav_find_work %ld", gc_threads[i]->scav_find_work); + } + copied += gc_threads[i]->copied; + max_copied = stg_max(gc_threads[i]->copied, max_copied); + } + if (n_gc_threads == 1) { + max_copied = 0; + avg_copied = 0; + } else { + avg_copied = copied; + } + } + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - if (g <= N) { + if (g == N) { generations[g].collections++; // for stats + if (n_gc_threads > 1) generations[g].par_collections++; } // Count the mutable list as bytes "copied" for the purposes of @@ -502,19 +553,21 @@ GarbageCollect ( rtsBool force_major_gc ) if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) { if (stp->is_compacted) { - // for a compacted step, just shift the new to-space - // onto the front of the now-compacted existing blocks. - for (bd = stp->blocks; bd != NULL; bd = bd->link) { - bd->flags &= ~BF_EVACUATED; // now from-space - } // tack the new blocks on the end of the existing blocks if (stp->old_blocks != NULL) { for (bd = stp->old_blocks; bd != NULL; bd = next) { + stp->n_words += bd->free - bd->start; + // NB. this step might not be compacted next // time, so reset the BF_COMPACTED flags. // They are set before GC if we're going to // compact. (search for BF_COMPACTED above). bd->flags &= ~BF_COMPACTED; + + // between GCs, all blocks in the heap except + // for the nursery have the BF_EVACUATED flag set. + bd->flags |= BF_EVACUATED; + next = bd->link; if (next == NULL) { bd->link = stp->blocks; @@ -525,6 +578,7 @@ GarbageCollect ( rtsBool force_major_gc ) // add the new blocks to the block tally stp->n_blocks += stp->n_old_blocks; ASSERT(countBlocks(stp->blocks) == stp->n_blocks); + ASSERT(countOccupied(stp->blocks) == stp->n_words); } else // not copacted { @@ -545,10 +599,6 @@ GarbageCollect ( rtsBool force_major_gc ) bd = next; } - // update the count of blocks used by large objects - for (bd = stp->scavenged_large_objects; bd != NULL; bd = bd->link) { - bd->flags &= ~BF_EVACUATED; - } stp->large_objects = stp->scavenged_large_objects; stp->n_large_blocks = stp->n_scavenged_large_blocks; @@ -561,7 +611,6 @@ GarbageCollect ( rtsBool force_major_gc ) */ for (bd = stp->scavenged_large_objects; bd; bd = next) { next = bd->link; - bd->flags &= ~BF_EVACUATED; dbl_link_onto(bd, &stp->large_objects); } @@ -574,8 +623,8 @@ GarbageCollect ( rtsBool force_major_gc ) // update the max size of older generations after a major GC resize_generations(); - // Guess the amount of live data for stats. - live = calcLive(); + // Calculate the amount of live data for stats. + live = calcLiveWords(); // Free the small objects allocated via allocate(), since this will // all have been copied into G0S1 now. @@ -585,6 +634,7 @@ GarbageCollect ( rtsBool force_major_gc ) g0s0->blocks = NULL; } g0s0->n_blocks = 0; + g0s0->n_words = 0; } alloc_blocks = 0; alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize; @@ -618,12 +668,19 @@ GarbageCollect ( rtsBool force_major_gc ) #ifdef PROFILING // resetStaticObjectForRetainerProfiling() must be called before // zeroing below. - resetStaticObjectForRetainerProfiling(); + if (n_gc_threads > 1) { + barf("profiling is currently broken with multi-threaded GC"); + // ToDo: fix the gct->scavenged_static_objects below + } + resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects); #endif // zero the scavenged static object list if (major_gc) { - zero_static_object_list(scavenged_static_objects); + nat i; + for (i = 0; i < n_gc_threads; i++) { + zero_static_object_list(gc_threads[i]->scavenged_static_objects); + } } // Reset the nursery @@ -637,6 +694,7 @@ GarbageCollect ( rtsBool force_major_gc ) // send exceptions to any threads which were about to die RELEASE_SM_LOCK; resurrectThreads(resurrected_threads); + performPendingThrowTos(exception_threads); ACQUIRE_SM_LOCK; // Update the stable pointer hash table. @@ -646,7 +704,7 @@ GarbageCollect ( rtsBool force_major_gc ) IF_DEBUG(sanity, checkSanity()); // extra GC trace info - IF_DEBUG(gc, statDescribeGens()); + if (traceClass(TRACE_gc|DEBUG_gc)) statDescribeGens(); #ifdef DEBUG // symbol-table based profiling @@ -660,7 +718,7 @@ GarbageCollect ( rtsBool force_major_gc ) #ifdef DEBUG // check for memory leaks if DEBUG is on - memInventory(); + memInventory(traceClass(DEBUG_gc)); #endif #ifdef RTS_GTK_FRONTPANEL @@ -670,7 +728,8 @@ GarbageCollect ( rtsBool force_major_gc ) #endif // ok, GC over: tell the stats department what happened. - stat_endGC(allocated, live, copied, N); + slop = calcLiveBlocks() * BLOCK_SIZE_W - live; + stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop); #if defined(RTS_USER_SIGNALS) if (RtsFlags.MiscFlags.install_signal_handlers) { @@ -684,190 +743,68 @@ GarbageCollect ( rtsBool force_major_gc ) gct = saved_gct; } -/* --------------------------------------------------------------------------- - Where are the roots that we know about? - - - all the threads on the runnable queue - - all the threads on the blocked queue - - all the threads on the sleeping queue - - all the thread currently executing a _ccall_GC - - all the "main threads" - - ------------------------------------------------------------------------ */ - -/* This has to be protected either by the scheduler monitor, or by the - garbage collection monitor (probably the latter). - KH @ 25/10/99 -*/ - -void -GetRoots( evac_fn evac ) -{ - nat i; - Capability *cap; - Task *task; - - for (i = 0; i < n_capabilities; i++) { - cap = &capabilities[i]; - evac((StgClosure **)(void *)&cap->run_queue_hd); - evac((StgClosure **)(void *)&cap->run_queue_tl); -#if defined(THREADED_RTS) - evac((StgClosure **)(void *)&cap->wakeup_queue_hd); - evac((StgClosure **)(void *)&cap->wakeup_queue_tl); -#endif - for (task = cap->suspended_ccalling_tasks; task != NULL; - task=task->next) { - debugTrace(DEBUG_sched, - "evac'ing suspended TSO %lu", (unsigned long)task->suspended_tso->id); - evac((StgClosure **)(void *)&task->suspended_tso); - } - - } - -#if !defined(THREADED_RTS) - evac((StgClosure **)(void *)&blocked_queue_hd); - evac((StgClosure **)(void *)&blocked_queue_tl); - evac((StgClosure **)(void *)&sleeping_queue); -#endif - - // evac((StgClosure **)&blackhole_queue); - -#if defined(THREADED_RTS) - markSparkQueue(evac); -#endif - -#if defined(RTS_USER_SIGNALS) - // mark the signal handlers (signals should be already blocked) - markSignalHandlers(evac); -#endif -} - /* ----------------------------------------------------------------------------- - isAlive determines whether the given closure is still alive (after - a garbage collection) or not. It returns the new address of the - closure if it is alive, or NULL otherwise. + Figure out which generation to collect, initialise N and major_gc. - NOTE: Use it before compaction only! - It untags and (if needed) retags pointers to closures. + Also returns the total number of blocks in generations that will be + collected. -------------------------------------------------------------------------- */ - -StgClosure * -isAlive(StgClosure *p) +static nat +initialise_N (rtsBool force_major_gc) { - const StgInfoTable *info; - bdescr *bd; - StgWord tag; - StgClosure *q; - - while (1) { - /* The tag and the pointer are split, to be merged later when needed. */ - tag = GET_CLOSURE_TAG(p); - q = UNTAG_CLOSURE(p); - - ASSERT(LOOKS_LIKE_CLOSURE_PTR(q)); - info = get_itbl(q); - - // ignore static closures - // - // ToDo: for static closures, check the static link field. - // Problem here is that we sometimes don't set the link field, eg. - // for static closures with an empty SRT or CONSTR_STATIC_NOCAFs. - // - if (!HEAP_ALLOCED(q)) { - return p; - } - - // ignore closures in generations that we're not collecting. - bd = Bdescr((P_)q); - if (bd->gen_no > N) { - return p; - } - - // if it's a pointer into to-space, then we're done - if (bd->flags & BF_EVACUATED) { - return p; - } + int g; + nat s, blocks, blocks_total; - // large objects use the evacuated flag - if (bd->flags & BF_LARGE) { - return NULL; - } + blocks = 0; + blocks_total = 0; - // check the mark bit for compacted steps - if ((bd->flags & BF_COMPACTED) && is_marked((P_)q,bd)) { - return p; + if (force_major_gc) { + N = RtsFlags.GcFlags.generations - 1; + } else { + N = 0; } - switch (info->type) { - - case IND: - case IND_STATIC: - case IND_PERM: - case IND_OLDGEN: // rely on compatible layout with StgInd - case IND_OLDGEN_PERM: - // follow indirections - p = ((StgInd *)q)->indirectee; - continue; - - case EVACUATED: - // alive! - return ((StgEvacuated *)q)->evacuee; - - case TSO: - if (((StgTSO *)q)->what_next == ThreadRelocated) { - p = (StgClosure *)((StgTSO *)q)->link; - continue; - } - return NULL; - - default: - // dead. - return NULL; + for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) { + blocks = 0; + for (s = 0; s < generations[g].n_steps; s++) { + blocks += generations[g].steps[s].n_words / BLOCK_SIZE_W; + blocks += generations[g].steps[s].n_large_blocks; + } + if (blocks >= generations[g].max_blocks) { + N = stg_max(N,g); + } + if ((nat)g <= N) { + blocks_total += blocks; + } } - } -} -/* ----------------------------------------------------------------------------- - Figure out which generation to collect, initialise N and major_gc. - -------------------------------------------------------------------------- */ + blocks_total += countNurseryBlocks(); -static void -initialise_N (rtsBool force_major_gc) -{ - nat g; - - if (force_major_gc) { - N = RtsFlags.GcFlags.generations - 1; - major_gc = rtsTrue; - } else { - N = 0; - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - if (generations[g].steps[0].n_blocks + - generations[g].steps[0].n_large_blocks - >= generations[g].max_blocks) { - N = g; - } - } - major_gc = (N == RtsFlags.GcFlags.generations-1); - } + major_gc = (N == RtsFlags.GcFlags.generations-1); + return blocks_total; } /* ----------------------------------------------------------------------------- Initialise the gc_thread structures. -------------------------------------------------------------------------- */ -static void -alloc_gc_thread (gc_thread *t, int n) +static gc_thread * +alloc_gc_thread (int n) { - nat g, s; + nat s; step_workspace *ws; + gc_thread *t; + + t = stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace), + "alloc_gc_thread"); #ifdef THREADED_RTS t->id = 0; initCondition(&t->wake_cond); initMutex(&t->wake_mutex); - t->wakeup = rtsFalse; + t->wakeup = rtsTrue; // starts true, so we can wait for the + // thread to start up, see wakeup_gc_threads t->exit = rtsFalse; #endif @@ -881,32 +818,24 @@ alloc_gc_thread (gc_thread *t, int n) t->papi_events = -1; #endif - t->steps = stgMallocBytes(RtsFlags.GcFlags.generations * - sizeof(step_workspace *), - "initialise_gc_thread"); - - for (g = 0; g < RtsFlags.GcFlags.generations; g++) + for (s = 0; s < total_steps; s++) { - t->steps[g] = stgMallocBytes(generations[g].n_steps * - sizeof(step_workspace), - "initialise_gc_thread/2"); - - for (s = 0; s < generations[g].n_steps; s++) - { - ws = &t->steps[g][s]; - ws->stp = &generations[g].steps[s]; - ws->gct = t; - - ws->scan_bd = NULL; - ws->scan = NULL; - - ws->todo_bd = NULL; - ws->buffer_todo_bd = NULL; - - ws->scavd_list = NULL; - ws->n_scavd_blocks = 0; - } + ws = &t->steps[s]; + ws->step = &all_steps[s]; + ASSERT(s == ws->step->abs_no); + ws->gct = t; + + ws->todo_bd = NULL; + ws->buffer_todo_bd = NULL; + + ws->part_list = NULL; + ws->n_part_blocks = 0; + + ws->scavd_list = NULL; + ws->n_scavd_blocks = 0; } + + return t; } @@ -917,17 +846,17 @@ alloc_gc_threads (void) #if defined(THREADED_RTS) nat i; gc_threads = stgMallocBytes (RtsFlags.ParFlags.gcThreads * - sizeof(gc_thread), + sizeof(gc_thread*), "alloc_gc_threads"); for (i = 0; i < RtsFlags.ParFlags.gcThreads; i++) { - alloc_gc_thread(&gc_threads[i], i); + gc_threads[i] = alloc_gc_thread(i); } #else - gc_threads = stgMallocBytes (sizeof(gc_thread), + gc_threads = stgMallocBytes (sizeof(gc_thread*), "alloc_gc_threads"); - alloc_gc_thread(gc_threads, 0); + gc_threads[0] = alloc_gc_thread(0); #endif } } @@ -949,6 +878,7 @@ inc_running (void) ACQUIRE_LOCK(&gc_running_mutex); n_running = ++gc_running_threads; RELEASE_LOCK(&gc_running_mutex); + ASSERT(n_running <= n_gc_threads); return n_running; } @@ -957,27 +887,19 @@ dec_running (void) { nat n_running; ACQUIRE_LOCK(&gc_running_mutex); + ASSERT(n_gc_threads != 0); n_running = --gc_running_threads; RELEASE_LOCK(&gc_running_mutex); return n_running; } -// -// gc_thread_work(): Scavenge until there's no work left to do and all -// the running threads are idle. -// static void -gc_thread_work (void) +scavenge_until_all_done (void) { nat r; debugTrace(DEBUG_gc, "GC thread %d working", gct->thread_index); - // gc_running_threads has already been incremented for us; either - // this is the main thread and we incremented it inside - // GarbageCollect(), or this is a worker thread and the main - // thread bumped gc_running_threads before waking us up. - loop: scavenge_loop(); // scavenge_loop() only exits when there's no work to do @@ -987,6 +909,7 @@ loop: gct->thread_index, r); while (gc_running_threads != 0) { + // usleep(1); if (any_work()) { inc_running(); goto loop; @@ -1001,8 +924,26 @@ loop: debugTrace(DEBUG_gc, "GC thread %d finished.", gct->thread_index); } - #if defined(THREADED_RTS) +// +// gc_thread_work(): Scavenge until there's no work left to do and all +// the running threads are idle. +// +static void +gc_thread_work (void) +{ + // gc_running_threads has already been incremented for us; this is + // a worker thread and the main thread bumped gc_running_threads + // before waking us up. + + // Every thread evacuates some roots. + gct->evac_step = 0; + markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads); + + scavenge_until_all_done(); +} + + static void gc_thread_mainloop (void) { @@ -1010,13 +951,13 @@ gc_thread_mainloop (void) // Wait until we're told to wake up ACQUIRE_LOCK(&gct->wake_mutex); + gct->wakeup = rtsFalse; while (!gct->wakeup) { debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index); waitCondition(&gct->wake_cond, &gct->wake_mutex); } RELEASE_LOCK(&gct->wake_mutex); - gct->wakeup = rtsFalse; if (gct->exit) break; #ifdef USE_PAPI @@ -1063,7 +1004,7 @@ start_gc_threads (void) // Start from 1: the main thread is 0 for (i = 1; i < RtsFlags.ParFlags.gcThreads; i++) { createOSThread(&id, (OSThreadProc*)&gc_thread_entry, - &gc_threads[i]); + gc_threads[i]); } done = rtsTrue; } @@ -1077,10 +1018,39 @@ wakeup_gc_threads (nat n_threads USED_IF_THREADS) nat i; for (i=1; i < n_threads; i++) { inc_running(); - ACQUIRE_LOCK(&gc_threads[i].wake_mutex); - gc_threads[i].wakeup = rtsTrue; - signalCondition(&gc_threads[i].wake_cond); - RELEASE_LOCK(&gc_threads[i].wake_mutex); + debugTrace(DEBUG_gc, "waking up gc thread %d", i); + do { + ACQUIRE_LOCK(&gc_threads[i]->wake_mutex); + if (gc_threads[i]->wakeup) { + RELEASE_LOCK(&gc_threads[i]->wake_mutex); + continue; + } else { + break; + } + } while (1); + gc_threads[i]->wakeup = rtsTrue; + signalCondition(&gc_threads[i]->wake_cond); + RELEASE_LOCK(&gc_threads[i]->wake_mutex); + } +#endif +} + +// After GC is complete, we must wait for all GC threads to enter the +// standby state, otherwise they may still be executing inside +// any_work(), and may even remain awake until the next GC starts. +static void +shutdown_gc_threads (nat n_threads USED_IF_THREADS) +{ +#if defined(THREADED_RTS) + nat i; + rtsBool wakeup; + for (i=1; i < n_threads; i++) { + do { + ACQUIRE_LOCK(&gc_threads[i]->wake_mutex); + wakeup = gc_threads[i]->wakeup; + // wakeup is false while the thread is waiting + RELEASE_LOCK(&gc_threads[i]->wake_mutex); + } while (wakeup); } #endif } @@ -1111,29 +1081,41 @@ init_collected_gen (nat g, nat n_threads) for (s = 0; s < generations[g].n_steps; s++) { + stp = &generations[g].steps[s]; + ASSERT(stp->gen_no == g); + + // we'll construct a new list of threads in this step + // during GC, throw away the current list. + stp->old_threads = stp->threads; + stp->threads = END_TSO_QUEUE; + // generation 0, step 0 doesn't need to-space if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) { continue; } - stp = &generations[g].steps[s]; - ASSERT(stp->gen_no == g); - // deprecate the existing blocks stp->old_blocks = stp->blocks; stp->n_old_blocks = stp->n_blocks; stp->blocks = NULL; stp->n_blocks = 0; + stp->n_words = 0; // we don't have any to-be-scavenged blocks yet stp->todos = NULL; + stp->todos_last = NULL; stp->n_todos = 0; // initialise the large object queues. stp->scavenged_large_objects = NULL; stp->n_scavenged_large_blocks = 0; - // mark the large objects as not evacuated yet + // mark the small objects as from-space + for (bd = stp->old_blocks; bd; bd = bd->link) { + bd->flags &= ~BF_EVACUATED; + } + + // mark the large objects as from-space for (bd = stp->large_objects; bd; bd = bd->link) { bd->flags &= ~BF_EVACUATED; } @@ -1184,18 +1166,18 @@ init_collected_gen (nat g, nat n_threads) // we don't copy objects into g0s0, unless -G0 if (g==0 && s==0 && RtsFlags.GcFlags.generations > 1) continue; - ws = &gc_threads[t].steps[g][s]; - - ws->scan_bd = NULL; - ws->scan = NULL; + ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s]; ws->todo_large_objects = NULL; + ws->part_list = NULL; + ws->n_part_blocks = 0; + // allocate the first to-space block; extra blocks will be // chained on as necessary. ws->todo_bd = NULL; ws->buffer_todo_bd = NULL; - gc_alloc_todo_block(ws); + alloc_todo_block(ws,0); ws->scavd_list = NULL; ws->n_scavd_blocks = 0; @@ -1222,46 +1204,62 @@ init_uncollected_gen (nat g, nat threads) stp->n_scavenged_large_blocks = 0; } - for (t = 0; t < threads; t++) { - for (s = 0; s < generations[g].n_steps; s++) { + for (s = 0; s < generations[g].n_steps; s++) { - ws = &gc_threads[t].steps[g][s]; - stp = ws->stp; + stp = &generations[g].steps[s]; + + for (t = 0; t < threads; t++) { + ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s]; ws->buffer_todo_bd = NULL; ws->todo_large_objects = NULL; + ws->part_list = NULL; + ws->n_part_blocks = 0; + ws->scavd_list = NULL; ws->n_scavd_blocks = 0; // If the block at the head of the list in this generation // is less than 3/4 full, then use it as a todo block. - if (isPartiallyFull(stp->blocks)) + if (stp->blocks && isPartiallyFull(stp->blocks)) { ws->todo_bd = stp->blocks; + ws->todo_free = ws->todo_bd->free; + ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W; stp->blocks = stp->blocks->link; stp->n_blocks -= 1; + stp->n_words -= ws->todo_bd->free - ws->todo_bd->start; ws->todo_bd->link = NULL; - - // this block is also the scan block; we must scan - // from the current end point. - ws->scan_bd = ws->todo_bd; - ws->scan = ws->scan_bd->free; - - // subtract the contents of this block from the stats, - // because we'll count the whole block later. - copied -= ws->scan_bd->free - ws->scan_bd->start; + // we must scan from the current end point. + ws->todo_bd->u.scan = ws->todo_bd->free; } else { - ws->scan_bd = NULL; - ws->scan = NULL; ws->todo_bd = NULL; - gc_alloc_todo_block(ws); + alloc_todo_block(ws,0); } } + + // deal out any more partial blocks to the threads' part_lists + t = 0; + while (stp->blocks && isPartiallyFull(stp->blocks)) + { + bd = stp->blocks; + stp->blocks = bd->link; + ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s]; + bd->link = ws->part_list; + ws->part_list = bd; + ws->n_part_blocks += 1; + bd->u.scan = bd->free; + stp->n_blocks -= 1; + stp->n_words -= bd->free - bd->start; + t++; + if (t == n_gc_threads) t = 0; + } } + // Move the private mutable lists from each capability onto the // main mutable list for the generation. for (i = 0; i < n_capabilities; i++) { @@ -1282,20 +1280,39 @@ init_uncollected_gen (nat g, nat threads) static void init_gc_thread (gc_thread *t) { + t->static_objects = END_OF_STATIC_LIST; + t->scavenged_static_objects = END_OF_STATIC_LIST; + t->scan_bd = NULL; t->evac_step = 0; t->failed_to_evac = rtsFalse; t->eager_promotion = rtsTrue; t->thunk_selector_depth = 0; + t->copied = 0; + t->scanned = 0; + t->any_work = 0; + t->no_work = 0; + t->scav_find_work = 0; } /* ----------------------------------------------------------------------------- - Function we pass to GetRoots to evacuate roots. + Function we pass to evacuate roots. -------------------------------------------------------------------------- */ static void -mark_root(StgClosure **root) +mark_root(void *user, StgClosure **root) { - evacuate(root); + // we stole a register for gct, but this function is called from + // *outside* the GC where the register variable is not in effect, + // so we need to save and restore it here. NB. only call + // mark_root() from the main GC thread, otherwise gct will be + // incorrect. + gc_thread *saved_gct; + saved_gct = gct; + gct = user; + + evacuate(root); + + gct = saved_gct; } /* ----------------------------------------------------------------------------- @@ -1316,42 +1333,6 @@ zero_static_object_list(StgClosure* first_static) } } -/* ----------------------------------------------------------------------------- - Reverting CAFs - -------------------------------------------------------------------------- */ - -void -revertCAFs( void ) -{ - StgIndStatic *c; - - for (c = (StgIndStatic *)revertible_caf_list; c != NULL; - c = (StgIndStatic *)c->static_link) - { - SET_INFO(c, c->saved_info); - c->saved_info = NULL; - // could, but not necessary: c->static_link = NULL; - } - revertible_caf_list = NULL; -} - -void -markCAFs( evac_fn evac ) -{ - StgIndStatic *c; - - for (c = (StgIndStatic *)caf_list; c != NULL; - c = (StgIndStatic *)c->static_link) - { - evac(&c->indirectee); - } - for (c = (StgIndStatic *)revertible_caf_list; c != NULL; - c = (StgIndStatic *)c->static_link) - { - evac(&c->indirectee); - } -} - /* ---------------------------------------------------------------------------- Update the pointers from the task list @@ -1405,7 +1386,7 @@ resize_generations (void) nat gens = RtsFlags.GcFlags.generations; // live in the oldest generations - live = oldest_gen->steps[0].n_blocks + + live = (oldest_gen->steps[0].n_words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W+ oldest_gen->steps[0].n_large_blocks; // default max size for all generations except zero @@ -1494,7 +1475,7 @@ resize_nursery (void) * performance we get from 3L bytes, reducing to the same * performance at 2L bytes. */ - blocks = g0s0->n_old_blocks; + blocks = g0s0->n_blocks; if ( RtsFlags.GcFlags.maxHeapSize != 0 && blocks * RtsFlags.GcFlags.oldGenFactor * 2 >