X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=rts%2Fsm%2FGC.c;h=d0dd44dd8aa0d0976a243c344330f34b955af1cb;hp=2eabdabee38b5a2bc7aa1d68f8bb4d9d24162ffa;hb=1fb38442d3a55ac92795aa6c5ed4df82011df724;hpb=65ac2f4cefcea7ca78a65ca22889b51b5a27d1f0 diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 2eabdab..d0dd44d 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -40,6 +40,7 @@ #include "GC.h" #include "GCThread.h" +#include "GCTDecl.h" #include "Compact.h" #include "Evac.h" #include "Scav.h" @@ -137,8 +138,8 @@ DECLARE_GCT static void mark_root (void *user, StgClosure **root); static void zero_static_object_list (StgClosure* first_static); static nat initialise_N (rtsBool force_major_gc); -static void init_collected_gen (nat g, nat threads); -static void init_uncollected_gen (nat g, nat threads); +static void prepare_collected_gen (generation *gen); +static void prepare_uncollected_gen (generation *gen); static void init_gc_thread (gc_thread *t); static void resize_generations (void); static void resize_nursery (void); @@ -146,8 +147,9 @@ static void start_gc_threads (void); static void scavenge_until_all_done (void); static StgWord inc_running (void); static StgWord dec_running (void); -static void wakeup_gc_threads (nat n_threads, nat me); -static void shutdown_gc_threads (nat n_threads, nat me); +static void wakeup_gc_threads (nat me); +static void shutdown_gc_threads (nat me); +static void collect_gct_blocks (void); #if 0 && defined(DEBUG) static void gcCAFs (void); @@ -174,9 +176,9 @@ GarbageCollect (rtsBool force_major_gc, { bdescr *bd; generation *gen; - lnat live, allocated, max_copied, avg_copied, slop; + lnat live_blocks, live_words, allocated, max_copied, avg_copied; gc_thread *saved_gct; - nat g, t, n; + nat g, n; // necessary if we stole a callee-saves register for gct: saved_gct = gct; @@ -197,11 +199,11 @@ GarbageCollect (rtsBool force_major_gc, ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord)); // otherwise adjust the padding in gen_workspace. - // tell the stats department that we've started a GC - stat_startGC(); + // this is the main thread + SET_GCT(gc_threads[cap->no]); - // tell the STM to discard any cached closures it's hoping to re-use - stmPreGCHook(); + // tell the stats department that we've started a GC + stat_startGC(gct); // lock the StablePtr table stablePtrPreGC(); @@ -221,7 +223,7 @@ GarbageCollect (rtsBool force_major_gc, /* Approximate how much we allocated. * Todo: only when generating stats? */ - allocated = calcAllocated(); + allocated = calcAllocated(rtsFalse/* don't count the nursery yet */); /* Figure out which generation to collect */ @@ -274,23 +276,20 @@ GarbageCollect (rtsBool force_major_gc, #endif // check sanity *before* GC - IF_DEBUG(sanity, checkSanity(rtsTrue)); - - // Initialise all our gc_thread structures - for (t = 0; t < n_gc_threads; t++) { - init_gc_thread(gc_threads[t]); - } + IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc)); // Initialise all the generations/steps that we're collecting. for (g = 0; g <= N; g++) { - init_collected_gen(g,n_gc_threads); + prepare_collected_gen(&generations[g]); } - // Initialise all the generations/steps that we're *not* collecting. for (g = N+1; g < RtsFlags.GcFlags.generations; g++) { - init_uncollected_gen(g,n_gc_threads); + prepare_uncollected_gen(&generations[g]); } + // Prepare this gc_thread + init_gc_thread(gct); + /* Allocate a mark stack if we're doing a major collection. */ if (major_gc && oldest_gen->mark) { @@ -305,17 +304,6 @@ GarbageCollect (rtsBool force_major_gc, mark_sp = NULL; } - // this is the main thread -#ifdef THREADED_RTS - if (n_gc_threads == 1) { - SET_GCT(gc_threads[0]); - } else { - SET_GCT(gc_threads[cap->no]); - } -#else -SET_GCT(gc_threads[0]); -#endif - /* ----------------------------------------------------------------------- * follow all the roots that we know about: */ @@ -325,28 +313,9 @@ SET_GCT(gc_threads[0]); // NB. do this after the mutable lists have been saved above, otherwise // the other GC threads will be writing into the old mutable lists. inc_running(); - wakeup_gc_threads(n_gc_threads, gct->thread_index); + wakeup_gc_threads(gct->thread_index); - // Mutable lists from each generation > N - // we want to *scavenge* these roots, not evacuate them: they're not - // going to move in this GC. - // Also do them in reverse generation order, for the usual reason: - // namely to reduce the likelihood of spurious old->new pointers. - // - for (g = RtsFlags.GcFlags.generations-1; g > N; g--) { -#if defined(THREADED_RTS) - if (n_gc_threads > 1) { - scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]); - } else { - scavenge_mutable_list1(generations[g].saved_mut_list, &generations[g]); - } -#else - scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]); -#endif - freeChain_sync(generations[g].saved_mut_list); - generations[g].saved_mut_list = NULL; - - } + traceEventGcWork(gct->cap); // scavenge the capability-private mutable lists. This isn't part // of markSomeCapabilities() because markSomeCapabilities() can only @@ -361,17 +330,25 @@ SET_GCT(gc_threads[0]); #endif } } else { - scavenge_capability_mut_lists(&capabilities[gct->thread_index]); + scavenge_capability_mut_lists(gct->cap); } // follow roots from the CAF list (used by GHCi) - gct->evac_gen = 0; + gct->evac_gen_no = 0; markCAFs(mark_root, gct); // follow all the roots that the application knows about. - gct->evac_gen = 0; - markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads, - rtsTrue/*prune sparks*/); + gct->evac_gen_no = 0; + if (n_gc_threads == 1) { + for (n = 0; n < n_capabilities; n++) { + markCapability(mark_root, gct, &capabilities[n], + rtsTrue/*don't mark sparks*/); + } + } else { + markCapability(mark_root, gct, cap, rtsTrue/*don't mark sparks*/); + } + + markScheduler(mark_root, gct); #if defined(RTS_USER_SIGNALS) // mark the signal handlers (signals should be already blocked) @@ -395,13 +372,6 @@ SET_GCT(gc_threads[0]); // The other threads are now stopped. We might recurse back to // here, but from now on this is the only thread. - // if any blackholes are alive, make the threads that wait on - // them alive too. - if (traverseBlackholeQueue()) { - inc_running(); - continue; - } - // must be last... invariant is that everything is fully // scavenged at this point. if (traverseWeakPtrList()) { // returns rtsTrue if evaced something @@ -413,11 +383,21 @@ SET_GCT(gc_threads[0]); break; } - shutdown_gc_threads(n_gc_threads, gct->thread_index); + shutdown_gc_threads(gct->thread_index); // Now see which stable names are still alive. gcStablePtrTable(); +#ifdef THREADED_RTS + if (n_gc_threads == 1) { + for (n = 0; n < n_capabilities; n++) { + pruneSparkQueue(&capabilities[n]); + } + } else { + pruneSparkQueue(gct->cap); + } +#endif + #ifdef PROFILING // We call processHeapClosureForDead() on every closure destroyed during // the current garbage collection, so we invoke LdvCensusForDead(). @@ -438,76 +418,6 @@ SET_GCT(gc_threads[0]); } } - // For each workspace, in each thread, move the copied blocks to the step - { - gc_thread *thr; - gen_workspace *ws; - bdescr *prev, *next; - - for (t = 0; t < n_gc_threads; t++) { - thr = gc_threads[t]; - - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - ws = &thr->gens[g]; - - // Push the final block - if (ws->todo_bd) { - push_scanned_block(ws->todo_bd, ws); - } - - ASSERT(gct->scan_bd == NULL); - ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks); - - prev = NULL; - for (bd = ws->scavd_list; bd != NULL; bd = bd->link) { - ws->gen->n_words += bd->free - bd->start; - prev = bd; - } - if (prev != NULL) { - prev->link = ws->gen->blocks; - ws->gen->blocks = ws->scavd_list; - } - ws->gen->n_blocks += ws->n_scavd_blocks; - } - } - - // Add all the partial blocks *after* we've added all the full - // blocks. This is so that we can grab the partial blocks back - // again and try to fill them up in the next GC. - for (t = 0; t < n_gc_threads; t++) { - thr = gc_threads[t]; - - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - ws = &thr->gens[g]; - - prev = NULL; - for (bd = ws->part_list; bd != NULL; bd = next) { - next = bd->link; - if (bd->free == bd->start) { - if (prev == NULL) { - ws->part_list = next; - } else { - prev->link = next; - } - freeGroup(bd); - ws->n_part_blocks--; - } else { - ws->gen->n_words += bd->free - bd->start; - prev = bd; - } - } - if (prev != NULL) { - prev->link = ws->gen->blocks; - ws->gen->blocks = ws->part_list; - } - ws->gen->n_blocks += ws->n_part_blocks; - - ASSERT(countBlocks(ws->gen->blocks) == ws->gen->n_blocks); - ASSERT(countOccupied(ws->gen->blocks) == ws->gen->n_words); - } - } - } - // Finally: compact or sweep the oldest generation. if (major_gc && oldest_gen->mark) { if (oldest_gen->compact) @@ -516,8 +426,6 @@ SET_GCT(gc_threads[0]); sweep(oldest_gen); } - /* run through all the generations/steps and tidy up - */ copied = 0; max_copied = 0; avg_copied = 0; @@ -543,6 +451,16 @@ SET_GCT(gc_threads[0]); } } + // Run through all the generations/steps and tidy up. + // We're going to: + // - count the amount of "live" data (live_words, live_blocks) + // - count the amount of "copied" data in this GC (copied) + // - free from-space + // - make to-space the new from-space (set BF_EVACUATED on all blocks) + // + live_words = 0; + live_blocks = 0; + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { if (g == N) { @@ -554,14 +472,8 @@ SET_GCT(gc_threads[0]); // stats. Every mutable list is copied during every GC. if (g > 0) { nat mut_list_size = 0; - for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) { - mut_list_size += bd->free - bd->start; - } for (n = 0; n < n_capabilities; n++) { - for (bd = capabilities[n].mut_lists[g]; - bd != NULL; bd = bd->link) { - mut_list_size += bd->free - bd->start; - } + mut_list_size += countOccupied(capabilities[n].mut_lists[g]); } copied += mut_list_size; @@ -645,8 +557,7 @@ SET_GCT(gc_threads[0]); freeChain(gen->large_objects); gen->large_objects = gen->scavenged_large_objects; gen->n_large_blocks = gen->n_scavenged_large_blocks; - gen->n_new_large_blocks = 0; - ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks); + gen->n_new_large_words = 0; } else // for generations > N { @@ -661,20 +572,31 @@ SET_GCT(gc_threads[0]); // add the new blocks we promoted during this GC gen->n_large_blocks += gen->n_scavenged_large_blocks; - ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks); + } + + ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks); + + gen->scavenged_large_objects = NULL; + gen->n_scavenged_large_blocks = 0; + + // Count "live" data + live_words += genLiveWords(gen); + live_blocks += genLiveBlocks(gen); + + // add in the partial blocks in the gen_workspaces, but ignore gen 0 + // if this is a local GC (we can't count another capability's part_list) + { + nat i; + for (i = 0; i < n_capabilities; i++) { + live_words += gcThreadLiveWords(i, gen->no); + live_blocks += gcThreadLiveBlocks(i, gen->no); + } } } // for all generations // update the max size of older generations after a major GC resize_generations(); - // Calculate the amount of live data for stats. - live = calcLiveWords(); - - // Free the small objects allocated via allocate(), since this will - // all have been copied into G0S1 now. - alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize; - // Start a new pinned_object_block for (n = 0; n < n_capabilities; n++) { capabilities[n].pinned_object_block = NULL; @@ -696,9 +618,14 @@ SET_GCT(gc_threads[0]); } } + // Reset the nursery: make the blocks empty + allocated += clearNurseries(); + resize_nursery(); - // mark the garbage collected CAFs as dead + resetNurseries(); + + // mark the garbage collected CAFs as dead #if 0 && defined(DEBUG) // doesn't work at the moment if (major_gc) { gcCAFs(); } #endif @@ -721,27 +648,45 @@ SET_GCT(gc_threads[0]); } } - // Reset the nursery - resetNurseries(); + // Update the stable pointer hash table. + updateStablePtrTable(major_gc); + + // unlock the StablePtr table. Must be before scheduleFinalizers(), + // because a finalizer may call hs_free_fun_ptr() or + // hs_free_stable_ptr(), both of which access the StablePtr table. + stablePtrPostGC(); - // start any pending finalizers + // Start any pending finalizers. Must be after + // updateStablePtrTable() and stablePtrPostGC() (see #4221). RELEASE_SM_LOCK; scheduleFinalizers(cap, old_weak_ptr_list); ACQUIRE_SM_LOCK; - - // send exceptions to any threads which were about to die + + // check sanity after GC + // before resurrectThreads(), because that might overwrite some + // closures, which will cause problems with THREADED where we don't + // fill slop. + IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc)); + + // send exceptions to any threads which were about to die RELEASE_SM_LOCK; resurrectThreads(resurrected_threads); - performPendingThrowTos(exception_threads); ACQUIRE_SM_LOCK; - // Update the stable pointer hash table. - updateStablePtrTable(major_gc); - - // check sanity after GC - IF_DEBUG(sanity, checkSanity(rtsTrue)); + if (major_gc) { + nat need, got; + need = BLOCKS_TO_MBLOCKS(n_alloc_blocks); + got = mblocks_allocated; + /* If the amount of data remains constant, next major GC we'll + require (F+1)*need. We leave (F+2)*need in order to reduce + repeated deallocation and reallocation. */ + need = (RtsFlags.GcFlags.oldGenFactor + 2) * need; + if (got > need) { + returnMemoryToOS(got - need); + } + } - // extra GC trace info + // extra GC trace info IF_DEBUG(gc, statDescribeGens()); #ifdef DEBUG @@ -766,11 +711,9 @@ SET_GCT(gc_threads[0]); #endif // ok, GC over: tell the stats department what happened. - slop = calcLiveBlocks() * BLOCK_SIZE_W - live; - stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop); - - // unlock the StablePtr table - stablePtrPostGC(); + stat_endGC(gct, allocated, live_words, + copied, N, max_copied, avg_copied, + live_blocks * BLOCK_SIZE_W - live_words /* slop */); // Guess which generation we'll collect *next* time initialise_N(force_major_gc); @@ -843,6 +786,8 @@ new_gc_thread (nat n, gc_thread *t) nat g; gen_workspace *ws; + t->cap = &capabilities[n]; + #ifdef THREADED_RTS t->id = 0; initSpinLock(&t->gc_spin); @@ -869,11 +814,26 @@ new_gc_thread (nat n, gc_thread *t) ASSERT(g == ws->gen->no); ws->my_gct = t; - ws->todo_bd = NULL; + // We want to call + // alloc_todo_block(ws,0); + // but can't, because it uses gct which isn't set up at this point. + // Hence, allocate a block for todo_bd manually: + { + bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet + initBdescr(bd, ws->gen, ws->gen->to); + bd->flags = BF_EVACUATED; + bd->u.scan = bd->free = bd->start; + + ws->todo_bd = bd; + ws->todo_free = bd->free; + ws->todo_lim = bd->start + BLOCK_SIZE_W; + } + ws->todo_q = newWSDeque(128); ws->todo_overflow = NULL; ws->n_todo_overflow = 0; - + ws->todo_large_objects = NULL; + ws->part_list = NULL; ws->n_part_blocks = 0; @@ -1011,8 +971,6 @@ scavenge_until_all_done (void) loop: - traceEventGcWork(&capabilities[gct->thread_index]); - #if defined(THREADED_RTS) if (n_gc_threads > 1) { scavenge_loop(); @@ -1023,10 +981,12 @@ loop: scavenge_loop(); #endif + collect_gct_blocks(); + // scavenge_loop() only exits when there's no work to do r = dec_running(); - traceEventGcIdle(&capabilities[gct->thread_index]); + traceEventGcIdle(gct->cap); debugTrace(DEBUG_gc, "%d GC threads still running", r); @@ -1034,6 +994,7 @@ loop: // usleep(1); if (any_work()) { inc_running(); + traceEventGcWork(gct->cap); goto loop; } // any_work() does not remove the work from the queue, it @@ -1042,7 +1003,7 @@ loop: // scavenge_loop() to perform any pending work. } - traceEventGcDone(&capabilities[gct->thread_index]); + traceEventGcDone(gct->cap); } #if defined(THREADED_RTS) @@ -1058,6 +1019,8 @@ gcWorkerThread (Capability *cap) gct = gc_threads[cap->no]; gct->id = osThreadId(); + stat_gcWorkerThreadStart(gct); + // Wait until we're told to wake up RELEASE_SPIN_LOCK(&gct->mut_spin); gct->wakeup = GC_THREAD_STANDING_BY; @@ -1071,15 +1034,28 @@ gcWorkerThread (Capability *cap) } papi_thread_start_gc1_count(gct->papi_events); #endif - + + init_gc_thread(gct); + + traceEventGcWork(gct->cap); + // Every thread evacuates some roots. - gct->evac_gen = 0; - markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads, - rtsTrue/*prune sparks*/); - scavenge_capability_mut_lists(&capabilities[gct->thread_index]); + gct->evac_gen_no = 0; + markCapability(mark_root, gct, cap, rtsTrue/*prune sparks*/); + scavenge_capability_mut_lists(cap); scavenge_until_all_done(); +#ifdef THREADED_RTS + // Now that the whole heap is marked, we discard any sparks that + // were found to be unreachable. The main GC thread is currently + // marking heap reachable via weak pointers, so it is + // non-deterministic whether a spark will be retained if it is + // only reachable via weak pointers. To fix this problem would + // require another GC barrier, which is too high a price. + pruneSparkQueue(cap); +#endif + #ifdef USE_PAPI // count events in this thread towards the GC totals papi_thread_stop_gc1_count(gct->papi_events); @@ -1093,6 +1069,9 @@ gcWorkerThread (Capability *cap) ACQUIRE_SPIN_LOCK(&gct->mut_spin); debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index); + // record the time spent doing GC in the Task structure + stat_gcWorkerThreadDone(gct); + SET_GCT(saved_gct); } @@ -1103,8 +1082,8 @@ gcWorkerThread (Capability *cap) void waitForGcThreads (Capability *cap USED_IF_THREADS) { - nat n_threads = RtsFlags.ParFlags.nNodes; - nat me = cap->no; + const nat n_threads = RtsFlags.ParFlags.nNodes; + const nat me = cap->no; nat i, j; rtsBool retry = rtsTrue; @@ -1142,11 +1121,14 @@ start_gc_threads (void) } static void -wakeup_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS) +wakeup_gc_threads (nat me USED_IF_THREADS) { #if defined(THREADED_RTS) nat i; - for (i=0; i < n_threads; i++) { + + if (n_gc_threads == 1) return; + + for (i=0; i < n_gc_threads; i++) { if (i == me) continue; inc_running(); debugTrace(DEBUG_gc, "waking up gc thread %d", i); @@ -1163,11 +1145,14 @@ wakeup_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS) // standby state, otherwise they may still be executing inside // any_work(), and may even remain awake until the next GC starts. static void -shutdown_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS) +shutdown_gc_threads (nat me USED_IF_THREADS) { #if defined(THREADED_RTS) nat i; - for (i=0; i < n_threads; i++) { + + if (n_gc_threads == 1) return; + + for (i=0; i < n_gc_threads; i++) { if (i == me) continue; while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); } } @@ -1178,8 +1163,8 @@ shutdown_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS) void releaseGCThreads (Capability *cap USED_IF_THREADS) { - nat n_threads = RtsFlags.ParFlags.nNodes; - nat me = cap->no; + const nat n_threads = RtsFlags.ParFlags.nNodes; + const nat me = cap->no; nat i; for (i=0; i < n_threads; i++) { if (i == me) continue; @@ -1198,20 +1183,18 @@ releaseGCThreads (Capability *cap USED_IF_THREADS) ------------------------------------------------------------------------- */ static void -init_collected_gen (nat g, nat n_threads) +prepare_collected_gen (generation *gen) { - nat t, i; + nat i, g, n; gen_workspace *ws; - generation *gen; - bdescr *bd; + bdescr *bd, *next; // Throw away the current mutable list. Invariant: the mutable // list always has at least one block; this means we can avoid a // check for NULL in recordMutable(). + g = gen->no; if (g != 0) { - freeChain(generations[g].mut_list); - generations[g].mut_list = allocBlock(); - for (i = 0; i < n_capabilities; i++) { + for (i = 0; i < n_capabilities; i++) { freeChain(capabilities[i].mut_lists[g]); capabilities[i].mut_lists[g] = allocBlock(); } @@ -1234,9 +1217,35 @@ init_collected_gen (nat g, nat n_threads) gen->live_estimate = 0; // initialise the large object queues. - gen->scavenged_large_objects = NULL; - gen->n_scavenged_large_blocks = 0; - + ASSERT(gen->scavenged_large_objects == NULL); + ASSERT(gen->n_scavenged_large_blocks == 0); + + // grab all the partial blocks stashed in the gc_thread workspaces and + // move them to the old_blocks list of this gen. + for (n = 0; n < n_capabilities; n++) { + ws = &gc_threads[n]->gens[gen->no]; + + for (bd = ws->part_list; bd != NULL; bd = next) { + next = bd->link; + bd->link = gen->old_blocks; + gen->old_blocks = bd; + gen->n_old_blocks += bd->blocks; + } + ws->part_list = NULL; + ws->n_part_blocks = 0; + + ASSERT(ws->scavd_list == NULL); + ASSERT(ws->n_scavd_blocks == 0); + + if (ws->todo_free != ws->todo_bd->start) { + ws->todo_bd->free = ws->todo_free; + ws->todo_bd->link = gen->old_blocks; + gen->old_blocks = ws->todo_bd; + gen->n_old_blocks += ws->todo_bd->blocks; + alloc_todo_block(ws,0); // always has one block. + } + } + // mark the small objects as from-space for (bd = gen->old_blocks; bd; bd = bd->link) { bd->flags &= ~BF_EVACUATED; @@ -1281,111 +1290,90 @@ init_collected_gen (nat g, nat n_threads) if (!(bd->flags & BF_FRAGMENTED)) { bd->flags |= BF_MARKED; } + + // BF_SWEPT should be marked only for blocks that are being + // collected in sweep() + bd->flags &= ~BF_SWEPT; } } } - - // For each GC thread, for each step, allocate a "todo" block to - // store evacuated objects to be scavenged, and a block to store - // evacuated objects that do not need to be scavenged. - for (t = 0; t < n_threads; t++) { - ws = &gc_threads[t]->gens[g]; - - ws->todo_large_objects = NULL; - - ws->part_list = NULL; - ws->n_part_blocks = 0; - - // allocate the first to-space block; extra blocks will be - // chained on as necessary. - ws->todo_bd = NULL; - ASSERT(looksEmptyWSDeque(ws->todo_q)); - alloc_todo_block(ws,0); - - ws->todo_overflow = NULL; - ws->n_todo_overflow = 0; - - ws->scavd_list = NULL; - ws->n_scavd_blocks = 0; - } } /* ---------------------------------------------------------------------------- + Save the mutable lists in saved_mut_lists + ------------------------------------------------------------------------- */ + +static void +stash_mut_list (Capability *cap, nat gen_no) +{ + cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no]; + cap->mut_lists[gen_no] = allocBlock_sync(); +} + +/* ---------------------------------------------------------------------------- Initialise a generation that is *not* to be collected ------------------------------------------------------------------------- */ static void -init_uncollected_gen (nat g, nat threads) +prepare_uncollected_gen (generation *gen) { - nat t, n; - gen_workspace *ws; - generation *gen; - bdescr *bd; + nat i; + + + ASSERT(gen->no > 0); // save the current mutable lists for this generation, and // allocate a fresh block for each one. We'll traverse these // mutable lists as roots early on in the GC. - generations[g].saved_mut_list = generations[g].mut_list; - generations[g].mut_list = allocBlock(); - for (n = 0; n < n_capabilities; n++) { - capabilities[n].saved_mut_lists[g] = capabilities[n].mut_lists[g]; - capabilities[n].mut_lists[g] = allocBlock(); + for (i = 0; i < n_capabilities; i++) { + stash_mut_list(&capabilities[i], gen->no); } - gen = &generations[g]; + ASSERT(gen->scavenged_large_objects == NULL); + ASSERT(gen->n_scavenged_large_blocks == 0); +} - gen->scavenged_large_objects = NULL; - gen->n_scavenged_large_blocks = 0; +/* ----------------------------------------------------------------------------- + Collect the completed blocks from a GC thread and attach them to + the generation. + -------------------------------------------------------------------------- */ - for (t = 0; t < threads; t++) { - ws = &gc_threads[t]->gens[g]; - - ASSERT(looksEmptyWSDeque(ws->todo_q)); - ws->todo_large_objects = NULL; - - ws->part_list = NULL; - ws->n_part_blocks = 0; +static void +collect_gct_blocks (void) +{ + nat g; + gen_workspace *ws; + bdescr *bd, *prev; + + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + ws = &gct->gens[g]; - ws->scavd_list = NULL; - ws->n_scavd_blocks = 0; + // there may still be a block attached to ws->todo_bd; + // leave it there to use next time. + + if (ws->scavd_list != NULL) { + ACQUIRE_SPIN_LOCK(&ws->gen->sync); + + ASSERT(gct->scan_bd == NULL); + ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks); - // If the block at the head of the list in this generation - // is less than 3/4 full, then use it as a todo block. - if (gen->blocks && isPartiallyFull(gen->blocks)) - { - ws->todo_bd = gen->blocks; - ws->todo_free = ws->todo_bd->free; - ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W; - gen->blocks = gen->blocks->link; - gen->n_blocks -= 1; - gen->n_words -= ws->todo_bd->free - ws->todo_bd->start; - ws->todo_bd->link = NULL; - // we must scan from the current end point. - ws->todo_bd->u.scan = ws->todo_bd->free; - } - else - { - ws->todo_bd = NULL; - alloc_todo_block(ws,0); - } - } + prev = NULL; + for (bd = ws->scavd_list; bd != NULL; bd = bd->link) { + ws->gen->n_words += bd->free - bd->start; + prev = bd; + } + if (prev != NULL) { + prev->link = ws->gen->blocks; + ws->gen->blocks = ws->scavd_list; + } + ws->gen->n_blocks += ws->n_scavd_blocks; - // deal out any more partial blocks to the threads' part_lists - t = 0; - while (gen->blocks && isPartiallyFull(gen->blocks)) - { - bd = gen->blocks; - gen->blocks = bd->link; - ws = &gc_threads[t]->gens[g]; - bd->link = ws->part_list; - ws->part_list = bd; - ws->n_part_blocks += 1; - bd->u.scan = bd->free; - gen->n_blocks -= 1; - gen->n_words -= bd->free - bd->start; - t++; - if (t == n_gc_threads) t = 0; + ws->scavd_list = NULL; + ws->n_scavd_blocks = 0; + + RELEASE_SPIN_LOCK(&ws->gen->sync); + } } } @@ -1399,8 +1387,8 @@ init_gc_thread (gc_thread *t) t->static_objects = END_OF_STATIC_LIST; t->scavenged_static_objects = END_OF_STATIC_LIST; t->scan_bd = NULL; - t->mut_lists = capabilities[t->thread_index].mut_lists; - t->evac_gen = 0; + t->mut_lists = t->cap->mut_lists; + t->evac_gen_no = 0; t->failed_to_evac = rtsFalse; t->eager_promotion = rtsTrue; t->thunk_selector_depth = 0; @@ -1466,8 +1454,8 @@ resize_generations (void) if (major_gc && RtsFlags.GcFlags.generations > 1) { nat live, size, min_alloc, words; - nat max = RtsFlags.GcFlags.maxHeapSize; - nat gens = RtsFlags.GcFlags.generations; + const nat max = RtsFlags.GcFlags.maxHeapSize; + const nat gens = RtsFlags.GcFlags.generations; // live in the oldest generations if (oldest_gen->live_estimate != 0) { @@ -1492,11 +1480,10 @@ resize_generations (void) // Auto-enable compaction when the residency reaches a // certain percentage of the maximum heap size (default: 30%). - if (RtsFlags.GcFlags.generations > 1 && - (RtsFlags.GcFlags.compact || - (max > 0 && - oldest_gen->n_blocks > - (RtsFlags.GcFlags.compactThreshold * max) / 100))) { + if (RtsFlags.GcFlags.compact || + (max > 0 && + oldest_gen->n_blocks > + (RtsFlags.GcFlags.compactThreshold * max) / 100)) { oldest_gen->mark = 1; oldest_gen->compact = 1; // debugBelch("compaction: on\n", live); @@ -1556,7 +1543,7 @@ resize_generations (void) static void resize_nursery (void) { - lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities; + const lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities; if (RtsFlags.GcFlags.generations == 1) { // Two-space collector: @@ -1616,7 +1603,7 @@ resize_nursery (void) if (RtsFlags.GcFlags.heapSizeSuggestion) { long blocks; - nat needed = calcNeeded(); // approx blocks needed at next GC + const nat needed = calcNeeded(); // approx blocks needed at next GC /* Guess how much will be live in generation 0 step 0 next time. * A good approximation is obtained by finding the