X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=rts%2Fsm%2FGC.c;h=4d63724ba01009b744db70397be869d17d3edd98;hp=02fd6d91610b363f3c09018c13b2ef0972b7c73f;hb=5d52d9b64c21dcf77849866584744722f8121389;hpb=a2a67cd520b9841114d69a87a423dabcb3b4368e diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 02fd6d9..4d63724 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -33,6 +33,7 @@ #endif #include "Trace.h" #include "RetainerProfile.h" +#include "LdvProfile.h" #include "RaiseAsync.h" #include "Papi.h" #include "Stable.h" @@ -43,6 +44,7 @@ #include "Evac.h" #include "Scav.h" #include "GCUtils.h" +#include "MarkStack.h" #include "MarkWeak.h" #include "Sparks.h" #include "Sweep.h" @@ -99,7 +101,7 @@ rtsBool major_gc; /* Data used for allocation area sizing. */ -static lnat g0s0_pcnt_kept = 30; // percentage of g0s0 live at last minor GC +static lnat g0_pcnt_kept = 30; // percentage of g0 live at last minor GC /* Mut-list stats */ #ifdef DEBUG @@ -114,7 +116,7 @@ nat mutlist_MUTVARS, gc_thread **gc_threads = NULL; #if !defined(THREADED_RTS) -StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(step_workspace)]; +StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(gen_workspace)]; #endif // Number of threads running in *this* GC. Affects how many @@ -138,7 +140,6 @@ static nat initialise_N (rtsBool force_major_gc); static void init_collected_gen (nat g, nat threads); static void init_uncollected_gen (nat g, nat threads); static void init_gc_thread (gc_thread *t); -static void update_task_list (void); static void resize_generations (void); static void resize_nursery (void); static void start_gc_threads (void); @@ -153,21 +154,12 @@ static void gcCAFs (void); #endif /* ----------------------------------------------------------------------------- - The mark bitmap & stack. + The mark stack. -------------------------------------------------------------------------- */ -#define MARK_STACK_BLOCKS 4 - -bdescr *mark_stack_bdescr; -StgPtr *mark_stack; -StgPtr *mark_sp; -StgPtr *mark_splim; - -// Flag and pointers used for falling back to a linear scan when the -// mark stack overflows. -rtsBool mark_stack_overflowed; -bdescr *oldgen_scan_bd; -StgPtr oldgen_scan; +bdescr *mark_stack_top_bd; // topmost block in the mark stack +bdescr *mark_stack_bd; // current block in the mark stack +StgPtr mark_sp; // pointer to the next unallocated mark stack entry /* ----------------------------------------------------------------------------- GarbageCollect: the main entry point to the garbage collector. @@ -181,10 +173,10 @@ GarbageCollect (rtsBool force_major_gc, Capability *cap) { bdescr *bd; - step *stp; + generation *gen; lnat live, allocated, max_copied, avg_copied, slop; gc_thread *saved_gct; - nat g, s, t, n; + nat g, t, n; // necessary if we stole a callee-saves register for gct: saved_gct = gct; @@ -202,8 +194,8 @@ GarbageCollect (rtsBool force_major_gc, } #endif - ASSERT(sizeof(step_workspace) == 16 * sizeof(StgWord)); - // otherwise adjust the padding in step_workspace. + ASSERT(sizeof(gen_workspace) == 16 * sizeof(StgWord)); + // otherwise adjust the padding in gen_workspace. // tell the stats department that we've started a GC stat_startGC(); @@ -236,7 +228,8 @@ GarbageCollect (rtsBool force_major_gc, n = initialise_N(force_major_gc); #if defined(THREADED_RTS) - work_stealing = RtsFlags.ParFlags.parGcLoadBalancing; + work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled && + N >= RtsFlags.ParFlags.parGcLoadBalancingGen; // It's not always a good idea to do load balancing in parallel // GC. In particular, for a parallel program we don't want to // lose locality by moving cached data into another CPU's cache @@ -277,12 +270,11 @@ GarbageCollect (rtsBool force_major_gc, #ifdef DEBUG // check for memory leaks if DEBUG is on - memInventory(traceClass(DEBUG_gc)); + memInventory(DEBUG_gc); #endif - // check stack sanity *before* GC - IF_DEBUG(sanity, checkFreeListSanity()); - IF_DEBUG(sanity, checkMutableLists(rtsTrue)); + // check sanity *before* GC + IF_DEBUG(sanity, checkSanity(rtsTrue)); // Initialise all our gc_thread structures for (t = 0; t < n_gc_threads; t++) { @@ -301,16 +293,16 @@ GarbageCollect (rtsBool force_major_gc, /* Allocate a mark stack if we're doing a major collection. */ - if (major_gc && oldest_gen->steps[0].mark) { - nat mark_stack_blocks; - mark_stack_blocks = stg_max(MARK_STACK_BLOCKS, - oldest_gen->steps[0].n_old_blocks / 100); - mark_stack_bdescr = allocGroup(mark_stack_blocks); - mark_stack = (StgPtr *)mark_stack_bdescr->start; - mark_sp = mark_stack; - mark_splim = mark_stack + (mark_stack_blocks * BLOCK_SIZE_W); + if (major_gc && oldest_gen->mark) { + mark_stack_bd = allocBlock(); + mark_stack_top_bd = mark_stack_bd; + mark_stack_bd->link = NULL; + mark_stack_bd->u.back = NULL; + mark_sp = mark_stack_bd->start; } else { - mark_stack_bdescr = NULL; + mark_stack_bd = NULL; + mark_stack_top_bd = NULL; + mark_sp = NULL; } // this is the main thread @@ -342,7 +334,15 @@ SET_GCT(gc_threads[0]); // namely to reduce the likelihood of spurious old->new pointers. // for (g = RtsFlags.GcFlags.generations-1; g > N; g--) { +#if defined(THREADED_RTS) + if (n_gc_threads > 1) { + scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]); + } else { + scavenge_mutable_list1(generations[g].saved_mut_list, &generations[g]); + } +#else scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]); +#endif freeChain_sync(generations[g].saved_mut_list); generations[g].saved_mut_list = NULL; @@ -354,18 +354,22 @@ SET_GCT(gc_threads[0]); // variable). if (n_gc_threads == 1) { for (n = 0; n < n_capabilities; n++) { +#if defined(THREADED_RTS) + scavenge_capability_mut_Lists1(&capabilities[n]); +#else scavenge_capability_mut_lists(&capabilities[n]); +#endif } } else { scavenge_capability_mut_lists(&capabilities[gct->thread_index]); } // follow roots from the CAF list (used by GHCi) - gct->evac_step = 0; + gct->evac_gen = 0; markCAFs(mark_root, gct); // follow all the roots that the application knows about. - gct->evac_step = 0; + gct->evac_gen = 0; markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads, rtsTrue/*prune sparks*/); @@ -391,13 +395,6 @@ SET_GCT(gc_threads[0]); // The other threads are now stopped. We might recurse back to // here, but from now on this is the only thread. - // if any blackholes are alive, make the threads that wait on - // them alive too. - if (traverseBlackholeQueue()) { - inc_running(); - continue; - } - // must be last... invariant is that everything is fully // scavenged at this point. if (traverseWeakPtrList()) { // returns rtsTrue if evaced something @@ -411,9 +408,6 @@ SET_GCT(gc_threads[0]); shutdown_gc_threads(n_gc_threads, gct->thread_index); - // Update pointers from the Task list - update_task_list(); - // Now see which stable names are still alive. gcStablePtrTable(); @@ -428,32 +422,26 @@ SET_GCT(gc_threads[0]); // NO MORE EVACUATION AFTER THIS POINT! // Two-space collector: free the old to-space. - // g0s0->old_blocks is the old nursery - // g0s0->blocks is to-space from the previous GC + // g0->old_blocks is the old nursery + // g0->blocks is to-space from the previous GC if (RtsFlags.GcFlags.generations == 1) { - if (g0s0->blocks != NULL) { - freeChain(g0s0->blocks); - g0s0->blocks = NULL; + if (g0->blocks != NULL) { + freeChain(g0->blocks); + g0->blocks = NULL; } } // For each workspace, in each thread, move the copied blocks to the step { gc_thread *thr; - step_workspace *ws; + gen_workspace *ws; bdescr *prev, *next; for (t = 0; t < n_gc_threads; t++) { thr = gc_threads[t]; - // not step 0 - if (RtsFlags.GcFlags.generations == 1) { - s = 0; - } else { - s = 1; - } - for (; s < total_steps; s++) { - ws = &thr->steps[s]; + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + ws = &thr->gens[g]; // Push the final block if (ws->todo_bd) { @@ -465,14 +453,14 @@ SET_GCT(gc_threads[0]); prev = NULL; for (bd = ws->scavd_list; bd != NULL; bd = bd->link) { - ws->step->n_words += bd->free - bd->start; + ws->gen->n_words += bd->free - bd->start; prev = bd; } if (prev != NULL) { - prev->link = ws->step->blocks; - ws->step->blocks = ws->scavd_list; + prev->link = ws->gen->blocks; + ws->gen->blocks = ws->scavd_list; } - ws->step->n_blocks += ws->n_scavd_blocks; + ws->gen->n_blocks += ws->n_scavd_blocks; } } @@ -482,14 +470,8 @@ SET_GCT(gc_threads[0]); for (t = 0; t < n_gc_threads; t++) { thr = gc_threads[t]; - // not step 0 - if (RtsFlags.GcFlags.generations == 1) { - s = 0; - } else { - s = 1; - } - for (; s < total_steps; s++) { - ws = &thr->steps[s]; + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + ws = &thr->gens[g]; prev = NULL; for (bd = ws->part_list; bd != NULL; bd = next) { @@ -503,28 +485,28 @@ SET_GCT(gc_threads[0]); freeGroup(bd); ws->n_part_blocks--; } else { - ws->step->n_words += bd->free - bd->start; + ws->gen->n_words += bd->free - bd->start; prev = bd; } } if (prev != NULL) { - prev->link = ws->step->blocks; - ws->step->blocks = ws->part_list; + prev->link = ws->gen->blocks; + ws->gen->blocks = ws->part_list; } - ws->step->n_blocks += ws->n_part_blocks; + ws->gen->n_blocks += ws->n_part_blocks; - ASSERT(countBlocks(ws->step->blocks) == ws->step->n_blocks); - ASSERT(countOccupied(ws->step->blocks) == ws->step->n_words); + ASSERT(countBlocks(ws->gen->blocks) == ws->gen->n_blocks); + ASSERT(countOccupied(ws->gen->blocks) == ws->gen->n_words); } } } // Finally: compact or sweep the oldest generation. - if (major_gc && oldest_gen->steps[0].mark) { - if (oldest_gen->steps[0].compact) + if (major_gc && oldest_gen->mark) { + if (oldest_gen->compact) compact(gct->scavenged_static_objects); else - sweep(&oldest_gen->steps[0]); + sweep(oldest_gen); } /* run through all the generations/steps and tidy up @@ -582,105 +564,99 @@ SET_GCT(gc_threads[0]); mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS); } - for (s = 0; s < generations[g].n_steps; s++) { - bdescr *next, *prev; - stp = &generations[g].steps[s]; + bdescr *next, *prev; + gen = &generations[g]; - // for generations we collected... - if (g <= N) { + // for generations we collected... + if (g <= N) { /* free old memory and shift to-space into from-space for all * the collected steps (except the allocation area). These * freed blocks will probaby be quickly recycled. */ - if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) { - if (stp->mark) - { - // tack the new blocks on the end of the existing blocks - if (stp->old_blocks != NULL) { - - prev = NULL; - for (bd = stp->old_blocks; bd != NULL; bd = next) { - - next = bd->link; - - if (!(bd->flags & BF_MARKED)) - { - if (prev == NULL) { - stp->old_blocks = next; - } else { - prev->link = next; - } - freeGroup(bd); - stp->n_old_blocks--; - } - else - { - stp->n_words += bd->free - bd->start; - - // NB. this step might not be compacted next - // time, so reset the BF_MARKED flags. - // They are set before GC if we're going to - // compact. (search for BF_MARKED above). - bd->flags &= ~BF_MARKED; - - // between GCs, all blocks in the heap except - // for the nursery have the BF_EVACUATED flag set. - bd->flags |= BF_EVACUATED; - - prev = bd; + if (gen->mark) + { + // tack the new blocks on the end of the existing blocks + if (gen->old_blocks != NULL) { + + prev = NULL; + for (bd = gen->old_blocks; bd != NULL; bd = next) { + + next = bd->link; + + if (!(bd->flags & BF_MARKED)) + { + if (prev == NULL) { + gen->old_blocks = next; + } else { + prev->link = next; } - } - - if (prev != NULL) { - prev->link = stp->blocks; - stp->blocks = stp->old_blocks; + freeGroup(bd); + gen->n_old_blocks--; } - } - // add the new blocks to the block tally - stp->n_blocks += stp->n_old_blocks; - ASSERT(countBlocks(stp->blocks) == stp->n_blocks); - ASSERT(countOccupied(stp->blocks) == stp->n_words); - } - else // not copacted - { - freeChain(stp->old_blocks); - } - stp->old_blocks = NULL; - stp->n_old_blocks = 0; - } - - /* LARGE OBJECTS. The current live large objects are chained on - * scavenged_large, having been moved during garbage - * collection from large_objects. Any objects left on - * large_objects list are therefore dead, so we free them here. - */ - for (bd = stp->large_objects; bd != NULL; bd = next) { - next = bd->link; - freeGroup(bd); - bd = next; - } + else + { + gen->n_words += bd->free - bd->start; + + // NB. this step might not be compacted next + // time, so reset the BF_MARKED flags. + // They are set before GC if we're going to + // compact. (search for BF_MARKED above). + bd->flags &= ~BF_MARKED; + + // between GCs, all blocks in the heap except + // for the nursery have the BF_EVACUATED flag set. + bd->flags |= BF_EVACUATED; + + prev = bd; + } + } - stp->large_objects = stp->scavenged_large_objects; - stp->n_large_blocks = stp->n_scavenged_large_blocks; + if (prev != NULL) { + prev->link = gen->blocks; + gen->blocks = gen->old_blocks; + } + } + // add the new blocks to the block tally + gen->n_blocks += gen->n_old_blocks; + ASSERT(countBlocks(gen->blocks) == gen->n_blocks); + ASSERT(countOccupied(gen->blocks) == gen->n_words); + } + else // not copacted + { + freeChain(gen->old_blocks); + } - } - else // for older generations... - { + gen->old_blocks = NULL; + gen->n_old_blocks = 0; + + /* LARGE OBJECTS. The current live large objects are chained on + * scavenged_large, having been moved during garbage + * collection from large_objects. Any objects left on the + * large_objects list are therefore dead, so we free them here. + */ + freeChain(gen->large_objects); + gen->large_objects = gen->scavenged_large_objects; + gen->n_large_blocks = gen->n_scavenged_large_blocks; + gen->n_new_large_blocks = 0; + ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks); + } + else // for generations > N + { /* For older generations, we need to append the * scavenged_large_object list (i.e. large objects that have been * promoted during this GC) to the large_object list for that step. */ - for (bd = stp->scavenged_large_objects; bd; bd = next) { - next = bd->link; - dbl_link_onto(bd, &stp->large_objects); + for (bd = gen->scavenged_large_objects; bd; bd = next) { + next = bd->link; + dbl_link_onto(bd, &gen->large_objects); } - + // add the new blocks we promoted during this GC - stp->n_large_blocks += stp->n_scavenged_large_blocks; - } + gen->n_large_blocks += gen->n_scavenged_large_blocks; + ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks); } - } + } // for all generations // update the max size of older generations after a major GC resize_generations(); @@ -690,33 +666,26 @@ SET_GCT(gc_threads[0]); // Free the small objects allocated via allocate(), since this will // all have been copied into G0S1 now. - if (RtsFlags.GcFlags.generations > 1) { - if (g0s0->blocks != NULL) { - freeChain(g0s0->blocks); - g0s0->blocks = NULL; - } - g0s0->n_blocks = 0; - g0s0->n_words = 0; - } - alloc_blocks = 0; alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize; // Start a new pinned_object_block - pinned_object_block = NULL; + for (n = 0; n < n_capabilities; n++) { + capabilities[n].pinned_object_block = NULL; + } // Free the mark stack. - if (mark_stack_bdescr != NULL) { - freeGroup(mark_stack_bdescr); + if (mark_stack_top_bd != NULL) { + debugTrace(DEBUG_gc, "mark stack: %d blocks", + countBlocks(mark_stack_top_bd)); + freeChain(mark_stack_top_bd); } // Free any bitmaps. for (g = 0; g <= N; g++) { - for (s = 0; s < generations[g].n_steps; s++) { - stp = &generations[g].steps[s]; - if (stp->bitmap != NULL) { - freeGroup(stp->bitmap); - stp->bitmap = NULL; - } + gen = &generations[g]; + if (gen->bitmap != NULL) { + freeGroup(gen->bitmap); + gen->bitmap = NULL; } } @@ -756,14 +725,13 @@ SET_GCT(gc_threads[0]); // send exceptions to any threads which were about to die RELEASE_SM_LOCK; resurrectThreads(resurrected_threads); - performPendingThrowTos(exception_threads); ACQUIRE_SM_LOCK; // Update the stable pointer hash table. updateStablePtrTable(major_gc); - // check sanity after GC - IF_DEBUG(sanity, checkSanity()); + // check sanity after GC + IF_DEBUG(sanity, checkSanity(rtsTrue)); // extra GC trace info IF_DEBUG(gc, statDescribeGens()); @@ -780,7 +748,7 @@ SET_GCT(gc_threads[0]); #ifdef DEBUG // check for memory leaks if DEBUG is on - memInventory(traceClass(DEBUG_gc)); + memInventory(DEBUG_gc); #endif #ifdef RTS_GTK_FRONTPANEL @@ -822,7 +790,7 @@ static nat initialise_N (rtsBool force_major_gc) { int g; - nat s, blocks, blocks_total; + nat blocks, blocks_total; blocks = 0; blocks_total = 0; @@ -834,11 +802,10 @@ initialise_N (rtsBool force_major_gc) } for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) { - blocks = 0; - for (s = 0; s < generations[g].n_steps; s++) { - blocks += generations[g].steps[s].n_words / BLOCK_SIZE_W; - blocks += generations[g].steps[s].n_large_blocks; - } + + blocks = generations[g].n_words / BLOCK_SIZE_W + + generations[g].n_large_blocks; + if (blocks >= generations[g].max_blocks) { N = stg_max(N,g); } @@ -865,8 +832,8 @@ initialise_N (rtsBool force_major_gc) static void new_gc_thread (nat n, gc_thread *t) { - nat s; - step_workspace *ws; + nat g; + gen_workspace *ws; #ifdef THREADED_RTS t->id = 0; @@ -887,11 +854,11 @@ new_gc_thread (nat n, gc_thread *t) t->papi_events = -1; #endif - for (s = 0; s < total_steps; s++) + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - ws = &t->steps[s]; - ws->step = &all_steps[s]; - ASSERT(s == ws->step->abs_no); + ws = &t->gens[g]; + ws->gen = &generations[g]; + ASSERT(g == ws->gen->no); ws->my_gct = t; ws->todo_bd = NULL; @@ -920,7 +887,8 @@ initGcThreads (void) for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) { gc_threads[i] = - stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace), + stgMallocBytes(sizeof(gc_thread) + + RtsFlags.GcFlags.generations * sizeof(gen_workspace), "alloc_gc_threads"); new_gc_thread(i, gc_threads[i]); @@ -936,14 +904,23 @@ initGcThreads (void) void freeGcThreads (void) { + nat g; if (gc_threads != NULL) { #if defined(THREADED_RTS) nat i; - for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) { + for (i = 0; i < n_capabilities; i++) { + for (g = 0; g < RtsFlags.GcFlags.generations; g++) + { + freeWSDeque(gc_threads[i]->gens[g].todo_q); + } stgFree (gc_threads[i]); } stgFree (gc_threads); #else + for (g = 0; g < RtsFlags.GcFlags.generations; g++) + { + freeWSDeque(gc_threads[0]->gens[g].todo_q); + } stgFree (gc_threads); #endif gc_threads = NULL; @@ -975,27 +952,23 @@ dec_running (void) static rtsBool any_work (void) { - int s; - step_workspace *ws; + int g; + gen_workspace *ws; gct->any_work++; write_barrier(); // scavenge objects in compacted generation - if (mark_stack_overflowed || oldgen_scan_bd != NULL || - (mark_stack_bdescr != NULL && !mark_stack_empty())) { + if (mark_stack_bd != NULL && !mark_stack_empty()) { return rtsTrue; } // Check for global work in any step. We don't need to check for // local work, because we have already exited scavenge_loop(), // which means there is no local work for this thread. - for (s = total_steps-1; s >= 0; s--) { - if (s == 0 && RtsFlags.GcFlags.generations > 1) { - continue; - } - ws = &gct->steps[s]; + for (g = 0; g < (int)RtsFlags.GcFlags.generations; g++) { + ws = &gct->gens[g]; if (ws->todo_large_objects) return rtsTrue; if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue; if (ws->todo_overflow) return rtsTrue; @@ -1007,8 +980,8 @@ any_work (void) // look for work to steal for (n = 0; n < n_gc_threads; n++) { if (n == gct->thread_index) continue; - for (s = total_steps-1; s >= 0; s--) { - ws = &gc_threads[n]->steps[s]; + for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) { + ws = &gc_threads[n]->gens[g]; if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue; } } @@ -1016,6 +989,9 @@ any_work (void) #endif gct->no_work++; +#if defined(THREADED_RTS) + yieldThread(); +#endif return rtsFalse; } @@ -1025,9 +1001,10 @@ scavenge_until_all_done (void) { nat r; - debugTrace(DEBUG_gc, "GC thread %d working", gct->thread_index); loop: + traceEventGcWork(&capabilities[gct->thread_index]); + #if defined(THREADED_RTS) if (n_gc_threads > 1) { scavenge_loop(); @@ -1041,8 +1018,9 @@ loop: // scavenge_loop() only exits when there's no work to do r = dec_running(); - debugTrace(DEBUG_gc, "GC thread %d idle (%d still running)", - gct->thread_index, r); + traceEventGcIdle(&capabilities[gct->thread_index]); + + debugTrace(DEBUG_gc, "%d GC threads still running", r); while (gc_running_threads != 0) { // usleep(1); @@ -1056,8 +1034,7 @@ loop: // scavenge_loop() to perform any pending work. } - // All threads are now stopped - debugTrace(DEBUG_gc, "GC thread %d finished.", gct->thread_index); + traceEventGcDone(&capabilities[gct->thread_index]); } #if defined(THREADED_RTS) @@ -1065,7 +1042,10 @@ loop: void gcWorkerThread (Capability *cap) { - cap->in_gc = rtsTrue; + gc_thread *saved_gct; + + // necessary if we stole a callee-saves register for gct: + saved_gct = gct; gct = gc_threads[cap->no]; gct->id = osThreadId(); @@ -1085,7 +1065,7 @@ gcWorkerThread (Capability *cap) #endif // Every thread evacuates some roots. - gct->evac_step = 0; + gct->evac_gen = 0; markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads, rtsTrue/*prune sparks*/); scavenge_capability_mut_lists(&capabilities[gct->thread_index]); @@ -1104,6 +1084,8 @@ gcWorkerThread (Capability *cap) gct->thread_index); ACQUIRE_SPIN_LOCK(&gct->mut_spin); debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index); + + SET_GCT(saved_gct); } #endif @@ -1125,7 +1107,7 @@ waitForGcThreads (Capability *cap USED_IF_THREADS) prodCapability(&capabilities[i], cap->running_task); } } - for (j=0; j < 10000000; j++) { + for (j=0; j < 10; j++) { retry = rtsFalse; for (i=0; i < n_threads; i++) { if (i == me) continue; @@ -1136,6 +1118,7 @@ waitForGcThreads (Capability *cap USED_IF_THREADS) } } if (!retry) break; + yieldThread(); } } } @@ -1209,9 +1192,9 @@ releaseGCThreads (Capability *cap USED_IF_THREADS) static void init_collected_gen (nat g, nat n_threads) { - nat s, t, i; - step_workspace *ws; - step *stp; + nat t, i; + gen_workspace *ws; + generation *gen; bdescr *bd; // Throw away the current mutable list. Invariant: the mutable @@ -1226,110 +1209,96 @@ init_collected_gen (nat g, nat n_threads) } } - for (s = 0; s < generations[g].n_steps; s++) { - - stp = &generations[g].steps[s]; - ASSERT(stp->gen_no == g); - - // we'll construct a new list of threads in this step - // during GC, throw away the current list. - stp->old_threads = stp->threads; - stp->threads = END_TSO_QUEUE; + gen = &generations[g]; + ASSERT(gen->no == g); + + // we'll construct a new list of threads in this step + // during GC, throw away the current list. + gen->old_threads = gen->threads; + gen->threads = END_TSO_QUEUE; + + // deprecate the existing blocks + gen->old_blocks = gen->blocks; + gen->n_old_blocks = gen->n_blocks; + gen->blocks = NULL; + gen->n_blocks = 0; + gen->n_words = 0; + gen->live_estimate = 0; + + // initialise the large object queues. + gen->scavenged_large_objects = NULL; + gen->n_scavenged_large_blocks = 0; + + // mark the small objects as from-space + for (bd = gen->old_blocks; bd; bd = bd->link) { + bd->flags &= ~BF_EVACUATED; + } + + // mark the large objects as from-space + for (bd = gen->large_objects; bd; bd = bd->link) { + bd->flags &= ~BF_EVACUATED; + } - // generation 0, step 0 doesn't need to-space - if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) { - continue; - } + // for a compacted generation, we need to allocate the bitmap + if (gen->mark) { + nat bitmap_size; // in bytes + bdescr *bitmap_bdescr; + StgWord *bitmap; - // deprecate the existing blocks - stp->old_blocks = stp->blocks; - stp->n_old_blocks = stp->n_blocks; - stp->blocks = NULL; - stp->n_blocks = 0; - stp->n_words = 0; - stp->live_estimate = 0; - - // initialise the large object queues. - stp->scavenged_large_objects = NULL; - stp->n_scavenged_large_blocks = 0; - - // mark the small objects as from-space - for (bd = stp->old_blocks; bd; bd = bd->link) { - bd->flags &= ~BF_EVACUATED; - } - - // mark the large objects as from-space - for (bd = stp->large_objects; bd; bd = bd->link) { - bd->flags &= ~BF_EVACUATED; - } - - // for a compacted step, we need to allocate the bitmap - if (stp->mark) { - nat bitmap_size; // in bytes - bdescr *bitmap_bdescr; - StgWord *bitmap; - - bitmap_size = stp->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE); - - if (bitmap_size > 0) { - bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size) - / BLOCK_SIZE); - stp->bitmap = bitmap_bdescr; - bitmap = bitmap_bdescr->start; - - debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p", - bitmap_size, bitmap); - - // don't forget to fill it with zeros! - memset(bitmap, 0, bitmap_size); + bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE); + + if (bitmap_size > 0) { + bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size) + / BLOCK_SIZE); + gen->bitmap = bitmap_bdescr; + bitmap = bitmap_bdescr->start; + + debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p", + bitmap_size, bitmap); + + // don't forget to fill it with zeros! + memset(bitmap, 0, bitmap_size); + + // For each block in this step, point to its bitmap from the + // block descriptor. + for (bd=gen->old_blocks; bd != NULL; bd = bd->link) { + bd->u.bitmap = bitmap; + bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE); - // For each block in this step, point to its bitmap from the - // block descriptor. - for (bd=stp->old_blocks; bd != NULL; bd = bd->link) { - bd->u.bitmap = bitmap; - bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE); - - // Also at this point we set the BF_MARKED flag - // for this block. The invariant is that - // BF_MARKED is always unset, except during GC - // when it is set on those blocks which will be - // compacted. - if (!(bd->flags & BF_FRAGMENTED)) { - bd->flags |= BF_MARKED; - } - } - } - } + // Also at this point we set the BF_MARKED flag + // for this block. The invariant is that + // BF_MARKED is always unset, except during GC + // when it is set on those blocks which will be + // compacted. + if (!(bd->flags & BF_FRAGMENTED)) { + bd->flags |= BF_MARKED; + } + } + } } // For each GC thread, for each step, allocate a "todo" block to // store evacuated objects to be scavenged, and a block to store // evacuated objects that do not need to be scavenged. for (t = 0; t < n_threads; t++) { - for (s = 0; s < generations[g].n_steps; s++) { - - // we don't copy objects into g0s0, unless -G0 - if (g==0 && s==0 && RtsFlags.GcFlags.generations > 1) continue; - - ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s]; - - ws->todo_large_objects = NULL; - - ws->part_list = NULL; - ws->n_part_blocks = 0; - - // allocate the first to-space block; extra blocks will be - // chained on as necessary. - ws->todo_bd = NULL; - ASSERT(looksEmptyWSDeque(ws->todo_q)); - alloc_todo_block(ws,0); - - ws->todo_overflow = NULL; - ws->n_todo_overflow = 0; - - ws->scavd_list = NULL; - ws->n_scavd_blocks = 0; - } + ws = &gc_threads[t]->gens[g]; + + ws->todo_large_objects = NULL; + + ws->part_list = NULL; + ws->n_part_blocks = 0; + + // allocate the first to-space block; extra blocks will be + // chained on as necessary. + ws->todo_bd = NULL; + ASSERT(looksEmptyWSDeque(ws->todo_q)); + alloc_todo_block(ws,0); + + ws->todo_overflow = NULL; + ws->n_todo_overflow = 0; + + ws->scavd_list = NULL; + ws->n_scavd_blocks = 0; } } @@ -1341,9 +1310,9 @@ init_collected_gen (nat g, nat n_threads) static void init_uncollected_gen (nat g, nat threads) { - nat s, t, n; - step_workspace *ws; - step *stp; + nat t, n; + gen_workspace *ws; + generation *gen; bdescr *bd; // save the current mutable lists for this generation, and @@ -1356,66 +1325,60 @@ init_uncollected_gen (nat g, nat threads) capabilities[n].mut_lists[g] = allocBlock(); } - for (s = 0; s < generations[g].n_steps; s++) { - stp = &generations[g].steps[s]; - stp->scavenged_large_objects = NULL; - stp->n_scavenged_large_blocks = 0; - } - - for (s = 0; s < generations[g].n_steps; s++) { - - stp = &generations[g].steps[s]; + gen = &generations[g]; - for (t = 0; t < threads; t++) { - ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s]; - - ASSERT(looksEmptyWSDeque(ws->todo_q)); - ws->todo_large_objects = NULL; - - ws->part_list = NULL; - ws->n_part_blocks = 0; - - ws->scavd_list = NULL; - ws->n_scavd_blocks = 0; - - // If the block at the head of the list in this generation - // is less than 3/4 full, then use it as a todo block. - if (stp->blocks && isPartiallyFull(stp->blocks)) - { - ws->todo_bd = stp->blocks; - ws->todo_free = ws->todo_bd->free; - ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W; - stp->blocks = stp->blocks->link; - stp->n_blocks -= 1; - stp->n_words -= ws->todo_bd->free - ws->todo_bd->start; - ws->todo_bd->link = NULL; - // we must scan from the current end point. - ws->todo_bd->u.scan = ws->todo_bd->free; - } - else - { - ws->todo_bd = NULL; - alloc_todo_block(ws,0); - } - } + gen->scavenged_large_objects = NULL; + gen->n_scavenged_large_blocks = 0; - // deal out any more partial blocks to the threads' part_lists - t = 0; - while (stp->blocks && isPartiallyFull(stp->blocks)) + for (t = 0; t < threads; t++) { + ws = &gc_threads[t]->gens[g]; + + ASSERT(looksEmptyWSDeque(ws->todo_q)); + ws->todo_large_objects = NULL; + + ws->part_list = NULL; + ws->n_part_blocks = 0; + + ws->scavd_list = NULL; + ws->n_scavd_blocks = 0; + + // If the block at the head of the list in this generation + // is less than 3/4 full, then use it as a todo block. + if (gen->blocks && isPartiallyFull(gen->blocks)) + { + ws->todo_bd = gen->blocks; + ws->todo_free = ws->todo_bd->free; + ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W; + gen->blocks = gen->blocks->link; + gen->n_blocks -= 1; + gen->n_words -= ws->todo_bd->free - ws->todo_bd->start; + ws->todo_bd->link = NULL; + // we must scan from the current end point. + ws->todo_bd->u.scan = ws->todo_bd->free; + } + else { - bd = stp->blocks; - stp->blocks = bd->link; - ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s]; - bd->link = ws->part_list; - ws->part_list = bd; - ws->n_part_blocks += 1; - bd->u.scan = bd->free; - stp->n_blocks -= 1; - stp->n_words -= bd->free - bd->start; - t++; - if (t == n_gc_threads) t = 0; + ws->todo_bd = NULL; + alloc_todo_block(ws,0); } } + + // deal out any more partial blocks to the threads' part_lists + t = 0; + while (gen->blocks && isPartiallyFull(gen->blocks)) + { + bd = gen->blocks; + gen->blocks = bd->link; + ws = &gc_threads[t]->gens[g]; + bd->link = ws->part_list; + ws->part_list = bd; + ws->n_part_blocks += 1; + bd->u.scan = bd->free; + gen->n_blocks -= 1; + gen->n_words -= bd->free - bd->start; + t++; + if (t == n_gc_threads) t = 0; + } } /* ----------------------------------------------------------------------------- @@ -1429,7 +1392,7 @@ init_gc_thread (gc_thread *t) t->scavenged_static_objects = END_OF_STATIC_LIST; t->scan_bd = NULL; t->mut_lists = capabilities[t->thread_index].mut_lists; - t->evac_step = 0; + t->evac_gen = 0; t->failed_to_evac = rtsFalse; t->eager_promotion = rtsTrue; t->thunk_selector_depth = 0; @@ -1480,39 +1443,6 @@ zero_static_object_list(StgClosure* first_static) } /* ---------------------------------------------------------------------------- - Update the pointers from the task list - - These are treated as weak pointers because we want to allow a main - thread to get a BlockedOnDeadMVar exception in the same way as any - other thread. Note that the threads should all have been retained - by GC by virtue of being on the all_threads list, we're just - updating pointers here. - ------------------------------------------------------------------------- */ - -static void -update_task_list (void) -{ - Task *task; - StgTSO *tso; - for (task = all_tasks; task != NULL; task = task->all_link) { - if (!task->stopped && task->tso) { - ASSERT(task->tso->bound == task); - tso = (StgTSO *) isAlive((StgClosure *)task->tso); - if (tso == NULL) { - barf("task %p: main thread %d has been GC'd", -#ifdef THREADED_RTS - (void *)task->id, -#else - (void *)task, -#endif - task->tso->id); - } - task->tso = tso; - } - } -} - -/* ---------------------------------------------------------------------------- Reset the sizes of the older generations when we do a major collection. @@ -1532,18 +1462,22 @@ resize_generations (void) nat gens = RtsFlags.GcFlags.generations; // live in the oldest generations - if (oldest_gen->steps[0].live_estimate != 0) { - words = oldest_gen->steps[0].live_estimate; + if (oldest_gen->live_estimate != 0) { + words = oldest_gen->live_estimate; } else { - words = oldest_gen->steps[0].n_words; + words = oldest_gen->n_words; } live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W + - oldest_gen->steps[0].n_large_blocks; + oldest_gen->n_large_blocks; // default max size for all generations except zero size = stg_max(live * RtsFlags.GcFlags.oldGenFactor, RtsFlags.GcFlags.minOldGenSize); + if (RtsFlags.GcFlags.heapSizeSuggestionAuto) { + RtsFlags.GcFlags.heapSizeSuggestion = size; + } + // minimum size for generation zero min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200, RtsFlags.GcFlags.minAllocAreaSize); @@ -1553,19 +1487,19 @@ resize_generations (void) if (RtsFlags.GcFlags.generations > 1 && (RtsFlags.GcFlags.compact || (max > 0 && - oldest_gen->steps[0].n_blocks > + oldest_gen->n_blocks > (RtsFlags.GcFlags.compactThreshold * max) / 100))) { - oldest_gen->steps[0].mark = 1; - oldest_gen->steps[0].compact = 1; + oldest_gen->mark = 1; + oldest_gen->compact = 1; // debugBelch("compaction: on\n", live); } else { - oldest_gen->steps[0].mark = 0; - oldest_gen->steps[0].compact = 0; + oldest_gen->mark = 0; + oldest_gen->compact = 0; // debugBelch("compaction: off\n", live); } if (RtsFlags.GcFlags.sweep) { - oldest_gen->steps[0].mark = 1; + oldest_gen->mark = 1; } // if we're going to go over the maximum heap size, reduce the @@ -1581,7 +1515,7 @@ resize_generations (void) heapOverflow(); } - if (oldest_gen->steps[0].compact) { + if (oldest_gen->compact) { if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) { size = (max - min_alloc) / ((gens - 1) * 2 - 1); } @@ -1614,6 +1548,8 @@ resize_generations (void) static void resize_nursery (void) { + lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities; + if (RtsFlags.GcFlags.generations == 1) { // Two-space collector: nat blocks; @@ -1632,7 +1568,7 @@ resize_nursery (void) * performance we get from 3L bytes, reducing to the same * performance at 2L bytes. */ - blocks = g0s0->n_blocks; + blocks = generations[0].n_blocks; if ( RtsFlags.GcFlags.maxHeapSize != 0 && blocks * RtsFlags.GcFlags.oldGenFactor * 2 > @@ -1656,9 +1592,9 @@ resize_nursery (void) else { blocks *= RtsFlags.GcFlags.oldGenFactor; - if (blocks < RtsFlags.GcFlags.minAllocAreaSize) + if (blocks < min_nursery) { - blocks = RtsFlags.GcFlags.minAllocAreaSize; + blocks = min_nursery; } } resizeNurseries(blocks); @@ -1676,7 +1612,7 @@ resize_nursery (void) /* Guess how much will be live in generation 0 step 0 next time. * A good approximation is obtained by finding the - * percentage of g0s0 that was live at the last minor GC. + * percentage of g0 that was live at the last minor GC. * * We have an accurate figure for the amount of copied data in * 'copied', but we must convert this to a number of blocks, with @@ -1685,7 +1621,7 @@ resize_nursery (void) */ if (N == 0) { - g0s0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100) + g0_pcnt_kept = ((copied / (BLOCK_SIZE_W - 10)) * 100) / countNurseryBlocks(); } @@ -1696,17 +1632,17 @@ resize_nursery (void) * * Formula: suggested - needed * ---------------------------- - * 1 + g0s0_pcnt_kept/100 + * 1 + g0_pcnt_kept/100 * * where 'needed' is the amount of memory needed at the next - * collection for collecting all steps except g0s0. + * collection for collecting all gens except g0. */ blocks = (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) / - (100 + (long)g0s0_pcnt_kept); + (100 + (long)g0_pcnt_kept); - if (blocks < (long)RtsFlags.GcFlags.minAllocAreaSize) { - blocks = RtsFlags.GcFlags.minAllocAreaSize; + if (blocks < (long)min_nursery) { + blocks = min_nursery; } resizeNurseries((nat)blocks);