From: Simon Marlow Date: Wed, 2 Feb 2011 15:49:55 +0000 (+0000) Subject: GC refactoring and cleanup X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=commitdiff_plain;h=18896fa2b06844407fd1e0d3f85cd3db97a96ff4 GC refactoring and cleanup Now we keep any partially-full blocks in the gc_thread[] structs after each GC, rather than moving them to the generation. This should give us slightly better locality (though I wasn't able to measure any difference). Also in this patch: better sanity checking with THREADED. --- diff --git a/includes/rts/storage/GC.h b/includes/rts/storage/GC.h index 7cee670..bbed216 100644 --- a/includes/rts/storage/GC.h +++ b/includes/rts/storage/GC.h @@ -87,7 +87,7 @@ typedef struct generation_ { #if defined(THREADED_RTS) char pad[128]; // make sure the following is // on a separate cache line. - SpinLock sync_large_objects; // lock for large_objects + SpinLock sync; // lock for large_objects // and scavenged_large_objects #endif @@ -101,9 +101,6 @@ typedef struct generation_ { unsigned int n_old_blocks; // number of blocks in from-space unsigned int live_estimate; // for sweeping: estimate of live data - bdescr * part_blocks; // partially-full scanned blocks - unsigned int n_part_blocks; // count of above - bdescr * scavenged_large_objects; // live large objs after GC (d-link) unsigned int n_scavenged_large_blocks; // size (not count) of above diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c index 550f703..775505f 100644 --- a/rts/RaiseAsync.c +++ b/rts/RaiseAsync.c @@ -592,7 +592,7 @@ removeFromMVarBlockedQueue (StgTSO *tso) if (mvar->head == q) { mvar->head = q->link; - q->header.info = &stg_IND_info; + OVERWRITE_INFO(q, &stg_IND_info); if (mvar->tail == q) { mvar->tail = (StgMVarTSOQueue*)END_TSO_QUEUE; } @@ -602,10 +602,10 @@ removeFromMVarBlockedQueue (StgTSO *tso) // we lose the tail pointer when the GC shorts out the IND. // So we use MSG_NULL as a kind of non-dupable indirection; // these are ignored by takeMVar/putMVar. - q->header.info = &stg_MSG_NULL_info; + OVERWRITE_INFO(q, &stg_MSG_NULL_info); } else { - q->header.info = &stg_IND_info; + OVERWRITE_INFO(q, &stg_IND_info); } // revoke the MVar operation diff --git a/rts/Stats.c b/rts/Stats.c index cbd02cd..4b9f6d8 100644 --- a/rts/Stats.c +++ b/rts/Stats.c @@ -710,7 +710,7 @@ stat_exit(int alloc) statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin); statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin); for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - statsPrintf("gen[%d].sync_large_objects: %"FMT_Word64"\n", g, generations[g].sync_large_objects.spin); + statsPrintf("gen[%d].sync: %"FMT_Word64"\n", g, generations[g].sync.spin); } } #endif @@ -772,8 +772,9 @@ void statDescribeGens(void) { nat g, mut, lge, i; - lnat live, slop; + lnat gen_slop; lnat tot_live, tot_slop; + lnat gen_live, gen_blocks; bdescr *bd; generation *gen; @@ -785,25 +786,32 @@ statDescribeGens(void) tot_live = 0; tot_slop = 0; + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + gen = &generations[g]; + + for (bd = gen->large_objects, lge = 0; bd; bd = bd->link) { + lge++; + } + + gen_live = genLiveWords(gen); + gen_blocks = genLiveBlocks(gen); + mut = 0; for (i = 0; i < n_capabilities; i++) { mut += countOccupied(capabilities[i].mut_lists[g]); + gen_live += gcThreadLiveWords(i,g); + gen_blocks += gcThreadLiveBlocks(i,g); } - gen = &generations[g]; - debugBelch("%5d %7d %9d", g, gen->max_blocks, mut); - for (bd = gen->large_objects, lge = 0; bd; bd = bd->link) { - lge++; - } - live = gen->n_words + countOccupied(gen->large_objects); - slop = (gen->n_blocks + gen->n_large_blocks) * BLOCK_SIZE_W - live; - debugBelch("%8d %8d %8ld %8ld\n", gen->n_blocks, lge, - live*sizeof(W_), slop*sizeof(W_)); - tot_live += live; - tot_slop += slop; + gen_slop = gen_blocks * BLOCK_SIZE_W - gen_live; + + debugBelch("%8ld %8d %8ld %8ld\n", gen_blocks, lge, + gen_live*sizeof(W_), gen_slop*sizeof(W_)); + tot_live += gen_live; + tot_slop += gen_slop; } debugBelch("----------------------------------------------------------\n"); debugBelch("%41s%8ld %8ld\n","",tot_live*sizeof(W_),tot_slop*sizeof(W_)); diff --git a/rts/Threads.c b/rts/Threads.c index dcb916a..3e1c5cf 100644 --- a/rts/Threads.c +++ b/rts/Threads.c @@ -628,7 +628,7 @@ threadStackOverflow (Capability *cap, StgTSO *tso) // will be discarded after the first overflow, being replaced by a // non-moving 32k chunk. if (old_stack->sp == old_stack->stack + old_stack->stack_size) { - frame->next_chunk = new_stack; + frame->next_chunk = (StgStack*)END_TSO_QUEUE; // dummy } tso->stackobj = new_stack; diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c index 18ace21..d049f98 100644 --- a/rts/sm/Evac.c +++ b/rts/sm/Evac.c @@ -248,7 +248,7 @@ evacuate_large(StgPtr p) bd = Bdescr(p); gen = bd->gen; gen_no = bd->gen_no; - ACQUIRE_SPIN_LOCK(&gen->sync_large_objects); + ACQUIRE_SPIN_LOCK(&gen->sync); // already evacuated? if (bd->flags & BF_EVACUATED) { @@ -259,7 +259,7 @@ evacuate_large(StgPtr p) gct->failed_to_evac = rtsTrue; TICK_GC_FAILED_PROMOTION(); } - RELEASE_SPIN_LOCK(&gen->sync_large_objects); + RELEASE_SPIN_LOCK(&gen->sync); return; } @@ -297,16 +297,16 @@ evacuate_large(StgPtr p) // them straight on the scavenged_large_objects list. if (bd->flags & BF_PINNED) { ASSERT(get_itbl((StgClosure *)p)->type == ARR_WORDS); - if (new_gen != gen) { ACQUIRE_SPIN_LOCK(&new_gen->sync_large_objects); } + if (new_gen != gen) { ACQUIRE_SPIN_LOCK(&new_gen->sync); } dbl_link_onto(bd, &new_gen->scavenged_large_objects); new_gen->n_scavenged_large_blocks += bd->blocks; - if (new_gen != gen) { RELEASE_SPIN_LOCK(&new_gen->sync_large_objects); } + if (new_gen != gen) { RELEASE_SPIN_LOCK(&new_gen->sync); } } else { bd->link = ws->todo_large_objects; ws->todo_large_objects = bd; } - RELEASE_SPIN_LOCK(&gen->sync_large_objects); + RELEASE_SPIN_LOCK(&gen->sync); } /* ---------------------------------------------------------------------------- diff --git a/rts/sm/GC.c b/rts/sm/GC.c index c990835..221f24a 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -137,8 +137,8 @@ DECLARE_GCT static void mark_root (void *user, StgClosure **root); static void zero_static_object_list (StgClosure* first_static); static nat initialise_N (rtsBool force_major_gc); -static void init_collected_gen (nat g, nat threads); -static void init_uncollected_gen (nat g, nat threads); +static void prepare_collected_gen (generation *gen); +static void prepare_uncollected_gen (generation *gen); static void init_gc_thread (gc_thread *t); static void resize_generations (void); static void resize_nursery (void); @@ -148,6 +148,7 @@ static StgWord inc_running (void); static StgWord dec_running (void); static void wakeup_gc_threads (nat n_threads, nat me); static void shutdown_gc_threads (nat n_threads, nat me); +static void collect_gct_blocks (void); #if 0 && defined(DEBUG) static void gcCAFs (void); @@ -174,7 +175,7 @@ GarbageCollect (rtsBool force_major_gc, { bdescr *bd; generation *gen; - lnat live, allocated, max_copied, avg_copied, slop; + lnat live_blocks, live_words, allocated, max_copied, avg_copied; gc_thread *saved_gct; nat g, t, n; @@ -274,7 +275,7 @@ GarbageCollect (rtsBool force_major_gc, #endif // check sanity *before* GC - IF_DEBUG(sanity, checkSanity(rtsTrue)); + IF_DEBUG(sanity, checkSanity(rtsFalse /* before GC */, major_gc)); // Initialise all our gc_thread structures for (t = 0; t < n_gc_threads; t++) { @@ -283,12 +284,11 @@ GarbageCollect (rtsBool force_major_gc, // Initialise all the generations/steps that we're collecting. for (g = 0; g <= N; g++) { - init_collected_gen(g,n_gc_threads); + prepare_collected_gen(&generations[g]); } - // Initialise all the generations/steps that we're *not* collecting. for (g = N+1; g < RtsFlags.GcFlags.generations; g++) { - init_uncollected_gen(g,n_gc_threads); + prepare_uncollected_gen(&generations[g]); } /* Allocate a mark stack if we're doing a major collection. @@ -420,76 +420,6 @@ SET_GCT(gc_threads[0]); } } - // For each workspace, in each thread, move the copied blocks to the step - { - gc_thread *thr; - gen_workspace *ws; - bdescr *prev, *next; - - for (t = 0; t < n_gc_threads; t++) { - thr = gc_threads[t]; - - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - ws = &thr->gens[g]; - - // Push the final block - if (ws->todo_bd) { - push_scanned_block(ws->todo_bd, ws); - } - - ASSERT(gct->scan_bd == NULL); - ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks); - - prev = NULL; - for (bd = ws->scavd_list; bd != NULL; bd = bd->link) { - ws->gen->n_words += bd->free - bd->start; - prev = bd; - } - if (prev != NULL) { - prev->link = ws->gen->blocks; - ws->gen->blocks = ws->scavd_list; - } - ws->gen->n_blocks += ws->n_scavd_blocks; - } - } - - // Add all the partial blocks *after* we've added all the full - // blocks. This is so that we can grab the partial blocks back - // again and try to fill them up in the next GC. - for (t = 0; t < n_gc_threads; t++) { - thr = gc_threads[t]; - - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - ws = &thr->gens[g]; - - prev = NULL; - for (bd = ws->part_list; bd != NULL; bd = next) { - next = bd->link; - if (bd->free == bd->start) { - if (prev == NULL) { - ws->part_list = next; - } else { - prev->link = next; - } - freeGroup(bd); - ws->n_part_blocks--; - } else { - ws->gen->n_words += bd->free - bd->start; - prev = bd; - } - } - if (prev != NULL) { - prev->link = ws->gen->blocks; - ws->gen->blocks = ws->part_list; - } - ws->gen->n_blocks += ws->n_part_blocks; - - ASSERT(countBlocks(ws->gen->blocks) == ws->gen->n_blocks); - ASSERT(countOccupied(ws->gen->blocks) == ws->gen->n_words); - } - } - } - // Finally: compact or sweep the oldest generation. if (major_gc && oldest_gen->mark) { if (oldest_gen->compact) @@ -498,8 +428,6 @@ SET_GCT(gc_threads[0]); sweep(oldest_gen); } - /* run through all the generations/steps and tidy up - */ copied = 0; max_copied = 0; avg_copied = 0; @@ -525,6 +453,16 @@ SET_GCT(gc_threads[0]); } } + // Run through all the generations/steps and tidy up. + // We're going to: + // - count the amount of "live" data (live_words, live_blocks) + // - count the amount of "copied" data in this GC (copied) + // - free from-space + // - make to-space the new from-space (set BF_EVACUATED on all blocks) + // + live_words = 0; + live_blocks = 0; + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { if (g == N) { @@ -622,7 +560,6 @@ SET_GCT(gc_threads[0]); gen->large_objects = gen->scavenged_large_objects; gen->n_large_blocks = gen->n_scavenged_large_blocks; gen->n_new_large_words = 0; - ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks); } else // for generations > N { @@ -637,16 +574,31 @@ SET_GCT(gc_threads[0]); // add the new blocks we promoted during this GC gen->n_large_blocks += gen->n_scavenged_large_blocks; - ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks); + } + + ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks); + + gen->scavenged_large_objects = NULL; + gen->n_scavenged_large_blocks = 0; + + // Count "live" data + live_words += genLiveWords(gen); + live_blocks += genLiveBlocks(gen); + + // add in the partial blocks in the gen_workspaces, but ignore gen 0 + // if this is a local GC (we can't count another capability's part_list) + { + nat i; + for (i = 0; i < n_capabilities; i++) { + live_words += gcThreadLiveWords(i, gen->no); + live_blocks += gcThreadLiveBlocks(i, gen->no); + } } } // for all generations // update the max size of older generations after a major GC resize_generations(); - // Calculate the amount of live data for stats. - live = calcLiveWords(); - // Start a new pinned_object_block for (n = 0; n < n_capabilities; n++) { capabilities[n].pinned_object_block = NULL; @@ -698,11 +650,6 @@ SET_GCT(gc_threads[0]); } } - // send exceptions to any threads which were about to die - RELEASE_SM_LOCK; - resurrectThreads(resurrected_threads); - ACQUIRE_SM_LOCK; - // Update the stable pointer hash table. updateStablePtrTable(major_gc); @@ -717,6 +664,17 @@ SET_GCT(gc_threads[0]); scheduleFinalizers(cap, old_weak_ptr_list); ACQUIRE_SM_LOCK; + // check sanity after GC + // before resurrectThreads(), because that might overwrite some + // closures, which will cause problems with THREADED where we don't + // fill slop. + IF_DEBUG(sanity, checkSanity(rtsTrue /* after GC */, major_gc)); + + // send exceptions to any threads which were about to die + RELEASE_SM_LOCK; + resurrectThreads(resurrected_threads); + ACQUIRE_SM_LOCK; + if (major_gc) { nat need, got; need = BLOCKS_TO_MBLOCKS(n_alloc_blocks); @@ -730,10 +688,7 @@ SET_GCT(gc_threads[0]); } } - // check sanity after GC - IF_DEBUG(sanity, checkSanity(rtsTrue)); - - // extra GC trace info + // extra GC trace info IF_DEBUG(gc, statDescribeGens()); #ifdef DEBUG @@ -758,8 +713,8 @@ SET_GCT(gc_threads[0]); #endif // ok, GC over: tell the stats department what happened. - slop = calcLiveBlocks() * BLOCK_SIZE_W - live; - stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop); + stat_endGC(allocated, live_words, copied, N, max_copied, avg_copied, + live_blocks * BLOCK_SIZE_W - live_words /* slop */); // Guess which generation we'll collect *next* time initialise_N(force_major_gc); @@ -858,7 +813,21 @@ new_gc_thread (nat n, gc_thread *t) ASSERT(g == ws->gen->no); ws->my_gct = t; - ws->todo_bd = NULL; + // We want to call + // alloc_todo_block(ws,0); + // but can't, because it uses gct which isn't set up at this point. + // Hence, allocate a block for todo_bd manually: + { + bdescr *bd = allocBlock(); // no lock, locks aren't initialised yet + initBdescr(bd, ws->gen, ws->gen->to); + bd->flags = BF_EVACUATED; + bd->u.scan = bd->free = bd->start; + + ws->todo_bd = bd; + ws->todo_free = bd->free; + ws->todo_lim = bd->start + BLOCK_SIZE_W; + } + ws->todo_q = newWSDeque(128); ws->todo_overflow = NULL; ws->n_todo_overflow = 0; @@ -1012,6 +981,8 @@ loop: scavenge_loop(); #endif + collect_gct_blocks(); + // scavenge_loop() only exits when there's no work to do r = dec_running(); @@ -1197,16 +1168,16 @@ releaseGCThreads (Capability *cap USED_IF_THREADS) ------------------------------------------------------------------------- */ static void -init_collected_gen (nat g, nat n_threads) +prepare_collected_gen (generation *gen) { - nat t, i; + nat i, g, n; gen_workspace *ws; - generation *gen; - bdescr *bd; + bdescr *bd, *next; // Throw away the current mutable list. Invariant: the mutable // list always has at least one block; this means we can avoid a // check for NULL in recordMutable(). + g = gen->no; if (g != 0) { for (i = 0; i < n_capabilities; i++) { freeChain(capabilities[i].mut_lists[g]); @@ -1231,9 +1202,35 @@ init_collected_gen (nat g, nat n_threads) gen->live_estimate = 0; // initialise the large object queues. - gen->scavenged_large_objects = NULL; - gen->n_scavenged_large_blocks = 0; - + ASSERT(gen->scavenged_large_objects == NULL); + ASSERT(gen->n_scavenged_large_blocks == 0); + + // grab all the partial blocks stashed in the gc_thread workspaces and + // move them to the old_blocks list of this gen. + for (n = 0; n < n_capabilities; n++) { + ws = &gc_threads[n]->gens[gen->no]; + + for (bd = ws->part_list; bd != NULL; bd = next) { + next = bd->link; + bd->link = gen->old_blocks; + gen->old_blocks = bd; + gen->n_old_blocks += bd->blocks; + } + ws->part_list = NULL; + ws->n_part_blocks = 0; + + ASSERT(ws->scavd_list == NULL); + ASSERT(ws->n_scavd_blocks == 0); + + if (ws->todo_free != ws->todo_bd->start) { + ws->todo_bd->free = ws->todo_free; + ws->todo_bd->link = gen->old_blocks; + gen->old_blocks = ws->todo_bd; + gen->n_old_blocks += ws->todo_bd->blocks; + alloc_todo_block(ws,0); // always has one block. + } + } + // mark the small objects as from-space for (bd = gen->old_blocks; bd; bd = bd->link) { bd->flags &= ~BF_EVACUATED; @@ -1285,106 +1282,83 @@ init_collected_gen (nat g, nat n_threads) } } } - - // For each GC thread, for each step, allocate a "todo" block to - // store evacuated objects to be scavenged, and a block to store - // evacuated objects that do not need to be scavenged. - for (t = 0; t < n_threads; t++) { - ws = &gc_threads[t]->gens[g]; - - ws->todo_large_objects = NULL; - - ws->part_list = NULL; - ws->n_part_blocks = 0; - - // allocate the first to-space block; extra blocks will be - // chained on as necessary. - ws->todo_bd = NULL; - ASSERT(looksEmptyWSDeque(ws->todo_q)); - alloc_todo_block(ws,0); - - ws->todo_overflow = NULL; - ws->n_todo_overflow = 0; - - ws->scavd_list = NULL; - ws->n_scavd_blocks = 0; - } } /* ---------------------------------------------------------------------------- + Save the mutable lists in saved_mut_lists + ------------------------------------------------------------------------- */ + +static void +stash_mut_list (Capability *cap, nat gen_no) +{ + cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no]; + cap->mut_lists[gen_no] = allocBlock_sync(); +} + +/* ---------------------------------------------------------------------------- Initialise a generation that is *not* to be collected ------------------------------------------------------------------------- */ static void -init_uncollected_gen (nat g, nat threads) +prepare_uncollected_gen (generation *gen) { - nat t, n; - gen_workspace *ws; - generation *gen; - bdescr *bd; + nat i; + + + ASSERT(gen->no > 0); // save the current mutable lists for this generation, and // allocate a fresh block for each one. We'll traverse these // mutable lists as roots early on in the GC. - for (n = 0; n < n_capabilities; n++) { - capabilities[n].saved_mut_lists[g] = capabilities[n].mut_lists[g]; - capabilities[n].mut_lists[g] = allocBlock(); + for (i = 0; i < n_capabilities; i++) { + stash_mut_list(&capabilities[i], gen->no); } - gen = &generations[g]; + ASSERT(gen->scavenged_large_objects == NULL); + ASSERT(gen->n_scavenged_large_blocks == 0); +} - gen->scavenged_large_objects = NULL; - gen->n_scavenged_large_blocks = 0; +/* ----------------------------------------------------------------------------- + Collect the completed blocks from a GC thread and attach them to + the generation. + -------------------------------------------------------------------------- */ - for (t = 0; t < threads; t++) { - ws = &gc_threads[t]->gens[g]; - - ASSERT(looksEmptyWSDeque(ws->todo_q)); - ws->todo_large_objects = NULL; - - ws->part_list = NULL; - ws->n_part_blocks = 0; +static void +collect_gct_blocks (void) +{ + nat g; + gen_workspace *ws; + bdescr *bd, *prev; + + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + ws = &gct->gens[g]; - ws->scavd_list = NULL; - ws->n_scavd_blocks = 0; + // there may still be a block attached to ws->todo_bd; + // leave it there to use next time. + + if (ws->scavd_list != NULL) { + ACQUIRE_SPIN_LOCK(&ws->gen->sync); + + ASSERT(gct->scan_bd == NULL); + ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks); - // If the block at the head of the list in this generation - // is less than 3/4 full, then use it as a todo block. - if (gen->blocks && isPartiallyFull(gen->blocks)) - { - ws->todo_bd = gen->blocks; - ws->todo_free = ws->todo_bd->free; - ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W; - gen->blocks = gen->blocks->link; - gen->n_blocks -= 1; - gen->n_words -= ws->todo_bd->free - ws->todo_bd->start; - ws->todo_bd->link = NULL; - // we must scan from the current end point. - ws->todo_bd->u.scan = ws->todo_bd->free; - } - else - { - ws->todo_bd = NULL; - alloc_todo_block(ws,0); - } - } + prev = NULL; + for (bd = ws->scavd_list; bd != NULL; bd = bd->link) { + ws->gen->n_words += bd->free - bd->start; + prev = bd; + } + if (prev != NULL) { + prev->link = ws->gen->blocks; + ws->gen->blocks = ws->scavd_list; + } + ws->gen->n_blocks += ws->n_scavd_blocks; - // deal out any more partial blocks to the threads' part_lists - t = 0; - while (gen->blocks && isPartiallyFull(gen->blocks)) - { - bd = gen->blocks; - gen->blocks = bd->link; - ws = &gc_threads[t]->gens[g]; - bd->link = ws->part_list; - ws->part_list = bd; - ws->n_part_blocks += 1; - bd->u.scan = bd->free; - gen->n_blocks -= 1; - gen->n_words -= bd->free - bd->start; - t++; - if (t == n_gc_threads) t = 0; + ws->scavd_list = NULL; + ws->n_scavd_blocks = 0; + + RELEASE_SPIN_LOCK(&ws->gen->sync); + } } } diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c index 65a70fa..8ebb9a2 100644 --- a/rts/sm/Sanity.c +++ b/rts/sm/Sanity.c @@ -21,6 +21,7 @@ #include "RtsUtils.h" #include "sm/Storage.h" #include "sm/BlockAlloc.h" +#include "GCThread.h" #include "Sanity.h" #include "Schedule.h" #include "Apply.h" @@ -468,17 +469,10 @@ checkClosure( StgClosure* p ) all the objects in the remainder of the chain. -------------------------------------------------------------------------- */ -void -checkHeap(bdescr *bd) +void checkHeapChain (bdescr *bd) { StgPtr p; -#if defined(THREADED_RTS) - // heap sanity checking doesn't work with SMP, because we can't - // zero the slop (see Updates.h). - return; -#endif - for (; bd != NULL; bd = bd->link) { if(!(bd->flags & BF_SWEPT)) { p = bd->start; @@ -496,7 +490,7 @@ checkHeap(bdescr *bd) } } -void +void checkHeapChunk(StgPtr start, StgPtr end) { StgPtr p; @@ -587,6 +581,24 @@ checkGlobalTSOList (rtsBool checkTSOs) ASSERT(Bdescr((P_)tso)->gen_no == 0 || (tso->flags & TSO_MARKED)); tso->flags &= ~TSO_MARKED; } + + { + StgStack *stack; + StgUnderflowFrame *frame; + + stack = tso->stackobj; + while (1) { + if (stack->dirty & 1) { + ASSERT(Bdescr((P_)stack)->gen_no == 0 || (stack->dirty & TSO_MARKED)); + stack->dirty &= ~TSO_MARKED; + } + frame = (StgUnderflowFrame*) (stack->stack + stack->stack_size + - sizeofW(StgUnderflowFrame)); + if (frame->info != &stg_stack_underflow_frame_info + || frame->next_chunk == (StgStack*)END_TSO_QUEUE) break; + stack = frame->next_chunk; + } + } } } } @@ -595,7 +607,7 @@ checkGlobalTSOList (rtsBool checkTSOs) Check mutable list sanity. -------------------------------------------------------------------------- */ -void +static void checkMutableList( bdescr *mut_bd, nat gen ) { bdescr *bd; @@ -605,25 +617,37 @@ checkMutableList( bdescr *mut_bd, nat gen ) for (bd = mut_bd; bd != NULL; bd = bd->link) { for (q = bd->start; q < bd->free; q++) { p = (StgClosure *)*q; - ASSERT(!HEAP_ALLOCED(p) || Bdescr((P_)p)->gen_no == gen); - if (get_itbl(p)->type == TSO) { + ASSERT(!HEAP_ALLOCED(p) || Bdescr((P_)p)->gen_no == gen); + checkClosure(p); + + switch (get_itbl(p)->type) { + case TSO: ((StgTSO *)p)->flags |= TSO_MARKED; + break; + case STACK: + ((StgStack *)p)->dirty |= TSO_MARKED; + break; } - } + } } } -void -checkMutableLists (rtsBool checkTSOs) +static void +checkLocalMutableLists (nat cap_no) { - nat g, i; + nat g; + for (g = 1; g < RtsFlags.GcFlags.generations; g++) { + checkMutableList(capabilities[cap_no].mut_lists[g], g); + } +} - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - for (i = 0; i < n_capabilities; i++) { - checkMutableList(capabilities[i].mut_lists[g], g); - } +static void +checkMutableLists (void) +{ + nat i; + for (i = 0; i < n_capabilities; i++) { + checkLocalMutableLists(i); } - checkGlobalTSOList(checkTSOs); } /* @@ -677,7 +701,8 @@ checkNurserySanity (nursery *nursery) prev = NULL; for (bd = nursery->blocks; bd != NULL; bd = bd->link) { - ASSERT(bd->u.back == prev); + ASSERT(bd->gen == g0); + ASSERT(bd->u.back == prev); prev = bd; blocks += bd->blocks; } @@ -685,41 +710,59 @@ checkNurserySanity (nursery *nursery) ASSERT(blocks == nursery->n_blocks); } +static void checkGeneration (generation *gen, + rtsBool after_major_gc USED_IF_THREADS) +{ + nat n; + gen_workspace *ws; + + ASSERT(countBlocks(gen->blocks) == gen->n_blocks); + ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks); + +#if defined(THREADED_RTS) + // heap sanity checking doesn't work with SMP, because we can't + // zero the slop (see Updates.h). However, we can sanity-check + // the heap after a major gc, because there is no slop. + if (!after_major_gc) return; +#endif + + checkHeapChain(gen->blocks); + + for (n = 0; n < n_capabilities; n++) { + ws = &gc_threads[n]->gens[gen->no]; + checkHeapChain(ws->todo_bd); + checkHeapChain(ws->part_list); + checkHeapChain(ws->scavd_list); + } + + checkLargeObjects(gen->large_objects); +} /* Full heap sanity check. */ -void -checkSanity( rtsBool check_heap ) +static void checkFullHeap (rtsBool after_major_gc) { nat g, n; for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - ASSERT(countBlocks(generations[g].blocks) - == generations[g].n_blocks); - ASSERT(countBlocks(generations[g].large_objects) - == generations[g].n_large_blocks); - if (check_heap) { - checkHeap(generations[g].blocks); - } - checkLargeObjects(generations[g].large_objects); + checkGeneration(&generations[g], after_major_gc); } - for (n = 0; n < n_capabilities; n++) { checkNurserySanity(&nurseries[n]); } - +} + +void checkSanity (rtsBool after_gc, rtsBool major_gc) +{ + checkFullHeap(after_gc && major_gc); + checkFreeListSanity(); -#if defined(THREADED_RTS) // always check the stacks in threaded mode, because checkHeap() // does nothing in this case. - checkMutableLists(rtsTrue); -#else - if (check_heap) { - checkMutableLists(rtsFalse); - } else { - checkMutableLists(rtsTrue); + if (after_gc) { + checkMutableLists(); + checkGlobalTSOList(rtsTrue); } -#endif } // If memInventory() calculates that we have a memory leak, this @@ -732,18 +775,21 @@ checkSanity( rtsBool check_heap ) static void findMemoryLeak (void) { - nat g, i; - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - for (i = 0; i < n_capabilities; i++) { - markBlocks(capabilities[i].mut_lists[g]); - } - markBlocks(generations[g].blocks); - markBlocks(generations[g].large_objects); - } + nat g, i; + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + for (i = 0; i < n_capabilities; i++) { + markBlocks(capabilities[i].mut_lists[g]); + markBlocks(gc_threads[i]->gens[g].part_list); + markBlocks(gc_threads[i]->gens[g].scavd_list); + markBlocks(gc_threads[i]->gens[g].todo_bd); + } + markBlocks(generations[g].blocks); + markBlocks(generations[g].large_objects); + } - for (i = 0; i < n_capabilities; i++) { - markBlocks(nurseries[i].blocks); - } + for (i = 0; i < n_capabilities; i++) { + markBlocks(nurseries[i].blocks); + } #ifdef PROFILING // TODO: @@ -823,6 +869,9 @@ memInventory (rtsBool show) gen_blocks[g] = 0; for (i = 0; i < n_capabilities; i++) { gen_blocks[g] += countBlocks(capabilities[i].mut_lists[g]); + gen_blocks[g] += countBlocks(gc_threads[i]->gens[g].part_list); + gen_blocks[g] += countBlocks(gc_threads[i]->gens[g].scavd_list); + gen_blocks[g] += countBlocks(gc_threads[i]->gens[g].todo_bd); } gen_blocks[g] += genBlocks(&generations[g]); } diff --git a/rts/sm/Sanity.h b/rts/sm/Sanity.h index 602be54..f302bc2 100644 --- a/rts/sm/Sanity.h +++ b/rts/sm/Sanity.h @@ -21,9 +21,9 @@ # endif /* debugging routines */ -void checkSanity ( rtsBool check_heap ); +void checkSanity ( rtsBool after_gc, rtsBool major_gc ); void checkNurserySanity ( nursery *nursery ); -void checkHeap ( bdescr *bd ); +void checkHeapChain ( bdescr *bd ); void checkHeapChunk ( StgPtr start, StgPtr end ); void checkLargeObjects ( bdescr *bd ); void checkTSO ( StgTSO* tso ); @@ -33,9 +33,6 @@ void checkStackChunk ( StgPtr sp, StgPtr stack_end ); StgOffset checkStackFrame ( StgPtr sp ); StgOffset checkClosure ( StgClosure* p ); -void checkMutableList ( bdescr *bd, nat gen ); -void checkMutableLists ( rtsBool checkTSOs ); - void checkRunQueue (Capability *cap); void memInventory (rtsBool show); diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c index 9ac152a..d77734f 100644 --- a/rts/sm/Scav.c +++ b/rts/sm/Scav.c @@ -1755,10 +1755,10 @@ scavenge_large (gen_workspace *ws) // the front when evacuating. ws->todo_large_objects = bd->link; - ACQUIRE_SPIN_LOCK(&ws->gen->sync_large_objects); + ACQUIRE_SPIN_LOCK(&ws->gen->sync); dbl_link_onto(bd, &ws->gen->scavenged_large_objects); ws->gen->n_scavenged_large_blocks += bd->blocks; - RELEASE_SPIN_LOCK(&ws->gen->sync_large_objects); + RELEASE_SPIN_LOCK(&ws->gen->sync); p = bd->start; if (scavenge_one(p)) { diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 1b8a720..6c11065 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -15,6 +15,7 @@ #include "Rts.h" #include "Storage.h" +#include "GCThread.h" #include "RtsUtils.h" #include "Stats.h" #include "BlockAlloc.h" @@ -84,7 +85,7 @@ initGeneration (generation *gen, int g) gen->compact = 0; gen->bitmap = NULL; #ifdef THREADED_RTS - initSpinLock(&gen->sync_large_objects); + initSpinLock(&gen->sync); #endif gen->threads = END_TSO_QUEUE; gen->old_threads = END_TSO_QUEUE; @@ -766,7 +767,6 @@ lnat calcAllocated (rtsBool include_nurseries) { nat allocated = 0; - bdescr *bd; nat i; // When called from GC.c, we already have the allocation count for @@ -775,9 +775,7 @@ calcAllocated (rtsBool include_nurseries) if (include_nurseries) { for (i = 0; i < n_capabilities; i++) { - for (bd = nurseries[i].blocks; bd; bd = bd->link) { - allocated += (lnat)(bd->free - bd->start); - } + allocated += countOccupied(nurseries[i].blocks); } } @@ -787,25 +785,6 @@ calcAllocated (rtsBool include_nurseries) return allocated; } -/* Approximate the amount of live data in the heap. To be called just - * after garbage collection (see GarbageCollect()). - */ -lnat calcLiveBlocks (void) -{ - nat g; - lnat live = 0; - generation *gen; - - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - /* approximate amount of live data (doesn't take into account slop - * at end of each block). - */ - gen = &generations[g]; - live += gen->n_large_blocks + gen->n_blocks; - } - return live; -} - lnat countOccupied (bdescr *bd) { lnat words; @@ -818,6 +797,38 @@ lnat countOccupied (bdescr *bd) return words; } +lnat genLiveWords (generation *gen) +{ + return gen->n_words + countOccupied(gen->large_objects); +} + +lnat genLiveBlocks (generation *gen) +{ + return gen->n_blocks + gen->n_large_blocks; +} + +lnat gcThreadLiveWords (nat i, nat g) +{ + lnat words; + + words = countOccupied(gc_threads[i]->gens[g].todo_bd); + words += countOccupied(gc_threads[i]->gens[g].part_list); + words += countOccupied(gc_threads[i]->gens[g].scavd_list); + + return words; +} + +lnat gcThreadLiveBlocks (nat i, nat g) +{ + lnat blocks; + + blocks = countBlocks(gc_threads[i]->gens[g].todo_bd); + blocks += gc_threads[i]->gens[g].n_part_blocks; + blocks += gc_threads[i]->gens[g].n_scavd_blocks; + + return blocks; +} + // Return an accurate count of the live data in the heap, excluding // generation 0. lnat calcLiveWords (void) @@ -828,8 +839,19 @@ lnat calcLiveWords (void) live = 0; for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - gen = &generations[g]; - live += gen->n_words + countOccupied(gen->large_objects); + live += genLiveWords(&generations[g]); + } + return live; +} + +lnat calcLiveBlocks (void) +{ + nat g; + lnat live; + + live = 0; + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + live += genLiveBlocks(&generations[g]); } return live; } diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h index 8927ed6..d463d1a 100644 --- a/rts/sm/Storage.h +++ b/rts/sm/Storage.h @@ -87,11 +87,18 @@ lnat countNurseryBlocks ( void ); -------------------------------------------------------------------------- */ lnat calcAllocated (rtsBool count_nurseries); -lnat calcLiveBlocks (void); -lnat calcLiveWords (void); lnat countOccupied (bdescr *bd); lnat calcNeeded (void); +lnat gcThreadLiveWords (nat i, nat g); +lnat gcThreadLiveBlocks (nat i, nat g); + +lnat genLiveWords (generation *gen); +lnat genLiveBlocks (generation *gen); + +lnat calcLiveBlocks (void); +lnat calcLiveWords (void); + /* ---------------------------------------------------------------------------- Storage manager internal APIs and globals ------------------------------------------------------------------------- */