X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=rts%2Fsm%2FGC.c;h=7a6889cd72e4859d306dcb95fb9767a128915045;hb=1663532f26ae2e68f04d067b11bd177d307637b1;hp=934c91f2ebc9311573862e2bbaa7c94d260a307e;hpb=0b0842005c6c68f5f0d7ab428eb153e5a47334d1;p=ghc-hetmet.git diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 934c91f..7a6889c 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -11,7 +11,7 @@ * * ---------------------------------------------------------------------------*/ -#include "PosixSource.h" +// #include "PosixSource.h" #include "Rts.h" #include "RtsFlags.h" #include "RtsUtils.h" @@ -51,6 +51,7 @@ #include "Sparks.h" #include // for memset() +#include /* ----------------------------------------------------------------------------- Global variables @@ -133,7 +134,7 @@ SpinLock recordMutableGen_sync; static void mark_root (StgClosure **root); static void zero_static_object_list (StgClosure* first_static); -static void initialise_N (rtsBool force_major_gc); +static nat initialise_N (rtsBool force_major_gc); static void alloc_gc_threads (void); static void init_collected_gen (nat g, nat threads); static void init_uncollected_gen (nat g, nat threads); @@ -146,6 +147,7 @@ static void gc_thread_work (void); static nat inc_running (void); static nat dec_running (void); static void wakeup_gc_threads (nat n_threads); +static void shutdown_gc_threads (nat n_threads); #if 0 && defined(DEBUG) static void gcCAFs (void); @@ -179,10 +181,10 @@ GarbageCollect ( rtsBool force_major_gc ) { bdescr *bd; step *stp; - lnat live, allocated; + lnat live, allocated, max_copied, avg_copied, slop; lnat oldgen_saved_blocks = 0; gc_thread *saved_gct; - nat g, s, t; + nat g, s, t, n; // necessary if we stole a callee-saves register for gct: saved_gct = gct; @@ -193,8 +195,6 @@ GarbageCollect ( rtsBool force_major_gc ) ACQUIRE_SM_LOCK; - debugTrace(DEBUG_gc, "starting GC"); - #if defined(RTS_USER_SIGNALS) if (RtsFlags.MiscFlags.install_signal_handlers) { // block signals @@ -227,7 +227,7 @@ GarbageCollect ( rtsBool force_major_gc ) /* Figure out which generation to collect */ - initialise_N(force_major_gc); + n = initialise_N(force_major_gc); /* Allocate + initialise the gc_thread structures. */ @@ -241,7 +241,7 @@ GarbageCollect ( rtsBool force_major_gc ) * We don't try to parallelise minor GC. */ #if defined(THREADED_RTS) - if (N == 0) { + if (n < (4*1024*1024 / BLOCK_SIZE)) { n_gc_threads = 1; } else { n_gc_threads = RtsFlags.ParFlags.gcThreads; @@ -249,6 +249,8 @@ GarbageCollect ( rtsBool force_major_gc ) #else n_gc_threads = 1; #endif + trace(TRACE_gc|DEBUG_gc, "GC (gen %d): %dKB to collect, using %d thread(s)", + N, n * (BLOCK_SIZE / 1024), n_gc_threads); #ifdef RTS_GTK_FRONTPANEL if (RtsFlags.GcFlags.frontpanel) { @@ -264,6 +266,11 @@ GarbageCollect ( rtsBool force_major_gc ) // check stack sanity *before* GC (ToDo: check all threads) IF_DEBUG(sanity, checkFreeListSanity()); + // Initialise all our gc_thread structures + for (t = 0; t < n_gc_threads; t++) { + init_gc_thread(gc_threads[t]); + } + // Initialise all the generations/steps that we're collecting. for (g = 0; g <= N; g++) { init_collected_gen(g,n_gc_threads); @@ -285,16 +292,6 @@ GarbageCollect ( rtsBool force_major_gc ) mark_stack_bdescr = NULL; } - // Initialise all our gc_thread structures - for (t = 0; t < n_gc_threads; t++) { - init_gc_thread(gc_threads[t]); - } - - // the main thread is running: this prevents any other threads from - // exiting prematurely, so we can start them now. - inc_running(); - wakeup_gc_threads(n_gc_threads); - // this is the main thread gct = gc_threads[0]; @@ -306,15 +303,21 @@ GarbageCollect ( rtsBool force_major_gc ) * Also do them in reverse generation order, for the usual reason: * namely to reduce the likelihood of spurious old->new pointers. */ - { - for (g = RtsFlags.GcFlags.generations-1; g > N; g--) { + for (g = RtsFlags.GcFlags.generations-1; g > N; g--) { generations[g].saved_mut_list = generations[g].mut_list; generations[g].mut_list = allocBlock(); - // mut_list always has at least one block. - } - for (g = RtsFlags.GcFlags.generations-1; g > N; g--) { + // mut_list always has at least one block. + } + + // the main thread is running: this prevents any other threads from + // exiting prematurely, so we can start them now. + // NB. do this after the mutable lists have been saved above, otherwise + // the other GC threads will be writing into the old mutable lists. + inc_running(); + wakeup_gc_threads(n_gc_threads); + + for (g = RtsFlags.GcFlags.generations-1; g > N; g--) { scavenge_mutable_list(&generations[g]); - } } // follow roots from the CAF list (used by GHCi) @@ -365,6 +368,8 @@ GarbageCollect ( rtsBool force_major_gc ) break; } + shutdown_gc_threads(n_gc_threads); + // Update pointers from the Task list update_task_list(); @@ -405,7 +410,7 @@ GarbageCollect ( rtsBool force_major_gc ) { gc_thread *thr; step_workspace *ws; - bdescr *prev; + bdescr *prev, *next; for (t = 0; t < n_gc_threads; t++) { thr = gc_threads[t]; @@ -413,24 +418,52 @@ GarbageCollect ( rtsBool force_major_gc ) // not step 0 for (s = 1; s < total_steps; s++) { ws = &thr->steps[s]; - // Not true? - // ASSERT( ws->scan_bd == ws->todo_bd ); - ASSERT( ws->scan_bd ? ws->scan == ws->scan_bd->free : 1 ); // Push the final block - if (ws->scan_bd) { push_scan_block(ws->scan_bd, ws); } - + if (ws->todo_bd) { + push_scanned_block(ws->todo_bd, ws); + } + + ASSERT(gct->scan_bd == NULL); ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks); - prev = ws->scavd_list; + prev = NULL; for (bd = ws->scavd_list; bd != NULL; bd = bd->link) { bd->flags &= ~BF_EVACUATED; // now from-space + ws->step->n_words += bd->free - bd->start; prev = bd; } - prev->link = ws->stp->blocks; - ws->stp->blocks = ws->scavd_list; - ws->stp->n_blocks += ws->n_scavd_blocks; - ASSERT(countBlocks(ws->stp->blocks) == ws->stp->n_blocks); + if (prev != NULL) { + prev->link = ws->step->blocks; + ws->step->blocks = ws->scavd_list; + } + ws->step->n_blocks += ws->n_scavd_blocks; + + prev = NULL; + for (bd = ws->part_list; bd != NULL; bd = next) { + next = bd->link; + if (bd->free == bd->start) { + if (prev == NULL) { + ws->part_list = next; + } else { + prev->link = next; + } + freeGroup(bd); + ws->n_part_blocks--; + } else { + bd->flags &= ~BF_EVACUATED; // now from-space + ws->step->n_words += bd->free - bd->start; + prev = bd; + } + } + if (prev != NULL) { + prev->link = ws->step->blocks; + ws->step->blocks = ws->part_list; + } + ws->step->n_blocks += ws->n_part_blocks; + + ASSERT(countBlocks(ws->step->blocks) == ws->step->n_blocks); + ASSERT(countOccupied(ws->step->blocks) == ws->step->n_words); } } } @@ -451,25 +484,35 @@ GarbageCollect ( rtsBool force_major_gc ) /* run through all the generations/steps and tidy up */ copied = 0; + max_copied = 0; + avg_copied = 0; { nat i; for (i=0; i < n_gc_threads; i++) { - if (major_gc) { + if (n_gc_threads > 1) { trace(TRACE_gc,"thread %d:", i); trace(TRACE_gc," copied %ld", gc_threads[i]->copied * sizeof(W_)); + trace(TRACE_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_)); trace(TRACE_gc," any_work %ld", gc_threads[i]->any_work); trace(TRACE_gc," no_work %ld", gc_threads[i]->no_work); - trace(TRACE_gc," scav_global_work %ld", gc_threads[i]->scav_global_work); - trace(TRACE_gc," scav_local_work %ld", gc_threads[i]->scav_local_work); + trace(TRACE_gc," scav_find_work %ld", gc_threads[i]->scav_find_work); } copied += gc_threads[i]->copied; + max_copied = stg_max(gc_threads[i]->copied, max_copied); + } + if (n_gc_threads == 1) { + max_copied = 0; + avg_copied = 0; + } else { + avg_copied = copied; } } for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - if (g <= N) { + if (g == N) { generations[g].collections++; // for stats + if (n_gc_threads > 1) generations[g].par_collections++; } // Count the mutable list as bytes "copied" for the purposes of @@ -505,6 +548,7 @@ GarbageCollect ( rtsBool force_major_gc ) // onto the front of the now-compacted existing blocks. for (bd = stp->blocks; bd != NULL; bd = bd->link) { bd->flags &= ~BF_EVACUATED; // now from-space + stp->n_words += bd->free - bd->start; } // tack the new blocks on the end of the existing blocks if (stp->old_blocks != NULL) { @@ -524,6 +568,7 @@ GarbageCollect ( rtsBool force_major_gc ) // add the new blocks to the block tally stp->n_blocks += stp->n_old_blocks; ASSERT(countBlocks(stp->blocks) == stp->n_blocks); + ASSERT(countOccupied(stp->blocks) == stp->n_words); } else // not copacted { @@ -573,10 +618,8 @@ GarbageCollect ( rtsBool force_major_gc ) // update the max size of older generations after a major GC resize_generations(); - // Guess the amount of live data for stats. - live = calcLiveBlocks() * BLOCK_SIZE_W; - debugTrace(DEBUG_gc, "Slop: %ldKB", - (live - calcLiveWords()) / (1024/sizeof(W_))); + // Calculate the amount of live data for stats. + live = calcLiveWords(); // Free the small objects allocated via allocate(), since this will // all have been copied into G0S1 now. @@ -586,6 +629,7 @@ GarbageCollect ( rtsBool force_major_gc ) g0s0->blocks = NULL; } g0s0->n_blocks = 0; + g0s0->n_words = 0; } alloc_blocks = 0; alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize; @@ -654,7 +698,7 @@ GarbageCollect ( rtsBool force_major_gc ) IF_DEBUG(sanity, checkSanity()); // extra GC trace info - if (traceClass(TRACE_gc)) statDescribeGens(); + if (traceClass(TRACE_gc|DEBUG_gc)) statDescribeGens(); #ifdef DEBUG // symbol-table based profiling @@ -678,7 +722,8 @@ GarbageCollect ( rtsBool force_major_gc ) #endif // ok, GC over: tell the stats department what happened. - stat_endGC(allocated, live, copied, N); + slop = calcLiveBlocks() * BLOCK_SIZE_W - live; + stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop); #if defined(RTS_USER_SIGNALS) if (RtsFlags.MiscFlags.install_signal_handlers) { @@ -900,27 +945,44 @@ isAlive(StgClosure *p) /* ----------------------------------------------------------------------------- Figure out which generation to collect, initialise N and major_gc. + + Also returns the total number of blocks in generations that will be + collected. -------------------------------------------------------------------------- */ -static void +static nat initialise_N (rtsBool force_major_gc) { - nat g; + int g; + nat s, blocks, blocks_total; + + blocks = 0; + blocks_total = 0; if (force_major_gc) { - N = RtsFlags.GcFlags.generations - 1; - major_gc = rtsTrue; + N = RtsFlags.GcFlags.generations - 1; } else { - N = 0; - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - if (generations[g].steps[0].n_blocks + - generations[g].steps[0].n_large_blocks - >= generations[g].max_blocks) { - N = g; - } - } - major_gc = (N == RtsFlags.GcFlags.generations-1); + N = 0; } + + for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) { + blocks = 0; + for (s = 0; s < generations[g].n_steps; s++) { + blocks += generations[g].steps[s].n_words / BLOCK_SIZE_W; + blocks += generations[g].steps[s].n_large_blocks; + } + if (blocks >= generations[g].max_blocks) { + N = stg_max(N,g); + } + if ((nat)g <= N) { + blocks_total += blocks; + } + } + + blocks_total += countNurseryBlocks(); + + major_gc = (N == RtsFlags.GcFlags.generations-1); + return blocks_total; } /* ----------------------------------------------------------------------------- @@ -941,7 +1003,8 @@ alloc_gc_thread (int n) t->id = 0; initCondition(&t->wake_cond); initMutex(&t->wake_mutex); - t->wakeup = rtsFalse; + t->wakeup = rtsTrue; // starts true, so we can wait for the + // thread to start up, see wakeup_gc_threads t->exit = rtsFalse; #endif @@ -958,16 +1021,16 @@ alloc_gc_thread (int n) for (s = 0; s < total_steps; s++) { ws = &t->steps[s]; - ws->stp = &all_steps[s]; - ASSERT(s == ws->stp->abs_no); + ws->step = &all_steps[s]; + ASSERT(s == ws->step->abs_no); ws->gct = t; - ws->scan_bd = NULL; - ws->scan = NULL; - ws->todo_bd = NULL; ws->buffer_todo_bd = NULL; + ws->part_list = NULL; + ws->n_part_blocks = 0; + ws->scavd_list = NULL; ws->n_scavd_blocks = 0; } @@ -1015,6 +1078,7 @@ inc_running (void) ACQUIRE_LOCK(&gc_running_mutex); n_running = ++gc_running_threads; RELEASE_LOCK(&gc_running_mutex); + ASSERT(n_running <= n_gc_threads); return n_running; } @@ -1023,6 +1087,7 @@ dec_running (void) { nat n_running; ACQUIRE_LOCK(&gc_running_mutex); + ASSERT(n_gc_threads != 0); n_running = --gc_running_threads; RELEASE_LOCK(&gc_running_mutex); return n_running; @@ -1057,6 +1122,7 @@ loop: gct->thread_index, r); while (gc_running_threads != 0) { + usleep(1); if (any_work()) { inc_running(); goto loop; @@ -1080,13 +1146,13 @@ gc_thread_mainloop (void) // Wait until we're told to wake up ACQUIRE_LOCK(&gct->wake_mutex); + gct->wakeup = rtsFalse; while (!gct->wakeup) { debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index); waitCondition(&gct->wake_cond, &gct->wake_mutex); } RELEASE_LOCK(&gct->wake_mutex); - gct->wakeup = rtsFalse; if (gct->exit) break; #ifdef USE_PAPI @@ -1147,7 +1213,16 @@ wakeup_gc_threads (nat n_threads USED_IF_THREADS) nat i; for (i=1; i < n_threads; i++) { inc_running(); - ACQUIRE_LOCK(&gc_threads[i]->wake_mutex); + debugTrace(DEBUG_gc, "waking up gc thread %d", i); + do { + ACQUIRE_LOCK(&gc_threads[i]->wake_mutex); + if (gc_threads[i]->wakeup) { + RELEASE_LOCK(&gc_threads[i]->wake_mutex); + continue; + } else { + break; + } + } while (1); gc_threads[i]->wakeup = rtsTrue; signalCondition(&gc_threads[i]->wake_cond); RELEASE_LOCK(&gc_threads[i]->wake_mutex); @@ -1155,6 +1230,26 @@ wakeup_gc_threads (nat n_threads USED_IF_THREADS) #endif } +// After GC is complete, we must wait for all GC threads to enter the +// standby state, otherwise they may still be executing inside +// any_work(), and may even remain awake until the next GC starts. +static void +shutdown_gc_threads (nat n_threads USED_IF_THREADS) +{ +#if defined(THREADED_RTS) + nat i; + rtsBool wakeup; + for (i=1; i < n_threads; i++) { + do { + ACQUIRE_LOCK(&gc_threads[i]->wake_mutex); + wakeup = gc_threads[i]->wakeup; + // wakeup is false while the thread is waiting + RELEASE_LOCK(&gc_threads[i]->wake_mutex); + } while (wakeup); + } +#endif +} + /* ---------------------------------------------------------------------------- Initialise a generation that is to be collected ------------------------------------------------------------------------- */ @@ -1194,6 +1289,7 @@ init_collected_gen (nat g, nat n_threads) stp->n_old_blocks = stp->n_blocks; stp->blocks = NULL; stp->n_blocks = 0; + stp->n_words = 0; // we don't have any to-be-scavenged blocks yet stp->todos = NULL; @@ -1257,16 +1353,16 @@ init_collected_gen (nat g, nat n_threads) ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s]; - ws->scan_bd = NULL; - ws->scan = NULL; - ws->todo_large_objects = NULL; + ws->part_list = NULL; + ws->n_part_blocks = 0; + // allocate the first to-space block; extra blocks will be // chained on as necessary. ws->todo_bd = NULL; ws->buffer_todo_bd = NULL; - gc_alloc_todo_block(ws); + alloc_todo_block(ws,0); ws->scavd_list = NULL; ws->n_scavd_blocks = 0; @@ -1297,11 +1393,14 @@ init_uncollected_gen (nat g, nat threads) for (s = 0; s < generations[g].n_steps; s++) { ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s]; - stp = ws->stp; + stp = ws->step; ws->buffer_todo_bd = NULL; ws->todo_large_objects = NULL; + ws->part_list = NULL; + ws->n_part_blocks = 0; + ws->scavd_list = NULL; ws->n_scavd_blocks = 0; @@ -1314,23 +1413,15 @@ init_uncollected_gen (nat g, nat threads) ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W; stp->blocks = stp->blocks->link; stp->n_blocks -= 1; + stp->n_words -= ws->todo_bd->free - ws->todo_bd->start; ws->todo_bd->link = NULL; - - // this block is also the scan block; we must scan - // from the current end point. - ws->scan_bd = ws->todo_bd; - ws->scan = ws->scan_bd->free; - - // subtract the contents of this block from the stats, - // because we'll count the whole block later. - copied -= ws->scan_bd->free - ws->scan_bd->start; + // we must scan from the current end point. + ws->todo_bd->u.scan = ws->todo_bd->free; } else { - ws->scan_bd = NULL; - ws->scan = NULL; ws->todo_bd = NULL; - gc_alloc_todo_block(ws); + alloc_todo_block(ws,0); } } } @@ -1357,16 +1448,16 @@ init_gc_thread (gc_thread *t) { t->static_objects = END_OF_STATIC_LIST; t->scavenged_static_objects = END_OF_STATIC_LIST; + t->scan_bd = NULL; t->evac_step = 0; t->failed_to_evac = rtsFalse; t->eager_promotion = rtsTrue; t->thunk_selector_depth = 0; t->copied = 0; + t->scanned = 0; t->any_work = 0; t->no_work = 0; - t->scav_global_work = 0; - t->scav_local_work = 0; - + t->scav_find_work = 0; } /* ----------------------------------------------------------------------------- @@ -1486,7 +1577,7 @@ resize_generations (void) nat gens = RtsFlags.GcFlags.generations; // live in the oldest generations - live = oldest_gen->steps[0].n_blocks + + live = (oldest_gen->steps[0].n_words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W+ oldest_gen->steps[0].n_large_blocks; // default max size for all generations except zero