X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=rts%2Fsm%2FGC.c;h=b381e56cce20b683774677e69df3ac7baa22f917;hb=4ba3cb054c3f069520f8db26591eae27560638c9;hp=4d0c4ef61334f2aa579be0e9e05c13c0693d1bd4;hpb=902be381a29af801582c93366528f7901a5da544;p=ghc-hetmet.git diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 4d0c4ef..b381e56 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------------- * - * (c) The GHC Team 1998-2006 + * (c) The GHC Team 1998-2008 * * Generational garbage collector * @@ -43,6 +43,7 @@ #include "Papi.h" #include "GC.h" +#include "GCThread.h" #include "Compact.h" #include "Evac.h" #include "Scav.h" @@ -132,7 +133,7 @@ SpinLock recordMutableGen_sync; Static function declarations -------------------------------------------------------------------------- */ -static void mark_root (StgClosure **root); +static void mark_root (void *user, StgClosure **root); static void zero_static_object_list (StgClosure* first_static); static nat initialise_N (rtsBool force_major_gc); static void alloc_gc_threads (void); @@ -181,7 +182,7 @@ GarbageCollect ( rtsBool force_major_gc ) { bdescr *bd; step *stp; - lnat live, allocated; + lnat live, allocated, max_copied, avg_copied, slop; lnat oldgen_saved_blocks = 0; gc_thread *saved_gct; nat g, s, t, n; @@ -195,8 +196,6 @@ GarbageCollect ( rtsBool force_major_gc ) ACQUIRE_SM_LOCK; - debugTrace(DEBUG_gc, "starting GC"); - #if defined(RTS_USER_SIGNALS) if (RtsFlags.MiscFlags.install_signal_handlers) { // block signals @@ -204,6 +203,9 @@ GarbageCollect ( rtsBool force_major_gc ) } #endif + ASSERT(sizeof(step_workspace) == 16 * sizeof(StgWord)); + // otherwise adjust the padding in step_workspace. + // tell the stats department that we've started a GC stat_startGC(); @@ -248,11 +250,11 @@ GarbageCollect ( rtsBool force_major_gc ) } else { n_gc_threads = RtsFlags.ParFlags.gcThreads; } - trace(TRACE_gc|DEBUG_gc, "GC (gen %d): %dKB to collect, using %d thread(s)", - N, n * (BLOCK_SIZE / 1024), n_gc_threads); #else n_gc_threads = 1; #endif + trace(TRACE_gc|DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)", + N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads); #ifdef RTS_GTK_FRONTPANEL if (RtsFlags.GcFlags.frontpanel) { @@ -324,15 +326,15 @@ GarbageCollect ( rtsBool force_major_gc ) // follow roots from the CAF list (used by GHCi) gct->evac_step = 0; - markCAFs(mark_root); + markCAFs(mark_root, gct); // follow all the roots that the application knows about. gct->evac_step = 0; - GetRoots(mark_root); + markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads); #if defined(RTS_USER_SIGNALS) // mark the signal handlers (signals should be already blocked) - markSignalHandlers(mark_root); + markSignalHandlers(mark_root, gct); #endif // Mark the weak pointer list, and prepare to detect dead weak pointers. @@ -340,7 +342,7 @@ GarbageCollect ( rtsBool force_major_gc ) initWeakForGC(); // Mark the stable pointer table. - markStablePtrTable(mark_root); + markStablePtrTable(mark_root, gct); /* ------------------------------------------------------------------------- * Repeatedly scavenge all the areas we know about until there's no @@ -391,7 +393,7 @@ GarbageCollect ( rtsBool force_major_gc ) if (major_gc && oldest_gen->steps[0].is_compacted) { // save number of blocks for stats oldgen_saved_blocks = oldest_gen->steps[0].n_old_blocks; - compact(); + compact(gct->scavenged_static_objects); } IF_DEBUG(sanity, checkGlobalTSOList(rtsFalse)); @@ -412,70 +414,101 @@ GarbageCollect ( rtsBool force_major_gc ) { gc_thread *thr; step_workspace *ws; - bdescr *prev; + bdescr *prev, *next; for (t = 0; t < n_gc_threads; t++) { thr = gc_threads[t]; // not step 0 - for (s = 1; s < total_steps; s++) { + if (RtsFlags.GcFlags.generations == 1) { + s = 0; + } else { + s = 1; + } + for (; s < total_steps; s++) { ws = &thr->steps[s]; - // Not true? - // ASSERT( ws->scan_bd == ws->todo_bd ); - ASSERT( ws->scan_bd ? ws->scan == ws->scan_bd->free : 1 ); // Push the final block - if (ws->scan_bd) { push_scan_block(ws->scan_bd, ws); } - + if (ws->todo_bd) { + push_scanned_block(ws->todo_bd, ws); + } + + ASSERT(gct->scan_bd == NULL); ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks); - prev = ws->scavd_list; + prev = NULL; for (bd = ws->scavd_list; bd != NULL; bd = bd->link) { bd->flags &= ~BF_EVACUATED; // now from-space + ws->step->n_words += bd->free - bd->start; prev = bd; } - prev->link = ws->stp->blocks; - ws->stp->blocks = ws->scavd_list; - ws->stp->n_blocks += ws->n_scavd_blocks; - ASSERT(countBlocks(ws->stp->blocks) == ws->stp->n_blocks); + if (prev != NULL) { + prev->link = ws->step->blocks; + ws->step->blocks = ws->scavd_list; + } + ws->step->n_blocks += ws->n_scavd_blocks; + + prev = NULL; + for (bd = ws->part_list; bd != NULL; bd = next) { + next = bd->link; + if (bd->free == bd->start) { + if (prev == NULL) { + ws->part_list = next; + } else { + prev->link = next; + } + freeGroup(bd); + ws->n_part_blocks--; + } else { + bd->flags &= ~BF_EVACUATED; // now from-space + ws->step->n_words += bd->free - bd->start; + prev = bd; + } + } + if (prev != NULL) { + prev->link = ws->step->blocks; + ws->step->blocks = ws->part_list; + } + ws->step->n_blocks += ws->n_part_blocks; + + ASSERT(countBlocks(ws->step->blocks) == ws->step->n_blocks); + ASSERT(countOccupied(ws->step->blocks) == ws->step->n_words); } } } - // Two-space collector: swap the semi-spaces around. - // Currently: g0s0->old_blocks is the old nursery - // g0s0->blocks is to-space from this GC - // We want these the other way around. - if (RtsFlags.GcFlags.generations == 1) { - bdescr *nursery_blocks = g0s0->old_blocks; - nat n_nursery_blocks = g0s0->n_old_blocks; - g0s0->old_blocks = g0s0->blocks; - g0s0->n_old_blocks = g0s0->n_blocks; - g0s0->blocks = nursery_blocks; - g0s0->n_blocks = n_nursery_blocks; - } - /* run through all the generations/steps and tidy up */ copied = 0; + max_copied = 0; + avg_copied = 0; { nat i; for (i=0; i < n_gc_threads; i++) { if (n_gc_threads > 1) { trace(TRACE_gc,"thread %d:", i); trace(TRACE_gc," copied %ld", gc_threads[i]->copied * sizeof(W_)); + trace(TRACE_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_)); trace(TRACE_gc," any_work %ld", gc_threads[i]->any_work); trace(TRACE_gc," no_work %ld", gc_threads[i]->no_work); trace(TRACE_gc," scav_find_work %ld", gc_threads[i]->scav_find_work); } copied += gc_threads[i]->copied; + max_copied = stg_max(gc_threads[i]->copied, max_copied); + } + if (n_gc_threads == 1) { + max_copied = 0; + avg_copied = 0; + } else { + avg_copied = copied; } } for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - if (g <= N) { + if (g == N) { generations[g].collections++; // for stats + if (n_gc_threads > 1) generations[g].par_collections++; } // Count the mutable list as bytes "copied" for the purposes of @@ -511,6 +544,7 @@ GarbageCollect ( rtsBool force_major_gc ) // onto the front of the now-compacted existing blocks. for (bd = stp->blocks; bd != NULL; bd = bd->link) { bd->flags &= ~BF_EVACUATED; // now from-space + stp->n_words += bd->free - bd->start; } // tack the new blocks on the end of the existing blocks if (stp->old_blocks != NULL) { @@ -530,6 +564,7 @@ GarbageCollect ( rtsBool force_major_gc ) // add the new blocks to the block tally stp->n_blocks += stp->n_old_blocks; ASSERT(countBlocks(stp->blocks) == stp->n_blocks); + ASSERT(countOccupied(stp->blocks) == stp->n_words); } else // not copacted { @@ -579,10 +614,8 @@ GarbageCollect ( rtsBool force_major_gc ) // update the max size of older generations after a major GC resize_generations(); - // Guess the amount of live data for stats. - live = calcLiveBlocks() * BLOCK_SIZE_W; - debugTrace(DEBUG_gc, "Slop: %ldKB", - (live - calcLiveWords()) / (1024/sizeof(W_))); + // Calculate the amount of live data for stats. + live = calcLiveWords(); // Free the small objects allocated via allocate(), since this will // all have been copied into G0S1 now. @@ -592,6 +625,7 @@ GarbageCollect ( rtsBool force_major_gc ) g0s0->blocks = NULL; } g0s0->n_blocks = 0; + g0s0->n_words = 0; } alloc_blocks = 0; alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize; @@ -684,7 +718,8 @@ GarbageCollect ( rtsBool force_major_gc ) #endif // ok, GC over: tell the stats department what happened. - stat_endGC(allocated, live, copied, N); + slop = calcLiveBlocks() * BLOCK_SIZE_W - live; + stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop); #if defined(RTS_USER_SIGNALS) if (RtsFlags.MiscFlags.install_signal_handlers) { @@ -699,212 +734,6 @@ GarbageCollect ( rtsBool force_major_gc ) } /* ----------------------------------------------------------------------------- - * Mark all nodes pointed to by sparks in the spark queues (for GC) Does an - * implicit slide i.e. after marking all sparks are at the beginning of the - * spark pool and the spark pool only contains sparkable closures - * -------------------------------------------------------------------------- */ - -#ifdef THREADED_RTS -static void -markSparkQueue (evac_fn evac, Capability *cap) -{ - StgClosure **sparkp, **to_sparkp; - nat n, pruned_sparks; // stats only - StgSparkPool *pool; - - PAR_TICKY_MARK_SPARK_QUEUE_START(); - - n = 0; - pruned_sparks = 0; - - pool = &(cap->r.rSparks); - - ASSERT_SPARK_POOL_INVARIANTS(pool); - -#if defined(PARALLEL_HASKELL) - // stats only - n = 0; - pruned_sparks = 0; -#endif - - sparkp = pool->hd; - to_sparkp = pool->hd; - while (sparkp != pool->tl) { - ASSERT(*sparkp!=NULL); - ASSERT(LOOKS_LIKE_CLOSURE_PTR(((StgClosure *)*sparkp))); - // ToDo?: statistics gathering here (also for GUM!) - if (closure_SHOULD_SPARK(*sparkp)) { - evac(sparkp); - *to_sparkp++ = *sparkp; - if (to_sparkp == pool->lim) { - to_sparkp = pool->base; - } - n++; - } else { - pruned_sparks++; - } - sparkp++; - if (sparkp == pool->lim) { - sparkp = pool->base; - } - } - pool->tl = to_sparkp; - - PAR_TICKY_MARK_SPARK_QUEUE_END(n); - -#if defined(PARALLEL_HASKELL) - debugTrace(DEBUG_sched, - "marked %d sparks and pruned %d sparks on [%x]", - n, pruned_sparks, mytid); -#else - debugTrace(DEBUG_sched, - "marked %d sparks and pruned %d sparks", - n, pruned_sparks); -#endif - - debugTrace(DEBUG_sched, - "new spark queue len=%d; (hd=%p; tl=%p)\n", - sparkPoolSize(pool), pool->hd, pool->tl); -} -#endif - -/* --------------------------------------------------------------------------- - Where are the roots that we know about? - - - all the threads on the runnable queue - - all the threads on the blocked queue - - all the threads on the sleeping queue - - all the thread currently executing a _ccall_GC - - all the "main threads" - - ------------------------------------------------------------------------ */ - -void -GetRoots( evac_fn evac ) -{ - nat i; - Capability *cap; - Task *task; - - // Each GC thread is responsible for following roots from the - // Capability of the same number. There will usually be the same - // or fewer Capabilities as GC threads, but just in case there - // are more, we mark every Capability whose number is the GC - // thread's index plus a multiple of the number of GC threads. - for (i = gct->thread_index; i < n_capabilities; i += n_gc_threads) { - cap = &capabilities[i]; - evac((StgClosure **)(void *)&cap->run_queue_hd); - evac((StgClosure **)(void *)&cap->run_queue_tl); -#if defined(THREADED_RTS) - evac((StgClosure **)(void *)&cap->wakeup_queue_hd); - evac((StgClosure **)(void *)&cap->wakeup_queue_tl); -#endif - for (task = cap->suspended_ccalling_tasks; task != NULL; - task=task->next) { - debugTrace(DEBUG_sched, - "evac'ing suspended TSO %lu", (unsigned long)task->suspended_tso->id); - evac((StgClosure **)(void *)&task->suspended_tso); - } - -#if defined(THREADED_RTS) - markSparkQueue(evac,cap); -#endif - } - -#if !defined(THREADED_RTS) - evac((StgClosure **)(void *)&blocked_queue_hd); - evac((StgClosure **)(void *)&blocked_queue_tl); - evac((StgClosure **)(void *)&sleeping_queue); -#endif -} - -/* ----------------------------------------------------------------------------- - isAlive determines whether the given closure is still alive (after - a garbage collection) or not. It returns the new address of the - closure if it is alive, or NULL otherwise. - - NOTE: Use it before compaction only! - It untags and (if needed) retags pointers to closures. - -------------------------------------------------------------------------- */ - - -StgClosure * -isAlive(StgClosure *p) -{ - const StgInfoTable *info; - bdescr *bd; - StgWord tag; - StgClosure *q; - - while (1) { - /* The tag and the pointer are split, to be merged later when needed. */ - tag = GET_CLOSURE_TAG(p); - q = UNTAG_CLOSURE(p); - - ASSERT(LOOKS_LIKE_CLOSURE_PTR(q)); - info = get_itbl(q); - - // ignore static closures - // - // ToDo: for static closures, check the static link field. - // Problem here is that we sometimes don't set the link field, eg. - // for static closures with an empty SRT or CONSTR_STATIC_NOCAFs. - // - if (!HEAP_ALLOCED(q)) { - return p; - } - - // ignore closures in generations that we're not collecting. - bd = Bdescr((P_)q); - if (bd->gen_no > N) { - return p; - } - - // if it's a pointer into to-space, then we're done - if (bd->flags & BF_EVACUATED) { - return p; - } - - // large objects use the evacuated flag - if (bd->flags & BF_LARGE) { - return NULL; - } - - // check the mark bit for compacted steps - if ((bd->flags & BF_COMPACTED) && is_marked((P_)q,bd)) { - return p; - } - - switch (info->type) { - - case IND: - case IND_STATIC: - case IND_PERM: - case IND_OLDGEN: // rely on compatible layout with StgInd - case IND_OLDGEN_PERM: - // follow indirections - p = ((StgInd *)q)->indirectee; - continue; - - case EVACUATED: - // alive! - return ((StgEvacuated *)q)->evacuee; - - case TSO: - if (((StgTSO *)q)->what_next == ThreadRelocated) { - p = (StgClosure *)((StgTSO *)q)->link; - continue; - } - return NULL; - - default: - // dead. - return NULL; - } - } -} - -/* ----------------------------------------------------------------------------- Figure out which generation to collect, initialise N and major_gc. Also returns the total number of blocks in generations that will be @@ -929,7 +758,7 @@ initialise_N (rtsBool force_major_gc) for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) { blocks = 0; for (s = 0; s < generations[g].n_steps; s++) { - blocks += generations[g].steps[s].n_blocks; + blocks += generations[g].steps[s].n_words / BLOCK_SIZE_W; blocks += generations[g].steps[s].n_large_blocks; } if (blocks >= generations[g].max_blocks) { @@ -964,7 +793,8 @@ alloc_gc_thread (int n) t->id = 0; initCondition(&t->wake_cond); initMutex(&t->wake_mutex); - t->wakeup = rtsFalse; + t->wakeup = rtsTrue; // starts true, so we can wait for the + // thread to start up, see wakeup_gc_threads t->exit = rtsFalse; #endif @@ -981,16 +811,16 @@ alloc_gc_thread (int n) for (s = 0; s < total_steps; s++) { ws = &t->steps[s]; - ws->stp = &all_steps[s]; - ASSERT(s == ws->stp->abs_no); + ws->step = &all_steps[s]; + ASSERT(s == ws->step->abs_no); ws->gct = t; - ws->scan_bd = NULL; - ws->scan = NULL; - ws->todo_bd = NULL; ws->buffer_todo_bd = NULL; + ws->part_list = NULL; + ws->n_part_blocks = 0; + ws->scavd_list = NULL; ws->n_scavd_blocks = 0; } @@ -1071,7 +901,7 @@ gc_thread_work (void) // Every thread evacuates some roots. gct->evac_step = 0; - GetRoots(mark_root); + markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads); loop: scavenge_loop(); @@ -1173,7 +1003,16 @@ wakeup_gc_threads (nat n_threads USED_IF_THREADS) nat i; for (i=1; i < n_threads; i++) { inc_running(); - ACQUIRE_LOCK(&gc_threads[i]->wake_mutex); + debugTrace(DEBUG_gc, "waking up gc thread %d", i); + do { + ACQUIRE_LOCK(&gc_threads[i]->wake_mutex); + if (gc_threads[i]->wakeup) { + RELEASE_LOCK(&gc_threads[i]->wake_mutex); + continue; + } else { + break; + } + } while (1); gc_threads[i]->wakeup = rtsTrue; signalCondition(&gc_threads[i]->wake_cond); RELEASE_LOCK(&gc_threads[i]->wake_mutex); @@ -1240,6 +1079,7 @@ init_collected_gen (nat g, nat n_threads) stp->n_old_blocks = stp->n_blocks; stp->blocks = NULL; stp->n_blocks = 0; + stp->n_words = 0; // we don't have any to-be-scavenged blocks yet stp->todos = NULL; @@ -1303,16 +1143,16 @@ init_collected_gen (nat g, nat n_threads) ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s]; - ws->scan_bd = NULL; - ws->scan = NULL; - ws->todo_large_objects = NULL; + ws->part_list = NULL; + ws->n_part_blocks = 0; + // allocate the first to-space block; extra blocks will be // chained on as necessary. ws->todo_bd = NULL; ws->buffer_todo_bd = NULL; - gc_alloc_todo_block(ws); + alloc_todo_block(ws,0); ws->scavd_list = NULL; ws->n_scavd_blocks = 0; @@ -1343,11 +1183,14 @@ init_uncollected_gen (nat g, nat threads) for (s = 0; s < generations[g].n_steps; s++) { ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s]; - stp = ws->stp; + stp = ws->step; ws->buffer_todo_bd = NULL; ws->todo_large_objects = NULL; + ws->part_list = NULL; + ws->n_part_blocks = 0; + ws->scavd_list = NULL; ws->n_scavd_blocks = 0; @@ -1360,23 +1203,15 @@ init_uncollected_gen (nat g, nat threads) ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W; stp->blocks = stp->blocks->link; stp->n_blocks -= 1; + stp->n_words -= ws->todo_bd->free - ws->todo_bd->start; ws->todo_bd->link = NULL; - - // this block is also the scan block; we must scan - // from the current end point. - ws->scan_bd = ws->todo_bd; - ws->scan = ws->scan_bd->free; - - // subtract the contents of this block from the stats, - // because we'll count the whole block later. - copied -= ws->scan_bd->free - ws->scan_bd->start; + // we must scan from the current end point. + ws->todo_bd->u.scan = ws->todo_bd->free; } else { - ws->scan_bd = NULL; - ws->scan = NULL; ws->todo_bd = NULL; - gc_alloc_todo_block(ws); + alloc_todo_block(ws,0); } } } @@ -1403,25 +1238,37 @@ init_gc_thread (gc_thread *t) { t->static_objects = END_OF_STATIC_LIST; t->scavenged_static_objects = END_OF_STATIC_LIST; + t->scan_bd = NULL; t->evac_step = 0; t->failed_to_evac = rtsFalse; t->eager_promotion = rtsTrue; t->thunk_selector_depth = 0; t->copied = 0; + t->scanned = 0; t->any_work = 0; t->no_work = 0; t->scav_find_work = 0; - } /* ----------------------------------------------------------------------------- - Function we pass to GetRoots to evacuate roots. + Function we pass to evacuate roots. -------------------------------------------------------------------------- */ static void -mark_root(StgClosure **root) +mark_root(void *user, StgClosure **root) { - evacuate(root); + // we stole a register for gct, but this function is called from + // *outside* the GC where the register variable is not in effect, + // so we need to save and restore it here. NB. only call + // mark_root() from the main GC thread, otherwise gct will be + // incorrect. + gc_thread *saved_gct; + saved_gct = gct; + gct = user; + + evacuate(root); + + gct = saved_gct; } /* ----------------------------------------------------------------------------- @@ -1442,42 +1289,6 @@ zero_static_object_list(StgClosure* first_static) } } -/* ----------------------------------------------------------------------------- - Reverting CAFs - -------------------------------------------------------------------------- */ - -void -revertCAFs( void ) -{ - StgIndStatic *c; - - for (c = (StgIndStatic *)revertible_caf_list; c != NULL; - c = (StgIndStatic *)c->static_link) - { - SET_INFO(c, c->saved_info); - c->saved_info = NULL; - // could, but not necessary: c->static_link = NULL; - } - revertible_caf_list = NULL; -} - -void -markCAFs( evac_fn evac ) -{ - StgIndStatic *c; - - for (c = (StgIndStatic *)caf_list; c != NULL; - c = (StgIndStatic *)c->static_link) - { - evac(&c->indirectee); - } - for (c = (StgIndStatic *)revertible_caf_list; c != NULL; - c = (StgIndStatic *)c->static_link) - { - evac(&c->indirectee); - } -} - /* ---------------------------------------------------------------------------- Update the pointers from the task list @@ -1531,7 +1342,7 @@ resize_generations (void) nat gens = RtsFlags.GcFlags.generations; // live in the oldest generations - live = oldest_gen->steps[0].n_blocks + + live = (oldest_gen->steps[0].n_words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W+ oldest_gen->steps[0].n_large_blocks; // default max size for all generations except zero @@ -1620,7 +1431,7 @@ resize_nursery (void) * performance we get from 3L bytes, reducing to the same * performance at 2L bytes. */ - blocks = g0s0->n_old_blocks; + blocks = g0s0->n_blocks; if ( RtsFlags.GcFlags.maxHeapSize != 0 && blocks * RtsFlags.GcFlags.oldGenFactor * 2 >