X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=rts%2Fsm%2FGC.c;h=c9908359b84216401f2ed1b6ce54622099431257;hb=bef3da1e26639303fccbf26c312d2833eedb486e;hp=ae6fc998dac6191f08a895cd59e3f640703e003e;hpb=7408b39235bccdcde48df2a73337ff976fbc09b7;p=ghc-hetmet.git diff --git a/rts/sm/GC.c b/rts/sm/GC.c index ae6fc99..c990835 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -221,7 +221,7 @@ GarbageCollect (rtsBool force_major_gc, /* Approximate how much we allocated. * Todo: only when generating stats? */ - allocated = calcAllocated(); + allocated = calcAllocated(rtsFalse/* don't count the nursery yet */); /* Figure out which generation to collect */ @@ -327,27 +327,6 @@ SET_GCT(gc_threads[0]); inc_running(); wakeup_gc_threads(n_gc_threads, gct->thread_index); - // Mutable lists from each generation > N - // we want to *scavenge* these roots, not evacuate them: they're not - // going to move in this GC. - // Also do them in reverse generation order, for the usual reason: - // namely to reduce the likelihood of spurious old->new pointers. - // - for (g = RtsFlags.GcFlags.generations-1; g > N; g--) { -#if defined(THREADED_RTS) - if (n_gc_threads > 1) { - scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]); - } else { - scavenge_mutable_list1(generations[g].saved_mut_list, &generations[g]); - } -#else - scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]); -#endif - freeChain_sync(generations[g].saved_mut_list); - generations[g].saved_mut_list = NULL; - - } - // scavenge the capability-private mutable lists. This isn't part // of markSomeCapabilities() because markSomeCapabilities() can only // call back into the GC via mark_root() (due to the gct register @@ -365,11 +344,11 @@ SET_GCT(gc_threads[0]); } // follow roots from the CAF list (used by GHCi) - gct->evac_gen = 0; + gct->evac_gen_no = 0; markCAFs(mark_root, gct); // follow all the roots that the application knows about. - gct->evac_gen = 0; + gct->evac_gen_no = 0; markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads, rtsTrue/*prune sparks*/); @@ -395,13 +374,6 @@ SET_GCT(gc_threads[0]); // The other threads are now stopped. We might recurse back to // here, but from now on this is the only thread. - // if any blackholes are alive, make the threads that wait on - // them alive too. - if (traverseBlackholeQueue()) { - inc_running(); - continue; - } - // must be last... invariant is that everything is fully // scavenged at this point. if (traverseWeakPtrList()) { // returns rtsTrue if evaced something @@ -418,6 +390,16 @@ SET_GCT(gc_threads[0]); // Now see which stable names are still alive. gcStablePtrTable(); +#ifdef THREADED_RTS + if (n_gc_threads == 1) { + for (n = 0; n < n_capabilities; n++) { + pruneSparkQueue(&capabilities[n]); + } + } else { + pruneSparkQueue(&capabilities[gct->thread_index]); + } +#endif + #ifdef PROFILING // We call processHeapClosureForDead() on every closure destroyed during // the current garbage collection, so we invoke LdvCensusForDead(). @@ -554,14 +536,8 @@ SET_GCT(gc_threads[0]); // stats. Every mutable list is copied during every GC. if (g > 0) { nat mut_list_size = 0; - for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) { - mut_list_size += bd->free - bd->start; - } for (n = 0; n < n_capabilities; n++) { - for (bd = capabilities[n].mut_lists[g]; - bd != NULL; bd = bd->link) { - mut_list_size += bd->free - bd->start; - } + mut_list_size += countOccupied(capabilities[n].mut_lists[g]); } copied += mut_list_size; @@ -645,7 +621,7 @@ SET_GCT(gc_threads[0]); freeChain(gen->large_objects); gen->large_objects = gen->scavenged_large_objects; gen->n_large_blocks = gen->n_scavenged_large_blocks; - gen->n_new_large_blocks = 0; + gen->n_new_large_words = 0; ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks); } else // for generations > N @@ -671,10 +647,6 @@ SET_GCT(gc_threads[0]); // Calculate the amount of live data for stats. live = calcLiveWords(); - // Free the small objects allocated via allocate(), since this will - // all have been copied into G0S1 now. - alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize; - // Start a new pinned_object_block for (n = 0; n < n_capabilities; n++) { capabilities[n].pinned_object_block = NULL; @@ -696,9 +668,14 @@ SET_GCT(gc_threads[0]); } } + // Reset the nursery: make the blocks empty + allocated += clearNurseries(); + resize_nursery(); - // mark the garbage collected CAFs as dead + resetNurseries(); + + // mark the garbage collected CAFs as dead #if 0 && defined(DEBUG) // doesn't work at the moment if (major_gc) { gcCAFs(); } #endif @@ -721,15 +698,7 @@ SET_GCT(gc_threads[0]); } } - // Reset the nursery - resetNurseries(); - - // start any pending finalizers - RELEASE_SM_LOCK; - scheduleFinalizers(cap, old_weak_ptr_list); - ACQUIRE_SM_LOCK; - - // send exceptions to any threads which were about to die + // send exceptions to any threads which were about to die RELEASE_SM_LOCK; resurrectThreads(resurrected_threads); ACQUIRE_SM_LOCK; @@ -737,6 +706,30 @@ SET_GCT(gc_threads[0]); // Update the stable pointer hash table. updateStablePtrTable(major_gc); + // unlock the StablePtr table. Must be before scheduleFinalizers(), + // because a finalizer may call hs_free_fun_ptr() or + // hs_free_stable_ptr(), both of which access the StablePtr table. + stablePtrPostGC(); + + // Start any pending finalizers. Must be after + // updateStablePtrTable() and stablePtrPostGC() (see #4221). + RELEASE_SM_LOCK; + scheduleFinalizers(cap, old_weak_ptr_list); + ACQUIRE_SM_LOCK; + + if (major_gc) { + nat need, got; + need = BLOCKS_TO_MBLOCKS(n_alloc_blocks); + got = mblocks_allocated; + /* If the amount of data remains constant, next major GC we'll + require (F+1)*need. We leave (F+2)*need in order to reduce + repeated deallocation and reallocation. */ + need = (RtsFlags.GcFlags.oldGenFactor + 2) * need; + if (got > need) { + returnMemoryToOS(got - need); + } + } + // check sanity after GC IF_DEBUG(sanity, checkSanity(rtsTrue)); @@ -768,9 +761,6 @@ SET_GCT(gc_threads[0]); slop = calcLiveBlocks() * BLOCK_SIZE_W - live; stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop); - // unlock the StablePtr table - stablePtrPostGC(); - // Guess which generation we'll collect *next* time initialise_N(force_major_gc); @@ -1072,13 +1062,23 @@ gcWorkerThread (Capability *cap) #endif // Every thread evacuates some roots. - gct->evac_gen = 0; + gct->evac_gen_no = 0; markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads, rtsTrue/*prune sparks*/); scavenge_capability_mut_lists(&capabilities[gct->thread_index]); scavenge_until_all_done(); +#ifdef THREADED_RTS + // Now that the whole heap is marked, we discard any sparks that + // were found to be unreachable. The main GC thread is currently + // marking heap reachable via weak pointers, so it is + // non-deterministic whether a spark will be retained if it is + // only reachable via weak pointers. To fix this problem would + // require another GC barrier, which is too high a price. + pruneSparkQueue(cap); +#endif + #ifdef USE_PAPI // count events in this thread towards the GC totals papi_thread_stop_gc1_count(gct->papi_events); @@ -1102,8 +1102,8 @@ gcWorkerThread (Capability *cap) void waitForGcThreads (Capability *cap USED_IF_THREADS) { - nat n_threads = RtsFlags.ParFlags.nNodes; - nat me = cap->no; + const nat n_threads = RtsFlags.ParFlags.nNodes; + const nat me = cap->no; nat i, j; rtsBool retry = rtsTrue; @@ -1177,8 +1177,8 @@ shutdown_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS) void releaseGCThreads (Capability *cap USED_IF_THREADS) { - nat n_threads = RtsFlags.ParFlags.nNodes; - nat me = cap->no; + const nat n_threads = RtsFlags.ParFlags.nNodes; + const nat me = cap->no; nat i; for (i=0; i < n_threads; i++) { if (i == me) continue; @@ -1208,9 +1208,7 @@ init_collected_gen (nat g, nat n_threads) // list always has at least one block; this means we can avoid a // check for NULL in recordMutable(). if (g != 0) { - freeChain(generations[g].mut_list); - generations[g].mut_list = allocBlock(); - for (i = 0; i < n_capabilities; i++) { + for (i = 0; i < n_capabilities; i++) { freeChain(capabilities[i].mut_lists[g]); capabilities[i].mut_lists[g] = allocBlock(); } @@ -1280,6 +1278,10 @@ init_collected_gen (nat g, nat n_threads) if (!(bd->flags & BF_FRAGMENTED)) { bd->flags |= BF_MARKED; } + + // BF_SWEPT should be marked only for blocks that are being + // collected in sweep() + bd->flags &= ~BF_SWEPT; } } } @@ -1325,8 +1327,6 @@ init_uncollected_gen (nat g, nat threads) // save the current mutable lists for this generation, and // allocate a fresh block for each one. We'll traverse these // mutable lists as roots early on in the GC. - generations[g].saved_mut_list = generations[g].mut_list; - generations[g].mut_list = allocBlock(); for (n = 0; n < n_capabilities; n++) { capabilities[n].saved_mut_lists[g] = capabilities[n].mut_lists[g]; capabilities[n].mut_lists[g] = allocBlock(); @@ -1399,7 +1399,7 @@ init_gc_thread (gc_thread *t) t->scavenged_static_objects = END_OF_STATIC_LIST; t->scan_bd = NULL; t->mut_lists = capabilities[t->thread_index].mut_lists; - t->evac_gen = 0; + t->evac_gen_no = 0; t->failed_to_evac = rtsFalse; t->eager_promotion = rtsTrue; t->thunk_selector_depth = 0; @@ -1465,8 +1465,8 @@ resize_generations (void) if (major_gc && RtsFlags.GcFlags.generations > 1) { nat live, size, min_alloc, words; - nat max = RtsFlags.GcFlags.maxHeapSize; - nat gens = RtsFlags.GcFlags.generations; + const nat max = RtsFlags.GcFlags.maxHeapSize; + const nat gens = RtsFlags.GcFlags.generations; // live in the oldest generations if (oldest_gen->live_estimate != 0) { @@ -1491,11 +1491,10 @@ resize_generations (void) // Auto-enable compaction when the residency reaches a // certain percentage of the maximum heap size (default: 30%). - if (RtsFlags.GcFlags.generations > 1 && - (RtsFlags.GcFlags.compact || - (max > 0 && - oldest_gen->n_blocks > - (RtsFlags.GcFlags.compactThreshold * max) / 100))) { + if (RtsFlags.GcFlags.compact || + (max > 0 && + oldest_gen->n_blocks > + (RtsFlags.GcFlags.compactThreshold * max) / 100)) { oldest_gen->mark = 1; oldest_gen->compact = 1; // debugBelch("compaction: on\n", live); @@ -1555,7 +1554,7 @@ resize_generations (void) static void resize_nursery (void) { - lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities; + const lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities; if (RtsFlags.GcFlags.generations == 1) { // Two-space collector: @@ -1615,7 +1614,7 @@ resize_nursery (void) if (RtsFlags.GcFlags.heapSizeSuggestion) { long blocks; - nat needed = calcNeeded(); // approx blocks needed at next GC + const nat needed = calcNeeded(); // approx blocks needed at next GC /* Guess how much will be live in generation 0 step 0 next time. * A good approximation is obtained by finding the