X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=rts%2Fsm%2FGC.c;h=38ae1a7996b94b812d38df9f51aa7074a8449731;hb=26f4bfc82f2b2359259578e9c54d476fc2de650f;hp=4d63724ba01009b744db70397be869d17d3edd98;hpb=5d52d9b64c21dcf77849866584744722f8121389;p=ghc-hetmet.git diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 4d63724..38ae1a7 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -221,7 +221,7 @@ GarbageCollect (rtsBool force_major_gc, /* Approximate how much we allocated. * Todo: only when generating stats? */ - allocated = calcAllocated(); + allocated = calcAllocated(rtsFalse/* don't count the nursery yet */); /* Figure out which generation to collect */ @@ -327,27 +327,6 @@ SET_GCT(gc_threads[0]); inc_running(); wakeup_gc_threads(n_gc_threads, gct->thread_index); - // Mutable lists from each generation > N - // we want to *scavenge* these roots, not evacuate them: they're not - // going to move in this GC. - // Also do them in reverse generation order, for the usual reason: - // namely to reduce the likelihood of spurious old->new pointers. - // - for (g = RtsFlags.GcFlags.generations-1; g > N; g--) { -#if defined(THREADED_RTS) - if (n_gc_threads > 1) { - scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]); - } else { - scavenge_mutable_list1(generations[g].saved_mut_list, &generations[g]); - } -#else - scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]); -#endif - freeChain_sync(generations[g].saved_mut_list); - generations[g].saved_mut_list = NULL; - - } - // scavenge the capability-private mutable lists. This isn't part // of markSomeCapabilities() because markSomeCapabilities() can only // call back into the GC via mark_root() (due to the gct register @@ -411,6 +390,16 @@ SET_GCT(gc_threads[0]); // Now see which stable names are still alive. gcStablePtrTable(); +#ifdef THREADED_RTS + if (n_gc_threads == 1) { + for (n = 0; n < n_capabilities; n++) { + pruneSparkQueue(&capabilities[n]); + } + } else { + pruneSparkQueue(&capabilities[gct->thread_index]); + } +#endif + #ifdef PROFILING // We call processHeapClosureForDead() on every closure destroyed during // the current garbage collection, so we invoke LdvCensusForDead(). @@ -547,14 +536,8 @@ SET_GCT(gc_threads[0]); // stats. Every mutable list is copied during every GC. if (g > 0) { nat mut_list_size = 0; - for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) { - mut_list_size += bd->free - bd->start; - } for (n = 0; n < n_capabilities; n++) { - for (bd = capabilities[n].mut_lists[g]; - bd != NULL; bd = bd->link) { - mut_list_size += bd->free - bd->start; - } + mut_list_size += countOccupied(capabilities[n].mut_lists[g]); } copied += mut_list_size; @@ -638,7 +621,7 @@ SET_GCT(gc_threads[0]); freeChain(gen->large_objects); gen->large_objects = gen->scavenged_large_objects; gen->n_large_blocks = gen->n_scavenged_large_blocks; - gen->n_new_large_blocks = 0; + gen->n_new_large_words = 0; ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks); } else // for generations > N @@ -664,10 +647,6 @@ SET_GCT(gc_threads[0]); // Calculate the amount of live data for stats. live = calcLiveWords(); - // Free the small objects allocated via allocate(), since this will - // all have been copied into G0S1 now. - alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize; - // Start a new pinned_object_block for (n = 0; n < n_capabilities; n++) { capabilities[n].pinned_object_block = NULL; @@ -689,9 +668,14 @@ SET_GCT(gc_threads[0]); } } + // Reset the nursery: make the blocks empty + allocated += clearNurseries(); + resize_nursery(); - // mark the garbage collected CAFs as dead + resetNurseries(); + + // mark the garbage collected CAFs as dead #if 0 && defined(DEBUG) // doesn't work at the moment if (major_gc) { gcCAFs(); } #endif @@ -714,15 +698,7 @@ SET_GCT(gc_threads[0]); } } - // Reset the nursery - resetNurseries(); - - // start any pending finalizers - RELEASE_SM_LOCK; - scheduleFinalizers(cap, old_weak_ptr_list); - ACQUIRE_SM_LOCK; - - // send exceptions to any threads which were about to die + // send exceptions to any threads which were about to die RELEASE_SM_LOCK; resurrectThreads(resurrected_threads); ACQUIRE_SM_LOCK; @@ -730,6 +706,30 @@ SET_GCT(gc_threads[0]); // Update the stable pointer hash table. updateStablePtrTable(major_gc); + // unlock the StablePtr table. Must be before scheduleFinalizers(), + // because a finalizer may call hs_free_fun_ptr() or + // hs_free_stable_ptr(), both of which access the StablePtr table. + stablePtrPostGC(); + + // Start any pending finalizers. Must be after + // updateStablePtrTable() and stablePtrPostGC() (see #4221). + RELEASE_SM_LOCK; + scheduleFinalizers(cap, old_weak_ptr_list); + ACQUIRE_SM_LOCK; + + if (major_gc) { + nat need, got; + need = BLOCKS_TO_MBLOCKS(n_alloc_blocks); + got = mblocks_allocated; + /* If the amount of data remains constant, next major GC we'll + require (F+1)*need. We leave (F+2)*need in order to reduce + repeated deallocation and reallocation. */ + need = (RtsFlags.GcFlags.oldGenFactor + 2) * need; + if (got > need) { + returnMemoryToOS(got - need); + } + } + // check sanity after GC IF_DEBUG(sanity, checkSanity(rtsTrue)); @@ -761,9 +761,6 @@ SET_GCT(gc_threads[0]); slop = calcLiveBlocks() * BLOCK_SIZE_W - live; stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop); - // unlock the StablePtr table - stablePtrPostGC(); - // Guess which generation we'll collect *next* time initialise_N(force_major_gc); @@ -1072,6 +1069,16 @@ gcWorkerThread (Capability *cap) scavenge_until_all_done(); +#ifdef THREADED_RTS + // Now that the whole heap is marked, we discard any sparks that + // were found to be unreachable. The main GC thread is currently + // marking heap reachable via weak pointers, so it is + // non-deterministic whether a spark will be retained if it is + // only reachable via weak pointers. To fix this problem would + // require another GC barrier, which is too high a price. + pruneSparkQueue(cap); +#endif + #ifdef USE_PAPI // count events in this thread towards the GC totals papi_thread_stop_gc1_count(gct->papi_events); @@ -1095,8 +1102,8 @@ gcWorkerThread (Capability *cap) void waitForGcThreads (Capability *cap USED_IF_THREADS) { - nat n_threads = RtsFlags.ParFlags.nNodes; - nat me = cap->no; + const nat n_threads = RtsFlags.ParFlags.nNodes; + const nat me = cap->no; nat i, j; rtsBool retry = rtsTrue; @@ -1170,8 +1177,8 @@ shutdown_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS) void releaseGCThreads (Capability *cap USED_IF_THREADS) { - nat n_threads = RtsFlags.ParFlags.nNodes; - nat me = cap->no; + const nat n_threads = RtsFlags.ParFlags.nNodes; + const nat me = cap->no; nat i; for (i=0; i < n_threads; i++) { if (i == me) continue; @@ -1201,9 +1208,7 @@ init_collected_gen (nat g, nat n_threads) // list always has at least one block; this means we can avoid a // check for NULL in recordMutable(). if (g != 0) { - freeChain(generations[g].mut_list); - generations[g].mut_list = allocBlock(); - for (i = 0; i < n_capabilities; i++) { + for (i = 0; i < n_capabilities; i++) { freeChain(capabilities[i].mut_lists[g]); capabilities[i].mut_lists[g] = allocBlock(); } @@ -1273,6 +1278,10 @@ init_collected_gen (nat g, nat n_threads) if (!(bd->flags & BF_FRAGMENTED)) { bd->flags |= BF_MARKED; } + + // BF_SWEPT should be marked only for blocks that are being + // collected in sweep() + bd->flags &= ~BF_SWEPT; } } } @@ -1318,8 +1327,6 @@ init_uncollected_gen (nat g, nat threads) // save the current mutable lists for this generation, and // allocate a fresh block for each one. We'll traverse these // mutable lists as roots early on in the GC. - generations[g].saved_mut_list = generations[g].mut_list; - generations[g].mut_list = allocBlock(); for (n = 0; n < n_capabilities; n++) { capabilities[n].saved_mut_lists[g] = capabilities[n].mut_lists[g]; capabilities[n].mut_lists[g] = allocBlock(); @@ -1458,8 +1465,8 @@ resize_generations (void) if (major_gc && RtsFlags.GcFlags.generations > 1) { nat live, size, min_alloc, words; - nat max = RtsFlags.GcFlags.maxHeapSize; - nat gens = RtsFlags.GcFlags.generations; + const nat max = RtsFlags.GcFlags.maxHeapSize; + const nat gens = RtsFlags.GcFlags.generations; // live in the oldest generations if (oldest_gen->live_estimate != 0) { @@ -1484,11 +1491,10 @@ resize_generations (void) // Auto-enable compaction when the residency reaches a // certain percentage of the maximum heap size (default: 30%). - if (RtsFlags.GcFlags.generations > 1 && - (RtsFlags.GcFlags.compact || - (max > 0 && - oldest_gen->n_blocks > - (RtsFlags.GcFlags.compactThreshold * max) / 100))) { + if (RtsFlags.GcFlags.compact || + (max > 0 && + oldest_gen->n_blocks > + (RtsFlags.GcFlags.compactThreshold * max) / 100)) { oldest_gen->mark = 1; oldest_gen->compact = 1; // debugBelch("compaction: on\n", live); @@ -1548,7 +1554,7 @@ resize_generations (void) static void resize_nursery (void) { - lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities; + const lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities; if (RtsFlags.GcFlags.generations == 1) { // Two-space collector: @@ -1608,7 +1614,7 @@ resize_nursery (void) if (RtsFlags.GcFlags.heapSizeSuggestion) { long blocks; - nat needed = calcNeeded(); // approx blocks needed at next GC + const nat needed = calcNeeded(); // approx blocks needed at next GC /* Guess how much will be live in generation 0 step 0 next time. * A good approximation is obtained by finding the