X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=rts%2Fsm%2FGC.c;h=6b7dc29f554c4a6260e4208158c09e28c11ef0ad;hb=c17eee4d8d25dafa1f8207e6c04ad2f9192b7a01;hp=e44a310f452c46caf026c7f686b281b69a6f6086;hpb=1aaac3473d8fce705610e4a6437283f7401a5583;p=ghc-hetmet.git diff --git a/rts/sm/GC.c b/rts/sm/GC.c index e44a310..6b7dc29 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -11,35 +11,32 @@ * * ---------------------------------------------------------------------------*/ -// #include "PosixSource.h" +#include "PosixSource.h" #include "Rts.h" -#include "RtsFlags.h" +#include "HsFFI.h" + +#include "Storage.h" #include "RtsUtils.h" #include "Apply.h" -#include "OSThreads.h" -#include "LdvProfile.h" #include "Updates.h" #include "Stats.h" #include "Schedule.h" #include "Sanity.h" #include "BlockAlloc.h" -#include "MBlock.h" #include "ProfHeap.h" -#include "SchedAPI.h" #include "Weak.h" #include "Prelude.h" -#include "ParTicky.h" // ToDo: move into Rts.h #include "RtsSignals.h" #include "STM.h" -#include "HsFFI.h" -#include "Linker.h" #if defined(RTS_GTK_FRONTPANEL) #include "FrontPanel.h" #endif #include "Trace.h" #include "RetainerProfile.h" +#include "LdvProfile.h" #include "RaiseAsync.h" #include "Papi.h" +#include "Stable.h" #include "GC.h" #include "GCThread.h" @@ -47,6 +44,7 @@ #include "Evac.h" #include "Scav.h" #include "GCUtils.h" +#include "MarkStack.h" #include "MarkWeak.h" #include "Sparks.h" #include "Sweep.h" @@ -116,7 +114,10 @@ nat mutlist_MUTVARS, /* Thread-local data for each GC thread */ gc_thread **gc_threads = NULL; -// gc_thread *gct = NULL; // this thread's gct TODO: make thread-local + +#if !defined(THREADED_RTS) +StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(step_workspace)]; +#endif // Number of threads running in *this* GC. Affects how many // step->todos[] lists we have to look in to find work. @@ -125,6 +126,8 @@ nat n_gc_threads; // For stats: long copied; // *words* copied & scavenged during this GC +rtsBool work_stealing; + DECLARE_GCT /* ----------------------------------------------------------------------------- @@ -142,32 +145,22 @@ static void resize_generations (void); static void resize_nursery (void); static void start_gc_threads (void); static void scavenge_until_all_done (void); -static nat inc_running (void); -static nat dec_running (void); +static StgWord inc_running (void); +static StgWord dec_running (void); static void wakeup_gc_threads (nat n_threads, nat me); static void shutdown_gc_threads (nat n_threads, nat me); -static void continue_gc_threads (nat n_threads, nat me); #if 0 && defined(DEBUG) static void gcCAFs (void); #endif /* ----------------------------------------------------------------------------- - The mark bitmap & stack. + The mark stack. -------------------------------------------------------------------------- */ -#define MARK_STACK_BLOCKS 4 - -bdescr *mark_stack_bdescr; -StgPtr *mark_stack; -StgPtr *mark_sp; -StgPtr *mark_splim; - -// Flag and pointers used for falling back to a linear scan when the -// mark stack overflows. -rtsBool mark_stack_overflowed; -bdescr *oldgen_scan_bd; -StgPtr oldgen_scan; +bdescr *mark_stack_top_bd; // topmost block in the mark stack +bdescr *mark_stack_bd; // current block in the mark stack +StgPtr mark_sp; // pointer to the next unallocated mark stack entry /* ----------------------------------------------------------------------------- GarbageCollect: the main entry point to the garbage collector. @@ -178,7 +171,7 @@ StgPtr oldgen_scan; void GarbageCollect (rtsBool force_major_gc, nat gc_type USED_IF_THREADS, - Capability *cap USED_IF_THREADS) + Capability *cap) { bdescr *bd; step *stp; @@ -211,6 +204,9 @@ GarbageCollect (rtsBool force_major_gc, // tell the STM to discard any cached closures it's hoping to re-use stmPreGCHook(); + // lock the StablePtr table + stablePtrPreGC(); + #ifdef DEBUG mutlist_MUTVARS = 0; mutlist_MUTARRS = 0; @@ -232,6 +228,20 @@ GarbageCollect (rtsBool force_major_gc, */ n = initialise_N(force_major_gc); +#if defined(THREADED_RTS) + work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled && + N >= RtsFlags.ParFlags.parGcLoadBalancingGen; + // It's not always a good idea to do load balancing in parallel + // GC. In particular, for a parallel program we don't want to + // lose locality by moving cached data into another CPU's cache + // (this effect can be quite significant). + // + // We could have a more complex way to deterimine whether to do + // work stealing or not, e.g. it might be a good idea to do it + // if the heap is big. For now, we just turn it on or off with + // a flag. +#endif + /* Start threads, so they can be spinning up while we finish initialisation. */ start_gc_threads(); @@ -250,7 +260,7 @@ GarbageCollect (rtsBool force_major_gc, n_gc_threads = 1; #endif - trace(TRACE_gc|DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)", + debugTrace(DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)", N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads); #ifdef RTS_GTK_FRONTPANEL @@ -261,12 +271,11 @@ GarbageCollect (rtsBool force_major_gc, #ifdef DEBUG // check for memory leaks if DEBUG is on - memInventory(traceClass(DEBUG_gc)); + memInventory(DEBUG_gc); #endif - // check stack sanity *before* GC - IF_DEBUG(sanity, checkFreeListSanity()); - IF_DEBUG(sanity, checkMutableLists(rtsTrue)); + // check sanity *before* GC + IF_DEBUG(sanity, checkSanity(rtsTrue)); // Initialise all our gc_thread structures for (t = 0; t < n_gc_threads; t++) { @@ -286,26 +295,26 @@ GarbageCollect (rtsBool force_major_gc, /* Allocate a mark stack if we're doing a major collection. */ if (major_gc && oldest_gen->steps[0].mark) { - nat mark_stack_blocks; - mark_stack_blocks = stg_max(MARK_STACK_BLOCKS, - oldest_gen->steps[0].n_old_blocks / 100); - mark_stack_bdescr = allocGroup(mark_stack_blocks); - mark_stack = (StgPtr *)mark_stack_bdescr->start; - mark_sp = mark_stack; - mark_splim = mark_stack + (mark_stack_blocks * BLOCK_SIZE_W); + mark_stack_bd = allocBlock(); + mark_stack_top_bd = mark_stack_bd; + mark_stack_bd->link = NULL; + mark_stack_bd->u.back = NULL; + mark_sp = mark_stack_bd->start; } else { - mark_stack_bdescr = NULL; + mark_stack_bd = NULL; + mark_stack_top_bd = NULL; + mark_sp = NULL; } // this is the main thread #ifdef THREADED_RTS if (n_gc_threads == 1) { - gct = gc_threads[0]; + SET_GCT(gc_threads[0]); } else { - gct = gc_threads[cap->no]; + SET_GCT(gc_threads[cap->no]); } #else - gct = gc_threads[0]; +SET_GCT(gc_threads[0]); #endif /* ----------------------------------------------------------------------- @@ -415,9 +424,9 @@ GarbageCollect (rtsBool force_major_gc, // g0s0->old_blocks is the old nursery // g0s0->blocks is to-space from the previous GC if (RtsFlags.GcFlags.generations == 1) { - if (g0s0->blocks != NULL) { - freeChain(g0s0->blocks); - g0s0->blocks = NULL; + if (g0->steps[0].blocks != NULL) { + freeChain(g0->steps[0].blocks); + g0->steps[0].blocks = NULL; } } @@ -520,12 +529,12 @@ GarbageCollect (rtsBool force_major_gc, nat i; for (i=0; i < n_gc_threads; i++) { if (n_gc_threads > 1) { - trace(TRACE_gc,"thread %d:", i); - trace(TRACE_gc," copied %ld", gc_threads[i]->copied * sizeof(W_)); - trace(TRACE_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_)); - trace(TRACE_gc," any_work %ld", gc_threads[i]->any_work); - trace(TRACE_gc," no_work %ld", gc_threads[i]->no_work); - trace(TRACE_gc," scav_find_work %ld", gc_threads[i]->scav_find_work); + debugTrace(DEBUG_gc,"thread %d:", i); + debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_)); + debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_)); + debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work); + debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work); + debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work); } copied += gc_threads[i]->copied; max_copied = stg_max(gc_threads[i]->copied, max_copied); @@ -636,18 +645,13 @@ GarbageCollect (rtsBool force_major_gc, /* LARGE OBJECTS. The current live large objects are chained on * scavenged_large, having been moved during garbage - * collection from large_objects. Any objects left on + * collection from large_objects. Any objects left on the * large_objects list are therefore dead, so we free them here. */ - for (bd = stp->large_objects; bd != NULL; bd = next) { - next = bd->link; - freeGroup(bd); - bd = next; - } - + freeChain(stp->large_objects); stp->large_objects = stp->scavenged_large_objects; stp->n_large_blocks = stp->n_scavenged_large_blocks; - + ASSERT(countBlocks(stp->large_objects) == stp->n_large_blocks); } else // for older generations... { @@ -662,6 +666,7 @@ GarbageCollect (rtsBool force_major_gc, // add the new blocks we promoted during this GC stp->n_large_blocks += stp->n_scavenged_large_blocks; + ASSERT(countBlocks(stp->large_objects) == stp->n_large_blocks); } } } @@ -675,22 +680,25 @@ GarbageCollect (rtsBool force_major_gc, // Free the small objects allocated via allocate(), since this will // all have been copied into G0S1 now. if (RtsFlags.GcFlags.generations > 1) { - if (g0s0->blocks != NULL) { - freeChain(g0s0->blocks); - g0s0->blocks = NULL; + if (g0->steps[0].blocks != NULL) { + freeChain(g0->steps[0].blocks); + g0->steps[0].blocks = NULL; } - g0s0->n_blocks = 0; - g0s0->n_words = 0; + g0->steps[0].n_blocks = 0; + g0->steps[0].n_words = 0; } - alloc_blocks = 0; alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize; // Start a new pinned_object_block - pinned_object_block = NULL; + for (n = 0; n < n_capabilities; n++) { + capabilities[n].pinned_object_block = NULL; + } // Free the mark stack. - if (mark_stack_bdescr != NULL) { - freeGroup(mark_stack_bdescr); + if (mark_stack_top_bd != NULL) { + debugTrace(DEBUG_gc, "mark stack: %d blocks", + countBlocks(mark_stack_top_bd)); + freeChain(mark_stack_top_bd); } // Free any bitmaps. @@ -734,7 +742,7 @@ GarbageCollect (rtsBool force_major_gc, // start any pending finalizers RELEASE_SM_LOCK; - scheduleFinalizers(last_free_capability, old_weak_ptr_list); + scheduleFinalizers(cap, old_weak_ptr_list); ACQUIRE_SM_LOCK; // send exceptions to any threads which were about to die @@ -746,11 +754,11 @@ GarbageCollect (rtsBool force_major_gc, // Update the stable pointer hash table. updateStablePtrTable(major_gc); - // check sanity after GC - IF_DEBUG(sanity, checkSanity()); + // check sanity after GC + IF_DEBUG(sanity, checkSanity(rtsTrue)); // extra GC trace info - if (traceClass(TRACE_gc|DEBUG_gc)) statDescribeGens(); + IF_DEBUG(gc, statDescribeGens()); #ifdef DEBUG // symbol-table based profiling @@ -764,7 +772,7 @@ GarbageCollect (rtsBool force_major_gc, #ifdef DEBUG // check for memory leaks if DEBUG is on - memInventory(traceClass(DEBUG_gc)); + memInventory(DEBUG_gc); #endif #ifdef RTS_GTK_FRONTPANEL @@ -777,6 +785,9 @@ GarbageCollect (rtsBool force_major_gc, slop = calcLiveBlocks() * BLOCK_SIZE_W - live; stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop); + // unlock the StablePtr table + stablePtrPostGC(); + // Guess which generation we'll collect *next* time initialise_N(force_major_gc); @@ -787,11 +798,9 @@ GarbageCollect (rtsBool force_major_gc, } #endif - continue_gc_threads(n_gc_threads, gct->thread_index); - RELEASE_SM_LOCK; - gct = saved_gct; + SET_GCT(saved_gct); } /* ----------------------------------------------------------------------------- @@ -845,15 +854,11 @@ initialise_N (rtsBool force_major_gc) #define GC_THREAD_RUNNING 2 #define GC_THREAD_WAITING_TO_CONTINUE 3 -static gc_thread * -alloc_gc_thread (int n) +static void +new_gc_thread (nat n, gc_thread *t) { nat s; step_workspace *ws; - gc_thread *t; - - t = stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace), - "alloc_gc_thread"); #ifdef THREADED_RTS t->id = 0; @@ -879,10 +884,12 @@ alloc_gc_thread (int n) ws = &t->steps[s]; ws->step = &all_steps[s]; ASSERT(s == ws->step->abs_no); - ws->gct = t; + ws->my_gct = t; ws->todo_bd = NULL; - ws->buffer_todo_bd = NULL; + ws->todo_q = newWSDeque(128); + ws->todo_overflow = NULL; + ws->n_todo_overflow = 0; ws->part_list = NULL; ws->n_part_blocks = 0; @@ -890,8 +897,6 @@ alloc_gc_thread (int n) ws->scavd_list = NULL; ws->n_scavd_blocks = 0; } - - return t; } @@ -906,14 +911,43 @@ initGcThreads (void) "alloc_gc_threads"); for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) { - gc_threads[i] = alloc_gc_thread(i); + gc_threads[i] = + stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace), + "alloc_gc_threads"); + + new_gc_thread(i, gc_threads[i]); } #else - gc_threads = stgMallocBytes (sizeof(gc_thread*), - "alloc_gc_threads"); + gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads"); + gc_threads[0] = gct; + new_gc_thread(0,gc_threads[0]); +#endif + } +} - gc_threads[0] = alloc_gc_thread(0); +void +freeGcThreads (void) +{ + nat s; + if (gc_threads != NULL) { +#if defined(THREADED_RTS) + nat i; + for (i = 0; i < n_capabilities; i++) { + for (s = 0; s < total_steps; s++) + { + freeWSDeque(gc_threads[i]->steps[s].todo_q); + } + stgFree (gc_threads[i]); + } + stgFree (gc_threads); +#else + for (s = 0; s < total_steps; s++) + { + freeWSDeque(gc_threads[0]->steps[s].todo_q); + } + stgFree (gc_threads); #endif + gc_threads = NULL; } } @@ -921,32 +955,22 @@ initGcThreads (void) Start GC threads ------------------------------------------------------------------------- */ -static nat gc_running_threads; +static volatile StgWord gc_running_threads; -#if defined(THREADED_RTS) -static Mutex gc_running_mutex; -#endif - -static nat +static StgWord inc_running (void) { - nat n_running; - ACQUIRE_LOCK(&gc_running_mutex); - n_running = ++gc_running_threads; - RELEASE_LOCK(&gc_running_mutex); - ASSERT(n_running <= n_gc_threads); - return n_running; + StgWord new; + new = atomic_inc(&gc_running_threads); + ASSERT(new <= n_gc_threads); + return new; } -static nat +static StgWord dec_running (void) { - nat n_running; - ACQUIRE_LOCK(&gc_running_mutex); - ASSERT(n_gc_threads != 0); - n_running = --gc_running_threads; - RELEASE_LOCK(&gc_running_mutex); - return n_running; + ASSERT(gc_running_threads != 0); + return atomic_dec(&gc_running_threads); } static rtsBool @@ -960,8 +984,7 @@ any_work (void) write_barrier(); // scavenge objects in compacted generation - if (mark_stack_overflowed || oldgen_scan_bd != NULL || - (mark_stack_bdescr != NULL && !mark_stack_empty())) { + if (mark_stack_bd != NULL && !mark_stack_empty()) { return rtsTrue; } @@ -974,9 +997,24 @@ any_work (void) } ws = &gct->steps[s]; if (ws->todo_large_objects) return rtsTrue; - if (ws->step->todos) return rtsTrue; + if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue; + if (ws->todo_overflow) return rtsTrue; } +#if defined(THREADED_RTS) + if (work_stealing) { + nat n; + // look for work to steal + for (n = 0; n < n_gc_threads; n++) { + if (n == gct->thread_index) continue; + for (s = total_steps-1; s >= 0; s--) { + ws = &gc_threads[n]->steps[s]; + if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue; + } + } + } +#endif + gct->no_work++; return rtsFalse; @@ -987,9 +1025,10 @@ scavenge_until_all_done (void) { nat r; - debugTrace(DEBUG_gc, "GC thread %d working", gct->thread_index); loop: + traceEvent(&capabilities[gct->thread_index], EVENT_GC_WORK); + #if defined(THREADED_RTS) if (n_gc_threads > 1) { scavenge_loop(); @@ -1003,23 +1042,23 @@ loop: // scavenge_loop() only exits when there's no work to do r = dec_running(); - debugTrace(DEBUG_gc, "GC thread %d idle (%d still running)", - gct->thread_index, r); + traceEvent(&capabilities[gct->thread_index], EVENT_GC_IDLE); + debugTrace(DEBUG_gc, "%d GC threads still running", r); + while (gc_running_threads != 0) { // usleep(1); - if (any_work()) { - inc_running(); - goto loop; - } - // any_work() does not remove the work from the queue, it - // just checks for the presence of work. If we find any, - // then we increment gc_running_threads and go back to - // scavenge_loop() to perform any pending work. + if (any_work()) { + inc_running(); + goto loop; + } + // any_work() does not remove the work from the queue, it + // just checks for the presence of work. If we find any, + // then we increment gc_running_threads and go back to + // scavenge_loop() to perform any pending work. } - // All threads are now stopped - debugTrace(DEBUG_gc, "GC thread %d finished.", gct->thread_index); + traceEvent(&capabilities[gct->thread_index], EVENT_GC_DONE); } #if defined(THREADED_RTS) @@ -1027,7 +1066,10 @@ loop: void gcWorkerThread (Capability *cap) { - cap->in_gc = rtsTrue; + gc_thread *saved_gct; + + // necessary if we stole a callee-saves register for gct: + saved_gct = gct; gct = gc_threads[cap->no]; gct->id = osThreadId(); @@ -1066,14 +1108,17 @@ gcWorkerThread (Capability *cap) gct->thread_index); ACQUIRE_SPIN_LOCK(&gct->mut_spin); debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index); + + SET_GCT(saved_gct); } #endif +#if defined(THREADED_RTS) + void waitForGcThreads (Capability *cap USED_IF_THREADS) { -#if defined(THREADED_RTS) nat n_threads = RtsFlags.ParFlags.nNodes; nat me = cap->no; nat i, j; @@ -1086,7 +1131,7 @@ waitForGcThreads (Capability *cap USED_IF_THREADS) prodCapability(&capabilities[i], cap->running_task); } } - for (j=0; j < 10000000; j++) { + for (j=0; j < 10; j++) { retry = rtsFalse; for (i=0; i < n_threads; i++) { if (i == me) continue; @@ -1097,17 +1142,18 @@ waitForGcThreads (Capability *cap USED_IF_THREADS) } } if (!retry) break; + yieldThread(); } } -#endif } +#endif // THREADED_RTS + static void start_gc_threads (void) { #if defined(THREADED_RTS) gc_running_threads = 0; - initMutex(&gc_running_mutex); #endif } @@ -1144,21 +1190,24 @@ shutdown_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS) #endif } -static void -continue_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS) -{ #if defined(THREADED_RTS) +void +releaseGCThreads (Capability *cap USED_IF_THREADS) +{ + nat n_threads = RtsFlags.ParFlags.nNodes; + nat me = cap->no; nat i; for (i=0; i < n_threads; i++) { if (i == me) continue; - if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) barf("continue_gc_threads"); + if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) + barf("releaseGCThreads"); gc_threads[i]->wakeup = GC_THREAD_INACTIVE; ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin); RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin); } -#endif } +#endif /* ---------------------------------------------------------------------------- Initialise a generation that is to be collected @@ -1184,8 +1233,21 @@ init_collected_gen (nat g, nat n_threads) } } + if (g == 0) { + for (i = 0; i < n_capabilities; i++) { + stp = &nurseries[i]; + stp->old_threads = stp->threads; + stp->threads = END_TSO_QUEUE; + } + } + for (s = 0; s < generations[g].n_steps; s++) { + // generation 0, step 0 doesn't need to-space, unless -G1 + if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) { + continue; + } + stp = &generations[g].steps[s]; ASSERT(stp->gen_no == g); @@ -1194,11 +1256,6 @@ init_collected_gen (nat g, nat n_threads) stp->old_threads = stp->threads; stp->threads = END_TSO_QUEUE; - // generation 0, step 0 doesn't need to-space - if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) { - continue; - } - // deprecate the existing blocks stp->old_blocks = stp->blocks; stp->n_old_blocks = stp->n_blocks; @@ -1207,11 +1264,6 @@ init_collected_gen (nat g, nat n_threads) stp->n_words = 0; stp->live_estimate = 0; - // we don't have any to-be-scavenged blocks yet - stp->todos = NULL; - stp->todos_last = NULL; - stp->n_todos = 0; - // initialise the large object queues. stp->scavenged_large_objects = NULL; stp->n_scavenged_large_blocks = 0; @@ -1284,9 +1336,12 @@ init_collected_gen (nat g, nat n_threads) // allocate the first to-space block; extra blocks will be // chained on as necessary. ws->todo_bd = NULL; - ws->buffer_todo_bd = NULL; + ASSERT(looksEmptyWSDeque(ws->todo_q)); alloc_todo_block(ws,0); + ws->todo_overflow = NULL; + ws->n_todo_overflow = 0; + ws->scavd_list = NULL; ws->n_scavd_blocks = 0; } @@ -1329,7 +1384,7 @@ init_uncollected_gen (nat g, nat threads) for (t = 0; t < threads; t++) { ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s]; - ws->buffer_todo_bd = NULL; + ASSERT(looksEmptyWSDeque(ws->todo_q)); ws->todo_large_objects = NULL; ws->part_list = NULL; @@ -1405,7 +1460,7 @@ init_gc_thread (gc_thread *t) -------------------------------------------------------------------------- */ static void -mark_root(void *user, StgClosure **root) +mark_root(void *user USED_IF_THREADS, StgClosure **root) { // we stole a register for gct, but this function is called from // *outside* the GC where the register variable is not in effect, @@ -1414,11 +1469,11 @@ mark_root(void *user, StgClosure **root) // incorrect. gc_thread *saved_gct; saved_gct = gct; - gct = user; + SET_GCT(user); evacuate(root); - gct = saved_gct; + SET_GCT(saved_gct); } /* ----------------------------------------------------------------------------- @@ -1504,6 +1559,10 @@ resize_generations (void) size = stg_max(live * RtsFlags.GcFlags.oldGenFactor, RtsFlags.GcFlags.minOldGenSize); + if (RtsFlags.GcFlags.heapSizeSuggestionAuto) { + RtsFlags.GcFlags.heapSizeSuggestion = size; + } + // minimum size for generation zero min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200, RtsFlags.GcFlags.minAllocAreaSize); @@ -1574,6 +1633,8 @@ resize_generations (void) static void resize_nursery (void) { + lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities; + if (RtsFlags.GcFlags.generations == 1) { // Two-space collector: nat blocks; @@ -1592,7 +1653,7 @@ resize_nursery (void) * performance we get from 3L bytes, reducing to the same * performance at 2L bytes. */ - blocks = g0s0->n_blocks; + blocks = generations[0].steps[0].n_blocks; if ( RtsFlags.GcFlags.maxHeapSize != 0 && blocks * RtsFlags.GcFlags.oldGenFactor * 2 > @@ -1616,9 +1677,9 @@ resize_nursery (void) else { blocks *= RtsFlags.GcFlags.oldGenFactor; - if (blocks < RtsFlags.GcFlags.minAllocAreaSize) + if (blocks < min_nursery) { - blocks = RtsFlags.GcFlags.minAllocAreaSize; + blocks = min_nursery; } } resizeNurseries(blocks); @@ -1665,8 +1726,8 @@ resize_nursery (void) (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) / (100 + (long)g0s0_pcnt_kept); - if (blocks < (long)RtsFlags.GcFlags.minAllocAreaSize) { - blocks = RtsFlags.GcFlags.minAllocAreaSize; + if (blocks < (long)min_nursery) { + blocks = min_nursery; } resizeNurseries((nat)blocks);