/* -----------------------------------------------------------------------------
*
- * (c) The GHC Team 1998-2006
+ * (c) The GHC Team 1998-2008
*
* Generational garbage collector
*
*
* ---------------------------------------------------------------------------*/
-// #include "PosixSource.h"
+#include "PosixSource.h"
#include "Rts.h"
-#include "RtsFlags.h"
+#include "HsFFI.h"
+
+#include "Storage.h"
#include "RtsUtils.h"
#include "Apply.h"
-#include "OSThreads.h"
-#include "LdvProfile.h"
#include "Updates.h"
#include "Stats.h"
#include "Schedule.h"
#include "Sanity.h"
#include "BlockAlloc.h"
-#include "MBlock.h"
#include "ProfHeap.h"
-#include "SchedAPI.h"
#include "Weak.h"
#include "Prelude.h"
-#include "ParTicky.h" // ToDo: move into Rts.h
#include "RtsSignals.h"
#include "STM.h"
-#include "HsFFI.h"
-#include "Linker.h"
#if defined(RTS_GTK_FRONTPANEL)
#include "FrontPanel.h"
#endif
#include "Trace.h"
#include "RetainerProfile.h"
+#include "LdvProfile.h"
#include "RaiseAsync.h"
-#include "Sparks.h"
#include "Papi.h"
+#include "Stable.h"
#include "GC.h"
+#include "GCThread.h"
#include "Compact.h"
#include "Evac.h"
#include "Scav.h"
#include "GCUtils.h"
+#include "MarkStack.h"
#include "MarkWeak.h"
#include "Sparks.h"
+#include "Sweep.h"
#include <string.h> // for memset()
#include <unistd.h>
/* Thread-local data for each GC thread
*/
gc_thread **gc_threads = NULL;
-// gc_thread *gct = NULL; // this thread's gct TODO: make thread-local
+
+#if !defined(THREADED_RTS)
+StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(step_workspace)];
+#endif
// Number of threads running in *this* GC. Affects how many
// step->todos[] lists we have to look in to find work.
// For stats:
long copied; // *words* copied & scavenged during this GC
-#ifdef THREADED_RTS
-SpinLock recordMutableGen_sync;
-#endif
+rtsBool work_stealing;
+
+DECLARE_GCT
/* -----------------------------------------------------------------------------
Static function declarations
-------------------------------------------------------------------------- */
-static void mark_root (StgClosure **root);
+static void mark_root (void *user, StgClosure **root);
static void zero_static_object_list (StgClosure* first_static);
static nat initialise_N (rtsBool force_major_gc);
-static void alloc_gc_threads (void);
static void init_collected_gen (nat g, nat threads);
static void init_uncollected_gen (nat g, nat threads);
static void init_gc_thread (gc_thread *t);
static void resize_generations (void);
static void resize_nursery (void);
static void start_gc_threads (void);
-static void gc_thread_work (void);
-static nat inc_running (void);
-static nat dec_running (void);
-static void wakeup_gc_threads (nat n_threads);
-static void shutdown_gc_threads (nat n_threads);
+static void scavenge_until_all_done (void);
+static StgWord inc_running (void);
+static StgWord dec_running (void);
+static void wakeup_gc_threads (nat n_threads, nat me);
+static void shutdown_gc_threads (nat n_threads, nat me);
#if 0 && defined(DEBUG)
static void gcCAFs (void);
#endif
/* -----------------------------------------------------------------------------
- The mark bitmap & stack.
+ The mark stack.
-------------------------------------------------------------------------- */
-#define MARK_STACK_BLOCKS 4
-
-bdescr *mark_stack_bdescr;
-StgPtr *mark_stack;
-StgPtr *mark_sp;
-StgPtr *mark_splim;
-
-// Flag and pointers used for falling back to a linear scan when the
-// mark stack overflows.
-rtsBool mark_stack_overflowed;
-bdescr *oldgen_scan_bd;
-StgPtr oldgen_scan;
+bdescr *mark_stack_top_bd; // topmost block in the mark stack
+bdescr *mark_stack_bd; // current block in the mark stack
+StgPtr mark_sp; // pointer to the next unallocated mark stack entry
/* -----------------------------------------------------------------------------
GarbageCollect: the main entry point to the garbage collector.
-------------------------------------------------------------------------- */
void
-GarbageCollect ( rtsBool force_major_gc )
+GarbageCollect (rtsBool force_major_gc,
+ nat gc_type USED_IF_THREADS,
+ Capability *cap)
{
bdescr *bd;
step *stp;
- lnat live, allocated, max_copied, avg_copied;
- lnat oldgen_saved_blocks = 0;
+ lnat live, allocated, max_copied, avg_copied, slop;
gc_thread *saved_gct;
nat g, s, t, n;
}
#endif
+ ASSERT(sizeof(step_workspace) == 16 * sizeof(StgWord));
+ // otherwise adjust the padding in step_workspace.
+
// tell the stats department that we've started a GC
stat_startGC();
// tell the STM to discard any cached closures it's hoping to re-use
stmPreGCHook();
+ // lock the StablePtr table
+ stablePtrPreGC();
+
#ifdef DEBUG
mutlist_MUTVARS = 0;
mutlist_MUTARRS = 0;
*/
n = initialise_N(force_major_gc);
- /* Allocate + initialise the gc_thread structures.
- */
- alloc_gc_threads();
+#if defined(THREADED_RTS)
+ work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
+ N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
+ // It's not always a good idea to do load balancing in parallel
+ // GC. In particular, for a parallel program we don't want to
+ // lose locality by moving cached data into another CPU's cache
+ // (this effect can be quite significant).
+ //
+ // We could have a more complex way to deterimine whether to do
+ // work stealing or not, e.g. it might be a good idea to do it
+ // if the heap is big. For now, we just turn it on or off with
+ // a flag.
+#endif
/* Start threads, so they can be spinning up while we finish initialisation.
*/
start_gc_threads();
+#if defined(THREADED_RTS)
/* How many threads will be participating in this GC?
- * We don't try to parallelise minor GC.
+ * We don't try to parallelise minor GCs (unless the user asks for
+ * it with +RTS -gn0), or mark/compact/sweep GC.
*/
-#if defined(THREADED_RTS)
- if (n < (4*1024*1024 / BLOCK_SIZE)) {
- n_gc_threads = 1;
+ if (gc_type == PENDING_GC_PAR) {
+ n_gc_threads = RtsFlags.ParFlags.nNodes;
} else {
- n_gc_threads = RtsFlags.ParFlags.gcThreads;
+ n_gc_threads = 1;
}
#else
n_gc_threads = 1;
#endif
- trace(TRACE_gc|DEBUG_gc, "GC (gen %d): %dKB to collect, using %d thread(s)",
- N, n * (BLOCK_SIZE / 1024), n_gc_threads);
+
+ debugTrace(DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
+ N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
#ifdef RTS_GTK_FRONTPANEL
if (RtsFlags.GcFlags.frontpanel) {
#ifdef DEBUG
// check for memory leaks if DEBUG is on
- memInventory(traceClass(DEBUG_gc));
+ memInventory(DEBUG_gc);
#endif
- // check stack sanity *before* GC (ToDo: check all threads)
+ // check stack sanity *before* GC
IF_DEBUG(sanity, checkFreeListSanity());
+ IF_DEBUG(sanity, checkMutableLists(rtsTrue));
// Initialise all our gc_thread structures
for (t = 0; t < n_gc_threads; t++) {
/* Allocate a mark stack if we're doing a major collection.
*/
- if (major_gc) {
- mark_stack_bdescr = allocGroup(MARK_STACK_BLOCKS);
- mark_stack = (StgPtr *)mark_stack_bdescr->start;
- mark_sp = mark_stack;
- mark_splim = mark_stack + (MARK_STACK_BLOCKS * BLOCK_SIZE_W);
+ if (major_gc && oldest_gen->steps[0].mark) {
+ mark_stack_bd = allocBlock();
+ mark_stack_top_bd = mark_stack_bd;
+ mark_stack_bd->link = NULL;
+ mark_stack_bd->u.back = NULL;
+ mark_sp = mark_stack_bd->start;
} else {
- mark_stack_bdescr = NULL;
+ mark_stack_bd = NULL;
+ mark_stack_top_bd = NULL;
+ mark_sp = NULL;
}
// this is the main thread
- gct = gc_threads[0];
+#ifdef THREADED_RTS
+ if (n_gc_threads == 1) {
+ SET_GCT(gc_threads[0]);
+ } else {
+ SET_GCT(gc_threads[cap->no]);
+ }
+#else
+SET_GCT(gc_threads[0]);
+#endif
/* -----------------------------------------------------------------------
* follow all the roots that we know about:
- * - mutable lists from each generation > N
- * we want to *scavenge* these roots, not evacuate them: they're not
- * going to move in this GC.
- * Also do them in reverse generation order, for the usual reason:
- * namely to reduce the likelihood of spurious old->new pointers.
*/
- for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
- generations[g].saved_mut_list = generations[g].mut_list;
- generations[g].mut_list = allocBlock();
- // mut_list always has at least one block.
- }
// the main thread is running: this prevents any other threads from
// exiting prematurely, so we can start them now.
// NB. do this after the mutable lists have been saved above, otherwise
// the other GC threads will be writing into the old mutable lists.
inc_running();
- wakeup_gc_threads(n_gc_threads);
-
+ wakeup_gc_threads(n_gc_threads, gct->thread_index);
+
+ // Mutable lists from each generation > N
+ // we want to *scavenge* these roots, not evacuate them: they're not
+ // going to move in this GC.
+ // Also do them in reverse generation order, for the usual reason:
+ // namely to reduce the likelihood of spurious old->new pointers.
+ //
for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
- scavenge_mutable_list(&generations[g]);
+ scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]);
+ freeChain_sync(generations[g].saved_mut_list);
+ generations[g].saved_mut_list = NULL;
+
+ }
+
+ // scavenge the capability-private mutable lists. This isn't part
+ // of markSomeCapabilities() because markSomeCapabilities() can only
+ // call back into the GC via mark_root() (due to the gct register
+ // variable).
+ if (n_gc_threads == 1) {
+ for (n = 0; n < n_capabilities; n++) {
+ scavenge_capability_mut_lists(&capabilities[n]);
+ }
+ } else {
+ scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
}
// follow roots from the CAF list (used by GHCi)
gct->evac_step = 0;
- markCAFs(mark_root);
+ markCAFs(mark_root, gct);
// follow all the roots that the application knows about.
gct->evac_step = 0;
- GetRoots(mark_root);
+ markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
+ rtsTrue/*prune sparks*/);
#if defined(RTS_USER_SIGNALS)
// mark the signal handlers (signals should be already blocked)
- markSignalHandlers(mark_root);
+ markSignalHandlers(mark_root, gct);
#endif
// Mark the weak pointer list, and prepare to detect dead weak pointers.
initWeakForGC();
// Mark the stable pointer table.
- markStablePtrTable(mark_root);
+ markStablePtrTable(mark_root, gct);
/* -------------------------------------------------------------------------
* Repeatedly scavenge all the areas we know about until there's no
*/
for (;;)
{
- gc_thread_work();
+ scavenge_until_all_done();
// The other threads are now stopped. We might recurse back to
// here, but from now on this is the only thread.
break;
}
- shutdown_gc_threads(n_gc_threads);
+ shutdown_gc_threads(n_gc_threads, gct->thread_index);
// Update pointers from the Task list
update_task_list();
#endif
// NO MORE EVACUATION AFTER THIS POINT!
- // Finally: compaction of the oldest generation.
- if (major_gc && oldest_gen->steps[0].is_compacted) {
- // save number of blocks for stats
- oldgen_saved_blocks = oldest_gen->steps[0].n_old_blocks;
- compact();
- }
-
- IF_DEBUG(sanity, checkGlobalTSOList(rtsFalse));
// Two-space collector: free the old to-space.
// g0s0->old_blocks is the old nursery
}
}
- // For each workspace, in each thread:
- // * clear the BF_EVACUATED flag from each copied block
- // * move the copied blocks to the step
+ // For each workspace, in each thread, move the copied blocks to the step
{
gc_thread *thr;
step_workspace *ws;
thr = gc_threads[t];
// not step 0
- for (s = 1; s < total_steps; s++) {
+ if (RtsFlags.GcFlags.generations == 1) {
+ s = 0;
+ } else {
+ s = 1;
+ }
+ for (; s < total_steps; s++) {
ws = &thr->steps[s];
// Push the final block
prev = NULL;
for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
- bd->flags &= ~BF_EVACUATED; // now from-space
ws->step->n_words += bd->free - bd->start;
prev = bd;
}
ws->step->blocks = ws->scavd_list;
}
ws->step->n_blocks += ws->n_scavd_blocks;
+ }
+ }
+
+ // Add all the partial blocks *after* we've added all the full
+ // blocks. This is so that we can grab the partial blocks back
+ // again and try to fill them up in the next GC.
+ for (t = 0; t < n_gc_threads; t++) {
+ thr = gc_threads[t];
+
+ // not step 0
+ if (RtsFlags.GcFlags.generations == 1) {
+ s = 0;
+ } else {
+ s = 1;
+ }
+ for (; s < total_steps; s++) {
+ ws = &thr->steps[s];
prev = NULL;
for (bd = ws->part_list; bd != NULL; bd = next) {
freeGroup(bd);
ws->n_part_blocks--;
} else {
- bd->flags &= ~BF_EVACUATED; // now from-space
ws->step->n_words += bd->free - bd->start;
prev = bd;
}
}
}
- // Two-space collector: swap the semi-spaces around.
- // Currently: g0s0->old_blocks is the old nursery
- // g0s0->blocks is to-space from this GC
- // We want these the other way around.
- if (RtsFlags.GcFlags.generations == 1) {
- bdescr *nursery_blocks = g0s0->old_blocks;
- nat n_nursery_blocks = g0s0->n_old_blocks;
- g0s0->old_blocks = g0s0->blocks;
- g0s0->n_old_blocks = g0s0->n_blocks;
- g0s0->blocks = nursery_blocks;
- g0s0->n_blocks = n_nursery_blocks;
+ // Finally: compact or sweep the oldest generation.
+ if (major_gc && oldest_gen->steps[0].mark) {
+ if (oldest_gen->steps[0].compact)
+ compact(gct->scavenged_static_objects);
+ else
+ sweep(&oldest_gen->steps[0]);
}
/* run through all the generations/steps and tidy up
nat i;
for (i=0; i < n_gc_threads; i++) {
if (n_gc_threads > 1) {
- trace(TRACE_gc,"thread %d:", i);
- trace(TRACE_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
- trace(TRACE_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
- trace(TRACE_gc," any_work %ld", gc_threads[i]->any_work);
- trace(TRACE_gc," no_work %ld", gc_threads[i]->no_work);
- trace(TRACE_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
+ debugTrace(DEBUG_gc,"thread %d:", i);
+ debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
+ debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
+ debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
+ debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
+ debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
}
copied += gc_threads[i]->copied;
max_copied = stg_max(gc_threads[i]->copied, max_copied);
for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
mut_list_size += bd->free - bd->start;
}
+ for (n = 0; n < n_capabilities; n++) {
+ for (bd = capabilities[n].mut_lists[g];
+ bd != NULL; bd = bd->link) {
+ mut_list_size += bd->free - bd->start;
+ }
+ }
copied += mut_list_size;
debugTrace(DEBUG_gc,
}
for (s = 0; s < generations[g].n_steps; s++) {
- bdescr *next;
+ bdescr *next, *prev;
stp = &generations[g].steps[s];
// for generations we collected...
* freed blocks will probaby be quickly recycled.
*/
if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) {
- if (stp->is_compacted)
+ if (stp->mark)
{
- // for a compacted step, just shift the new to-space
- // onto the front of the now-compacted existing blocks.
- for (bd = stp->blocks; bd != NULL; bd = bd->link) {
- bd->flags &= ~BF_EVACUATED; // now from-space
- stp->n_words += bd->free - bd->start;
- }
// tack the new blocks on the end of the existing blocks
if (stp->old_blocks != NULL) {
+
+ prev = NULL;
for (bd = stp->old_blocks; bd != NULL; bd = next) {
- // NB. this step might not be compacted next
- // time, so reset the BF_COMPACTED flags.
- // They are set before GC if we're going to
- // compact. (search for BF_COMPACTED above).
- bd->flags &= ~BF_COMPACTED;
- next = bd->link;
- if (next == NULL) {
- bd->link = stp->blocks;
- }
+
+ next = bd->link;
+
+ if (!(bd->flags & BF_MARKED))
+ {
+ if (prev == NULL) {
+ stp->old_blocks = next;
+ } else {
+ prev->link = next;
+ }
+ freeGroup(bd);
+ stp->n_old_blocks--;
+ }
+ else
+ {
+ stp->n_words += bd->free - bd->start;
+
+ // NB. this step might not be compacted next
+ // time, so reset the BF_MARKED flags.
+ // They are set before GC if we're going to
+ // compact. (search for BF_MARKED above).
+ bd->flags &= ~BF_MARKED;
+
+ // between GCs, all blocks in the heap except
+ // for the nursery have the BF_EVACUATED flag set.
+ bd->flags |= BF_EVACUATED;
+
+ prev = bd;
+ }
}
- stp->blocks = stp->old_blocks;
+
+ if (prev != NULL) {
+ prev->link = stp->blocks;
+ stp->blocks = stp->old_blocks;
+ }
}
// add the new blocks to the block tally
stp->n_blocks += stp->n_old_blocks;
bd = next;
}
- // update the count of blocks used by large objects
- for (bd = stp->scavenged_large_objects; bd != NULL; bd = bd->link) {
- bd->flags &= ~BF_EVACUATED;
- }
stp->large_objects = stp->scavenged_large_objects;
stp->n_large_blocks = stp->n_scavenged_large_blocks;
*/
for (bd = stp->scavenged_large_objects; bd; bd = next) {
next = bd->link;
- bd->flags &= ~BF_EVACUATED;
dbl_link_onto(bd, &stp->large_objects);
}
pinned_object_block = NULL;
// Free the mark stack.
- if (mark_stack_bdescr != NULL) {
- freeGroup(mark_stack_bdescr);
+ if (mark_stack_top_bd != NULL) {
+ debugTrace(DEBUG_gc, "mark stack: %d blocks",
+ countBlocks(mark_stack_top_bd));
+ freeChain(mark_stack_top_bd);
}
// Free any bitmaps.
// start any pending finalizers
RELEASE_SM_LOCK;
- scheduleFinalizers(last_free_capability, old_weak_ptr_list);
+ scheduleFinalizers(cap, old_weak_ptr_list);
ACQUIRE_SM_LOCK;
// send exceptions to any threads which were about to die
RELEASE_SM_LOCK;
resurrectThreads(resurrected_threads);
+ performPendingThrowTos(exception_threads);
ACQUIRE_SM_LOCK;
// Update the stable pointer hash table.
IF_DEBUG(sanity, checkSanity());
// extra GC trace info
- if (traceClass(TRACE_gc|DEBUG_gc)) statDescribeGens();
+ IF_DEBUG(gc, statDescribeGens());
#ifdef DEBUG
// symbol-table based profiling
#ifdef DEBUG
// check for memory leaks if DEBUG is on
- memInventory(traceClass(DEBUG_gc));
+ memInventory(DEBUG_gc);
#endif
#ifdef RTS_GTK_FRONTPANEL
#endif
// ok, GC over: tell the stats department what happened.
- stat_endGC(allocated, live, copied, N, max_copied, avg_copied);
+ slop = calcLiveBlocks() * BLOCK_SIZE_W - live;
+ stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop);
+
+ // unlock the StablePtr table
+ stablePtrPostGC();
+
+ // Guess which generation we'll collect *next* time
+ initialise_N(force_major_gc);
#if defined(RTS_USER_SIGNALS)
if (RtsFlags.MiscFlags.install_signal_handlers) {
RELEASE_SM_LOCK;
- gct = saved_gct;
-}
-
-/* -----------------------------------------------------------------------------
- * Mark all nodes pointed to by sparks in the spark queues (for GC) Does an
- * implicit slide i.e. after marking all sparks are at the beginning of the
- * spark pool and the spark pool only contains sparkable closures
- * -------------------------------------------------------------------------- */
-
-#ifdef THREADED_RTS
-static void
-markSparkQueue (evac_fn evac, Capability *cap)
-{
- StgClosure **sparkp, **to_sparkp;
- nat n, pruned_sparks; // stats only
- StgSparkPool *pool;
-
- PAR_TICKY_MARK_SPARK_QUEUE_START();
-
- n = 0;
- pruned_sparks = 0;
-
- pool = &(cap->r.rSparks);
-
- ASSERT_SPARK_POOL_INVARIANTS(pool);
-
-#if defined(PARALLEL_HASKELL)
- // stats only
- n = 0;
- pruned_sparks = 0;
-#endif
-
- sparkp = pool->hd;
- to_sparkp = pool->hd;
- while (sparkp != pool->tl) {
- ASSERT(*sparkp!=NULL);
- ASSERT(LOOKS_LIKE_CLOSURE_PTR(((StgClosure *)*sparkp)));
- // ToDo?: statistics gathering here (also for GUM!)
- if (closure_SHOULD_SPARK(*sparkp)) {
- evac(sparkp);
- *to_sparkp++ = *sparkp;
- if (to_sparkp == pool->lim) {
- to_sparkp = pool->base;
- }
- n++;
- } else {
- pruned_sparks++;
- }
- sparkp++;
- if (sparkp == pool->lim) {
- sparkp = pool->base;
- }
- }
- pool->tl = to_sparkp;
-
- PAR_TICKY_MARK_SPARK_QUEUE_END(n);
-
-#if defined(PARALLEL_HASKELL)
- debugTrace(DEBUG_sched,
- "marked %d sparks and pruned %d sparks on [%x]",
- n, pruned_sparks, mytid);
-#else
- debugTrace(DEBUG_sched,
- "marked %d sparks and pruned %d sparks",
- n, pruned_sparks);
-#endif
-
- debugTrace(DEBUG_sched,
- "new spark queue len=%d; (hd=%p; tl=%p)\n",
- sparkPoolSize(pool), pool->hd, pool->tl);
-}
-#endif
-
-/* ---------------------------------------------------------------------------
- Where are the roots that we know about?
-
- - all the threads on the runnable queue
- - all the threads on the blocked queue
- - all the threads on the sleeping queue
- - all the thread currently executing a _ccall_GC
- - all the "main threads"
-
- ------------------------------------------------------------------------ */
-
-void
-GetRoots( evac_fn evac )
-{
- nat i;
- Capability *cap;
- Task *task;
-
- // Each GC thread is responsible for following roots from the
- // Capability of the same number. There will usually be the same
- // or fewer Capabilities as GC threads, but just in case there
- // are more, we mark every Capability whose number is the GC
- // thread's index plus a multiple of the number of GC threads.
- for (i = gct->thread_index; i < n_capabilities; i += n_gc_threads) {
- cap = &capabilities[i];
- evac((StgClosure **)(void *)&cap->run_queue_hd);
- evac((StgClosure **)(void *)&cap->run_queue_tl);
-#if defined(THREADED_RTS)
- evac((StgClosure **)(void *)&cap->wakeup_queue_hd);
- evac((StgClosure **)(void *)&cap->wakeup_queue_tl);
-#endif
- for (task = cap->suspended_ccalling_tasks; task != NULL;
- task=task->next) {
- debugTrace(DEBUG_sched,
- "evac'ing suspended TSO %lu", (unsigned long)task->suspended_tso->id);
- evac((StgClosure **)(void *)&task->suspended_tso);
- }
-
-#if defined(THREADED_RTS)
- markSparkQueue(evac,cap);
-#endif
- }
-
-#if !defined(THREADED_RTS)
- evac((StgClosure **)(void *)&blocked_queue_hd);
- evac((StgClosure **)(void *)&blocked_queue_tl);
- evac((StgClosure **)(void *)&sleeping_queue);
-#endif
-}
-
-/* -----------------------------------------------------------------------------
- isAlive determines whether the given closure is still alive (after
- a garbage collection) or not. It returns the new address of the
- closure if it is alive, or NULL otherwise.
-
- NOTE: Use it before compaction only!
- It untags and (if needed) retags pointers to closures.
- -------------------------------------------------------------------------- */
-
-
-StgClosure *
-isAlive(StgClosure *p)
-{
- const StgInfoTable *info;
- bdescr *bd;
- StgWord tag;
- StgClosure *q;
-
- while (1) {
- /* The tag and the pointer are split, to be merged later when needed. */
- tag = GET_CLOSURE_TAG(p);
- q = UNTAG_CLOSURE(p);
-
- ASSERT(LOOKS_LIKE_CLOSURE_PTR(q));
- info = get_itbl(q);
-
- // ignore static closures
- //
- // ToDo: for static closures, check the static link field.
- // Problem here is that we sometimes don't set the link field, eg.
- // for static closures with an empty SRT or CONSTR_STATIC_NOCAFs.
- //
- if (!HEAP_ALLOCED(q)) {
- return p;
- }
-
- // ignore closures in generations that we're not collecting.
- bd = Bdescr((P_)q);
- if (bd->gen_no > N) {
- return p;
- }
-
- // if it's a pointer into to-space, then we're done
- if (bd->flags & BF_EVACUATED) {
- return p;
- }
-
- // large objects use the evacuated flag
- if (bd->flags & BF_LARGE) {
- return NULL;
- }
-
- // check the mark bit for compacted steps
- if ((bd->flags & BF_COMPACTED) && is_marked((P_)q,bd)) {
- return p;
- }
-
- switch (info->type) {
-
- case IND:
- case IND_STATIC:
- case IND_PERM:
- case IND_OLDGEN: // rely on compatible layout with StgInd
- case IND_OLDGEN_PERM:
- // follow indirections
- p = ((StgInd *)q)->indirectee;
- continue;
-
- case EVACUATED:
- // alive!
- return ((StgEvacuated *)q)->evacuee;
-
- case TSO:
- if (((StgTSO *)q)->what_next == ThreadRelocated) {
- p = (StgClosure *)((StgTSO *)q)->link;
- continue;
- }
- return NULL;
-
- default:
- // dead.
- return NULL;
- }
- }
+ SET_GCT(saved_gct);
}
/* -----------------------------------------------------------------------------
Initialise the gc_thread structures.
-------------------------------------------------------------------------- */
-static gc_thread *
-alloc_gc_thread (int n)
+#define GC_THREAD_INACTIVE 0
+#define GC_THREAD_STANDING_BY 1
+#define GC_THREAD_RUNNING 2
+#define GC_THREAD_WAITING_TO_CONTINUE 3
+
+static void
+new_gc_thread (nat n, gc_thread *t)
{
nat s;
step_workspace *ws;
- gc_thread *t;
-
- t = stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace),
- "alloc_gc_thread");
#ifdef THREADED_RTS
t->id = 0;
- initCondition(&t->wake_cond);
- initMutex(&t->wake_mutex);
- t->wakeup = rtsTrue; // starts true, so we can wait for the
+ initSpinLock(&t->gc_spin);
+ initSpinLock(&t->mut_spin);
+ ACQUIRE_SPIN_LOCK(&t->gc_spin);
+ t->wakeup = GC_THREAD_INACTIVE; // starts true, so we can wait for the
// thread to start up, see wakeup_gc_threads
- t->exit = rtsFalse;
#endif
t->thread_index = n;
ws = &t->steps[s];
ws->step = &all_steps[s];
ASSERT(s == ws->step->abs_no);
- ws->gct = t;
+ ws->my_gct = t;
ws->todo_bd = NULL;
- ws->buffer_todo_bd = NULL;
+ ws->todo_q = newWSDeque(128);
+ ws->todo_overflow = NULL;
+ ws->n_todo_overflow = 0;
ws->part_list = NULL;
ws->n_part_blocks = 0;
ws->scavd_list = NULL;
ws->n_scavd_blocks = 0;
}
-
- return t;
}
-static void
-alloc_gc_threads (void)
+void
+initGcThreads (void)
{
if (gc_threads == NULL) {
#if defined(THREADED_RTS)
nat i;
- gc_threads = stgMallocBytes (RtsFlags.ParFlags.gcThreads *
+ gc_threads = stgMallocBytes (RtsFlags.ParFlags.nNodes *
sizeof(gc_thread*),
"alloc_gc_threads");
- for (i = 0; i < RtsFlags.ParFlags.gcThreads; i++) {
- gc_threads[i] = alloc_gc_thread(i);
+ for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
+ gc_threads[i] =
+ stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace),
+ "alloc_gc_threads");
+
+ new_gc_thread(i, gc_threads[i]);
}
#else
- gc_threads = stgMallocBytes (sizeof(gc_thread*),
- "alloc_gc_threads");
+ gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
+ gc_threads[0] = gct;
+ new_gc_thread(0,gc_threads[0]);
+#endif
+ }
+}
- gc_threads[0] = alloc_gc_thread(0);
+void
+freeGcThreads (void)
+{
+ if (gc_threads != NULL) {
+#if defined(THREADED_RTS)
+ nat i;
+ for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
+ stgFree (gc_threads[i]);
+ }
+ stgFree (gc_threads);
+#else
+ stgFree (gc_threads);
#endif
+ gc_threads = NULL;
}
}
Start GC threads
------------------------------------------------------------------------- */
-static nat gc_running_threads;
+static volatile StgWord gc_running_threads;
-#if defined(THREADED_RTS)
-static Mutex gc_running_mutex;
-#endif
-
-static nat
+static StgWord
inc_running (void)
{
- nat n_running;
- ACQUIRE_LOCK(&gc_running_mutex);
- n_running = ++gc_running_threads;
- RELEASE_LOCK(&gc_running_mutex);
- ASSERT(n_running <= n_gc_threads);
- return n_running;
+ StgWord new;
+ new = atomic_inc(&gc_running_threads);
+ ASSERT(new <= n_gc_threads);
+ return new;
}
-static nat
+static StgWord
dec_running (void)
{
- nat n_running;
- ACQUIRE_LOCK(&gc_running_mutex);
- ASSERT(n_gc_threads != 0);
- n_running = --gc_running_threads;
- RELEASE_LOCK(&gc_running_mutex);
- return n_running;
+ ASSERT(gc_running_threads != 0);
+ return atomic_dec(&gc_running_threads);
}
-//
-// gc_thread_work(): Scavenge until there's no work left to do and all
-// the running threads are idle.
-//
+static rtsBool
+any_work (void)
+{
+ int s;
+ step_workspace *ws;
+
+ gct->any_work++;
+
+ write_barrier();
+
+ // scavenge objects in compacted generation
+ if (mark_stack_bd != NULL && !mark_stack_empty()) {
+ return rtsTrue;
+ }
+
+ // Check for global work in any step. We don't need to check for
+ // local work, because we have already exited scavenge_loop(),
+ // which means there is no local work for this thread.
+ for (s = total_steps-1; s >= 0; s--) {
+ if (s == 0 && RtsFlags.GcFlags.generations > 1) {
+ continue;
+ }
+ ws = &gct->steps[s];
+ if (ws->todo_large_objects) return rtsTrue;
+ if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
+ if (ws->todo_overflow) return rtsTrue;
+ }
+
+#if defined(THREADED_RTS)
+ if (work_stealing) {
+ nat n;
+ // look for work to steal
+ for (n = 0; n < n_gc_threads; n++) {
+ if (n == gct->thread_index) continue;
+ for (s = total_steps-1; s >= 0; s--) {
+ ws = &gc_threads[n]->steps[s];
+ if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
+ }
+ }
+ }
+#endif
+
+ gct->no_work++;
+
+ return rtsFalse;
+}
+
static void
-gc_thread_work (void)
+scavenge_until_all_done (void)
{
nat r;
- debugTrace(DEBUG_gc, "GC thread %d working", gct->thread_index);
-
- // gc_running_threads has already been incremented for us; either
- // this is the main thread and we incremented it inside
- // GarbageCollect(), or this is a worker thread and the main
- // thread bumped gc_running_threads before waking us up.
-
- // Every thread evacuates some roots.
- gct->evac_step = 0;
- GetRoots(mark_root);
loop:
+ traceEvent(&capabilities[gct->thread_index], EVENT_GC_WORK);
+
+#if defined(THREADED_RTS)
+ if (n_gc_threads > 1) {
+ scavenge_loop();
+ } else {
+ scavenge_loop1();
+ }
+#else
scavenge_loop();
+#endif
+
// scavenge_loop() only exits when there's no work to do
r = dec_running();
- debugTrace(DEBUG_gc, "GC thread %d idle (%d still running)",
- gct->thread_index, r);
+ traceEvent(&capabilities[gct->thread_index], EVENT_GC_IDLE);
+ debugTrace(DEBUG_gc, "%d GC threads still running", r);
+
while (gc_running_threads != 0) {
- usleep(1);
- if (any_work()) {
- inc_running();
- goto loop;
- }
- // any_work() does not remove the work from the queue, it
- // just checks for the presence of work. If we find any,
- // then we increment gc_running_threads and go back to
- // scavenge_loop() to perform any pending work.
+ // usleep(1);
+ if (any_work()) {
+ inc_running();
+ goto loop;
+ }
+ // any_work() does not remove the work from the queue, it
+ // just checks for the presence of work. If we find any,
+ // then we increment gc_running_threads and go back to
+ // scavenge_loop() to perform any pending work.
}
- // All threads are now stopped
- debugTrace(DEBUG_gc, "GC thread %d finished.", gct->thread_index);
+ traceEvent(&capabilities[gct->thread_index], EVENT_GC_DONE);
}
-
#if defined(THREADED_RTS)
-static void
-gc_thread_mainloop (void)
+
+void
+gcWorkerThread (Capability *cap)
{
- while (!gct->exit) {
-
- // Wait until we're told to wake up
- ACQUIRE_LOCK(&gct->wake_mutex);
- gct->wakeup = rtsFalse;
- while (!gct->wakeup) {
- debugTrace(DEBUG_gc, "GC thread %d standing by...",
- gct->thread_index);
- waitCondition(&gct->wake_cond, &gct->wake_mutex);
- }
- RELEASE_LOCK(&gct->wake_mutex);
- if (gct->exit) break;
+ gc_thread *saved_gct;
+
+ // necessary if we stole a callee-saves register for gct:
+ saved_gct = gct;
+
+ cap->in_gc = rtsTrue;
+
+ gct = gc_threads[cap->no];
+ gct->id = osThreadId();
+ // Wait until we're told to wake up
+ RELEASE_SPIN_LOCK(&gct->mut_spin);
+ gct->wakeup = GC_THREAD_STANDING_BY;
+ debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
+ ACQUIRE_SPIN_LOCK(&gct->gc_spin);
+
#ifdef USE_PAPI
- // start performance counters in this thread...
- if (gct->papi_events == -1) {
- papi_init_eventset(&gct->papi_events);
- }
- papi_thread_start_gc1_count(gct->papi_events);
+ // start performance counters in this thread...
+ if (gct->papi_events == -1) {
+ papi_init_eventset(&gct->papi_events);
+ }
+ papi_thread_start_gc1_count(gct->papi_events);
#endif
+
+ // Every thread evacuates some roots.
+ gct->evac_step = 0;
+ markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
+ rtsTrue/*prune sparks*/);
+ scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
- gc_thread_work();
-
+ scavenge_until_all_done();
+
#ifdef USE_PAPI
- // count events in this thread towards the GC totals
- papi_thread_stop_gc1_count(gct->papi_events);
+ // count events in this thread towards the GC totals
+ papi_thread_stop_gc1_count(gct->papi_events);
#endif
- }
-}
+
+ // Wait until we're told to continue
+ RELEASE_SPIN_LOCK(&gct->gc_spin);
+ gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
+ debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
+ gct->thread_index);
+ ACQUIRE_SPIN_LOCK(&gct->mut_spin);
+ debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
+
+ SET_GCT(saved_gct);
+}
+
#endif
#if defined(THREADED_RTS)
-static void
-gc_thread_entry (gc_thread *my_gct)
+
+void
+waitForGcThreads (Capability *cap USED_IF_THREADS)
{
- gct = my_gct;
- debugTrace(DEBUG_gc, "GC thread %d starting...", gct->thread_index);
- gct->id = osThreadId();
- gc_thread_mainloop();
+ nat n_threads = RtsFlags.ParFlags.nNodes;
+ nat me = cap->no;
+ nat i, j;
+ rtsBool retry = rtsTrue;
+
+ while(retry) {
+ for (i=0; i < n_threads; i++) {
+ if (i == me) continue;
+ if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
+ prodCapability(&capabilities[i], cap->running_task);
+ }
+ }
+ for (j=0; j < 10; j++) {
+ retry = rtsFalse;
+ for (i=0; i < n_threads; i++) {
+ if (i == me) continue;
+ write_barrier();
+ setContextSwitches();
+ if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
+ retry = rtsTrue;
+ }
+ }
+ if (!retry) break;
+ yieldThread();
+ }
+ }
}
-#endif
+
+#endif // THREADED_RTS
static void
start_gc_threads (void)
{
#if defined(THREADED_RTS)
- nat i;
- OSThreadId id;
- static rtsBool done = rtsFalse;
-
gc_running_threads = 0;
- initMutex(&gc_running_mutex);
-
- if (!done) {
- // Start from 1: the main thread is 0
- for (i = 1; i < RtsFlags.ParFlags.gcThreads; i++) {
- createOSThread(&id, (OSThreadProc*)&gc_thread_entry,
- gc_threads[i]);
- }
- done = rtsTrue;
- }
#endif
}
static void
-wakeup_gc_threads (nat n_threads USED_IF_THREADS)
+wakeup_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
{
#if defined(THREADED_RTS)
nat i;
- for (i=1; i < n_threads; i++) {
+ for (i=0; i < n_threads; i++) {
+ if (i == me) continue;
inc_running();
debugTrace(DEBUG_gc, "waking up gc thread %d", i);
- do {
- ACQUIRE_LOCK(&gc_threads[i]->wake_mutex);
- if (gc_threads[i]->wakeup) {
- RELEASE_LOCK(&gc_threads[i]->wake_mutex);
- continue;
- } else {
- break;
- }
- } while (1);
- gc_threads[i]->wakeup = rtsTrue;
- signalCondition(&gc_threads[i]->wake_cond);
- RELEASE_LOCK(&gc_threads[i]->wake_mutex);
+ if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
+
+ gc_threads[i]->wakeup = GC_THREAD_RUNNING;
+ ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
+ RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
}
#endif
}
// standby state, otherwise they may still be executing inside
// any_work(), and may even remain awake until the next GC starts.
static void
-shutdown_gc_threads (nat n_threads USED_IF_THREADS)
+shutdown_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
{
#if defined(THREADED_RTS)
nat i;
- rtsBool wakeup;
- for (i=1; i < n_threads; i++) {
- do {
- ACQUIRE_LOCK(&gc_threads[i]->wake_mutex);
- wakeup = gc_threads[i]->wakeup;
- // wakeup is false while the thread is waiting
- RELEASE_LOCK(&gc_threads[i]->wake_mutex);
- } while (wakeup);
+ for (i=0; i < n_threads; i++) {
+ if (i == me) continue;
+ while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
}
#endif
}
+#if defined(THREADED_RTS)
+void
+releaseGCThreads (Capability *cap USED_IF_THREADS)
+{
+ nat n_threads = RtsFlags.ParFlags.nNodes;
+ nat me = cap->no;
+ nat i;
+ for (i=0; i < n_threads; i++) {
+ if (i == me) continue;
+ if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
+ barf("releaseGCThreads");
+
+ gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
+ ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
+ RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
+ }
+}
+#endif
+
/* ----------------------------------------------------------------------------
Initialise a generation that is to be collected
------------------------------------------------------------------------- */
for (s = 0; s < generations[g].n_steps; s++) {
+ stp = &generations[g].steps[s];
+ ASSERT(stp->gen_no == g);
+
+ // we'll construct a new list of threads in this step
+ // during GC, throw away the current list.
+ stp->old_threads = stp->threads;
+ stp->threads = END_TSO_QUEUE;
+
// generation 0, step 0 doesn't need to-space
if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
continue;
}
- stp = &generations[g].steps[s];
- ASSERT(stp->gen_no == g);
-
// deprecate the existing blocks
stp->old_blocks = stp->blocks;
stp->n_old_blocks = stp->n_blocks;
stp->blocks = NULL;
stp->n_blocks = 0;
stp->n_words = 0;
-
- // we don't have any to-be-scavenged blocks yet
- stp->todos = NULL;
- stp->todos_last = NULL;
- stp->n_todos = 0;
+ stp->live_estimate = 0;
// initialise the large object queues.
stp->scavenged_large_objects = NULL;
stp->n_scavenged_large_blocks = 0;
- // mark the large objects as not evacuated yet
+ // mark the small objects as from-space
+ for (bd = stp->old_blocks; bd; bd = bd->link) {
+ bd->flags &= ~BF_EVACUATED;
+ }
+
+ // mark the large objects as from-space
for (bd = stp->large_objects; bd; bd = bd->link) {
bd->flags &= ~BF_EVACUATED;
}
// for a compacted step, we need to allocate the bitmap
- if (stp->is_compacted) {
+ if (stp->mark) {
nat bitmap_size; // in bytes
bdescr *bitmap_bdescr;
StgWord *bitmap;
bd->u.bitmap = bitmap;
bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
- // Also at this point we set the BF_COMPACTED flag
+ // Also at this point we set the BF_MARKED flag
// for this block. The invariant is that
- // BF_COMPACTED is always unset, except during GC
+ // BF_MARKED is always unset, except during GC
// when it is set on those blocks which will be
// compacted.
- bd->flags |= BF_COMPACTED;
+ if (!(bd->flags & BF_FRAGMENTED)) {
+ bd->flags |= BF_MARKED;
+ }
}
}
}
// allocate the first to-space block; extra blocks will be
// chained on as necessary.
ws->todo_bd = NULL;
- ws->buffer_todo_bd = NULL;
+ ASSERT(looksEmptyWSDeque(ws->todo_q));
alloc_todo_block(ws,0);
+ ws->todo_overflow = NULL;
+ ws->n_todo_overflow = 0;
+
ws->scavd_list = NULL;
ws->n_scavd_blocks = 0;
}
static void
init_uncollected_gen (nat g, nat threads)
{
- nat s, t, i;
+ nat s, t, n;
step_workspace *ws;
step *stp;
bdescr *bd;
+ // save the current mutable lists for this generation, and
+ // allocate a fresh block for each one. We'll traverse these
+ // mutable lists as roots early on in the GC.
+ generations[g].saved_mut_list = generations[g].mut_list;
+ generations[g].mut_list = allocBlock();
+ for (n = 0; n < n_capabilities; n++) {
+ capabilities[n].saved_mut_lists[g] = capabilities[n].mut_lists[g];
+ capabilities[n].mut_lists[g] = allocBlock();
+ }
+
for (s = 0; s < generations[g].n_steps; s++) {
stp = &generations[g].steps[s];
stp->scavenged_large_objects = NULL;
stp->n_scavenged_large_blocks = 0;
}
- for (t = 0; t < threads; t++) {
- for (s = 0; s < generations[g].n_steps; s++) {
+ for (s = 0; s < generations[g].n_steps; s++) {
+ stp = &generations[g].steps[s];
+
+ for (t = 0; t < threads; t++) {
ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
- stp = ws->step;
- ws->buffer_todo_bd = NULL;
+ ASSERT(looksEmptyWSDeque(ws->todo_q));
ws->todo_large_objects = NULL;
ws->part_list = NULL;
alloc_todo_block(ws,0);
}
}
- }
- // Move the private mutable lists from each capability onto the
- // main mutable list for the generation.
- for (i = 0; i < n_capabilities; i++) {
- for (bd = capabilities[i].mut_lists[g];
- bd->link != NULL; bd = bd->link) {
- /* nothing */
- }
- bd->link = generations[g].mut_list;
- generations[g].mut_list = capabilities[i].mut_lists[g];
- capabilities[i].mut_lists[g] = allocBlock();
+ // deal out any more partial blocks to the threads' part_lists
+ t = 0;
+ while (stp->blocks && isPartiallyFull(stp->blocks))
+ {
+ bd = stp->blocks;
+ stp->blocks = bd->link;
+ ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
+ bd->link = ws->part_list;
+ ws->part_list = bd;
+ ws->n_part_blocks += 1;
+ bd->u.scan = bd->free;
+ stp->n_blocks -= 1;
+ stp->n_words -= bd->free - bd->start;
+ t++;
+ if (t == n_gc_threads) t = 0;
+ }
}
}
t->static_objects = END_OF_STATIC_LIST;
t->scavenged_static_objects = END_OF_STATIC_LIST;
t->scan_bd = NULL;
+ t->mut_lists = capabilities[t->thread_index].mut_lists;
t->evac_step = 0;
t->failed_to_evac = rtsFalse;
t->eager_promotion = rtsTrue;
}
/* -----------------------------------------------------------------------------
- Function we pass to GetRoots to evacuate roots.
+ Function we pass to evacuate roots.
-------------------------------------------------------------------------- */
static void
-mark_root(StgClosure **root)
+mark_root(void *user USED_IF_THREADS, StgClosure **root)
{
- evacuate(root);
+ // we stole a register for gct, but this function is called from
+ // *outside* the GC where the register variable is not in effect,
+ // so we need to save and restore it here. NB. only call
+ // mark_root() from the main GC thread, otherwise gct will be
+ // incorrect.
+ gc_thread *saved_gct;
+ saved_gct = gct;
+ SET_GCT(user);
+
+ evacuate(root);
+
+ SET_GCT(saved_gct);
}
/* -----------------------------------------------------------------------------
}
}
-/* -----------------------------------------------------------------------------
- Reverting CAFs
- -------------------------------------------------------------------------- */
-
-void
-revertCAFs( void )
-{
- StgIndStatic *c;
-
- for (c = (StgIndStatic *)revertible_caf_list; c != NULL;
- c = (StgIndStatic *)c->static_link)
- {
- SET_INFO(c, c->saved_info);
- c->saved_info = NULL;
- // could, but not necessary: c->static_link = NULL;
- }
- revertible_caf_list = NULL;
-}
-
-void
-markCAFs( evac_fn evac )
-{
- StgIndStatic *c;
-
- for (c = (StgIndStatic *)caf_list; c != NULL;
- c = (StgIndStatic *)c->static_link)
- {
- evac(&c->indirectee);
- }
- for (c = (StgIndStatic *)revertible_caf_list; c != NULL;
- c = (StgIndStatic *)c->static_link)
- {
- evac(&c->indirectee);
- }
-}
-
/* ----------------------------------------------------------------------------
Update the pointers from the task list
nat g;
if (major_gc && RtsFlags.GcFlags.generations > 1) {
- nat live, size, min_alloc;
+ nat live, size, min_alloc, words;
nat max = RtsFlags.GcFlags.maxHeapSize;
nat gens = RtsFlags.GcFlags.generations;
// live in the oldest generations
- live = (oldest_gen->steps[0].n_words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W+
- oldest_gen->steps[0].n_large_blocks;
+ if (oldest_gen->steps[0].live_estimate != 0) {
+ words = oldest_gen->steps[0].live_estimate;
+ } else {
+ words = oldest_gen->steps[0].n_words;
+ }
+ live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W +
+ oldest_gen->steps[0].n_large_blocks;
// default max size for all generations except zero
size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
(max > 0 &&
oldest_gen->steps[0].n_blocks >
(RtsFlags.GcFlags.compactThreshold * max) / 100))) {
- oldest_gen->steps[0].is_compacted = 1;
+ oldest_gen->steps[0].mark = 1;
+ oldest_gen->steps[0].compact = 1;
// debugBelch("compaction: on\n", live);
} else {
- oldest_gen->steps[0].is_compacted = 0;
+ oldest_gen->steps[0].mark = 0;
+ oldest_gen->steps[0].compact = 0;
// debugBelch("compaction: off\n", live);
}
+ if (RtsFlags.GcFlags.sweep) {
+ oldest_gen->steps[0].mark = 1;
+ }
+
// if we're going to go over the maximum heap size, reduce the
// size of the generations accordingly. The calculation is
// different if compaction is turned on, because we don't need
heapOverflow();
}
- if (oldest_gen->steps[0].is_compacted) {
+ if (oldest_gen->steps[0].compact) {
if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) {
size = (max - min_alloc) / ((gens - 1) * 2 - 1);
}
static void
resize_nursery (void)
{
+ lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities;
+
if (RtsFlags.GcFlags.generations == 1)
{ // Two-space collector:
nat blocks;
* performance we get from 3L bytes, reducing to the same
* performance at 2L bytes.
*/
- blocks = g0s0->n_old_blocks;
+ blocks = g0s0->n_blocks;
if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
else
{
blocks *= RtsFlags.GcFlags.oldGenFactor;
- if (blocks < RtsFlags.GcFlags.minAllocAreaSize)
+ if (blocks < min_nursery)
{
- blocks = RtsFlags.GcFlags.minAllocAreaSize;
+ blocks = min_nursery;
}
}
resizeNurseries(blocks);
(((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
(100 + (long)g0s0_pcnt_kept);
- if (blocks < (long)RtsFlags.GcFlags.minAllocAreaSize) {
- blocks = RtsFlags.GcFlags.minAllocAreaSize;
+ if (blocks < (long)min_nursery) {
+ blocks = min_nursery;
}
resizeNurseries((nat)blocks);