*
* ---------------------------------------------------------------------------*/
-// #include "PosixSource.h"
+#include "PosixSource.h"
#include "Rts.h"
-#include "RtsFlags.h"
+#include "HsFFI.h"
+
+#include "Storage.h"
#include "RtsUtils.h"
#include "Apply.h"
-#include "OSThreads.h"
-#include "LdvProfile.h"
#include "Updates.h"
#include "Stats.h"
#include "Schedule.h"
#include "Sanity.h"
#include "BlockAlloc.h"
-#include "MBlock.h"
#include "ProfHeap.h"
-#include "SchedAPI.h"
#include "Weak.h"
#include "Prelude.h"
-#include "ParTicky.h" // ToDo: move into Rts.h
#include "RtsSignals.h"
#include "STM.h"
-#include "HsFFI.h"
-#include "Linker.h"
#if defined(RTS_GTK_FRONTPANEL)
#include "FrontPanel.h"
#endif
#include "Trace.h"
#include "RetainerProfile.h"
+#include "LdvProfile.h"
#include "RaiseAsync.h"
#include "Papi.h"
+#include "Stable.h"
#include "GC.h"
#include "GCThread.h"
/* Thread-local data for each GC thread
*/
gc_thread **gc_threads = NULL;
-// gc_thread *gct = NULL; // this thread's gct TODO: make thread-local
+
+#if !defined(THREADED_RTS)
+StgWord8 the_gc_thread[sizeof(gc_thread) + 64 * sizeof(step_workspace)];
+#endif
// Number of threads running in *this* GC. Affects how many
// step->todos[] lists we have to look in to find work.
// For stats:
long copied; // *words* copied & scavenged during this GC
-#ifdef THREADED_RTS
-SpinLock recordMutableGen_sync;
-#endif
+rtsBool work_stealing;
DECLARE_GCT
static void resize_nursery (void);
static void start_gc_threads (void);
static void scavenge_until_all_done (void);
-static nat inc_running (void);
-static nat dec_running (void);
+static StgWord inc_running (void);
+static StgWord dec_running (void);
static void wakeup_gc_threads (nat n_threads, nat me);
static void shutdown_gc_threads (nat n_threads, nat me);
-static void continue_gc_threads (nat n_threads, nat me);
#if 0 && defined(DEBUG)
static void gcCAFs (void);
void
GarbageCollect (rtsBool force_major_gc,
nat gc_type USED_IF_THREADS,
- Capability *cap USED_IF_THREADS)
+ Capability *cap)
{
bdescr *bd;
step *stp;
// tell the STM to discard any cached closures it's hoping to re-use
stmPreGCHook();
+ // lock the StablePtr table
+ stablePtrPreGC();
+
#ifdef DEBUG
mutlist_MUTVARS = 0;
mutlist_MUTARRS = 0;
*/
n = initialise_N(force_major_gc);
+#if defined(THREADED_RTS)
+ work_stealing = RtsFlags.ParFlags.parGcLoadBalancing;
+ // It's not always a good idea to do load balancing in parallel
+ // GC. In particular, for a parallel program we don't want to
+ // lose locality by moving cached data into another CPU's cache
+ // (this effect can be quite significant).
+ //
+ // We could have a more complex way to deterimine whether to do
+ // work stealing or not, e.g. it might be a good idea to do it
+ // if the heap is big. For now, we just turn it on or off with
+ // a flag.
+#endif
+
/* Start threads, so they can be spinning up while we finish initialisation.
*/
start_gc_threads();
n_gc_threads = 1;
#endif
- trace(TRACE_gc|DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
+ debugTrace(DEBUG_gc, "GC (gen %d): %d KB to collect, %ld MB in use, using %d thread(s)",
N, n * (BLOCK_SIZE / 1024), mblocks_allocated, n_gc_threads);
#ifdef RTS_GTK_FRONTPANEL
// check stack sanity *before* GC
IF_DEBUG(sanity, checkFreeListSanity());
- IF_DEBUG(sanity, checkMutableLists());
+ IF_DEBUG(sanity, checkMutableLists(rtsTrue));
// Initialise all our gc_thread structures
for (t = 0; t < n_gc_threads; t++) {
// this is the main thread
#ifdef THREADED_RTS
if (n_gc_threads == 1) {
- gct = gc_threads[0];
+ SET_GCT(gc_threads[0]);
} else {
- gct = gc_threads[cap->no];
+ SET_GCT(gc_threads[cap->no]);
}
#else
- gct = gc_threads[0];
+SET_GCT(gc_threads[0]);
#endif
/* -----------------------------------------------------------------------
* follow all the roots that we know about:
- * - mutable lists from each generation > N
- * we want to *scavenge* these roots, not evacuate them: they're not
- * going to move in this GC.
- * Also do them in reverse generation order, for the usual reason:
- * namely to reduce the likelihood of spurious old->new pointers.
*/
- for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
- generations[g].saved_mut_list = generations[g].mut_list;
- generations[g].mut_list = allocBlock();
- // mut_list always has at least one block.
- }
// the main thread is running: this prevents any other threads from
// exiting prematurely, so we can start them now.
inc_running();
wakeup_gc_threads(n_gc_threads, gct->thread_index);
+ // Mutable lists from each generation > N
+ // we want to *scavenge* these roots, not evacuate them: they're not
+ // going to move in this GC.
+ // Also do them in reverse generation order, for the usual reason:
+ // namely to reduce the likelihood of spurious old->new pointers.
+ //
for (g = RtsFlags.GcFlags.generations-1; g > N; g--) {
- scavenge_mutable_list(&generations[g]);
+ scavenge_mutable_list(generations[g].saved_mut_list, &generations[g]);
+ freeChain_sync(generations[g].saved_mut_list);
+ generations[g].saved_mut_list = NULL;
+
+ }
+
+ // scavenge the capability-private mutable lists. This isn't part
+ // of markSomeCapabilities() because markSomeCapabilities() can only
+ // call back into the GC via mark_root() (due to the gct register
+ // variable).
+ if (n_gc_threads == 1) {
+ for (n = 0; n < n_capabilities; n++) {
+ scavenge_capability_mut_lists(&capabilities[n]);
+ }
+ } else {
+ scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
}
// follow roots from the CAF list (used by GHCi)
nat i;
for (i=0; i < n_gc_threads; i++) {
if (n_gc_threads > 1) {
- trace(TRACE_gc,"thread %d:", i);
- trace(TRACE_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
- trace(TRACE_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
- trace(TRACE_gc," any_work %ld", gc_threads[i]->any_work);
- trace(TRACE_gc," no_work %ld", gc_threads[i]->no_work);
- trace(TRACE_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
+ debugTrace(DEBUG_gc,"thread %d:", i);
+ debugTrace(DEBUG_gc," copied %ld", gc_threads[i]->copied * sizeof(W_));
+ debugTrace(DEBUG_gc," scanned %ld", gc_threads[i]->scanned * sizeof(W_));
+ debugTrace(DEBUG_gc," any_work %ld", gc_threads[i]->any_work);
+ debugTrace(DEBUG_gc," no_work %ld", gc_threads[i]->no_work);
+ debugTrace(DEBUG_gc," scav_find_work %ld", gc_threads[i]->scav_find_work);
}
copied += gc_threads[i]->copied;
max_copied = stg_max(gc_threads[i]->copied, max_copied);
for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) {
mut_list_size += bd->free - bd->start;
}
+ for (n = 0; n < n_capabilities; n++) {
+ for (bd = capabilities[n].mut_lists[g];
+ bd != NULL; bd = bd->link) {
+ mut_list_size += bd->free - bd->start;
+ }
+ }
copied += mut_list_size;
debugTrace(DEBUG_gc,
// start any pending finalizers
RELEASE_SM_LOCK;
- scheduleFinalizers(last_free_capability, old_weak_ptr_list);
+ scheduleFinalizers(cap, old_weak_ptr_list);
ACQUIRE_SM_LOCK;
// send exceptions to any threads which were about to die
IF_DEBUG(sanity, checkSanity());
// extra GC trace info
- if (traceClass(TRACE_gc|DEBUG_gc)) statDescribeGens();
+ IF_DEBUG(gc, statDescribeGens());
#ifdef DEBUG
// symbol-table based profiling
slop = calcLiveBlocks() * BLOCK_SIZE_W - live;
stat_endGC(allocated, live, copied, N, max_copied, avg_copied, slop);
+ // unlock the StablePtr table
+ stablePtrPostGC();
+
// Guess which generation we'll collect *next* time
initialise_N(force_major_gc);
}
#endif
- continue_gc_threads(n_gc_threads, gct->thread_index);
-
RELEASE_SM_LOCK;
- gct = saved_gct;
+ SET_GCT(saved_gct);
}
/* -----------------------------------------------------------------------------
#define GC_THREAD_RUNNING 2
#define GC_THREAD_WAITING_TO_CONTINUE 3
-static gc_thread *
-alloc_gc_thread (int n)
+static void
+new_gc_thread (nat n, gc_thread *t)
{
nat s;
step_workspace *ws;
- gc_thread *t;
-
- t = stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace),
- "alloc_gc_thread");
#ifdef THREADED_RTS
t->id = 0;
ws = &t->steps[s];
ws->step = &all_steps[s];
ASSERT(s == ws->step->abs_no);
- ws->gct = t;
+ ws->my_gct = t;
ws->todo_bd = NULL;
- ws->buffer_todo_bd = NULL;
+ ws->todo_q = newWSDeque(128);
+ ws->todo_overflow = NULL;
+ ws->n_todo_overflow = 0;
ws->part_list = NULL;
ws->n_part_blocks = 0;
ws->scavd_list = NULL;
ws->n_scavd_blocks = 0;
}
-
- return t;
}
"alloc_gc_threads");
for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
- gc_threads[i] = alloc_gc_thread(i);
+ gc_threads[i] =
+ stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace),
+ "alloc_gc_threads");
+
+ new_gc_thread(i, gc_threads[i]);
}
#else
- gc_threads = stgMallocBytes (sizeof(gc_thread*),
- "alloc_gc_threads");
+ gc_threads = stgMallocBytes (sizeof(gc_thread*),"alloc_gc_threads");
+ gc_threads[0] = gct;
+ new_gc_thread(0,gc_threads[0]);
+#endif
+ }
+}
- gc_threads[0] = alloc_gc_thread(0);
+void
+freeGcThreads (void)
+{
+ if (gc_threads != NULL) {
+#if defined(THREADED_RTS)
+ nat i;
+ for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
+ stgFree (gc_threads[i]);
+ }
+ stgFree (gc_threads);
+#else
+ stgFree (gc_threads);
#endif
+ gc_threads = NULL;
}
}
Start GC threads
------------------------------------------------------------------------- */
-static nat gc_running_threads;
+static volatile StgWord gc_running_threads;
-#if defined(THREADED_RTS)
-static Mutex gc_running_mutex;
-#endif
-
-static nat
+static StgWord
inc_running (void)
{
- nat n_running;
- ACQUIRE_LOCK(&gc_running_mutex);
- n_running = ++gc_running_threads;
- RELEASE_LOCK(&gc_running_mutex);
- ASSERT(n_running <= n_gc_threads);
- return n_running;
+ StgWord new;
+ new = atomic_inc(&gc_running_threads);
+ ASSERT(new <= n_gc_threads);
+ return new;
}
-static nat
+static StgWord
dec_running (void)
{
- nat n_running;
- ACQUIRE_LOCK(&gc_running_mutex);
- ASSERT(n_gc_threads != 0);
- n_running = --gc_running_threads;
- RELEASE_LOCK(&gc_running_mutex);
- return n_running;
+ ASSERT(gc_running_threads != 0);
+ return atomic_dec(&gc_running_threads);
}
static rtsBool
}
ws = &gct->steps[s];
if (ws->todo_large_objects) return rtsTrue;
- if (ws->step->todos) return rtsTrue;
+ if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
+ if (ws->todo_overflow) return rtsTrue;
}
+#if defined(THREADED_RTS)
+ if (work_stealing) {
+ nat n;
+ // look for work to steal
+ for (n = 0; n < n_gc_threads; n++) {
+ if (n == gct->thread_index) continue;
+ for (s = total_steps-1; s >= 0; s--) {
+ ws = &gc_threads[n]->steps[s];
+ if (!looksEmptyWSDeque(ws->todo_q)) return rtsTrue;
+ }
+ }
+ }
+#endif
+
gct->no_work++;
return rtsFalse;
r = dec_running();
debugTrace(DEBUG_gc, "GC thread %d idle (%d still running)",
- gct->thread_index, r);
-
+ gct->thread_index, r);
+
while (gc_running_threads != 0) {
// usleep(1);
- if (any_work()) {
- inc_running();
- goto loop;
- }
- // any_work() does not remove the work from the queue, it
- // just checks for the presence of work. If we find any,
- // then we increment gc_running_threads and go back to
- // scavenge_loop() to perform any pending work.
+ if (any_work()) {
+ inc_running();
+ goto loop;
+ }
+ // any_work() does not remove the work from the queue, it
+ // just checks for the presence of work. If we find any,
+ // then we increment gc_running_threads and go back to
+ // scavenge_loop() to perform any pending work.
}
// All threads are now stopped
gct->evac_step = 0;
markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads,
rtsTrue/*prune sparks*/);
+ scavenge_capability_mut_lists(&capabilities[gct->thread_index]);
scavenge_until_all_done();
#endif
+#if defined(THREADED_RTS)
+
void
waitForGcThreads (Capability *cap USED_IF_THREADS)
{
-#if defined(THREADED_RTS)
nat n_threads = RtsFlags.ParFlags.nNodes;
nat me = cap->no;
nat i, j;
if (!retry) break;
}
}
-#endif
}
+#endif // THREADED_RTS
+
static void
start_gc_threads (void)
{
#if defined(THREADED_RTS)
gc_running_threads = 0;
- initMutex(&gc_running_mutex);
#endif
}
#endif
}
-static void
-continue_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
-{
#if defined(THREADED_RTS)
+void
+releaseGCThreads (Capability *cap USED_IF_THREADS)
+{
+ nat n_threads = RtsFlags.ParFlags.nNodes;
+ nat me = cap->no;
nat i;
for (i=0; i < n_threads; i++) {
if (i == me) continue;
- if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) barf("continue_gc_threads");
+ if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
+ barf("releaseGCThreads");
gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
}
-#endif
}
+#endif
/* ----------------------------------------------------------------------------
Initialise a generation that is to be collected
stp->n_words = 0;
stp->live_estimate = 0;
- // we don't have any to-be-scavenged blocks yet
- stp->todos = NULL;
- stp->todos_last = NULL;
- stp->n_todos = 0;
-
// initialise the large object queues.
stp->scavenged_large_objects = NULL;
stp->n_scavenged_large_blocks = 0;
// allocate the first to-space block; extra blocks will be
// chained on as necessary.
ws->todo_bd = NULL;
- ws->buffer_todo_bd = NULL;
+ ASSERT(looksEmptyWSDeque(ws->todo_q));
alloc_todo_block(ws,0);
+ ws->todo_overflow = NULL;
+ ws->n_todo_overflow = 0;
+
ws->scavd_list = NULL;
ws->n_scavd_blocks = 0;
}
static void
init_uncollected_gen (nat g, nat threads)
{
- nat s, t, i;
+ nat s, t, n;
step_workspace *ws;
step *stp;
bdescr *bd;
+ // save the current mutable lists for this generation, and
+ // allocate a fresh block for each one. We'll traverse these
+ // mutable lists as roots early on in the GC.
+ generations[g].saved_mut_list = generations[g].mut_list;
+ generations[g].mut_list = allocBlock();
+ for (n = 0; n < n_capabilities; n++) {
+ capabilities[n].saved_mut_lists[g] = capabilities[n].mut_lists[g];
+ capabilities[n].mut_lists[g] = allocBlock();
+ }
+
for (s = 0; s < generations[g].n_steps; s++) {
stp = &generations[g].steps[s];
stp->scavenged_large_objects = NULL;
for (t = 0; t < threads; t++) {
ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
- ws->buffer_todo_bd = NULL;
+ ASSERT(looksEmptyWSDeque(ws->todo_q));
ws->todo_large_objects = NULL;
ws->part_list = NULL;
if (t == n_gc_threads) t = 0;
}
}
-
-
- // Move the private mutable lists from each capability onto the
- // main mutable list for the generation.
- for (i = 0; i < n_capabilities; i++) {
- for (bd = capabilities[i].mut_lists[g];
- bd->link != NULL; bd = bd->link) {
- /* nothing */
- }
- bd->link = generations[g].mut_list;
- generations[g].mut_list = capabilities[i].mut_lists[g];
- capabilities[i].mut_lists[g] = allocBlock();
- }
}
/* -----------------------------------------------------------------------------
t->static_objects = END_OF_STATIC_LIST;
t->scavenged_static_objects = END_OF_STATIC_LIST;
t->scan_bd = NULL;
+ t->mut_lists = capabilities[t->thread_index].mut_lists;
t->evac_step = 0;
t->failed_to_evac = rtsFalse;
t->eager_promotion = rtsTrue;
-------------------------------------------------------------------------- */
static void
-mark_root(void *user, StgClosure **root)
+mark_root(void *user USED_IF_THREADS, StgClosure **root)
{
// we stole a register for gct, but this function is called from
// *outside* the GC where the register variable is not in effect,
// incorrect.
gc_thread *saved_gct;
saved_gct = gct;
- gct = user;
+ SET_GCT(user);
evacuate(root);
- gct = saved_gct;
+ SET_GCT(saved_gct);
}
/* -----------------------------------------------------------------------------