#include "RetainerProfile.h"
#include "RaiseAsync.h"
#include "Sparks.h"
+#include "Papi.h"
#include "GC.h"
#include "Compact.h"
gc_thread *gc_threads = NULL;
// gc_thread *gct = NULL; // this thread's gct TODO: make thread-local
+// Number of threads running in *this* GC. Affects how many
+// step->todos[] lists we have to look in to find work.
+nat n_gc_threads;
+
// For stats:
long copied; // *words* copied & scavenged during this GC
-long scavd_copied; // *words* copied only during this GC
#ifdef THREADED_RTS
SpinLock recordMutableGen_sync;
step *stp;
lnat live, allocated;
lnat oldgen_saved_blocks = 0;
- nat n_threads; // number of threads participating in GC
-
+ gc_thread *saved_gct;
nat g, s, t;
+ // necessary if we stole a callee-saves register for gct:
+ saved_gct = gct;
+
#ifdef PROFILING
CostCentreStack *prev_CCS;
#endif
*/
#if defined(THREADED_RTS)
if (N == 0) {
- n_threads = 1;
+ n_gc_threads = 1;
} else {
- n_threads = RtsFlags.ParFlags.gcThreads;
+ n_gc_threads = RtsFlags.ParFlags.gcThreads;
}
#else
- n_threads = 1;
+ n_gc_threads = 1;
#endif
#ifdef RTS_GTK_FRONTPANEL
// Initialise all the generations/steps that we're collecting.
for (g = 0; g <= N; g++) {
- init_collected_gen(g,n_threads);
+ init_collected_gen(g,n_gc_threads);
}
// Initialise all the generations/steps that we're *not* collecting.
for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
- init_uncollected_gen(g,n_threads);
+ init_uncollected_gen(g,n_gc_threads);
}
/* Allocate a mark stack if we're doing a major collection.
}
// Initialise all our gc_thread structures
- for (t = 0; t < n_threads; t++) {
+ for (t = 0; t < n_gc_threads; t++) {
init_gc_thread(&gc_threads[t]);
}
// the main thread is running: this prevents any other threads from
// exiting prematurely, so we can start them now.
inc_running();
- wakeup_gc_threads(n_threads);
+ wakeup_gc_threads(n_gc_threads);
// Initialise stats
copied = 0;
- scavd_copied = 0;
// this is the main thread
gct = &gc_threads[0];
}
// follow roots from the CAF list (used by GHCi)
- gct->evac_gen = 0;
+ gct->evac_step = 0;
markCAFs(mark_root);
// follow all the roots that the application knows about.
- gct->evac_gen = 0;
+ gct->evac_step = 0;
GetRoots(mark_root);
+#if defined(RTS_USER_SIGNALS)
+ // mark the signal handlers (signals should be already blocked)
+ markSignalHandlers(mark_root);
+#endif
+
// Mark the weak pointer list, and prepare to detect dead weak pointers.
markWeakPtrList();
initWeakForGC();
step_workspace *ws;
bdescr *prev;
- for (t = 0; t < n_threads; t++) {
+ for (t = 0; t < n_gc_threads; t++) {
thr = &gc_threads[t];
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
// Push the final block
if (ws->scan_bd) { push_scan_block(ws->scan_bd, ws); }
- // update stats: we haven't counted the block at the
- // front of the scavd_list yet.
- scavd_copied += ws->scavd_list->free - ws->scavd_list->start;
-
ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
prev = ws->scavd_list;
resize_generations();
// Guess the amount of live data for stats.
- live = calcLive();
+ live = calcLiveBlocks() * BLOCK_SIZE_W;
+ debugTrace(DEBUG_gc, "Slop: %ldKB",
+ (live - calcLiveWords()) / (1024/sizeof(W_)));
// Free the small objects allocated via allocate(), since this will
// all have been copied into G0S1 now.
#endif
// ok, GC over: tell the stats department what happened.
- stat_endGC(allocated, live, copied, scavd_copied, N);
+ stat_endGC(allocated, live, copied, N);
#if defined(RTS_USER_SIGNALS)
if (RtsFlags.MiscFlags.install_signal_handlers) {
#endif
RELEASE_SM_LOCK;
+
+ gct = saved_gct;
}
+/* -----------------------------------------------------------------------------
+ * Mark all nodes pointed to by sparks in the spark queues (for GC) Does an
+ * implicit slide i.e. after marking all sparks are at the beginning of the
+ * spark pool and the spark pool only contains sparkable closures
+ * -------------------------------------------------------------------------- */
+
+#ifdef THREADED_RTS
+static void
+markSparkQueue (evac_fn evac, Capability *cap)
+{
+ StgClosure **sparkp, **to_sparkp;
+ nat n, pruned_sparks; // stats only
+ StgSparkPool *pool;
+
+ PAR_TICKY_MARK_SPARK_QUEUE_START();
+
+ n = 0;
+ pruned_sparks = 0;
+
+ pool = &(cap->r.rSparks);
+
+ ASSERT_SPARK_POOL_INVARIANTS(pool);
+
+#if defined(PARALLEL_HASKELL)
+ // stats only
+ n = 0;
+ pruned_sparks = 0;
+#endif
+
+ sparkp = pool->hd;
+ to_sparkp = pool->hd;
+ while (sparkp != pool->tl) {
+ ASSERT(*sparkp!=NULL);
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(((StgClosure *)*sparkp)));
+ // ToDo?: statistics gathering here (also for GUM!)
+ if (closure_SHOULD_SPARK(*sparkp)) {
+ evac(sparkp);
+ *to_sparkp++ = *sparkp;
+ if (to_sparkp == pool->lim) {
+ to_sparkp = pool->base;
+ }
+ n++;
+ } else {
+ pruned_sparks++;
+ }
+ sparkp++;
+ if (sparkp == pool->lim) {
+ sparkp = pool->base;
+ }
+ }
+ pool->tl = to_sparkp;
+
+ PAR_TICKY_MARK_SPARK_QUEUE_END(n);
+
+#if defined(PARALLEL_HASKELL)
+ debugTrace(DEBUG_sched,
+ "marked %d sparks and pruned %d sparks on [%x]",
+ n, pruned_sparks, mytid);
+#else
+ debugTrace(DEBUG_sched,
+ "marked %d sparks and pruned %d sparks",
+ n, pruned_sparks);
+#endif
+
+ debugTrace(DEBUG_sched,
+ "new spark queue len=%d; (hd=%p; tl=%p)\n",
+ sparkPoolSize(pool), pool->hd, pool->tl);
+}
+#endif
+
/* ---------------------------------------------------------------------------
Where are the roots that we know about?
------------------------------------------------------------------------ */
-/* This has to be protected either by the scheduler monitor, or by the
- garbage collection monitor (probably the latter).
- KH @ 25/10/99
-*/
-
void
GetRoots( evac_fn evac )
{
Capability *cap;
Task *task;
- for (i = 0; i < n_capabilities; i++) {
+ // Each GC thread is responsible for following roots from the
+ // Capability of the same number. There will usually be the same
+ // or fewer Capabilities as GC threads, but just in case there
+ // are more, we mark every Capability whose number is the GC
+ // thread's index plus a multiple of the number of GC threads.
+ for (i = gct->thread_index; i < n_capabilities; i += n_gc_threads) {
cap = &capabilities[i];
evac((StgClosure **)(void *)&cap->run_queue_hd);
evac((StgClosure **)(void *)&cap->run_queue_tl);
evac((StgClosure **)(void *)&task->suspended_tso);
}
+#if defined(THREADED_RTS)
+ markSparkQueue(evac,cap);
+#endif
}
#if !defined(THREADED_RTS)
evac((StgClosure **)(void *)&blocked_queue_tl);
evac((StgClosure **)(void *)&sleeping_queue);
#endif
-
- // evac((StgClosure **)&blackhole_queue);
-
-#if defined(THREADED_RTS)
- markSparkQueue(evac);
-#endif
-
-#if defined(RTS_USER_SIGNALS)
- // mark the signal handlers (signals should be already blocked)
- markSignalHandlers(evac);
-#endif
}
/* -----------------------------------------------------------------------------
init_gc_thread(t);
+#ifdef USE_PAPI
+ t->papi_events = -1;
+#endif
+
t->steps = stgMallocBytes(RtsFlags.GcFlags.generations *
sizeof(step_workspace *),
"initialise_gc_thread");
// GarbageCollect(), or this is a worker thread and the main
// thread bumped gc_running_threads before waking us up.
+ // Every thread evacuates some roots.
+ gct->evac_step = 0;
+ GetRoots(mark_root);
+
loop:
scavenge_loop();
// scavenge_loop() only exits when there's no work to do
gct->wakeup = rtsFalse;
if (gct->exit) break;
+#ifdef USE_PAPI
+ // start performance counters in this thread...
+ if (gct->papi_events == -1) {
+ papi_init_eventset(&gct->papi_events);
+ }
+ papi_thread_start_gc1_count(gct->papi_events);
+#endif
+
gc_thread_work();
+
+#ifdef USE_PAPI
+ // count events in this thread towards the GC totals
+ papi_thread_stop_gc1_count(gct->papi_events);
+#endif
}
}
#endif
ws->buffer_todo_bd = NULL;
gc_alloc_todo_block(ws);
- // allocate a block for "already scavenged" objects. This goes
- // on the front of the stp->blocks list, so it won't be
- // traversed by the scavenging sweep.
ws->scavd_list = NULL;
ws->n_scavd_blocks = 0;
- gc_alloc_scavd_block(ws);
}
}
}
ws->buffer_todo_bd = NULL;
ws->todo_large_objects = NULL;
+ ws->scavd_list = NULL;
+ ws->n_scavd_blocks = 0;
+
// If the block at the head of the list in this generation
// is less than 3/4 full, then use it as a todo block.
- if (isPartiallyFull(stp->blocks))
+ if (stp->blocks && isPartiallyFull(stp->blocks))
{
ws->todo_bd = stp->blocks;
+ ws->todo_free = ws->todo_bd->free;
+ ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
stp->blocks = stp->blocks->link;
stp->n_blocks -= 1;
ws->todo_bd->link = NULL;
ws->todo_bd = NULL;
gc_alloc_todo_block(ws);
}
-
- // Do the same trick for the scavd block
- if (isPartiallyFull(stp->blocks))
- {
- ws->scavd_list = stp->blocks;
- stp->blocks = stp->blocks->link;
- stp->n_blocks -= 1;
- ws->scavd_list->link = NULL;
- ws->n_scavd_blocks = 1;
- // subtract the contents of this block from the stats,
- // because we'll count the whole block later.
- scavd_copied -= ws->scavd_list->free - ws->scavd_list->start;
- }
- else
- {
- ws->scavd_list = NULL;
- ws->n_scavd_blocks = 0;
- gc_alloc_scavd_block(ws);
- }
}
}
static void
init_gc_thread (gc_thread *t)
{
- t->evac_gen = 0;
+ t->evac_step = 0;
t->failed_to_evac = rtsFalse;
t->eager_promotion = rtsTrue;
t->thunk_selector_depth = 0;
static void
mark_root(StgClosure **root)
{
- *root = evacuate(*root);
+ evacuate(root);
}
/* -----------------------------------------------------------------------------