#include "RetainerProfile.h"
#include "RaiseAsync.h"
#include "Sparks.h"
+#include "Papi.h"
#include "GC.h"
#include "Compact.h"
#include "Scav.h"
#include "GCUtils.h"
#include "MarkWeak.h"
+#include "Sparks.h"
#include <string.h> // for memset()
/* Thread-local data for each GC thread
*/
gc_thread *gc_threads = NULL;
-gc_thread *gct = NULL; // this thread's gct TODO: make thread-local
+// gc_thread *gct = NULL; // this thread's gct TODO: make thread-local
+
+// Number of threads running in *this* GC. Affects how many
+// step->todos[] lists we have to look in to find work.
+nat n_gc_threads;
// For stats:
long copied; // *words* copied & scavenged during this GC
-long scavd_copied; // *words* copied only during this GC
+
+#ifdef THREADED_RTS
+SpinLock recordMutableGen_sync;
+#endif
/* -----------------------------------------------------------------------------
Static function declarations
static void update_task_list (void);
static void resize_generations (void);
static void resize_nursery (void);
+static void start_gc_threads (void);
+static void gc_thread_work (void);
+static nat inc_running (void);
+static nat dec_running (void);
+static void wakeup_gc_threads (nat n_threads);
#if 0 && defined(DEBUG)
static void gcCAFs (void);
step *stp;
lnat live, allocated;
lnat oldgen_saved_blocks = 0;
- nat n_threads; // number of threads participating in GC
-
+ gc_thread *saved_gct;
nat g, s, t;
+ // necessary if we stole a callee-saves register for gct:
+ saved_gct = gct;
+
#ifdef PROFILING
CostCentreStack *prev_CCS;
#endif
*/
alloc_gc_threads();
+ /* Start threads, so they can be spinning up while we finish initialisation.
+ */
+ start_gc_threads();
+
/* How many threads will be participating in this GC?
* We don't try to parallelise minor GC.
*/
#if defined(THREADED_RTS)
if (N == 0) {
- n_threads = 1;
+ n_gc_threads = 1;
} else {
- n_threads = RtsFlags.ParFlags.gcThreads;
+ n_gc_threads = RtsFlags.ParFlags.gcThreads;
}
#else
- n_threads = 1;
+ n_gc_threads = 1;
#endif
#ifdef RTS_GTK_FRONTPANEL
*/
static_objects = END_OF_STATIC_LIST;
scavenged_static_objects = END_OF_STATIC_LIST;
+
#ifdef THREADED_RTS
initSpinLock(&static_objects_sync);
+ initSpinLock(&recordMutableGen_sync);
+ initSpinLock(&gc_alloc_block_sync);
#endif
// Initialise all the generations/steps that we're collecting.
for (g = 0; g <= N; g++) {
- init_collected_gen(g,n_threads);
+ init_collected_gen(g,n_gc_threads);
}
// Initialise all the generations/steps that we're *not* collecting.
for (g = N+1; g < RtsFlags.GcFlags.generations; g++) {
- init_uncollected_gen(g,n_threads);
+ init_uncollected_gen(g,n_gc_threads);
}
/* Allocate a mark stack if we're doing a major collection.
}
// Initialise all our gc_thread structures
- for (t = 0; t < n_threads; t++) {
+ for (t = 0; t < n_gc_threads; t++) {
init_gc_thread(&gc_threads[t]);
}
+ // the main thread is running: this prevents any other threads from
+ // exiting prematurely, so we can start them now.
+ inc_running();
+ wakeup_gc_threads(n_gc_threads);
+
// Initialise stats
copied = 0;
- scavd_copied = 0;
- // start threads etc.
- // For now, we just have one thread, and set gct to gc_threads[0]
+ // this is the main thread
gct = &gc_threads[0];
/* -----------------------------------------------------------------------
}
// follow roots from the CAF list (used by GHCi)
- gct->evac_gen = 0;
+ gct->evac_step = 0;
markCAFs(mark_root);
// follow all the roots that the application knows about.
- gct->evac_gen = 0;
+ gct->evac_step = 0;
GetRoots(mark_root);
// Mark the weak pointer list, and prepare to detect dead weak pointers.
* Repeatedly scavenge all the areas we know about until there's no
* more scavenging to be done.
*/
- {
- rtsBool flag;
- loop:
- flag = rtsFalse;
-
- scavenge_loop();
-
- // if any blackholes are alive, make the threads that wait on
- // them alive too.
- if (traverseBlackholeQueue())
- flag = rtsTrue;
-
- if (flag) { goto loop; }
+ for (;;)
+ {
+ gc_thread_work();
+ // The other threads are now stopped. We might recurse back to
+ // here, but from now on this is the only thread.
+
+ // if any blackholes are alive, make the threads that wait on
+ // them alive too.
+ if (traverseBlackholeQueue()) {
+ inc_running();
+ continue;
+ }
+
+ // must be last... invariant is that everything is fully
+ // scavenged at this point.
+ if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
+ inc_running();
+ continue;
+ }
- // must be last... invariant is that everything is fully
- // scavenged at this point.
- if (traverseWeakPtrList()) { // returns rtsTrue if evaced something
- goto loop;
- }
+ // If we get to here, there's really nothing left to do.
+ break;
}
// Update pointers from the Task list
step_workspace *ws;
bdescr *prev;
- for (t = 0; t < n_threads; t++) {
+ for (t = 0; t < n_gc_threads; t++) {
thr = &gc_threads[t];
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
ws = &thr->steps[g][s];
if (g==0 && s==0) continue;
- ASSERT( ws->scan_bd == ws->todo_bd );
+ // Not true?
+ // ASSERT( ws->scan_bd == ws->todo_bd );
ASSERT( ws->scan_bd ? ws->scan == ws->scan_bd->free : 1 );
// Push the final block
if (ws->scan_bd) { push_scan_block(ws->scan_bd, ws); }
- // update stats: we haven't counted the block at the
- // front of the scavd_list yet.
- scavd_copied += ws->scavd_list->free - ws->scavd_list->start;
-
ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
prev = ws->scavd_list;
#endif
// ok, GC over: tell the stats department what happened.
- stat_endGC(allocated, live, copied, scavd_copied, N);
+ stat_endGC(allocated, live, copied, N);
#if defined(RTS_USER_SIGNALS)
if (RtsFlags.MiscFlags.install_signal_handlers) {
#endif
RELEASE_SM_LOCK;
+
+ gct = saved_gct;
}
/* ---------------------------------------------------------------------------
Capability *cap;
Task *task;
-#if defined(GRAN)
- for (i=0; i<=RtsFlags.GranFlags.proc; i++) {
- if ((run_queue_hds[i] != END_TSO_QUEUE) && ((run_queue_hds[i] != NULL)))
- evac((StgClosure **)&run_queue_hds[i]);
- if ((run_queue_tls[i] != END_TSO_QUEUE) && ((run_queue_tls[i] != NULL)))
- evac((StgClosure **)&run_queue_tls[i]);
-
- if ((blocked_queue_hds[i] != END_TSO_QUEUE) && ((blocked_queue_hds[i] != NULL)))
- evac((StgClosure **)&blocked_queue_hds[i]);
- if ((blocked_queue_tls[i] != END_TSO_QUEUE) && ((blocked_queue_tls[i] != NULL)))
- evac((StgClosure **)&blocked_queue_tls[i]);
- if ((ccalling_threadss[i] != END_TSO_QUEUE) && ((ccalling_threadss[i] != NULL)))
- evac((StgClosure **)&ccalling_threads[i]);
- }
-
- markEventQueue();
-
-#else /* !GRAN */
-
for (i = 0; i < n_capabilities; i++) {
cap = &capabilities[i];
evac((StgClosure **)(void *)&cap->run_queue_hd);
}
-
#if !defined(THREADED_RTS)
evac((StgClosure **)(void *)&blocked_queue_hd);
evac((StgClosure **)(void *)&blocked_queue_tl);
evac((StgClosure **)(void *)&sleeping_queue);
#endif
-#endif
// evac((StgClosure **)&blackhole_queue);
-#if defined(THREADED_RTS) || defined(PARALLEL_HASKELL) || defined(GRAN)
+#if defined(THREADED_RTS)
markSparkQueue(evac);
#endif
nat g, s;
step_workspace *ws;
+#ifdef THREADED_RTS
+ t->id = 0;
+ initCondition(&t->wake_cond);
+ initMutex(&t->wake_mutex);
+ t->wakeup = rtsFalse;
+ t->exit = rtsFalse;
+#endif
+
t->thread_index = n;
t->free_blocks = NULL;
t->gc_count = 0;
init_gc_thread(t);
+#ifdef USE_PAPI
+ t->papi_events = -1;
+#endif
+
t->steps = stgMallocBytes(RtsFlags.GcFlags.generations *
sizeof(step_workspace *),
"initialise_gc_thread");
if (gc_threads == NULL) {
#if defined(THREADED_RTS)
nat i;
-
gc_threads = stgMallocBytes (RtsFlags.ParFlags.gcThreads *
sizeof(gc_thread),
"alloc_gc_threads");
}
/* ----------------------------------------------------------------------------
+ Start GC threads
+ ------------------------------------------------------------------------- */
+
+static nat gc_running_threads;
+
+#if defined(THREADED_RTS)
+static Mutex gc_running_mutex;
+#endif
+
+static nat
+inc_running (void)
+{
+ nat n_running;
+ ACQUIRE_LOCK(&gc_running_mutex);
+ n_running = ++gc_running_threads;
+ RELEASE_LOCK(&gc_running_mutex);
+ return n_running;
+}
+
+static nat
+dec_running (void)
+{
+ nat n_running;
+ ACQUIRE_LOCK(&gc_running_mutex);
+ n_running = --gc_running_threads;
+ RELEASE_LOCK(&gc_running_mutex);
+ return n_running;
+}
+
+//
+// gc_thread_work(): Scavenge until there's no work left to do and all
+// the running threads are idle.
+//
+static void
+gc_thread_work (void)
+{
+ nat r;
+
+ debugTrace(DEBUG_gc, "GC thread %d working", gct->thread_index);
+
+ // gc_running_threads has already been incremented for us; either
+ // this is the main thread and we incremented it inside
+ // GarbageCollect(), or this is a worker thread and the main
+ // thread bumped gc_running_threads before waking us up.
+
+loop:
+ scavenge_loop();
+ // scavenge_loop() only exits when there's no work to do
+ r = dec_running();
+
+ debugTrace(DEBUG_gc, "GC thread %d idle (%d still running)",
+ gct->thread_index, r);
+
+ while (gc_running_threads != 0) {
+ if (any_work()) {
+ inc_running();
+ goto loop;
+ }
+ // any_work() does not remove the work from the queue, it
+ // just checks for the presence of work. If we find any,
+ // then we increment gc_running_threads and go back to
+ // scavenge_loop() to perform any pending work.
+ }
+
+ // All threads are now stopped
+ debugTrace(DEBUG_gc, "GC thread %d finished.", gct->thread_index);
+}
+
+
+#if defined(THREADED_RTS)
+static void
+gc_thread_mainloop (void)
+{
+ while (!gct->exit) {
+
+ // Wait until we're told to wake up
+ ACQUIRE_LOCK(&gct->wake_mutex);
+ while (!gct->wakeup) {
+ debugTrace(DEBUG_gc, "GC thread %d standing by...",
+ gct->thread_index);
+ waitCondition(&gct->wake_cond, &gct->wake_mutex);
+ }
+ RELEASE_LOCK(&gct->wake_mutex);
+ gct->wakeup = rtsFalse;
+ if (gct->exit) break;
+
+#ifdef USE_PAPI
+ // start performance counters in this thread...
+ if (gct->papi_events == -1) {
+ papi_init_eventset(&gct->papi_events);
+ }
+ papi_thread_start_gc_count(gct->papi_events);
+#endif
+
+ gc_thread_work();
+
+#ifdef USE_PAPI
+ // count events in this thread towards the GC totals
+ papi_thread_stop_gc_count(gct->papi_events);
+#endif
+ }
+}
+#endif
+
+#if defined(THREADED_RTS)
+static void
+gc_thread_entry (gc_thread *my_gct)
+{
+ gct = my_gct;
+ debugTrace(DEBUG_gc, "GC thread %d starting...", gct->thread_index);
+ gct->id = osThreadId();
+ gc_thread_mainloop();
+}
+#endif
+
+static void
+start_gc_threads (void)
+{
+#if defined(THREADED_RTS)
+ nat i;
+ OSThreadId id;
+ static rtsBool done = rtsFalse;
+
+ gc_running_threads = 0;
+ initMutex(&gc_running_mutex);
+
+ if (!done) {
+ // Start from 1: the main thread is 0
+ for (i = 1; i < RtsFlags.ParFlags.gcThreads; i++) {
+ createOSThread(&id, (OSThreadProc*)&gc_thread_entry,
+ &gc_threads[i]);
+ }
+ done = rtsTrue;
+ }
+#endif
+}
+
+static void
+wakeup_gc_threads (nat n_threads USED_IF_THREADS)
+{
+#if defined(THREADED_RTS)
+ nat i;
+ for (i=1; i < n_threads; i++) {
+ inc_running();
+ ACQUIRE_LOCK(&gc_threads[i].wake_mutex);
+ gc_threads[i].wakeup = rtsTrue;
+ signalCondition(&gc_threads[i].wake_cond);
+ RELEASE_LOCK(&gc_threads[i].wake_mutex);
+ }
+#endif
+}
+
+/* ----------------------------------------------------------------------------
Initialise a generation that is to be collected
------------------------------------------------------------------------- */
ws->buffer_todo_bd = NULL;
gc_alloc_todo_block(ws);
- // allocate a block for "already scavenged" objects. This goes
- // on the front of the stp->blocks list, so it won't be
- // traversed by the scavenging sweep.
ws->scavd_list = NULL;
ws->n_scavd_blocks = 0;
- gc_alloc_scavd_block(ws);
}
}
}
ws->buffer_todo_bd = NULL;
ws->todo_large_objects = NULL;
+ ws->scavd_list = NULL;
+ ws->n_scavd_blocks = 0;
+
// If the block at the head of the list in this generation
// is less than 3/4 full, then use it as a todo block.
if (isPartiallyFull(stp->blocks))
ws->todo_bd = NULL;
gc_alloc_todo_block(ws);
}
-
- // Do the same trick for the scavd block
- if (isPartiallyFull(stp->blocks))
- {
- ws->scavd_list = stp->blocks;
- stp->blocks = stp->blocks->link;
- stp->n_blocks -= 1;
- ws->scavd_list->link = NULL;
- ws->n_scavd_blocks = 1;
- // subtract the contents of this block from the stats,
- // because we'll count the whole block later.
- scavd_copied -= ws->scavd_list->free - ws->scavd_list->start;
- }
- else
- {
- ws->scavd_list = NULL;
- ws->n_scavd_blocks = 0;
- gc_alloc_scavd_block(ws);
- }
}
}
static void
init_gc_thread (gc_thread *t)
{
- t->evac_gen = 0;
+ t->evac_step = 0;
t->failed_to_evac = rtsFalse;
t->eager_promotion = rtsTrue;
t->thunk_selector_depth = 0;
static void
mark_root(StgClosure **root)
{
- *root = evacuate(*root);
+ evacuate(root);
}
/* -----------------------------------------------------------------------------