+ return rtsFalse;
+}
+
+static void
+scavenge_until_all_done (void)
+{
+ nat r;
+
+
+loop:
+#if defined(THREADED_RTS)
+ if (n_gc_threads > 1) {
+ scavenge_loop();
+ } else {
+ scavenge_loop1();
+ }
+#else
+ scavenge_loop();
+#endif
+
+ collect_gct_blocks();
+
+ // scavenge_loop() only exits when there's no work to do
+ r = dec_running();
+
+ traceEventGcIdle(gct->cap);
+
+ debugTrace(DEBUG_gc, "%d GC threads still running", r);
+
+ while (gc_running_threads != 0) {
+ // usleep(1);
+ if (any_work()) {
+ inc_running();
+ traceEventGcWork(gct->cap);
+ goto loop;
+ }
+ // any_work() does not remove the work from the queue, it
+ // just checks for the presence of work. If we find any,
+ // then we increment gc_running_threads and go back to
+ // scavenge_loop() to perform any pending work.
+ }
+
+ traceEventGcDone(gct->cap);
+}
+
+#if defined(THREADED_RTS)
+
+void
+gcWorkerThread (Capability *cap)
+{
+ gc_thread *saved_gct;
+
+ // necessary if we stole a callee-saves register for gct:
+ saved_gct = gct;
+
+ gct = gc_threads[cap->no];
+ gct->id = osThreadId();
+
+ stat_gcWorkerThreadStart(gct);
+
+ // Wait until we're told to wake up
+ RELEASE_SPIN_LOCK(&gct->mut_spin);
+ gct->wakeup = GC_THREAD_STANDING_BY;
+ debugTrace(DEBUG_gc, "GC thread %d standing by...", gct->thread_index);
+ ACQUIRE_SPIN_LOCK(&gct->gc_spin);
+
+#ifdef USE_PAPI
+ // start performance counters in this thread...
+ if (gct->papi_events == -1) {
+ papi_init_eventset(&gct->papi_events);
+ }
+ papi_thread_start_gc1_count(gct->papi_events);
+#endif
+
+ init_gc_thread(gct);
+
+ traceEventGcWork(gct->cap);
+
+ // Every thread evacuates some roots.
+ gct->evac_gen_no = 0;
+ markCapability(mark_root, gct, cap, rtsTrue/*prune sparks*/);
+ scavenge_capability_mut_lists(cap);
+
+ scavenge_until_all_done();
+
+#ifdef THREADED_RTS
+ // Now that the whole heap is marked, we discard any sparks that
+ // were found to be unreachable. The main GC thread is currently
+ // marking heap reachable via weak pointers, so it is
+ // non-deterministic whether a spark will be retained if it is
+ // only reachable via weak pointers. To fix this problem would
+ // require another GC barrier, which is too high a price.
+ pruneSparkQueue(cap);
+#endif
+
+#ifdef USE_PAPI
+ // count events in this thread towards the GC totals
+ papi_thread_stop_gc1_count(gct->papi_events);
+#endif
+
+ // Wait until we're told to continue
+ RELEASE_SPIN_LOCK(&gct->gc_spin);
+ gct->wakeup = GC_THREAD_WAITING_TO_CONTINUE;
+ debugTrace(DEBUG_gc, "GC thread %d waiting to continue...",
+ gct->thread_index);
+ ACQUIRE_SPIN_LOCK(&gct->mut_spin);
+ debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
+
+ // record the time spent doing GC in the Task structure
+ stat_gcWorkerThreadDone(gct);
+
+ SET_GCT(saved_gct);
+}
+
+#endif
+
+#if defined(THREADED_RTS)
+
+void
+waitForGcThreads (Capability *cap USED_IF_THREADS)
+{
+ const nat n_threads = RtsFlags.ParFlags.nNodes;
+ const nat me = cap->no;
+ nat i, j;
+ rtsBool retry = rtsTrue;
+
+ while(retry) {
+ for (i=0; i < n_threads; i++) {
+ if (i == me) continue;
+ if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
+ prodCapability(&capabilities[i], cap->running_task);
+ }
+ }
+ for (j=0; j < 10; j++) {
+ retry = rtsFalse;
+ for (i=0; i < n_threads; i++) {
+ if (i == me) continue;
+ write_barrier();
+ setContextSwitches();
+ if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) {
+ retry = rtsTrue;
+ }
+ }
+ if (!retry) break;
+ yieldThread();
+ }
+ }
+}
+
+#endif // THREADED_RTS
+
+static void
+start_gc_threads (void)
+{
+#if defined(THREADED_RTS)
+ gc_running_threads = 0;
+#endif
+}
+
+static void
+wakeup_gc_threads (nat me USED_IF_THREADS)
+{
+#if defined(THREADED_RTS)
+ nat i;
+
+ if (n_gc_threads == 1) return;
+
+ for (i=0; i < n_gc_threads; i++) {
+ if (i == me) continue;
+ inc_running();
+ debugTrace(DEBUG_gc, "waking up gc thread %d", i);
+ if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
+
+ gc_threads[i]->wakeup = GC_THREAD_RUNNING;
+ ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
+ RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
+ }
+#endif
+}
+
+// After GC is complete, we must wait for all GC threads to enter the
+// standby state, otherwise they may still be executing inside
+// any_work(), and may even remain awake until the next GC starts.
+static void
+shutdown_gc_threads (nat me USED_IF_THREADS)
+{
+#if defined(THREADED_RTS)
+ nat i;
+
+ if (n_gc_threads == 1) return;
+
+ for (i=0; i < n_gc_threads; i++) {
+ if (i == me) continue;
+ while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
+ }
+#endif
+}
+
+#if defined(THREADED_RTS)
+void
+releaseGCThreads (Capability *cap USED_IF_THREADS)
+{
+ const nat n_threads = RtsFlags.ParFlags.nNodes;
+ const nat me = cap->no;
+ nat i;
+ for (i=0; i < n_threads; i++) {
+ if (i == me) continue;
+ if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
+ barf("releaseGCThreads");
+
+ gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
+ ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
+ RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
+ }
+}
+#endif
+
+/* ----------------------------------------------------------------------------
+ Initialise a generation that is to be collected
+ ------------------------------------------------------------------------- */
+
+static void
+prepare_collected_gen (generation *gen)
+{
+ nat i, g, n;
+ gen_workspace *ws;
+ bdescr *bd, *next;
+
+ // Throw away the current mutable list. Invariant: the mutable
+ // list always has at least one block; this means we can avoid a
+ // check for NULL in recordMutable().
+ g = gen->no;
+ if (g != 0) {
+ for (i = 0; i < n_capabilities; i++) {
+ freeChain(capabilities[i].mut_lists[g]);
+ capabilities[i].mut_lists[g] = allocBlock();
+ }
+ }
+
+ gen = &generations[g];
+ ASSERT(gen->no == g);
+
+ // we'll construct a new list of threads in this step
+ // during GC, throw away the current list.
+ gen->old_threads = gen->threads;
+ gen->threads = END_TSO_QUEUE;
+
+ // deprecate the existing blocks
+ gen->old_blocks = gen->blocks;
+ gen->n_old_blocks = gen->n_blocks;
+ gen->blocks = NULL;
+ gen->n_blocks = 0;
+ gen->n_words = 0;
+ gen->live_estimate = 0;
+
+ // initialise the large object queues.
+ ASSERT(gen->scavenged_large_objects == NULL);
+ ASSERT(gen->n_scavenged_large_blocks == 0);
+
+ // grab all the partial blocks stashed in the gc_thread workspaces and
+ // move them to the old_blocks list of this gen.
+ for (n = 0; n < n_capabilities; n++) {
+ ws = &gc_threads[n]->gens[gen->no];
+
+ for (bd = ws->part_list; bd != NULL; bd = next) {
+ next = bd->link;
+ bd->link = gen->old_blocks;
+ gen->old_blocks = bd;
+ gen->n_old_blocks += bd->blocks;
+ }
+ ws->part_list = NULL;
+ ws->n_part_blocks = 0;
+
+ ASSERT(ws->scavd_list == NULL);
+ ASSERT(ws->n_scavd_blocks == 0);
+
+ if (ws->todo_free != ws->todo_bd->start) {
+ ws->todo_bd->free = ws->todo_free;
+ ws->todo_bd->link = gen->old_blocks;
+ gen->old_blocks = ws->todo_bd;
+ gen->n_old_blocks += ws->todo_bd->blocks;
+ alloc_todo_block(ws,0); // always has one block.
+ }
+ }
+
+ // mark the small objects as from-space
+ for (bd = gen->old_blocks; bd; bd = bd->link) {
+ bd->flags &= ~BF_EVACUATED;
+ }
+
+ // mark the large objects as from-space
+ for (bd = gen->large_objects; bd; bd = bd->link) {
+ bd->flags &= ~BF_EVACUATED;
+ }
+
+ // for a compacted generation, we need to allocate the bitmap
+ if (gen->mark) {
+ nat bitmap_size; // in bytes
+ bdescr *bitmap_bdescr;
+ StgWord *bitmap;
+
+ bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
+
+ if (bitmap_size > 0) {
+ bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
+ / BLOCK_SIZE);
+ gen->bitmap = bitmap_bdescr;
+ bitmap = bitmap_bdescr->start;
+
+ debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
+ bitmap_size, bitmap);
+
+ // don't forget to fill it with zeros!
+ memset(bitmap, 0, bitmap_size);
+
+ // For each block in this step, point to its bitmap from the
+ // block descriptor.
+ for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
+ bd->u.bitmap = bitmap;
+ bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
+
+ // Also at this point we set the BF_MARKED flag
+ // for this block. The invariant is that
+ // BF_MARKED is always unset, except during GC
+ // when it is set on those blocks which will be
+ // compacted.
+ if (!(bd->flags & BF_FRAGMENTED)) {
+ bd->flags |= BF_MARKED;
+ }
+
+ // BF_SWEPT should be marked only for blocks that are being
+ // collected in sweep()
+ bd->flags &= ~BF_SWEPT;
+ }
+ }
+ }
+}
+
+
+/* ----------------------------------------------------------------------------
+ Save the mutable lists in saved_mut_lists
+ ------------------------------------------------------------------------- */
+
+static void
+stash_mut_list (Capability *cap, nat gen_no)
+{
+ cap->saved_mut_lists[gen_no] = cap->mut_lists[gen_no];
+ cap->mut_lists[gen_no] = allocBlock_sync();
+}
+
+/* ----------------------------------------------------------------------------
+ Initialise a generation that is *not* to be collected
+ ------------------------------------------------------------------------- */
+
+static void
+prepare_uncollected_gen (generation *gen)
+{
+ nat i;
+
+
+ ASSERT(gen->no > 0);
+
+ // save the current mutable lists for this generation, and
+ // allocate a fresh block for each one. We'll traverse these
+ // mutable lists as roots early on in the GC.
+ for (i = 0; i < n_capabilities; i++) {
+ stash_mut_list(&capabilities[i], gen->no);
+ }
+
+ ASSERT(gen->scavenged_large_objects == NULL);
+ ASSERT(gen->n_scavenged_large_blocks == 0);
+}
+
+/* -----------------------------------------------------------------------------
+ Collect the completed blocks from a GC thread and attach them to
+ the generation.
+ -------------------------------------------------------------------------- */
+
+static void
+collect_gct_blocks (void)
+{
+ nat g;
+ gen_workspace *ws;
+ bdescr *bd, *prev;
+
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ ws = &gct->gens[g];
+
+ // there may still be a block attached to ws->todo_bd;
+ // leave it there to use next time.
+
+ if (ws->scavd_list != NULL) {
+ ACQUIRE_SPIN_LOCK(&ws->gen->sync);
+
+ ASSERT(gct->scan_bd == NULL);
+ ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
+
+ prev = NULL;
+ for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
+ ws->gen->n_words += bd->free - bd->start;
+ prev = bd;
+ }
+ if (prev != NULL) {
+ prev->link = ws->gen->blocks;
+ ws->gen->blocks = ws->scavd_list;
+ }
+ ws->gen->n_blocks += ws->n_scavd_blocks;
+
+ ws->scavd_list = NULL;
+ ws->n_scavd_blocks = 0;
+
+ RELEASE_SPIN_LOCK(&ws->gen->sync);
+ }