+/* -----------------------------------------------------------------------------
+ Figure out which generation to collect, initialise N and major_gc.
+
+ Also returns the total number of blocks in generations that will be
+ collected.
+ -------------------------------------------------------------------------- */
+
+static nat
+initialise_N (rtsBool force_major_gc)
+{
+ int g;
+ nat s, blocks, blocks_total;
+
+ blocks = 0;
+ blocks_total = 0;
+
+ if (force_major_gc) {
+ N = RtsFlags.GcFlags.generations - 1;
+ } else {
+ N = 0;
+ }
+
+ for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) {
+ blocks = 0;
+ for (s = 0; s < generations[g].n_steps; s++) {
+ blocks += generations[g].steps[s].n_blocks;
+ blocks += generations[g].steps[s].n_large_blocks;
+ }
+ if (blocks >= generations[g].max_blocks) {
+ N = stg_max(N,g);
+ }
+ if ((nat)g <= N) {
+ blocks_total += blocks;
+ }
+ }
+
+ blocks_total += countNurseryBlocks();
+
+ major_gc = (N == RtsFlags.GcFlags.generations-1);
+ return blocks_total;
+}
+
+/* -----------------------------------------------------------------------------
+ Initialise the gc_thread structures.
+ -------------------------------------------------------------------------- */
+
+static gc_thread *
+alloc_gc_thread (int n)
+{
+ nat s;
+ step_workspace *ws;
+ gc_thread *t;
+
+ t = stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace),
+ "alloc_gc_thread");
+
+#ifdef THREADED_RTS
+ t->id = 0;
+ initCondition(&t->wake_cond);
+ initMutex(&t->wake_mutex);
+ t->wakeup = rtsFalse;
+ t->exit = rtsFalse;
+#endif
+
+ t->thread_index = n;
+ t->free_blocks = NULL;
+ t->gc_count = 0;
+
+ init_gc_thread(t);
+
+#ifdef USE_PAPI
+ t->papi_events = -1;
+#endif
+
+ for (s = 0; s < total_steps; s++)
+ {
+ ws = &t->steps[s];
+ ws->step = &all_steps[s];
+ ASSERT(s == ws->step->abs_no);
+ ws->gct = t;
+
+ ws->scan_bd = NULL;
+
+ ws->todo_bd = NULL;
+ ws->buffer_todo_bd = NULL;
+
+ ws->part_list = NULL;
+ ws->n_part_blocks = 0;
+
+ ws->scavd_list = NULL;
+ ws->n_scavd_blocks = 0;
+ }
+
+ return t;
+}
+
+
+static void
+alloc_gc_threads (void)
+{
+ if (gc_threads == NULL) {
+#if defined(THREADED_RTS)
+ nat i;
+ gc_threads = stgMallocBytes (RtsFlags.ParFlags.gcThreads *
+ sizeof(gc_thread*),
+ "alloc_gc_threads");
+
+ for (i = 0; i < RtsFlags.ParFlags.gcThreads; i++) {
+ gc_threads[i] = alloc_gc_thread(i);
+ }
+#else
+ gc_threads = stgMallocBytes (sizeof(gc_thread*),
+ "alloc_gc_threads");
+
+ gc_threads[0] = alloc_gc_thread(0);
+#endif
+ }
+}
+
+/* ----------------------------------------------------------------------------
+ Start GC threads
+ ------------------------------------------------------------------------- */
+
+static nat gc_running_threads;
+
+#if defined(THREADED_RTS)
+static Mutex gc_running_mutex;
+#endif
+
+static nat
+inc_running (void)
+{
+ nat n_running;
+ ACQUIRE_LOCK(&gc_running_mutex);
+ n_running = ++gc_running_threads;
+ RELEASE_LOCK(&gc_running_mutex);
+ ASSERT(n_running <= n_gc_threads);
+ return n_running;
+}
+
+static nat
+dec_running (void)
+{
+ nat n_running;
+ ACQUIRE_LOCK(&gc_running_mutex);
+ ASSERT(n_gc_threads != 0);
+ n_running = --gc_running_threads;
+ RELEASE_LOCK(&gc_running_mutex);
+ return n_running;
+}
+
+//
+// gc_thread_work(): Scavenge until there's no work left to do and all
+// the running threads are idle.
+//
+static void
+gc_thread_work (void)
+{
+ nat r;
+
+ debugTrace(DEBUG_gc, "GC thread %d working", gct->thread_index);
+
+ // gc_running_threads has already been incremented for us; either
+ // this is the main thread and we incremented it inside
+ // GarbageCollect(), or this is a worker thread and the main
+ // thread bumped gc_running_threads before waking us up.
+
+ // Every thread evacuates some roots.
+ gct->evac_step = 0;
+ GetRoots(mark_root);
+
+loop:
+ scavenge_loop();
+ // scavenge_loop() only exits when there's no work to do
+ r = dec_running();
+
+ debugTrace(DEBUG_gc, "GC thread %d idle (%d still running)",
+ gct->thread_index, r);
+
+ while (gc_running_threads != 0) {
+ usleep(1);
+ if (any_work()) {
+ inc_running();
+ goto loop;
+ }
+ // any_work() does not remove the work from the queue, it
+ // just checks for the presence of work. If we find any,
+ // then we increment gc_running_threads and go back to
+ // scavenge_loop() to perform any pending work.
+ }
+
+ // All threads are now stopped
+ debugTrace(DEBUG_gc, "GC thread %d finished.", gct->thread_index);
+}
+
+
+#if defined(THREADED_RTS)
+static void
+gc_thread_mainloop (void)
+{
+ while (!gct->exit) {
+
+ // Wait until we're told to wake up
+ ACQUIRE_LOCK(&gct->wake_mutex);
+ gct->wakeup = rtsFalse;
+ while (!gct->wakeup) {
+ debugTrace(DEBUG_gc, "GC thread %d standing by...",
+ gct->thread_index);
+ waitCondition(&gct->wake_cond, &gct->wake_mutex);
+ }
+ RELEASE_LOCK(&gct->wake_mutex);
+ if (gct->exit) break;
+
+#ifdef USE_PAPI
+ // start performance counters in this thread...
+ if (gct->papi_events == -1) {
+ papi_init_eventset(&gct->papi_events);
+ }
+ papi_thread_start_gc1_count(gct->papi_events);
+#endif
+
+ gc_thread_work();
+
+#ifdef USE_PAPI
+ // count events in this thread towards the GC totals
+ papi_thread_stop_gc1_count(gct->papi_events);
+#endif
+ }
+}
+#endif
+
+#if defined(THREADED_RTS)
+static void
+gc_thread_entry (gc_thread *my_gct)
+{
+ gct = my_gct;
+ debugTrace(DEBUG_gc, "GC thread %d starting...", gct->thread_index);
+ gct->id = osThreadId();
+ gc_thread_mainloop();
+}
+#endif
+
+static void
+start_gc_threads (void)
+{
+#if defined(THREADED_RTS)
+ nat i;
+ OSThreadId id;
+ static rtsBool done = rtsFalse;
+
+ gc_running_threads = 0;
+ initMutex(&gc_running_mutex);
+
+ if (!done) {
+ // Start from 1: the main thread is 0
+ for (i = 1; i < RtsFlags.ParFlags.gcThreads; i++) {
+ createOSThread(&id, (OSThreadProc*)&gc_thread_entry,
+ gc_threads[i]);
+ }
+ done = rtsTrue;
+ }
+#endif
+}
+
+static void
+wakeup_gc_threads (nat n_threads USED_IF_THREADS)
+{
+#if defined(THREADED_RTS)
+ nat i;
+ for (i=1; i < n_threads; i++) {
+ inc_running();
+ ACQUIRE_LOCK(&gc_threads[i]->wake_mutex);
+ gc_threads[i]->wakeup = rtsTrue;
+ signalCondition(&gc_threads[i]->wake_cond);
+ RELEASE_LOCK(&gc_threads[i]->wake_mutex);
+ }
+#endif
+}
+
+// After GC is complete, we must wait for all GC threads to enter the
+// standby state, otherwise they may still be executing inside
+// any_work(), and may even remain awake until the next GC starts.
+static void
+shutdown_gc_threads (nat n_threads USED_IF_THREADS)
+{
+#if defined(THREADED_RTS)
+ nat i;
+ rtsBool wakeup;
+ for (i=1; i < n_threads; i++) {
+ do {
+ ACQUIRE_LOCK(&gc_threads[i]->wake_mutex);
+ wakeup = gc_threads[i]->wakeup;
+ // wakeup is false while the thread is waiting
+ RELEASE_LOCK(&gc_threads[i]->wake_mutex);
+ } while (wakeup);
+ }
+#endif
+}
+
+/* ----------------------------------------------------------------------------
+ Initialise a generation that is to be collected
+ ------------------------------------------------------------------------- */
+
+static void
+init_collected_gen (nat g, nat n_threads)
+{
+ nat s, t, i;
+ step_workspace *ws;
+ step *stp;
+ bdescr *bd;
+
+ // Throw away the current mutable list. Invariant: the mutable
+ // list always has at least one block; this means we can avoid a
+ // check for NULL in recordMutable().
+ if (g != 0) {
+ freeChain(generations[g].mut_list);
+ generations[g].mut_list = allocBlock();
+ for (i = 0; i < n_capabilities; i++) {
+ freeChain(capabilities[i].mut_lists[g]);
+ capabilities[i].mut_lists[g] = allocBlock();
+ }
+ }
+
+ for (s = 0; s < generations[g].n_steps; s++) {
+
+ // generation 0, step 0 doesn't need to-space
+ if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
+ continue;
+ }
+
+ stp = &generations[g].steps[s];
+ ASSERT(stp->gen_no == g);
+
+ // deprecate the existing blocks
+ stp->old_blocks = stp->blocks;
+ stp->n_old_blocks = stp->n_blocks;
+ stp->blocks = NULL;
+ stp->n_blocks = 0;
+
+ // we don't have any to-be-scavenged blocks yet
+ stp->todos = NULL;
+ stp->todos_last = NULL;
+ stp->n_todos = 0;
+
+ // initialise the large object queues.
+ stp->scavenged_large_objects = NULL;
+ stp->n_scavenged_large_blocks = 0;
+
+ // mark the large objects as not evacuated yet
+ for (bd = stp->large_objects; bd; bd = bd->link) {
+ bd->flags &= ~BF_EVACUATED;
+ }
+
+ // for a compacted step, we need to allocate the bitmap
+ if (stp->is_compacted) {
+ nat bitmap_size; // in bytes
+ bdescr *bitmap_bdescr;
+ StgWord *bitmap;
+
+ bitmap_size = stp->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
+
+ if (bitmap_size > 0) {
+ bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
+ / BLOCK_SIZE);
+ stp->bitmap = bitmap_bdescr;
+ bitmap = bitmap_bdescr->start;
+
+ debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
+ bitmap_size, bitmap);
+
+ // don't forget to fill it with zeros!
+ memset(bitmap, 0, bitmap_size);
+
+ // For each block in this step, point to its bitmap from the
+ // block descriptor.
+ for (bd=stp->old_blocks; bd != NULL; bd = bd->link) {
+ bd->u.bitmap = bitmap;
+ bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
+
+ // Also at this point we set the BF_COMPACTED flag
+ // for this block. The invariant is that
+ // BF_COMPACTED is always unset, except during GC
+ // when it is set on those blocks which will be
+ // compacted.
+ bd->flags |= BF_COMPACTED;
+ }
+ }
+ }
+ }
+
+ // For each GC thread, for each step, allocate a "todo" block to
+ // store evacuated objects to be scavenged, and a block to store
+ // evacuated objects that do not need to be scavenged.
+ for (t = 0; t < n_threads; t++) {
+ for (s = 0; s < generations[g].n_steps; s++) {
+
+ // we don't copy objects into g0s0, unless -G0
+ if (g==0 && s==0 && RtsFlags.GcFlags.generations > 1) continue;
+
+ ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
+
+ ws->scan_bd = NULL;
+
+ ws->todo_large_objects = NULL;
+
+ ws->part_list = NULL;
+ ws->n_part_blocks = 0;
+
+ // allocate the first to-space block; extra blocks will be
+ // chained on as necessary.
+ ws->todo_bd = NULL;
+ ws->buffer_todo_bd = NULL;
+ alloc_todo_block(ws,0);
+
+ ws->scavd_list = NULL;
+ ws->n_scavd_blocks = 0;
+ }
+ }
+}
+
+
+/* ----------------------------------------------------------------------------
+ Initialise a generation that is *not* to be collected
+ ------------------------------------------------------------------------- */
+
+static void
+init_uncollected_gen (nat g, nat threads)
+{
+ nat s, t, i;
+ step_workspace *ws;
+ step *stp;
+ bdescr *bd;
+
+ for (s = 0; s < generations[g].n_steps; s++) {
+ stp = &generations[g].steps[s];
+ stp->scavenged_large_objects = NULL;
+ stp->n_scavenged_large_blocks = 0;
+ }
+
+ for (t = 0; t < threads; t++) {
+ for (s = 0; s < generations[g].n_steps; s++) {
+
+ ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
+ stp = ws->step;
+
+ ws->buffer_todo_bd = NULL;
+ ws->todo_large_objects = NULL;
+
+ ws->part_list = NULL;
+ ws->n_part_blocks = 0;
+
+ ws->scavd_list = NULL;
+ ws->n_scavd_blocks = 0;
+
+ // If the block at the head of the list in this generation
+ // is less than 3/4 full, then use it as a todo block.
+ if (stp->blocks && isPartiallyFull(stp->blocks))
+ {
+ ws->todo_bd = stp->blocks;
+ ws->todo_free = ws->todo_bd->free;
+ ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
+ stp->blocks = stp->blocks->link;
+ stp->n_blocks -= 1;
+ ws->todo_bd->link = NULL;
+
+ // this block is also the scan block; we must scan
+ // from the current end point.
+ ws->scan_bd = ws->todo_bd;
+ ws->scan_bd->u.scan = ws->scan_bd->free;
+
+ // subtract the contents of this block from the stats,
+ // because we'll count the whole block later.
+ copied -= ws->scan_bd->free - ws->scan_bd->start;
+ }
+ else
+ {
+ ws->scan_bd = NULL;
+ ws->todo_bd = NULL;
+ alloc_todo_block(ws,0);
+ }
+ }
+ }
+
+ // Move the private mutable lists from each capability onto the
+ // main mutable list for the generation.
+ for (i = 0; i < n_capabilities; i++) {
+ for (bd = capabilities[i].mut_lists[g];
+ bd->link != NULL; bd = bd->link) {
+ /* nothing */
+ }
+ bd->link = generations[g].mut_list;
+ generations[g].mut_list = capabilities[i].mut_lists[g];
+ capabilities[i].mut_lists[g] = allocBlock();
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ Initialise a gc_thread before GC
+ -------------------------------------------------------------------------- */
+
+static void
+init_gc_thread (gc_thread *t)
+{
+ t->static_objects = END_OF_STATIC_LIST;
+ t->scavenged_static_objects = END_OF_STATIC_LIST;
+ t->evac_step = 0;
+ t->failed_to_evac = rtsFalse;
+ t->eager_promotion = rtsTrue;
+ t->thunk_selector_depth = 0;
+ t->copied = 0;
+ t->any_work = 0;
+ t->no_work = 0;
+ t->scav_find_work = 0;
+
+}
+
+/* -----------------------------------------------------------------------------
+ Function we pass to GetRoots to evacuate roots.
+ -------------------------------------------------------------------------- */
+