+#if defined(THREADED_RTS)
+ nat i;
+ for (i=0; i < n_threads; i++) {
+ if (i == me) continue;
+ inc_running();
+ debugTrace(DEBUG_gc, "waking up gc thread %d", i);
+ if (gc_threads[i]->wakeup != GC_THREAD_STANDING_BY) barf("wakeup_gc_threads");
+
+ gc_threads[i]->wakeup = GC_THREAD_RUNNING;
+ ACQUIRE_SPIN_LOCK(&gc_threads[i]->mut_spin);
+ RELEASE_SPIN_LOCK(&gc_threads[i]->gc_spin);
+ }
+#endif
+}
+
+// After GC is complete, we must wait for all GC threads to enter the
+// standby state, otherwise they may still be executing inside
+// any_work(), and may even remain awake until the next GC starts.
+static void
+shutdown_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS)
+{
+#if defined(THREADED_RTS)
+ nat i;
+ for (i=0; i < n_threads; i++) {
+ if (i == me) continue;
+ while (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE) { write_barrier(); }
+ }
+#endif
+}
+
+#if defined(THREADED_RTS)
+void
+releaseGCThreads (Capability *cap USED_IF_THREADS)
+{
+ nat n_threads = RtsFlags.ParFlags.nNodes;
+ nat me = cap->no;
+ nat i;
+ for (i=0; i < n_threads; i++) {
+ if (i == me) continue;
+ if (gc_threads[i]->wakeup != GC_THREAD_WAITING_TO_CONTINUE)
+ barf("releaseGCThreads");
+
+ gc_threads[i]->wakeup = GC_THREAD_INACTIVE;
+ ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin);
+ RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin);
+ }
+}
+#endif
+
+/* ----------------------------------------------------------------------------
+ Initialise a generation that is to be collected
+ ------------------------------------------------------------------------- */
+
+static void
+init_collected_gen (nat g, nat n_threads)
+{
+ nat t, i;
+ gen_workspace *ws;
+ generation *gen;
+ bdescr *bd;
+
+ // Throw away the current mutable list. Invariant: the mutable
+ // list always has at least one block; this means we can avoid a
+ // check for NULL in recordMutable().
+ if (g != 0) {
+ freeChain(generations[g].mut_list);
+ generations[g].mut_list = allocBlock();
+ for (i = 0; i < n_capabilities; i++) {
+ freeChain(capabilities[i].mut_lists[g]);
+ capabilities[i].mut_lists[g] = allocBlock();
+ }
+ }
+
+ gen = &generations[g];
+ ASSERT(gen->no == g);
+
+ // we'll construct a new list of threads in this step
+ // during GC, throw away the current list.
+ gen->old_threads = gen->threads;
+ gen->threads = END_TSO_QUEUE;
+
+ // deprecate the existing blocks
+ gen->old_blocks = gen->blocks;
+ gen->n_old_blocks = gen->n_blocks;
+ gen->blocks = NULL;
+ gen->n_blocks = 0;
+ gen->n_words = 0;
+ gen->live_estimate = 0;
+
+ // initialise the large object queues.
+ gen->scavenged_large_objects = NULL;
+ gen->n_scavenged_large_blocks = 0;
+
+ // mark the small objects as from-space
+ for (bd = gen->old_blocks; bd; bd = bd->link) {
+ bd->flags &= ~BF_EVACUATED;
+ }
+
+ // mark the large objects as from-space
+ for (bd = gen->large_objects; bd; bd = bd->link) {
+ bd->flags &= ~BF_EVACUATED;
+ }
+
+ // for a compacted generation, we need to allocate the bitmap
+ if (gen->mark) {
+ nat bitmap_size; // in bytes
+ bdescr *bitmap_bdescr;
+ StgWord *bitmap;
+
+ bitmap_size = gen->n_old_blocks * BLOCK_SIZE / (sizeof(W_)*BITS_PER_BYTE);
+
+ if (bitmap_size > 0) {
+ bitmap_bdescr = allocGroup((lnat)BLOCK_ROUND_UP(bitmap_size)
+ / BLOCK_SIZE);
+ gen->bitmap = bitmap_bdescr;
+ bitmap = bitmap_bdescr->start;
+
+ debugTrace(DEBUG_gc, "bitmap_size: %d, bitmap: %p",
+ bitmap_size, bitmap);
+
+ // don't forget to fill it with zeros!
+ memset(bitmap, 0, bitmap_size);
+
+ // For each block in this step, point to its bitmap from the
+ // block descriptor.
+ for (bd=gen->old_blocks; bd != NULL; bd = bd->link) {
+ bd->u.bitmap = bitmap;
+ bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE);
+
+ // Also at this point we set the BF_MARKED flag
+ // for this block. The invariant is that
+ // BF_MARKED is always unset, except during GC
+ // when it is set on those blocks which will be
+ // compacted.
+ if (!(bd->flags & BF_FRAGMENTED)) {
+ bd->flags |= BF_MARKED;
+ }
+ }
+ }
+ }
+
+ // For each GC thread, for each step, allocate a "todo" block to
+ // store evacuated objects to be scavenged, and a block to store
+ // evacuated objects that do not need to be scavenged.
+ for (t = 0; t < n_threads; t++) {
+ ws = &gc_threads[t]->gens[g];
+
+ ws->todo_large_objects = NULL;
+
+ ws->part_list = NULL;
+ ws->n_part_blocks = 0;
+
+ // allocate the first to-space block; extra blocks will be
+ // chained on as necessary.
+ ws->todo_bd = NULL;
+ ASSERT(looksEmptyWSDeque(ws->todo_q));
+ alloc_todo_block(ws,0);
+
+ ws->todo_overflow = NULL;
+ ws->n_todo_overflow = 0;
+
+ ws->scavd_list = NULL;
+ ws->n_scavd_blocks = 0;
+ }
+}
+
+
+/* ----------------------------------------------------------------------------
+ Initialise a generation that is *not* to be collected
+ ------------------------------------------------------------------------- */
+
+static void
+init_uncollected_gen (nat g, nat threads)
+{
+ nat t, n;
+ gen_workspace *ws;
+ generation *gen;
+ bdescr *bd;
+
+ // save the current mutable lists for this generation, and
+ // allocate a fresh block for each one. We'll traverse these
+ // mutable lists as roots early on in the GC.
+ generations[g].saved_mut_list = generations[g].mut_list;
+ generations[g].mut_list = allocBlock();
+ for (n = 0; n < n_capabilities; n++) {
+ capabilities[n].saved_mut_lists[g] = capabilities[n].mut_lists[g];
+ capabilities[n].mut_lists[g] = allocBlock();
+ }
+
+ gen = &generations[g];
+
+ gen->scavenged_large_objects = NULL;
+ gen->n_scavenged_large_blocks = 0;
+
+ for (t = 0; t < threads; t++) {
+ ws = &gc_threads[t]->gens[g];
+
+ ASSERT(looksEmptyWSDeque(ws->todo_q));
+ ws->todo_large_objects = NULL;
+
+ ws->part_list = NULL;
+ ws->n_part_blocks = 0;
+
+ ws->scavd_list = NULL;
+ ws->n_scavd_blocks = 0;
+
+ // If the block at the head of the list in this generation
+ // is less than 3/4 full, then use it as a todo block.
+ if (gen->blocks && isPartiallyFull(gen->blocks))
+ {
+ ws->todo_bd = gen->blocks;
+ ws->todo_free = ws->todo_bd->free;
+ ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W;
+ gen->blocks = gen->blocks->link;
+ gen->n_blocks -= 1;
+ gen->n_words -= ws->todo_bd->free - ws->todo_bd->start;
+ ws->todo_bd->link = NULL;
+ // we must scan from the current end point.
+ ws->todo_bd->u.scan = ws->todo_bd->free;
+ }
+ else
+ {
+ ws->todo_bd = NULL;
+ alloc_todo_block(ws,0);
+ }
+ }
+
+ // deal out any more partial blocks to the threads' part_lists
+ t = 0;
+ while (gen->blocks && isPartiallyFull(gen->blocks))
+ {
+ bd = gen->blocks;
+ gen->blocks = bd->link;
+ ws = &gc_threads[t]->gens[g];
+ bd->link = ws->part_list;
+ ws->part_list = bd;
+ ws->n_part_blocks += 1;
+ bd->u.scan = bd->free;
+ gen->n_blocks -= 1;
+ gen->n_words -= bd->free - bd->start;
+ t++;
+ if (t == n_gc_threads) t = 0;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ Initialise a gc_thread before GC
+ -------------------------------------------------------------------------- */
+
+static void
+init_gc_thread (gc_thread *t)
+{
+ t->static_objects = END_OF_STATIC_LIST;
+ t->scavenged_static_objects = END_OF_STATIC_LIST;
+ t->scan_bd = NULL;
+ t->mut_lists = capabilities[t->thread_index].mut_lists;
+ t->evac_gen = 0;
+ t->failed_to_evac = rtsFalse;
+ t->eager_promotion = rtsTrue;
+ t->thunk_selector_depth = 0;
+ t->copied = 0;
+ t->scanned = 0;
+ t->any_work = 0;
+ t->no_work = 0;
+ t->scav_find_work = 0;
+}
+
+/* -----------------------------------------------------------------------------
+ Function we pass to evacuate roots.
+ -------------------------------------------------------------------------- */
+
+static void
+mark_root(void *user USED_IF_THREADS, StgClosure **root)
+{
+ // we stole a register for gct, but this function is called from
+ // *outside* the GC where the register variable is not in effect,
+ // so we need to save and restore it here. NB. only call
+ // mark_root() from the main GC thread, otherwise gct will be
+ // incorrect.
+ gc_thread *saved_gct;
+ saved_gct = gct;
+ SET_GCT(user);
+
+ evacuate(root);
+
+ SET_GCT(saved_gct);