* ------------------------------------------------------------------------- */
typedef struct step_ {
- unsigned int no; // step number
+ unsigned int no; // step number in this generation
+ unsigned int abs_no; // absolute step number
int is_compacted; // compact this step? (old gen only)
struct generation_ * gen; // generation this step belongs to
bdescr * large_objects; // large objects (doubly linked)
unsigned int n_large_blocks; // no. of blocks used by large objs
+
// ------------------------------------
// Fields below are used during GC only
// During GC, if we are collecting this step, blocks and n_blocks
// are copied into the following two fields. After GC, these blocks
// are freed.
- bdescr * old_blocks; // bdescr of first from-space block
- unsigned int n_old_blocks; // number of blocks in from-space
-
- bdescr * todos; // blocks waiting to be scavenged
- unsigned int n_todos; // count of above
#if defined(THREADED_RTS)
+ char pad[128]; // make sure the following is
+ // on a separate cache line.
SpinLock sync_todo; // lock for todos
SpinLock sync_large_objects; // lock for large_objects
// and scavenged_large_objects
#endif
+ bdescr * old_blocks; // bdescr of first from-space block
+ unsigned int n_old_blocks; // number of blocks in from-space
+
+ bdescr * todos; // blocks waiting to be scavenged
+ unsigned int n_todos; // count of above
+
bdescr * scavenged_large_objects; // live large objs after GC (d-link)
unsigned int n_scavenged_large_blocks; // size (not count) of above
bdescr * bitmap; // bitmap for compacting collection
+
+
} step;
extern generation * RTS_VAR(g0);
extern step * RTS_VAR(g0s0);
extern generation * RTS_VAR(oldest_gen);
+extern step * RTS_VAR(all_steps);
+extern nat total_steps;
/* -----------------------------------------------------------------------------
Initialisation / De-initialisation
}
}
- ws = &gct->steps[stp->gen_no][stp->no];
+ ws = &gct->steps[stp->abs_no];
+ // this compiles to a single mem access to stp->abs_no only
/* chain a new block onto the to-space for the destination step if
* necessary.
*/
-
- ASSERT(ws->todo_free >= ws->todo_bd->free && ws->todo_free <= ws->todo_lim);
to = ws->todo_free;
if (to + size > ws->todo_lim) {
to = gc_alloc_todo_block(ws);
}
}
- ws = &gct->steps[new_stp->gen_no][new_stp->no];
+ ws = &gct->steps[new_stp->abs_no];
bd->flags |= BF_EVACUATED;
bd->step = new_stp;
bd->gen_no = new_stp->gen_no;
/* Thread-local data for each GC thread
*/
-gc_thread *gc_threads = NULL;
+gc_thread **gc_threads = NULL;
// gc_thread *gct = NULL; // this thread's gct TODO: make thread-local
// Number of threads running in *this* GC. Affects how many
// Initialise all our gc_thread structures
for (t = 0; t < n_gc_threads; t++) {
- init_gc_thread(&gc_threads[t]);
+ init_gc_thread(gc_threads[t]);
}
// the main thread is running: this prevents any other threads from
copied = 0;
// this is the main thread
- gct = &gc_threads[0];
+ gct = gc_threads[0];
/* -----------------------------------------------------------------------
* follow all the roots that we know about:
bdescr *prev;
for (t = 0; t < n_gc_threads; t++) {
- thr = &gc_threads[t];
-
- for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
- for (s = 0; s < generations[g].n_steps; s++) {
- ws = &thr->steps[g][s];
- if (g==0 && s==0) continue;
-
- // Not true?
- // ASSERT( ws->scan_bd == ws->todo_bd );
- ASSERT( ws->scan_bd ? ws->scan == ws->scan_bd->free : 1 );
-
- // Push the final block
- if (ws->scan_bd) { push_scan_block(ws->scan_bd, ws); }
-
- ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
-
- prev = ws->scavd_list;
- for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
- bd->flags &= ~BF_EVACUATED; // now from-space
- prev = bd;
- }
- prev->link = ws->stp->blocks;
- ws->stp->blocks = ws->scavd_list;
- ws->stp->n_blocks += ws->n_scavd_blocks;
- ASSERT(countBlocks(ws->stp->blocks) == ws->stp->n_blocks);
- }
+ thr = gc_threads[t];
+
+ // not step 0
+ for (s = 1; s < total_steps; s++) {
+ ws = &thr->steps[s];
+ // Not true?
+ // ASSERT( ws->scan_bd == ws->todo_bd );
+ ASSERT( ws->scan_bd ? ws->scan == ws->scan_bd->free : 1 );
+
+ // Push the final block
+ if (ws->scan_bd) { push_scan_block(ws->scan_bd, ws); }
+
+ ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks);
+
+ prev = ws->scavd_list;
+ for (bd = ws->scavd_list; bd != NULL; bd = bd->link) {
+ bd->flags &= ~BF_EVACUATED; // now from-space
+ prev = bd;
+ }
+ prev->link = ws->stp->blocks;
+ ws->stp->blocks = ws->scavd_list;
+ ws->stp->n_blocks += ws->n_scavd_blocks;
+ ASSERT(countBlocks(ws->stp->blocks) == ws->stp->n_blocks);
}
}
}
Initialise the gc_thread structures.
-------------------------------------------------------------------------- */
-static void
-alloc_gc_thread (gc_thread *t, int n)
+static gc_thread *
+alloc_gc_thread (int n)
{
- nat g, s;
+ nat s;
step_workspace *ws;
+ gc_thread *t;
+
+ t = stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace),
+ "alloc_gc_thread");
#ifdef THREADED_RTS
t->id = 0;
t->papi_events = -1;
#endif
- t->steps = stgMallocBytes(RtsFlags.GcFlags.generations *
- sizeof(step_workspace *),
- "initialise_gc_thread");
-
- for (g = 0; g < RtsFlags.GcFlags.generations; g++)
+ for (s = 0; s < total_steps; s++)
{
- t->steps[g] = stgMallocBytes(generations[g].n_steps *
- sizeof(step_workspace),
- "initialise_gc_thread/2");
-
- for (s = 0; s < generations[g].n_steps; s++)
- {
- ws = &t->steps[g][s];
- ws->stp = &generations[g].steps[s];
- ws->gct = t;
-
- ws->scan_bd = NULL;
- ws->scan = NULL;
-
- ws->todo_bd = NULL;
- ws->buffer_todo_bd = NULL;
-
- ws->scavd_list = NULL;
- ws->n_scavd_blocks = 0;
- }
+ ws = &t->steps[s];
+ ws->stp = &all_steps[s];
+ ASSERT(s == ws->stp->abs_no);
+ ws->gct = t;
+
+ ws->scan_bd = NULL;
+ ws->scan = NULL;
+
+ ws->todo_bd = NULL;
+ ws->buffer_todo_bd = NULL;
+
+ ws->scavd_list = NULL;
+ ws->n_scavd_blocks = 0;
}
+
+ return t;
}
#if defined(THREADED_RTS)
nat i;
gc_threads = stgMallocBytes (RtsFlags.ParFlags.gcThreads *
- sizeof(gc_thread),
+ sizeof(gc_thread*),
"alloc_gc_threads");
for (i = 0; i < RtsFlags.ParFlags.gcThreads; i++) {
- alloc_gc_thread(&gc_threads[i], i);
+ gc_threads[i] = alloc_gc_thread(i);
}
#else
- gc_threads = stgMallocBytes (sizeof(gc_thread),
+ gc_threads = stgMallocBytes (sizeof(gc_thread*),
"alloc_gc_threads");
- alloc_gc_thread(gc_threads, 0);
+ gc_threads[0] = alloc_gc_thread(0);
#endif
}
}
// Start from 1: the main thread is 0
for (i = 1; i < RtsFlags.ParFlags.gcThreads; i++) {
createOSThread(&id, (OSThreadProc*)&gc_thread_entry,
- &gc_threads[i]);
+ gc_threads[i]);
}
done = rtsTrue;
}
nat i;
for (i=1; i < n_threads; i++) {
inc_running();
- ACQUIRE_LOCK(&gc_threads[i].wake_mutex);
- gc_threads[i].wakeup = rtsTrue;
- signalCondition(&gc_threads[i].wake_cond);
- RELEASE_LOCK(&gc_threads[i].wake_mutex);
+ ACQUIRE_LOCK(&gc_threads[i]->wake_mutex);
+ gc_threads[i]->wakeup = rtsTrue;
+ signalCondition(&gc_threads[i]->wake_cond);
+ RELEASE_LOCK(&gc_threads[i]->wake_mutex);
}
#endif
}
// we don't copy objects into g0s0, unless -G0
if (g==0 && s==0 && RtsFlags.GcFlags.generations > 1) continue;
- ws = &gc_threads[t].steps[g][s];
+ ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
ws->scan_bd = NULL;
ws->scan = NULL;
for (t = 0; t < threads; t++) {
for (s = 0; s < generations[g].n_steps; s++) {
- ws = &gc_threads[t].steps[g][s];
+ ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s];
stp = ws->stp;
ws->buffer_todo_bd = NULL;
#endif
nat thread_index; // a zero based index identifying the thread
- step_workspace ** steps; // 2-d array (gen,step) of workspaces
-
bdescr * free_blocks; // a buffer of free blocks for this thread
// during GC without accessing the block
// allocators spin lock.
#ifdef USE_PAPI
int papi_events;
#endif
-
+
+ // -------------------
+ // workspaces
+
+ // array of workspaces, indexed by stp->abs_no. This is placed
+ // directly at the end of the gc_thread structure so that we can get from
+ // the gc_thread pointer to a workspace using only pointer
+ // arithmetic, no memory access. This happens in the inner loop
+ // of the GC, see Evac.c:alloc_for_copy().
+ step_workspace steps[];
} gc_thread;
extern nat N;
extern rtsBool major_gc;
-extern gc_thread *gc_threads;
+extern gc_thread **gc_threads;
register gc_thread *gct __asm__("%rbx");
// extern gc_thread *gct; // this thread's gct TODO: make thread-local
scavenge_find_global_work (void)
{
bdescr *bd;
- int g, s;
+ int s;
rtsBool flag;
step_workspace *ws;
flag = rtsFalse;
- for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
- for (s = generations[g].n_steps-1; s >= 0; s--) {
- if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
- continue;
- }
- ws = &gct->steps[g][s];
+ for (s = total_steps-1; s>=0; s--)
+ {
+ if (s == 0 && RtsFlags.GcFlags.generations > 1) {
+ continue;
+ }
+ ws = &gct->steps[s];
- // If we have any large objects to scavenge, do them now.
- if (ws->todo_large_objects) {
- scavenge_large(ws);
- flag = rtsTrue;
- }
+ // If we have any large objects to scavenge, do them now.
+ if (ws->todo_large_objects) {
+ scavenge_large(ws);
+ flag = rtsTrue;
+ }
- if ((bd = grab_todo_block(ws)) != NULL) {
- // no need to assign this to ws->scan_bd, we're going
- // to scavenge the whole thing and then push it on
- // our scavd list. This saves pushing out the
- // scan_bd block, which might be partial.
- if (N == 0) {
- scavenge_block0(bd, bd->start);
- } else {
- scavenge_block(bd, bd->start);
- }
- push_scan_block(bd, ws);
- return rtsTrue;
- }
+ if ((bd = grab_todo_block(ws)) != NULL) {
+ // no need to assign this to ws->scan_bd, we're going
+ // to scavenge the whole thing and then push it on
+ // our scavd list. This saves pushing out the
+ // scan_bd block, which might be partial.
+ if (N == 0) {
+ scavenge_block0(bd, bd->start);
+ } else {
+ scavenge_block(bd, bd->start);
+ }
+ push_scan_block(bd, ws);
+ return rtsTrue;
+ }
- if (flag) return rtsTrue;
- }
+ if (flag) return rtsTrue;
}
return rtsFalse;
}
static rtsBool
scavenge_find_local_work (void)
{
- int g, s;
+ int s;
step_workspace *ws;
rtsBool flag;
flag = rtsFalse;
- for (g = RtsFlags.GcFlags.generations; --g >= 0; ) {
- for (s = generations[g].n_steps; --s >= 0; ) {
- if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
- continue;
- }
- ws = &gct->steps[g][s];
+ for (s = total_steps-1; s >= 0; s--) {
+ if (s == 0 && RtsFlags.GcFlags.generations > 1) {
+ continue;
+ }
+ ws = &gct->steps[s];
- if (ws->todo_bd != NULL)
- {
- ws->todo_bd->free = ws->todo_free;
+ if (ws->todo_bd != NULL)
+ {
+ ws->todo_bd->free = ws->todo_free;
+ }
+
+ // If we have a todo block and no scan block, start
+ // scanning the todo block.
+ if (ws->scan_bd == NULL && ws->todo_bd != NULL)
+ {
+ ws->scan_bd = ws->todo_bd;
+ ws->scan = ws->scan_bd->start;
+ }
+
+ // If we have a scan block with some work to do,
+ // scavenge everything up to the free pointer.
+ if (ws->scan != NULL && ws->scan < ws->scan_bd->free)
+ {
+ if (N == 0) {
+ scavenge_block0(ws->scan_bd, ws->scan);
+ } else {
+ scavenge_block(ws->scan_bd, ws->scan);
}
-
- // If we have a todo block and no scan block, start
- // scanning the todo block.
- if (ws->scan_bd == NULL && ws->todo_bd != NULL)
- {
- ws->scan_bd = ws->todo_bd;
- ws->scan = ws->scan_bd->start;
- }
-
- // If we have a scan block with some work to do,
- // scavenge everything up to the free pointer.
- if (ws->scan != NULL && ws->scan < ws->scan_bd->free)
- {
- if (N == 0) {
- scavenge_block0(ws->scan_bd, ws->scan);
- } else {
- scavenge_block(ws->scan_bd, ws->scan);
- }
- ws->scan = ws->scan_bd->free;
- flag = rtsTrue;
- }
-
- if (ws->scan_bd != NULL && ws->scan == ws->scan_bd->free
- && ws->scan_bd != ws->todo_bd)
- {
- // we're not going to evac any more objects into
- // this block, so push it now.
- push_scan_block(ws->scan_bd, ws);
- ws->scan_bd = NULL;
- ws->scan = NULL;
- // we might be able to scan the todo block now. But
- // don't do it right away: there might be full blocks
- // waiting to be scanned as a result of scavenge_block above.
- flag = rtsTrue;
- }
-
- if (flag) return rtsTrue;
- }
+ ws->scan = ws->scan_bd->free;
+ flag = rtsTrue;
+ }
+
+ if (ws->scan_bd != NULL && ws->scan == ws->scan_bd->free
+ && ws->scan_bd != ws->todo_bd)
+ {
+ // we're not going to evac any more objects into
+ // this block, so push it now.
+ push_scan_block(ws->scan_bd, ws);
+ ws->scan_bd = NULL;
+ ws->scan = NULL;
+ // we might be able to scan the todo block now. But
+ // don't do it right away: there might be full blocks
+ // waiting to be scanned as a result of scavenge_block above.
+ flag = rtsTrue;
+ }
+
+ if (flag) return rtsTrue;
}
return rtsFalse;
}
rtsBool
any_work (void)
{
- int g, s;
+ int s;
step_workspace *ws;
write_barrier();
// Check for global work in any step. We don't need to check for
// local work, because we have already exited scavenge_loop(),
// which means there is no local work for this thread.
- for (g = RtsFlags.GcFlags.generations-1; g >= 0; g--) {
- for (s = generations[g].n_steps-1; s >= 0; s--) {
- if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
- continue;
- }
- ws = &gct->steps[g][s];
- if (ws->todo_large_objects) return rtsTrue;
- if (ws->stp->todos) return rtsTrue;
- }
+ for (s = total_steps-1; s >= 0; s--) {
+ if (s == 0 && RtsFlags.GcFlags.generations > 1) {
+ continue;
+ }
+ ws = &gct->steps[s];
+ if (ws->todo_large_objects) return rtsTrue;
+ if (ws->stp->todos) return rtsTrue;
}
return rtsFalse;
generation *oldest_gen = NULL; /* oldest generation, for convenience */
step *g0s0 = NULL; /* generation 0, step 0, for convenience */
+nat total_steps = 0;
+step *all_steps = NULL; /* single array of steps */
+
ullong total_allocated = 0; /* total memory allocated during run */
nat n_nurseries = 0; /* == RtsFlags.ParFlags.nNodes, convenience */
initStep (step *stp, int g, int s)
{
stp->no = s;
+ stp->abs_no = RtsFlags.GcFlags.steps * g + s;
stp->blocks = NULL;
stp->n_blocks = 0;
stp->old_blocks = NULL;
{
nat g, s;
generation *gen;
- step *step_arr;
if (generations != NULL) {
// multi-init protection
it this way, because we need the invariant that two step pointers
can be directly compared to see which is the oldest.
Remember that the last generation has only one step. */
- step_arr = stgMallocBytes(sizeof(struct step_)
- * (1 + ((RtsFlags.GcFlags.generations - 1)
- * RtsFlags.GcFlags.steps)),
- "initStorage: steps");
+ total_steps = 1 + (RtsFlags.GcFlags.generations - 1) * RtsFlags.GcFlags.steps;
+ all_steps = stgMallocBytes(total_steps * sizeof(struct step_),
+ "initStorage: steps");
/* Initialise all generations */
for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
/* Oldest generation: one step */
oldest_gen->n_steps = 1;
- oldest_gen->steps = step_arr + (RtsFlags.GcFlags.generations - 1)
+ oldest_gen->steps = all_steps + (RtsFlags.GcFlags.generations - 1)
* RtsFlags.GcFlags.steps;
/* set up all except the oldest generation with 2 steps */
for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
generations[g].n_steps = RtsFlags.GcFlags.steps;
- generations[g].steps = step_arr + g * RtsFlags.GcFlags.steps;
+ generations[g].steps = all_steps + g * RtsFlags.GcFlags.steps;
}
} else {
/* single generation, i.e. a two-space collector */
g0->n_steps = 1;
- g0->steps = step_arr;
+ g0->steps = all_steps;
}
#ifdef THREADED_RTS