#endif
#include "Trace.h"
#include "RetainerProfile.h"
+#include "LdvProfile.h"
#include "RaiseAsync.h"
#include "Papi.h"
#include "Stable.h"
#include "Evac.h"
#include "Scav.h"
#include "GCUtils.h"
+#include "MarkStack.h"
#include "MarkWeak.h"
#include "Sparks.h"
#include "Sweep.h"
#endif
/* -----------------------------------------------------------------------------
- The mark bitmap & stack.
+ The mark stack.
-------------------------------------------------------------------------- */
-#define MARK_STACK_BLOCKS 4
-
-bdescr *mark_stack_bdescr;
-StgPtr *mark_stack;
-StgPtr *mark_sp;
-StgPtr *mark_splim;
-
-// Flag and pointers used for falling back to a linear scan when the
-// mark stack overflows.
-rtsBool mark_stack_overflowed;
-bdescr *oldgen_scan_bd;
-StgPtr oldgen_scan;
+bdescr *mark_stack_top_bd; // topmost block in the mark stack
+bdescr *mark_stack_bd; // current block in the mark stack
+StgPtr mark_sp; // pointer to the next unallocated mark stack entry
/* -----------------------------------------------------------------------------
GarbageCollect: the main entry point to the garbage collector.
n = initialise_N(force_major_gc);
#if defined(THREADED_RTS)
- work_stealing = RtsFlags.ParFlags.parGcLoadBalancing;
+ work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled &&
+ N >= RtsFlags.ParFlags.parGcLoadBalancingGen;
// It's not always a good idea to do load balancing in parallel
// GC. In particular, for a parallel program we don't want to
// lose locality by moving cached data into another CPU's cache
#ifdef DEBUG
// check for memory leaks if DEBUG is on
- memInventory(traceClass(DEBUG_gc));
+ memInventory(DEBUG_gc);
#endif
- // check stack sanity *before* GC
- IF_DEBUG(sanity, checkFreeListSanity());
- IF_DEBUG(sanity, checkMutableLists(rtsTrue));
+ // check sanity *before* GC
+ IF_DEBUG(sanity, checkSanity(rtsTrue));
// Initialise all our gc_thread structures
for (t = 0; t < n_gc_threads; t++) {
/* Allocate a mark stack if we're doing a major collection.
*/
if (major_gc && oldest_gen->steps[0].mark) {
- nat mark_stack_blocks;
- mark_stack_blocks = stg_max(MARK_STACK_BLOCKS,
- oldest_gen->steps[0].n_old_blocks / 100);
- mark_stack_bdescr = allocGroup(mark_stack_blocks);
- mark_stack = (StgPtr *)mark_stack_bdescr->start;
- mark_sp = mark_stack;
- mark_splim = mark_stack + (mark_stack_blocks * BLOCK_SIZE_W);
+ mark_stack_bd = allocBlock();
+ mark_stack_top_bd = mark_stack_bd;
+ mark_stack_bd->link = NULL;
+ mark_stack_bd->u.back = NULL;
+ mark_sp = mark_stack_bd->start;
} else {
- mark_stack_bdescr = NULL;
+ mark_stack_bd = NULL;
+ mark_stack_top_bd = NULL;
+ mark_sp = NULL;
}
// this is the main thread
// g0s0->old_blocks is the old nursery
// g0s0->blocks is to-space from the previous GC
if (RtsFlags.GcFlags.generations == 1) {
- if (g0s0->blocks != NULL) {
- freeChain(g0s0->blocks);
- g0s0->blocks = NULL;
+ if (g0->steps[0].blocks != NULL) {
+ freeChain(g0->steps[0].blocks);
+ g0->steps[0].blocks = NULL;
}
}
/* LARGE OBJECTS. The current live large objects are chained on
* scavenged_large, having been moved during garbage
- * collection from large_objects. Any objects left on
+ * collection from large_objects. Any objects left on the
* large_objects list are therefore dead, so we free them here.
*/
- for (bd = stp->large_objects; bd != NULL; bd = next) {
- next = bd->link;
- freeGroup(bd);
- bd = next;
- }
-
+ freeChain(stp->large_objects);
stp->large_objects = stp->scavenged_large_objects;
stp->n_large_blocks = stp->n_scavenged_large_blocks;
-
+ ASSERT(countBlocks(stp->large_objects) == stp->n_large_blocks);
}
else // for older generations...
{
// add the new blocks we promoted during this GC
stp->n_large_blocks += stp->n_scavenged_large_blocks;
+ ASSERT(countBlocks(stp->large_objects) == stp->n_large_blocks);
}
}
}
// Free the small objects allocated via allocate(), since this will
// all have been copied into G0S1 now.
if (RtsFlags.GcFlags.generations > 1) {
- if (g0s0->blocks != NULL) {
- freeChain(g0s0->blocks);
- g0s0->blocks = NULL;
+ if (g0->steps[0].blocks != NULL) {
+ freeChain(g0->steps[0].blocks);
+ g0->steps[0].blocks = NULL;
}
- g0s0->n_blocks = 0;
- g0s0->n_words = 0;
+ g0->steps[0].n_blocks = 0;
+ g0->steps[0].n_words = 0;
}
- alloc_blocks = 0;
alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize;
// Start a new pinned_object_block
- pinned_object_block = NULL;
+ for (n = 0; n < n_capabilities; n++) {
+ capabilities[n].pinned_object_block = NULL;
+ }
// Free the mark stack.
- if (mark_stack_bdescr != NULL) {
- freeGroup(mark_stack_bdescr);
+ if (mark_stack_top_bd != NULL) {
+ debugTrace(DEBUG_gc, "mark stack: %d blocks",
+ countBlocks(mark_stack_top_bd));
+ freeChain(mark_stack_top_bd);
}
// Free any bitmaps.
// Update the stable pointer hash table.
updateStablePtrTable(major_gc);
- // check sanity after GC
- IF_DEBUG(sanity, checkSanity());
+ // check sanity after GC
+ IF_DEBUG(sanity, checkSanity(rtsTrue));
// extra GC trace info
IF_DEBUG(gc, statDescribeGens());
#ifdef DEBUG
// check for memory leaks if DEBUG is on
- memInventory(traceClass(DEBUG_gc));
+ memInventory(DEBUG_gc);
#endif
#ifdef RTS_GTK_FRONTPANEL
void
freeGcThreads (void)
{
+ nat s;
if (gc_threads != NULL) {
#if defined(THREADED_RTS)
nat i;
- for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) {
+ for (i = 0; i < n_capabilities; i++) {
+ for (s = 0; s < total_steps; s++)
+ {
+ freeWSDeque(gc_threads[i]->steps[s].todo_q);
+ }
stgFree (gc_threads[i]);
}
stgFree (gc_threads);
#else
+ for (s = 0; s < total_steps; s++)
+ {
+ freeWSDeque(gc_threads[0]->steps[s].todo_q);
+ }
stgFree (gc_threads);
#endif
gc_threads = NULL;
write_barrier();
// scavenge objects in compacted generation
- if (mark_stack_overflowed || oldgen_scan_bd != NULL ||
- (mark_stack_bdescr != NULL && !mark_stack_empty())) {
+ if (mark_stack_bd != NULL && !mark_stack_empty()) {
return rtsTrue;
}
{
nat r;
- debugTrace(DEBUG_gc, "GC thread %d working", gct->thread_index);
loop:
+ traceEvent(&capabilities[gct->thread_index], EVENT_GC_WORK);
+
#if defined(THREADED_RTS)
if (n_gc_threads > 1) {
scavenge_loop();
// scavenge_loop() only exits when there's no work to do
r = dec_running();
- debugTrace(DEBUG_gc, "GC thread %d idle (%d still running)",
- gct->thread_index, r);
+ traceEvent(&capabilities[gct->thread_index], EVENT_GC_IDLE);
+
+ debugTrace(DEBUG_gc, "%d GC threads still running", r);
while (gc_running_threads != 0) {
// usleep(1);
// scavenge_loop() to perform any pending work.
}
- // All threads are now stopped
- debugTrace(DEBUG_gc, "GC thread %d finished.", gct->thread_index);
+ traceEvent(&capabilities[gct->thread_index], EVENT_GC_DONE);
}
#if defined(THREADED_RTS)
void
gcWorkerThread (Capability *cap)
{
- cap->in_gc = rtsTrue;
+ gc_thread *saved_gct;
+
+ // necessary if we stole a callee-saves register for gct:
+ saved_gct = gct;
gct = gc_threads[cap->no];
gct->id = osThreadId();
gct->thread_index);
ACQUIRE_SPIN_LOCK(&gct->mut_spin);
debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index);
+
+ SET_GCT(saved_gct);
}
#endif
prodCapability(&capabilities[i], cap->running_task);
}
}
- for (j=0; j < 10000000; j++) {
+ for (j=0; j < 10; j++) {
retry = rtsFalse;
for (i=0; i < n_threads; i++) {
if (i == me) continue;
}
}
if (!retry) break;
+ yieldThread();
}
}
}
}
}
+ if (g == 0) {
+ for (i = 0; i < n_capabilities; i++) {
+ stp = &nurseries[i];
+ stp->old_threads = stp->threads;
+ stp->threads = END_TSO_QUEUE;
+ }
+ }
+
for (s = 0; s < generations[g].n_steps; s++) {
+ // generation 0, step 0 doesn't need to-space, unless -G1
+ if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
+ continue;
+ }
+
stp = &generations[g].steps[s];
ASSERT(stp->gen_no == g);
stp->old_threads = stp->threads;
stp->threads = END_TSO_QUEUE;
- // generation 0, step 0 doesn't need to-space
- if (g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1) {
- continue;
- }
-
// deprecate the existing blocks
stp->old_blocks = stp->blocks;
stp->n_old_blocks = stp->n_blocks;
size = stg_max(live * RtsFlags.GcFlags.oldGenFactor,
RtsFlags.GcFlags.minOldGenSize);
+ if (RtsFlags.GcFlags.heapSizeSuggestionAuto) {
+ RtsFlags.GcFlags.heapSizeSuggestion = size;
+ }
+
// minimum size for generation zero
min_alloc = stg_max((RtsFlags.GcFlags.pcFreeHeap * max) / 200,
RtsFlags.GcFlags.minAllocAreaSize);
static void
resize_nursery (void)
{
+ lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities;
+
if (RtsFlags.GcFlags.generations == 1)
{ // Two-space collector:
nat blocks;
* performance we get from 3L bytes, reducing to the same
* performance at 2L bytes.
*/
- blocks = g0s0->n_blocks;
+ blocks = generations[0].steps[0].n_blocks;
if ( RtsFlags.GcFlags.maxHeapSize != 0 &&
blocks * RtsFlags.GcFlags.oldGenFactor * 2 >
else
{
blocks *= RtsFlags.GcFlags.oldGenFactor;
- if (blocks < RtsFlags.GcFlags.minAllocAreaSize)
+ if (blocks < min_nursery)
{
- blocks = RtsFlags.GcFlags.minAllocAreaSize;
+ blocks = min_nursery;
}
}
resizeNurseries(blocks);
(((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) /
(100 + (long)g0s0_pcnt_kept);
- if (blocks < (long)RtsFlags.GcFlags.minAllocAreaSize) {
- blocks = RtsFlags.GcFlags.minAllocAreaSize;
+ if (blocks < (long)min_nursery) {
+ blocks = min_nursery;
}
resizeNurseries((nat)blocks);