From: Simon Marlow Date: Mon, 9 Jun 2008 17:49:43 +0000 (+0000) Subject: Experimental "mark-region" strategy for the old generation X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=commitdiff_plain;h=74ee9df9f9e79e7110e9d8541b84010f35c464c5 Experimental "mark-region" strategy for the old generation Sometimes better than the default copying, enabled by +RTS -w --- diff --git a/includes/Block.h b/includes/Block.h index e2e691a..3d7a5c8 100644 --- a/includes/Block.h +++ b/includes/Block.h @@ -84,12 +84,15 @@ typedef struct bdescr_ { #define BF_LARGE 2 /* Block is pinned */ #define BF_PINNED 4 -/* Block is part of a compacted generation */ -#define BF_COMPACTED 8 +/* Block is to be marked, not copied */ +#define BF_MARKED 8 /* Block is free, and on the free list (TODO: is this used?) */ #define BF_FREE 16 /* Block is executable */ #define BF_EXEC 32 +/* Block contains only a small amount of live data */ +#define BF_FRAGMENTED 64 + /* Finding the block descriptor for a given block -------------------------- */ diff --git a/includes/RtsFlags.h b/includes/RtsFlags.h index 7d0b418..11133ef 100644 --- a/includes/RtsFlags.h +++ b/includes/RtsFlags.h @@ -39,6 +39,8 @@ struct GC_FLAGS { rtsBool compact; /* True <=> "compact all the time" */ double compactThreshold; + rtsBool sweep; /* use "mostly mark-sweep" instead of copying + * for the oldest generation */ rtsBool ringBell; rtsBool frontpanel; diff --git a/includes/Storage.h b/includes/Storage.h index ae066c1..34a5411 100644 --- a/includes/Storage.h +++ b/includes/Storage.h @@ -55,7 +55,6 @@ typedef struct step_ { unsigned int no; // step number in this generation unsigned int abs_no; // absolute step number - int is_compacted; // compact this step? (old gen only) struct generation_ * gen; // generation this step belongs to unsigned int gen_no; // generation number (cached) @@ -87,8 +86,12 @@ typedef struct step_ { // and scavenged_large_objects #endif + int mark; // mark (not copy)? (old gen only) + int compact; // compact (not sweep)? (old gen only) + bdescr * old_blocks; // bdescr of first from-space block unsigned int n_old_blocks; // number of blocks in from-space + unsigned int live_estimate; // for sweeping: estimate of live data bdescr * todos; // blocks waiting to be scavenged bdescr * todos_last; diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c index 81bac4e..0618386 100644 --- a/rts/RtsFlags.c +++ b/rts/RtsFlags.c @@ -147,6 +147,7 @@ void initRtsFlagsDefaults(void) #endif RtsFlags.GcFlags.compact = rtsFalse; RtsFlags.GcFlags.compactThreshold = 30.0; + RtsFlags.GcFlags.sweep = rtsFalse; #ifdef RTS_GTK_FRONTPANEL RtsFlags.GcFlags.frontpanel = rtsFalse; #endif @@ -353,6 +354,7 @@ usage_text[] = { " -c Auto-enable compaction of the oldest generation when live data is", " at least % of the maximum heap size set with -M (default: 30%)", " -c Enable compaction for all major collections", +" -w Use mark-region for the oldest generation (experimental)", #if defined(THREADED_RTS) " -I Perform full GC after idle time (default: 0.3, 0 == off)", #endif @@ -750,6 +752,10 @@ error = rtsTrue; } break; + case 'w': + RtsFlags.GcFlags.sweep = rtsTrue; + break; + case 'F': RtsFlags.GcFlags.oldGenFactor = atof(rts_argv[arg]+2); diff --git a/rts/sm/Compact.c b/rts/sm/Compact.c index bb4d838..9f0a69d 100644 --- a/rts/sm/Compact.c +++ b/rts/sm/Compact.c @@ -84,7 +84,7 @@ thread (StgClosure **p) if (HEAP_ALLOCED(q)) { bd = Bdescr(q); - if (bd->flags & BF_COMPACTED) + if (bd->flags & BF_MARKED) { iptr = *q; switch (GET_CLOSURE_TAG((StgClosure *)iptr)) diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c index 78f0f31..3b68c62 100644 --- a/rts/sm/Evac.c +++ b/rts/sm/Evac.c @@ -21,6 +21,7 @@ #include "Compact.h" #include "Prelude.h" #include "LdvProfile.h" +#include "Trace.h" #if defined(PROF_SPIN) && defined(THREADED_RTS) && defined(PARALLEL_GC) StgWord64 whitehole_spin = 0; @@ -383,7 +384,7 @@ loop: bd = Bdescr((P_)q); - if ((bd->flags & (BF_LARGE | BF_COMPACTED | BF_EVACUATED)) != 0) { + if ((bd->flags & (BF_LARGE | BF_MARKED | BF_EVACUATED)) != 0) { // pointer into to-space: just return it. It might be a pointer // into a generation that we aren't collecting (> N), or it @@ -419,17 +420,16 @@ loop: /* If the object is in a step that we're compacting, then we * need to use an alternative evacuate procedure. */ - if (bd->flags & BF_COMPACTED) { - if (!is_marked((P_)q,bd)) { - mark((P_)q,bd); - if (mark_stack_full()) { - mark_stack_overflowed = rtsTrue; - reset_mark_stack(); - } - push_mark_stack((P_)q); - } - return; + if (!is_marked((P_)q,bd)) { + mark((P_)q,bd); + if (mark_stack_full()) { + debugTrace(DEBUG_gc,"mark stack overflowed"); + mark_stack_overflowed = rtsTrue; + reset_mark_stack(); + } + push_mark_stack((P_)q); } + return; } stp = bd->step->to; @@ -828,7 +828,7 @@ selector_chain: // (scavenge_mark_stack doesn't deal with IND). BEWARE! This // bit is very tricky to get right. If you make changes // around here, test by compiling stage 3 with +RTS -c -RTS. - if (bd->flags & BF_COMPACTED) { + if (bd->flags & BF_MARKED) { // must call evacuate() to mark this closure if evac==rtsTrue *q = (StgClosure *)p; if (evac) evacuate(q); diff --git a/rts/sm/GC.c b/rts/sm/GC.c index b71079b..165c706 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -49,6 +49,7 @@ #include "GCUtils.h" #include "MarkWeak.h" #include "Sparks.h" +#include "Sweep.h" #include // for memset() #include @@ -288,10 +289,13 @@ GarbageCollect ( rtsBool force_major_gc ) /* Allocate a mark stack if we're doing a major collection. */ if (major_gc) { - mark_stack_bdescr = allocGroup(MARK_STACK_BLOCKS); + nat mark_stack_blocks; + mark_stack_blocks = stg_max(MARK_STACK_BLOCKS, + oldest_gen->steps[0].n_old_blocks / 100); + mark_stack_bdescr = allocGroup(mark_stack_blocks); mark_stack = (StgPtr *)mark_stack_bdescr->start; mark_sp = mark_stack; - mark_splim = mark_stack + (MARK_STACK_BLOCKS * BLOCK_SIZE_W); + mark_splim = mark_stack + (mark_stack_blocks * BLOCK_SIZE_W); } else { mark_stack_bdescr = NULL; } @@ -485,9 +489,12 @@ GarbageCollect ( rtsBool force_major_gc ) } } - // Finally: compaction of the oldest generation. - if (major_gc && oldest_gen->steps[0].is_compacted) { - compact(gct->scavenged_static_objects); + // Finally: compact or sweep the oldest generation. + if (major_gc && oldest_gen->steps[0].mark) { + if (oldest_gen->steps[0].compact) + compact(gct->scavenged_static_objects); + else + sweep(&oldest_gen->steps[0]); } IF_DEBUG(sanity, checkGlobalTSOList(rtsFalse)); @@ -542,7 +549,7 @@ GarbageCollect ( rtsBool force_major_gc ) } for (s = 0; s < generations[g].n_steps; s++) { - bdescr *next; + bdescr *next, *prev; stp = &generations[g].steps[s]; // for generations we collected... @@ -553,29 +560,48 @@ GarbageCollect ( rtsBool force_major_gc ) * freed blocks will probaby be quickly recycled. */ if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) { - if (stp->is_compacted) + if (stp->mark) { // tack the new blocks on the end of the existing blocks if (stp->old_blocks != NULL) { + + prev = NULL; for (bd = stp->old_blocks; bd != NULL; bd = next) { - stp->n_words += bd->free - bd->start; - - // NB. this step might not be compacted next - // time, so reset the BF_COMPACTED flags. - // They are set before GC if we're going to - // compact. (search for BF_COMPACTED above). - bd->flags &= ~BF_COMPACTED; - - // between GCs, all blocks in the heap except - // for the nursery have the BF_EVACUATED flag set. - bd->flags |= BF_EVACUATED; - - next = bd->link; - if (next == NULL) { - bd->link = stp->blocks; - } + + next = bd->link; + + if (!(bd->flags & BF_MARKED)) + { + if (prev == NULL) { + stp->old_blocks = next; + } else { + prev->link = next; + } + freeGroup(bd); + stp->n_old_blocks--; + } + else + { + stp->n_words += bd->free - bd->start; + + // NB. this step might not be compacted next + // time, so reset the BF_MARKED flags. + // They are set before GC if we're going to + // compact. (search for BF_MARKED above). + bd->flags &= ~BF_MARKED; + + // between GCs, all blocks in the heap except + // for the nursery have the BF_EVACUATED flag set. + bd->flags |= BF_EVACUATED; + + prev = bd; + } } - stp->blocks = stp->old_blocks; + + if (prev != NULL) { + prev->link = stp->blocks; + stp->blocks = stp->old_blocks; + } } // add the new blocks to the block tally stp->n_blocks += stp->n_old_blocks; @@ -1144,6 +1170,7 @@ init_collected_gen (nat g, nat n_threads) stp->blocks = NULL; stp->n_blocks = 0; stp->n_words = 0; + stp->live_estimate = 0; // we don't have any to-be-scavenged blocks yet stp->todos = NULL; @@ -1165,7 +1192,7 @@ init_collected_gen (nat g, nat n_threads) } // for a compacted step, we need to allocate the bitmap - if (stp->is_compacted) { + if (stp->mark) { nat bitmap_size; // in bytes bdescr *bitmap_bdescr; StgWord *bitmap; @@ -1190,12 +1217,14 @@ init_collected_gen (nat g, nat n_threads) bd->u.bitmap = bitmap; bitmap += BLOCK_SIZE_W / (sizeof(W_)*BITS_PER_BYTE); - // Also at this point we set the BF_COMPACTED flag + // Also at this point we set the BF_MARKED flag // for this block. The invariant is that - // BF_COMPACTED is always unset, except during GC + // BF_MARKED is always unset, except during GC // when it is set on those blocks which will be // compacted. - bd->flags |= BF_COMPACTED; + if (!(bd->flags & BF_FRAGMENTED)) { + bd->flags |= BF_MARKED; + } } } } @@ -1425,13 +1454,18 @@ resize_generations (void) nat g; if (major_gc && RtsFlags.GcFlags.generations > 1) { - nat live, size, min_alloc; + nat live, size, min_alloc, words; nat max = RtsFlags.GcFlags.maxHeapSize; nat gens = RtsFlags.GcFlags.generations; // live in the oldest generations - live = (oldest_gen->steps[0].n_words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W+ - oldest_gen->steps[0].n_large_blocks; + if (oldest_gen->steps[0].live_estimate != 0) { + words = oldest_gen->steps[0].live_estimate; + } else { + words = oldest_gen->steps[0].n_words; + } + live = (words + BLOCK_SIZE_W - 1) / BLOCK_SIZE_W + + oldest_gen->steps[0].n_large_blocks; // default max size for all generations except zero size = stg_max(live * RtsFlags.GcFlags.oldGenFactor, @@ -1448,13 +1482,19 @@ resize_generations (void) (max > 0 && oldest_gen->steps[0].n_blocks > (RtsFlags.GcFlags.compactThreshold * max) / 100))) { - oldest_gen->steps[0].is_compacted = 1; + oldest_gen->steps[0].mark = 1; + oldest_gen->steps[0].compact = 1; // debugBelch("compaction: on\n", live); } else { - oldest_gen->steps[0].is_compacted = 0; + oldest_gen->steps[0].mark = 0; + oldest_gen->steps[0].compact = 0; // debugBelch("compaction: off\n", live); } + if (RtsFlags.GcFlags.sweep) { + oldest_gen->steps[0].mark = 1; + } + // if we're going to go over the maximum heap size, reduce the // size of the generations accordingly. The calculation is // different if compaction is turned on, because we don't need @@ -1468,7 +1508,7 @@ resize_generations (void) heapOverflow(); } - if (oldest_gen->steps[0].is_compacted) { + if (oldest_gen->steps[0].compact) { if ( (size + (size - 1) * (gens - 2) * 2) + min_alloc > max ) { size = (max - min_alloc) / ((gens - 1) * 2 - 1); } diff --git a/rts/sm/GCAux.c b/rts/sm/GCAux.c index 48179f7..66806f4 100644 --- a/rts/sm/GCAux.c +++ b/rts/sm/GCAux.c @@ -71,7 +71,7 @@ isAlive(StgClosure *p) } // check the mark bit for compacted steps - if ((bd->flags & BF_COMPACTED) && is_marked((P_)q,bd)) { + if ((bd->flags & BF_MARKED) && is_marked((P_)q,bd)) { return p; } diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index b76fcf4..69e441d 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -87,6 +87,7 @@ initStep (step *stp, int g, int s) stp->blocks = NULL; stp->n_blocks = 0; stp->n_words = 0; + stp->live_estimate = 0; stp->old_blocks = NULL; stp->n_old_blocks = 0; stp->gen = &generations[g]; @@ -95,7 +96,8 @@ initStep (step *stp, int g, int s) stp->n_large_blocks = 0; stp->scavenged_large_objects = NULL; stp->n_scavenged_large_blocks = 0; - stp->is_compacted = 0; + stp->mark = 0; + stp->compact = 0; stp->bitmap = NULL; #ifdef THREADED_RTS initSpinLock(&stp->sync_todo); @@ -230,11 +232,13 @@ initStorage( void ) } /* The oldest generation has one step. */ - if (RtsFlags.GcFlags.compact) { + if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) { if (RtsFlags.GcFlags.generations == 1) { - errorBelch("WARNING: compaction is incompatible with -G1; disabled"); + errorBelch("WARNING: compact/sweep is incompatible with -G1; disabled"); } else { - oldest_gen->steps[0].is_compacted = 1; + oldest_gen->steps[0].mark = 1; + if (RtsFlags.GcFlags.compact) + oldest_gen->steps[0].compact = 1; } } @@ -1032,6 +1036,7 @@ countOccupied(bdescr *bd) words = 0; for (; bd != NULL; bd = bd->link) { + ASSERT(bd->free <= bd->start + bd->blocks * BLOCK_SIZE_W); words += bd->free - bd->start; } return words; @@ -1079,14 +1084,27 @@ calcNeeded(void) for (s = 0; s < generations[g].n_steps; s++) { if (g == 0 && s == 0) { continue; } stp = &generations[g].steps[s]; + + // we need at least this much space + needed += stp->n_blocks + stp->n_large_blocks; + + // any additional space needed to collect this gen next time? if (g == 0 || // always collect gen 0 (generations[g].steps[0].n_blocks + generations[g].steps[0].n_large_blocks - > generations[g].max_blocks - && stp->is_compacted == 0)) { - needed += 2 * stp->n_blocks + stp->n_large_blocks; - } else { - needed += stp->n_blocks + stp->n_large_blocks; + > generations[g].max_blocks)) { + // we will collect this gen next time + if (stp->mark) { + // bitmap: + needed += stp->n_blocks / BITS_IN(W_); + // mark stack: + needed += stp->n_blocks / 100; + } + if (stp->compact) { + continue; // no additional space needed for compaction + } else { + needed += stp->n_blocks; + } } } } diff --git a/rts/sm/Sweep.c b/rts/sm/Sweep.c new file mode 100644 index 0000000..979fe9c --- /dev/null +++ b/rts/sm/Sweep.c @@ -0,0 +1,82 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team 2008 + * + * Simple mark/sweep, collecting whole blocks. + * + * Documentation on the architecture of the Garbage Collector can be + * found in the online commentary: + * + * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC + * + * ---------------------------------------------------------------------------*/ + +#include "Rts.h" +#include "Sweep.h" +#include "Block.h" +#include "Trace.h" + +void +sweep(step *step) +{ + bdescr *bd, *prev, *next; + nat i; + nat freed, resid, fragd, blocks, live; + + ASSERT(countBlocks(step->old_blocks) == step->n_old_blocks); + + live = 0; // estimate of live data in this gen + freed = 0; + fragd = 0; + blocks = 0; + prev = NULL; + for (bd = step->old_blocks; bd != NULL; bd = next) + { + next = bd->link; + + if (!(bd->flags & BF_MARKED)) { + prev = bd; + continue; + } + + blocks++; + resid = 0; + for (i = 0; i < BLOCK_SIZE_W / BITS_IN(W_); i++) + { + if (bd->u.bitmap[i] != 0) resid++; + } + live += resid * BITS_IN(W_); + + if (resid == 0) + { + freed++; + step->n_old_blocks--; + if (prev == NULL) { + step->old_blocks = next; + } else { + prev->link = next; + } + freeGroup(bd); + } + else + { + prev = bd; + if (resid < (BLOCK_SIZE_W * 3) / (BITS_IN(W_) * 4)) { + fragd++; + bd->flags |= BF_FRAGMENTED; + } + } + } + + step->live_estimate = live; + + trace(DEBUG_gc|TRACE_gc, "sweeping: %d blocks, %d were copied, %d freed (%d%%), %d are fragmented, live estimate: %d%%", + step->n_old_blocks + freed, + step->n_old_blocks - blocks + freed, + freed, + blocks == 0 ? 0 : (freed * 100) / blocks, + fragd, + (blocks - freed) == 0 ? 0 : ((live / BLOCK_SIZE_W) * 100) / (blocks - freed)); + + ASSERT(countBlocks(step->old_blocks) == step->n_old_blocks); +} diff --git a/rts/sm/Sweep.h b/rts/sm/Sweep.h new file mode 100644 index 0000000..e7904a9 --- /dev/null +++ b/rts/sm/Sweep.h @@ -0,0 +1,14 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team 2008 + * + * Simple mark/sweep, collecting whole blocks. + * + * Documentation on the architecture of the Garbage Collector can be + * found in the online commentary: + * + * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC + * + * ---------------------------------------------------------------------------*/ + +void sweep(step *gen);