From 754e039a8a15d5774fe73872ff9ac593b46370e0 Mon Sep 17 00:00:00 2001 From: Simon Marlow Date: Thu, 8 Oct 2009 15:14:42 +0000 Subject: [PATCH] Mark/compact: use a dynamically-sized mark stack, and don't do linear scan This improves the performance of the mark/compact and mark/region collectors, and paves the way for doing mark/region with smaller region sizes, in the style of Immix. --- rts/sm/Compact.c | 8 +++--- rts/sm/Compact.h | 30 ---------------------- rts/sm/Evac.c | 6 +---- rts/sm/GC.c | 46 +++++++++++++++------------------- rts/sm/GC.h | 12 +++------ rts/sm/MarkStack.h | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++ rts/sm/Scav.c | 52 +++----------------------------------- 7 files changed, 104 insertions(+), 121 deletions(-) create mode 100644 rts/sm/MarkStack.h diff --git a/rts/sm/Compact.c b/rts/sm/Compact.c index 892364d..c566aa0 100644 --- a/rts/sm/Compact.c +++ b/rts/sm/Compact.c @@ -854,15 +854,15 @@ update_fwd_compact( bdescr *blocks ) size = p - q; if (free + size > free_bd->start + BLOCK_SIZE_W) { - // unset the next bit in the bitmap to indicate that + // set the next bit in the bitmap to indicate that // this object needs to be pushed into the next // block. This saves us having to run down the // threaded info pointer list twice during the next pass. - unmark(q+1,bd); + mark(q+1,bd); free_bd = free_bd->link; free = free_bd->start; } else { - ASSERT(is_marked(q+1,bd)); + ASSERT(!is_marked(q+1,bd)); } unthread(q,(StgWord)free + GET_CLOSURE_TAG((StgClosure *)iptr)); @@ -921,7 +921,7 @@ update_bkwd_compact( step *stp ) } #endif - if (!is_marked(p+1,bd)) { + if (is_marked(p+1,bd)) { // don't forget to update the free ptr in the block desc. free_bd->free = free; free_bd = free_bd->link; diff --git a/rts/sm/Compact.h b/rts/sm/Compact.h index 7fe15e5..efd7351 100644 --- a/rts/sm/Compact.h +++ b/rts/sm/Compact.h @@ -16,36 +16,6 @@ BEGIN_RTS_PRIVATE -INLINE_HEADER rtsBool -mark_stack_empty(void) -{ - return mark_sp == mark_stack; -} - -INLINE_HEADER rtsBool -mark_stack_full(void) -{ - return mark_sp >= mark_splim; -} - -INLINE_HEADER void -reset_mark_stack(void) -{ - mark_sp = mark_stack; -} - -INLINE_HEADER void -push_mark_stack(StgPtr p) -{ - *mark_sp++ = p; -} - -INLINE_HEADER StgPtr -pop_mark_stack(void) -{ - return *--mark_sp; -} - INLINE_HEADER void mark(StgPtr p, bdescr *bd) { diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c index 5836210..379fbba 100644 --- a/rts/sm/Evac.c +++ b/rts/sm/Evac.c @@ -20,6 +20,7 @@ #include "GCThread.h" #include "GCUtils.h" #include "Compact.h" +#include "MarkStack.h" #include "Prelude.h" #include "Trace.h" #include "LdvProfile.h" @@ -499,11 +500,6 @@ loop: */ if (!is_marked((P_)q,bd)) { mark((P_)q,bd); - if (mark_stack_full()) { - debugTrace(DEBUG_gc,"mark stack overflowed"); - mark_stack_overflowed = rtsTrue; - reset_mark_stack(); - } push_mark_stack((P_)q); } return; diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 97f391c..07cf2b5 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -44,6 +44,7 @@ #include "Evac.h" #include "Scav.h" #include "GCUtils.h" +#include "MarkStack.h" #include "MarkWeak.h" #include "Sparks.h" #include "Sweep.h" @@ -154,21 +155,12 @@ static void gcCAFs (void); #endif /* ----------------------------------------------------------------------------- - The mark bitmap & stack. + The mark stack. -------------------------------------------------------------------------- */ -#define MARK_STACK_BLOCKS 4 - -bdescr *mark_stack_bdescr; -StgPtr *mark_stack; -StgPtr *mark_sp; -StgPtr *mark_splim; - -// Flag and pointers used for falling back to a linear scan when the -// mark stack overflows. -rtsBool mark_stack_overflowed; -bdescr *oldgen_scan_bd; -StgPtr oldgen_scan; +bdescr *mark_stack_top_bd; // topmost block in the mark stack +bdescr *mark_stack_bd; // current block in the mark stack +StgPtr mark_sp; // pointer to the next unallocated mark stack entry /* ----------------------------------------------------------------------------- GarbageCollect: the main entry point to the garbage collector. @@ -304,15 +296,15 @@ GarbageCollect (rtsBool force_major_gc, /* Allocate a mark stack if we're doing a major collection. */ if (major_gc && oldest_gen->steps[0].mark) { - nat mark_stack_blocks; - mark_stack_blocks = stg_max(MARK_STACK_BLOCKS, - oldest_gen->steps[0].n_old_blocks / 100); - mark_stack_bdescr = allocGroup(mark_stack_blocks); - mark_stack = (StgPtr *)mark_stack_bdescr->start; - mark_sp = mark_stack; - mark_splim = mark_stack + (mark_stack_blocks * BLOCK_SIZE_W); + mark_stack_bd = allocBlock(); + mark_stack_top_bd = mark_stack_bd; + mark_stack_bd->link = NULL; + mark_stack_bd->u.back = NULL; + mark_sp = mark_stack_bd->start; } else { - mark_stack_bdescr = NULL; + mark_stack_bd = NULL; + mark_stack_top_bd = NULL; + mark_sp = NULL; } // this is the main thread @@ -707,8 +699,10 @@ SET_GCT(gc_threads[0]); pinned_object_block = NULL; // Free the mark stack. - if (mark_stack_bdescr != NULL) { - freeGroup(mark_stack_bdescr); + if (mark_stack_top_bd != NULL) { + debugTrace(DEBUG_gc, "mark stack: %d blocks", + countBlocks(mark_stack_top_bd)); + freeChain(mark_stack_top_bd); } // Free any bitmaps. @@ -985,8 +979,7 @@ any_work (void) write_barrier(); // scavenge objects in compacted generation - if (mark_stack_overflowed || oldgen_scan_bd != NULL || - (mark_stack_bdescr != NULL && !mark_stack_empty())) { + if (mark_stack_bd != NULL && !mark_stack_empty()) { return rtsTrue; } @@ -1134,7 +1127,7 @@ waitForGcThreads (Capability *cap USED_IF_THREADS) prodCapability(&capabilities[i], cap->running_task); } } - for (j=0; j < 10000000; j++) { + for (j=0; j < 10; j++) { retry = rtsFalse; for (i=0; i < n_threads; i++) { if (i == me) continue; @@ -1145,6 +1138,7 @@ waitForGcThreads (Capability *cap USED_IF_THREADS) } } if (!retry) break; + yieldThread(); } } } diff --git a/rts/sm/GC.h b/rts/sm/GC.h index 4b928e9..cddba00 100644 --- a/rts/sm/GC.h +++ b/rts/sm/GC.h @@ -26,14 +26,10 @@ void markCAFs ( evac_fn evac, void *user ); extern nat N; extern rtsBool major_gc; -extern bdescr *mark_stack_bdescr; -extern StgPtr *mark_stack; -extern StgPtr *mark_sp; -extern StgPtr *mark_splim; - -extern rtsBool mark_stack_overflowed; -extern bdescr *oldgen_scan_bd; -extern StgPtr oldgen_scan; +extern bdescr *mark_stack_bd; +extern bdescr *mark_stack_top_bd; +extern StgPtr mark_sp; +extern StgPtr mark_splim; extern long copied; diff --git a/rts/sm/MarkStack.h b/rts/sm/MarkStack.h new file mode 100644 index 0000000..bf445b3 --- /dev/null +++ b/rts/sm/MarkStack.h @@ -0,0 +1,71 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team 1998-2009 + * + * Operations on the mark stack + * + * Documentation on the architecture of the Garbage Collector can be + * found in the online commentary: + * + * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC + * + * ---------------------------------------------------------------------------*/ + +#ifndef SM_MARKSTACK_H +#define SM_MARKSTACk_H + +BEGIN_RTS_PRIVATE + +INLINE_HEADER void +push_mark_stack(StgPtr p) +{ + bdescr *bd; + + *mark_sp++ = (StgWord)p; + + if (((W_)mark_sp & BLOCK_MASK) == 0) + { + if (mark_stack_bd->u.back != NULL) + { + mark_stack_bd = mark_stack_bd->u.back; + } + else + { + bd = allocBlock_sync(); + bd->link = mark_stack_bd; + bd->u.back = NULL; + mark_stack_bd->u.back = bd; // double-link the new block on + mark_stack_top_bd = bd; + mark_stack_bd = bd; + } + mark_sp = mark_stack_bd->start; + } +} + +INLINE_HEADER StgPtr +pop_mark_stack(void) +{ + if (((W_)mark_sp & BLOCK_MASK) == 0) + { + if (mark_stack_bd->link == NULL) + { + return NULL; + } + else + { + mark_stack_bd = mark_stack_bd->link; + mark_sp = mark_stack_bd->start + BLOCK_SIZE_W; + } + } + return (StgPtr)*--mark_sp; +} + +INLINE_HEADER rtsBool +mark_stack_empty(void) +{ + return (((W_)mark_sp & BLOCK_MASK) == 0 && mark_stack_bd->link == NULL); +} + +END_RTS_PRIVATE + +#endif /* SM_MARKSTACK_H */ diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c index 4c75ed2..5e38777 100644 --- a/rts/sm/Scav.c +++ b/rts/sm/Scav.c @@ -19,6 +19,7 @@ #include "GCThread.h" #include "GCUtils.h" #include "Compact.h" +#include "MarkStack.h" #include "Evac.h" #include "Scav.h" #include "Apply.h" @@ -740,9 +741,7 @@ scavenge_mark_stack(void) gct->evac_step = &oldest_gen->steps[0]; saved_evac_step = gct->evac_step; -linear_scan: - while (!mark_stack_empty()) { - p = pop_mark_stack(); + while ((p = pop_mark_stack())) { ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); info = get_itbl((StgClosure *)p); @@ -1055,49 +1054,7 @@ linear_scan: recordMutableGen_GC((StgClosure *)q, gct->evac_step->gen_no); } } - - // mark the next bit to indicate "scavenged" - mark(q+1, Bdescr(q)); - - } // while (!mark_stack_empty()) - - // start a new linear scan if the mark stack overflowed at some point - if (mark_stack_overflowed && oldgen_scan_bd == NULL) { - debugTrace(DEBUG_gc, "scavenge_mark_stack: starting linear scan"); - mark_stack_overflowed = rtsFalse; - oldgen_scan_bd = oldest_gen->steps[0].old_blocks; - oldgen_scan = oldgen_scan_bd->start; - } - - if (oldgen_scan_bd) { - // push a new thing on the mark stack - loop: - // find a closure that is marked but not scavenged, and start - // from there. - while (oldgen_scan < oldgen_scan_bd->free - && !is_marked(oldgen_scan,oldgen_scan_bd)) { - oldgen_scan++; - } - - if (oldgen_scan < oldgen_scan_bd->free) { - - // already scavenged? - if (is_marked(oldgen_scan+1,oldgen_scan_bd)) { - oldgen_scan += sizeofW(StgHeader) + MIN_PAYLOAD_SIZE; - goto loop; - } - push_mark_stack(oldgen_scan); - // ToDo: bump the linear scan by the actual size of the object - oldgen_scan += sizeofW(StgHeader) + MIN_PAYLOAD_SIZE; - goto linear_scan; - } - - oldgen_scan_bd = oldgen_scan_bd->link; - if (oldgen_scan_bd != NULL) { - oldgen_scan = oldgen_scan_bd->start; - goto loop; - } - } + } // while (p = pop_mark_stack()) } /* ----------------------------------------------------------------------------- @@ -1964,8 +1921,7 @@ loop: } // scavenge objects in compacted generation - if (mark_stack_overflowed || oldgen_scan_bd != NULL || - (mark_stack_bdescr != NULL && !mark_stack_empty())) { + if (mark_stack_bd != NULL && !mark_stack_empty()) { scavenge_mark_stack(); work_to_do = rtsTrue; } -- 1.7.10.4