From dbbf15c0f141357aa49b583286174867baadb821 Mon Sep 17 00:00:00 2001 From: Simon Marlow Date: Wed, 16 Apr 2008 22:03:47 +0000 Subject: [PATCH] Allow work units smaller than a block to improve load balancing --- includes/Block.h | 1 + includes/Storage.h | 3 + rts/sm/Evac.c | 2 +- rts/sm/GC.c | 50 ++++++++++----- rts/sm/GC.h | 13 ++-- rts/sm/GCUtils.c | 178 ++++++++++++++++++++++++++++++++++------------------ rts/sm/GCUtils.h | 10 +-- rts/sm/Scav.c | 37 +++++------ rts/sm/Scav.c-inc | 17 +++-- 9 files changed, 197 insertions(+), 114 deletions(-) diff --git a/includes/Block.h b/includes/Block.h index 112092c..e2e691a 100644 --- a/includes/Block.h +++ b/includes/Block.h @@ -54,6 +54,7 @@ typedef struct bdescr_ { union { struct bdescr_ *back; /* used (occasionally) for doubly-linked lists*/ StgWord *bitmap; + StgPtr scan; /* scan pointer for copying GC */ } u; unsigned int gen_no; /* generation */ struct step_ *step; /* step */ diff --git a/includes/Storage.h b/includes/Storage.h index d109a17..3fcdfeb 100644 --- a/includes/Storage.h +++ b/includes/Storage.h @@ -91,6 +91,9 @@ typedef struct step_ { bdescr * todos_last; unsigned int n_todos; // count of above + bdescr * part_blocks; // partially-full scanned blocks + unsigned int n_part_blocks; // count of above + bdescr * scavenged_large_objects; // live large objs after GC (d-link) unsigned int n_scavenged_large_blocks; // size (not count) of above diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c index f117526..daa6018 100644 --- a/rts/sm/Evac.c +++ b/rts/sm/Evac.c @@ -63,7 +63,7 @@ alloc_for_copy (nat size, step *stp) */ to = ws->todo_free; if (to + size > ws->todo_lim) { - to = gc_alloc_todo_block(ws); + to = todo_block_full(size, ws); } ws->todo_free = to + size; ASSERT(ws->todo_free >= ws->todo_bd->free && ws->todo_free <= ws->todo_lim); diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 4d0c4ef..a361175 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -422,22 +422,34 @@ GarbageCollect ( rtsBool force_major_gc ) ws = &thr->steps[s]; // Not true? // ASSERT( ws->scan_bd == ws->todo_bd ); - ASSERT( ws->scan_bd ? ws->scan == ws->scan_bd->free : 1 ); + ASSERT( ws->scan_bd ? ws->scan_bd->u.scan == ws->scan_bd->free : 1 ); // Push the final block - if (ws->scan_bd) { push_scan_block(ws->scan_bd, ws); } + if (ws->scan_bd) { push_scanned_block(ws->scan_bd, ws); } ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks); - prev = ws->scavd_list; + prev = ws->part_list; + for (bd = ws->part_list; bd != NULL; bd = bd->link) { + bd->flags &= ~BF_EVACUATED; // now from-space + prev = bd; + } + if (prev != NULL) { + prev->link = ws->scavd_list; + } for (bd = ws->scavd_list; bd != NULL; bd = bd->link) { bd->flags &= ~BF_EVACUATED; // now from-space prev = bd; } - prev->link = ws->stp->blocks; - ws->stp->blocks = ws->scavd_list; - ws->stp->n_blocks += ws->n_scavd_blocks; - ASSERT(countBlocks(ws->stp->blocks) == ws->stp->n_blocks); + prev->link = ws->step->blocks; + if (ws->part_list != NULL) { + ws->step->blocks = ws->part_list; + } else { + ws->step->blocks = ws->scavd_list; + } + ws->step->n_blocks += ws->n_part_blocks; + ws->step->n_blocks += ws->n_scavd_blocks; + ASSERT(countBlocks(ws->step->blocks) == ws->step->n_blocks); } } } @@ -981,16 +993,18 @@ alloc_gc_thread (int n) for (s = 0; s < total_steps; s++) { ws = &t->steps[s]; - ws->stp = &all_steps[s]; - ASSERT(s == ws->stp->abs_no); + ws->step = &all_steps[s]; + ASSERT(s == ws->step->abs_no); ws->gct = t; ws->scan_bd = NULL; - ws->scan = NULL; ws->todo_bd = NULL; ws->buffer_todo_bd = NULL; + ws->part_list = NULL; + ws->n_part_blocks = 0; + ws->scavd_list = NULL; ws->n_scavd_blocks = 0; } @@ -1304,15 +1318,17 @@ init_collected_gen (nat g, nat n_threads) ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s]; ws->scan_bd = NULL; - ws->scan = NULL; ws->todo_large_objects = NULL; + ws->part_list = NULL; + ws->n_part_blocks = 0; + // allocate the first to-space block; extra blocks will be // chained on as necessary. ws->todo_bd = NULL; ws->buffer_todo_bd = NULL; - gc_alloc_todo_block(ws); + alloc_todo_block(ws,0); ws->scavd_list = NULL; ws->n_scavd_blocks = 0; @@ -1343,11 +1359,14 @@ init_uncollected_gen (nat g, nat threads) for (s = 0; s < generations[g].n_steps; s++) { ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s]; - stp = ws->stp; + stp = ws->step; ws->buffer_todo_bd = NULL; ws->todo_large_objects = NULL; + ws->part_list = NULL; + ws->n_part_blocks = 0; + ws->scavd_list = NULL; ws->n_scavd_blocks = 0; @@ -1365,7 +1384,7 @@ init_uncollected_gen (nat g, nat threads) // this block is also the scan block; we must scan // from the current end point. ws->scan_bd = ws->todo_bd; - ws->scan = ws->scan_bd->free; + ws->scan_bd->u.scan = ws->scan_bd->free; // subtract the contents of this block from the stats, // because we'll count the whole block later. @@ -1374,9 +1393,8 @@ init_uncollected_gen (nat g, nat threads) else { ws->scan_bd = NULL; - ws->scan = NULL; ws->todo_bd = NULL; - gc_alloc_todo_block(ws); + alloc_todo_block(ws,0); } } } diff --git a/rts/sm/GC.h b/rts/sm/GC.h index 5c2bff9..bc14840 100644 --- a/rts/sm/GC.h +++ b/rts/sm/GC.h @@ -73,12 +73,11 @@ ------------------------------------------------------------------------- */ typedef struct step_workspace_ { - step * stp; // the step for this workspace + step * step; // the step for this workspace struct gc_thread_ * gct; // the gc_thread that contains this workspace // block that is currently being scanned bdescr * scan_bd; - StgPtr scan; // the scan pointer // where objects to be scavenged go bdescr * todo_bd; @@ -91,10 +90,14 @@ typedef struct step_workspace_ { // where large objects to be scavenged go bdescr * todo_large_objects; - // Objects that need not be, or have already been, scavenged. + // Objects that have already been, scavenged. bdescr * scavd_list; lnat n_scavd_blocks; // count of blocks in this list + // Partially-full, scavenged, blocks + bdescr * part_list; + unsigned int n_part_blocks; // count of above + } step_workspace; /* ---------------------------------------------------------------------------- @@ -137,7 +140,7 @@ typedef struct gc_thread_ { // variable). rtsBool failed_to_evac; // failure to evacuate an object typically - // causes it to be recorded in the mutable + // Causes it to be recorded in the mutable // object list rtsBool eager_promotion; // forces promotion to the evac gen @@ -194,4 +197,6 @@ extern nat mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS; StgClosure * isAlive(StgClosure *p); +#define WORK_UNIT_WORDS 128 + #endif /* GC_H */ diff --git a/rts/sm/GCUtils.c b/rts/sm/GCUtils.c index 98e6f8a..184b540 100644 --- a/rts/sm/GCUtils.c +++ b/rts/sm/GCUtils.c @@ -51,7 +51,7 @@ grab_todo_block (step_workspace *ws) bdescr *bd; step *stp; - stp = ws->stp; + stp = ws->step; bd = NULL; if (ws->buffer_todo_bd) @@ -76,90 +76,144 @@ grab_todo_block (step_workspace *ws) return bd; } -static void -push_todo_block (bdescr *bd, step *stp) +void +push_scanned_block (bdescr *bd, step_workspace *ws) { + ASSERT(bd != NULL); ASSERT(bd->link == NULL); - ACQUIRE_SPIN_LOCK(&stp->sync_todo); - if (stp->todos_last == NULL) { - stp->todos_last = bd; - stp->todos = bd; - } else { - stp->todos_last->link = bd; - stp->todos_last = bd; + ASSERT(bd->step == ws->step); + ASSERT(bd->u.scan == bd->free); + + if (bd->start + BLOCK_SIZE_W - bd->free > WORK_UNIT_WORDS) + { + // a partially full block: put it on the part_list list. + bd->link = ws->part_list; + ws->part_list = bd; + ws->n_part_blocks++; + IF_DEBUG(sanity, + ASSERT(countBlocks(ws->part_list) == ws->n_part_blocks)); + } + else + { + // put the scan block on the ws->scavd_list. + bd->link = ws->scavd_list; + ws->scavd_list = bd; + ws->n_scavd_blocks ++; + IF_DEBUG(sanity, + ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks)); } - stp->n_todos++; - trace(TRACE_gc|DEBUG_gc, "step %d, n_todos: %d", stp->abs_no, stp->n_todos); - RELEASE_SPIN_LOCK(&stp->sync_todo); } -void -push_scan_block (bdescr *bd, step_workspace *ws) +StgPtr +todo_block_full (nat size, step_workspace *ws) { + bdescr *bd; + + bd = ws->todo_bd; + ASSERT(bd != NULL); ASSERT(bd->link == NULL); + ASSERT(bd->step == ws->step); + + bd->free = ws->todo_free; + + // If the global list is not empty, or there's not much work in + // this block to push, and there's enough room in + // this block to evacuate the current object, then just increase + // the limit. + if (ws->step->todos != NULL || + (bd->free - bd->u.scan < WORK_UNIT_WORDS / 2)) { + if (bd->free + size < bd->start + BLOCK_SIZE_W) { + debugTrace(DEBUG_gc, "increasing limit for %p", bd->start); + ws->todo_lim = stg_min(bd->start + BLOCK_SIZE_W, + ws->todo_lim + stg_max(WORK_UNIT_WORDS,size)); + return ws->todo_free; + } + } + + ASSERT(bd->u.scan >= bd->start && bd->u.scan <= bd->free); + + // If this block is not the scan block, we want to push it out and + // make room for a new todo block. + if (bd != ws->scan_bd) + { + // If this block does not have enough space to allocate the + // current object, but it also doesn't have any work to push, then + // push it on to the scanned list. It cannot be empty, because + // then there would be enough room to copy the current object. + if (bd->u.scan == bd->free) + { + ASSERT(bd->free != bd->start); + push_scanned_block(bd, ws); + } + // Otherwise, push this block out to the global list. + else + { + step *stp; + stp = ws->step; + trace(TRACE_gc|DEBUG_gc, "push todo block %p (%d words), step %d, n_todos: %d", + bd->start, bd->free - bd->u.scan, stp->abs_no, stp->n_todos); + // ToDo: use buffer_todo + ACQUIRE_SPIN_LOCK(&stp->sync_todo); + if (stp->todos_last == NULL) { + stp->todos_last = bd; + stp->todos = bd; + } else { + stp->todos_last->link = bd; + stp->todos_last = bd; + } + stp->n_todos++; + RELEASE_SPIN_LOCK(&stp->sync_todo); + } + } - // update stats: this is a block that has been copied & scavenged - gct->copied += bd->free - bd->start; + ws->todo_bd = NULL; + ws->todo_free = NULL; + ws->todo_lim = NULL; - // put the scan block on the ws->scavd_list. - bd->link = ws->scavd_list; - ws->scavd_list = bd; - ws->n_scavd_blocks ++; + alloc_todo_block(ws, size); - IF_DEBUG(sanity, - ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks)); + return ws->todo_free; } StgPtr -gc_alloc_todo_block (step_workspace *ws) +alloc_todo_block (step_workspace *ws, nat size) { bdescr *bd; - if (ws->todo_bd != NULL) { - ws->todo_bd->free = ws->todo_free; + // Grab a part block if we have one, and it has enough room + if (ws->part_list != NULL && + ws->part_list->start + BLOCK_SIZE_W - ws->part_list->free > (int)size) + { + bd = ws->part_list; + ws->part_list = bd->link; + ws->n_part_blocks--; } - - // If we already have a todo block, it must be full, so we push it - // out: first to the buffer_todo_bd, then to the step. BUT, don't - // push out the block out if it is already the scan block. - if (ws->todo_bd != NULL && ws->scan_bd != ws->todo_bd) { - ASSERT(ws->todo_bd->link == NULL); - if (ws->buffer_todo_bd == NULL) { - // If the global todo list is empty, push this block - // out immediately rather than caching it in - // buffer_todo_bd, because there might be other threads - // waiting for work. - if (ws->stp->todos == NULL) { - push_todo_block(ws->todo_bd, ws->stp); - } else { - ws->buffer_todo_bd = ws->todo_bd; - } - } else { - ASSERT(ws->buffer_todo_bd->link == NULL); - push_todo_block(ws->buffer_todo_bd, ws->stp); - ws->buffer_todo_bd = ws->todo_bd; + else + { + bd = allocBlock_sync(); + bd->gen_no = ws->step->gen_no; + bd->step = ws->step; + bd->u.scan = bd->start; + + // blocks in to-space in generations up to and including N + // get the BF_EVACUATED flag. + if (ws->step->gen_no <= N) { + bd->flags = BF_EVACUATED; + } else { + bd->flags = 0; } - ws->todo_bd = NULL; - } - - bd = allocBlock_sync(); + } - bd->gen_no = ws->stp->gen_no; - bd->step = ws->stp; bd->link = NULL; - // blocks in to-space in generations up to and including N - // get the BF_EVACUATED flag. - if (ws->stp->gen_no <= N) { - bd->flags = BF_EVACUATED; - } else { - bd->flags = 0; - } - ws->todo_bd = bd; - ws->todo_free = bd->start; - ws->todo_lim = bd->start + BLOCK_SIZE_W; + ws->todo_free = bd->free; + ws->todo_lim = stg_min(bd->start + BLOCK_SIZE_W, + bd->free + stg_max(WORK_UNIT_WORDS,size)); + + debugTrace(DEBUG_gc, "alloc new todo block %p for step %d", + bd->start, ws->step->abs_no); return ws->todo_free; } diff --git a/rts/sm/GCUtils.h b/rts/sm/GCUtils.h index a121dbd..34657c2 100644 --- a/rts/sm/GCUtils.h +++ b/rts/sm/GCUtils.h @@ -20,18 +20,18 @@ extern SpinLock gc_alloc_block_sync; bdescr *allocBlock_sync(void); void freeChain_sync(bdescr *bd); -void push_scan_block (bdescr *bd, step_workspace *ws); +void push_scanned_block (bdescr *bd, step_workspace *ws); bdescr *grab_todo_block (step_workspace *ws); -StgPtr gc_alloc_todo_block (step_workspace *ws); -bdescr *gc_alloc_scavd_block (step_workspace *ws); +StgPtr todo_block_full (nat size, step_workspace *ws); +StgPtr alloc_todo_block (step_workspace *ws, nat size); -// Returns true if a block is 3/4 full. This predicate is used to try +// Returns true if a block is partially full. This predicate is used to try // to re-use partial blocks wherever possible, and to reduce wastage. // We might need to tweak the actual value. INLINE_HEADER rtsBool isPartiallyFull(bdescr *bd) { - return (bd->free + BLOCK_SIZE_W/4 < bd->start + BLOCK_SIZE_W); + return (bd->free + WORK_UNIT_WORDS < bd->start + BLOCK_SIZE_W); } diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c index 6eba1e0..b7bd7f3 100644 --- a/rts/sm/Scav.c +++ b/rts/sm/Scav.c @@ -1355,7 +1355,7 @@ scavenge_large (step_workspace *ws) bdescr *bd; StgPtr p; - gct->evac_step = ws->stp; + gct->evac_step = ws->step; bd = ws->todo_large_objects; @@ -1367,15 +1367,15 @@ scavenge_large (step_workspace *ws) // the front when evacuating. ws->todo_large_objects = bd->link; - ACQUIRE_SPIN_LOCK(&ws->stp->sync_large_objects); - dbl_link_onto(bd, &ws->stp->scavenged_large_objects); - ws->stp->n_scavenged_large_blocks += bd->blocks; - RELEASE_SPIN_LOCK(&ws->stp->sync_large_objects); + ACQUIRE_SPIN_LOCK(&ws->step->sync_large_objects); + dbl_link_onto(bd, &ws->step->scavenged_large_objects); + ws->step->n_scavenged_large_blocks += bd->blocks; + RELEASE_SPIN_LOCK(&ws->step->sync_large_objects); p = bd->start; if (scavenge_one(p)) { - if (ws->stp->gen_no > 0) { - recordMutableGen_GC((StgClosure *)p, ws->stp->gen); + if (ws->step->gen_no > 0) { + recordMutableGen_GC((StgClosure *)p, ws->step->gen); } } } @@ -1439,30 +1439,27 @@ loop: if (ws->scan_bd == NULL && ws->todo_bd != NULL) { ws->scan_bd = ws->todo_bd; - ws->scan = ws->scan_bd->start; } // If we have a scan block with some work to do, // scavenge everything up to the free pointer. - if (ws->scan != NULL && ws->scan < ws->scan_bd->free) + if (ws->scan_bd != NULL && ws->scan_bd->u.scan < ws->scan_bd->free) { if (n_gc_threads == 1) { - scavenge_block1(ws->scan_bd, ws->scan); + scavenge_block1(ws->scan_bd); } else { - scavenge_block(ws->scan_bd, ws->scan); + scavenge_block(ws->scan_bd); } - ws->scan = ws->scan_bd->free; did_something = rtsTrue; } - if (ws->scan_bd != NULL && ws->scan == ws->scan_bd->free - && ws->scan_bd != ws->todo_bd) + if (ws->scan_bd != NULL && ws->scan_bd != ws->todo_bd) { + ASSERT(ws->scan_bd->u.scan == ws->scan_bd->free); // we're not going to evac any more objects into // this block, so push it now. - push_scan_block(ws->scan_bd, ws); + push_scanned_block(ws->scan_bd, ws); ws->scan_bd = NULL; - ws->scan = NULL; // we might be able to scan the todo block now. did_something = rtsTrue; } @@ -1482,11 +1479,11 @@ loop: // our scavd list. This saves pushing out the // scan_bd block, which might be partial. if (n_gc_threads == 1) { - scavenge_block1(bd, bd->start); + scavenge_block1(bd); } else { - scavenge_block(bd, bd->start); + scavenge_block(bd); } - push_scan_block(bd, ws); + push_scanned_block(bd, ws); did_something = rtsTrue; break; } @@ -1561,7 +1558,7 @@ any_work (void) } ws = &gct->steps[s]; if (ws->todo_large_objects) return rtsTrue; - if (ws->stp->todos) return rtsTrue; + if (ws->step->todos) return rtsTrue; } gct->no_work++; diff --git a/rts/sm/Scav.c-inc b/rts/sm/Scav.c-inc index 09cdf36..6f85203 100644 --- a/rts/sm/Scav.c-inc +++ b/rts/sm/Scav.c-inc @@ -15,7 +15,7 @@ // defined, the second time without. #ifndef PARALLEL_GC -#define scavenge_block(a,b) scavenge_block1(a,b) +#define scavenge_block(a) scavenge_block1(a) #define evacuate(a) evacuate1(a) #define recordMutableGen_GC(a,b) recordMutableGen(a,b) #else @@ -24,7 +24,7 @@ #undef recordMutableGen_GC #endif -static void scavenge_block (bdescr *bd, StgPtr scan); +static void scavenge_block (bdescr *bd); /* ----------------------------------------------------------------------------- Scavenge a block from the given scan pointer up to bd->free. @@ -39,7 +39,7 @@ static void scavenge_block (bdescr *bd, StgPtr scan); -------------------------------------------------------------------------- */ static void -scavenge_block (bdescr *bd, StgPtr scan) +scavenge_block (bdescr *bd) { StgPtr p, q; StgInfoTable *info; @@ -47,10 +47,10 @@ scavenge_block (bdescr *bd, StgPtr scan) rtsBool saved_eager_promotion; step_workspace *ws; - p = scan; + p = bd->u.scan; debugTrace(DEBUG_gc, "scavenging block %p (gen %d, step %d) @ %p", - bd->start, bd->gen_no, bd->step->no, scan); + bd->start, bd->gen_no, bd->step->no, p); gct->evac_step = bd->step; saved_evac_step = gct->evac_step; @@ -448,7 +448,12 @@ scavenge_block (bdescr *bd, StgPtr scan) } debugTrace(DEBUG_gc, " scavenged %ld bytes", - (unsigned long)((bd->free - scan) * sizeof(W_))); + (unsigned long)((bd->free - bd->u.scan) * sizeof(W_))); + + // update stats: this is a block that has been copied & scavenged + gct->copied += bd->free - bd->u.scan; + + bd->u.scan = bd->free; } #undef scavenge_block -- 1.7.10.4