X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Frts%2FGC.c;h=a13cd33afadd346ed79e20ac72235ea4e62887eb;hb=a1b4e3b88a6987deed7bb7f1bd870b30eef1b475;hp=cce7f3f9e3bd3a987ad93b8b816a7d7607955620;hpb=a28e9d8435d1a5e8348e90f2772f44b15fcbbb91;p=ghc-hetmet.git diff --git a/ghc/rts/GC.c b/ghc/rts/GC.c index cce7f3f..a13cd33 100644 --- a/ghc/rts/GC.c +++ b/ghc/rts/GC.c @@ -26,7 +26,7 @@ #include "Prelude.h" #include "ParTicky.h" // ToDo: move into Rts.h #include "GCCompact.h" -#include "Signals.h" +#include "RtsSignals.h" #include "STM.h" #if defined(GRAN) || defined(PAR) # include "GranSimRts.h" @@ -104,6 +104,10 @@ static rtsBool major_gc; */ static nat evac_gen; +/* Whether to do eager promotion or not. + */ +static rtsBool eager_promotion; + /* Weak pointers */ StgWeak *old_weak_ptr_list; // also pending finaliser list @@ -140,6 +144,14 @@ static lnat g0s0_pcnt_kept = 30; // percentage of g0s0 live at last minor GC static lnat thunk_selector_depth = 0; #define MAX_THUNK_SELECTOR_DEPTH 8 +/* Mut-list stats */ +#ifdef DEBUG +static nat + mutlist_MUTVARS, + mutlist_MUTARRS, + mutlist_OTHERS; +#endif + /* ----------------------------------------------------------------------------- Static function declarations -------------------------------------------------------------------------- */ @@ -323,7 +335,7 @@ gc_alloc_scavd_block(step *stp) - free from-space in each step, and set from-space = to-space. - Locks held: sched_mutex + Locks held: all capabilities are held throughout GarbageCollect(). -------------------------------------------------------------------------- */ @@ -332,9 +344,11 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) { bdescr *bd; step *stp; - lnat live, allocated, collected = 0, copied = 0, scavd_copied = 0; + lnat live, allocated, copied = 0, scavd_copied = 0; lnat oldgen_saved_blocks = 0; - nat g, s; + nat g, s, i; + + ACQUIRE_SM_LOCK; #ifdef PROFILING CostCentreStack *prev_CCS; @@ -356,6 +370,17 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) // tell the stats department that we've started a GC stat_startGC(); +#ifdef DEBUG + // check for memory leaks if DEBUG is on + memInventory(); +#endif + +#ifdef DEBUG + mutlist_MUTVARS = 0; + mutlist_MUTARRS = 0; + mutlist_OTHERS = 0; +#endif + // Init stats and print par specific (timing) info PAR_TICKY_PAR_START(); @@ -432,6 +457,10 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) if (g != 0) { freeChain(generations[g].mut_list); generations[g].mut_list = allocBlock(); + for (i = 0; i < n_capabilities; i++) { + freeChain(capabilities[i].mut_lists[g]); + capabilities[i].mut_lists[g] = allocBlock(); + } } for (s = 0; s < generations[g].n_steps; s++) { @@ -534,6 +563,19 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) stp->scavenged_large_objects = NULL; stp->n_scavenged_large_blocks = 0; } + + /* Move the private mutable lists from each capability onto the + * main mutable list for the generation. + */ + for (i = 0; i < n_capabilities; i++) { + for (bd = capabilities[i].mut_lists[g]; + bd->link != NULL; bd = bd->link) { + /* nothing */ + } + bd->link = generations[g].mut_list; + generations[g].mut_list = capabilities[i].mut_lists[g]; + capabilities[i].mut_lists[g] = allocBlock(); + } } /* Allocate a mark stack if we're doing a major collection. @@ -547,6 +589,8 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) mark_stack_bdescr = NULL; } + eager_promotion = rtsTrue; // for now + /* ----------------------------------------------------------------------- * follow all the roots that we know about: * - mutable lists from each generation > N @@ -688,7 +732,7 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) } } - /* Update the pointers from the "main thread" list - these are + /* Update the pointers from the task list - these are * treated as weak pointers because we want to allow a main thread * to get a BlockedOnDeadMVar exception in the same way as any other * thread. Note that the threads should all have been retained by @@ -696,14 +740,23 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) * updating pointers here. */ { - StgMainThread *m; + Task *task; StgTSO *tso; - for (m = main_threads; m != NULL; m = m->link) { - tso = (StgTSO *) isAlive((StgClosure *)m->tso); - if (tso == NULL) { - barf("main thread has been GC'd"); + for (task = all_tasks; task != NULL; task = task->all_link) { + if (!task->stopped && task->tso) { + ASSERT(task->tso->bound == task); + tso = (StgTSO *) isAlive((StgClosure *)task->tso); + if (tso == NULL) { + barf("task %p: main thread %d has been GC'd", +#ifdef THREADED_RTS + (void *)task->id, +#else + (void *)task, +#endif + task->tso->id); + } + task->tso = tso; } - m->tso = tso; } } @@ -759,9 +812,13 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) // Count the mutable list as bytes "copied" for the purposes of // stats. Every mutable list is copied during every GC. if (g > 0) { + nat mut_list_size = 0; for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) { - copied += (bd->free - bd->start) * sizeof(StgWord); + mut_list_size += bd->free - bd->start; } + copied += mut_list_size; + + IF_DEBUG(gc, debugBelch("mut_list_size: %ld (%d vars, %d arrays, %d others)\n", mut_list_size * sizeof(W_), mutlist_MUTVARS, mutlist_MUTARRS, mutlist_OTHERS)); } for (s = 0; s < generations[g].n_steps; s++) { @@ -780,18 +837,6 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) // for generations we collected... if (g <= N) { - // rough calculation of garbage collected, for stats output - if (stp->is_compacted) { - collected += (oldgen_saved_blocks - stp->n_old_blocks) * BLOCK_SIZE_W; - } else { - if (g == 0 && s == 0) { - collected += countNurseryBlocks() * BLOCK_SIZE_W; - collected += alloc_blocks; - } else { - collected += stp->n_old_blocks * BLOCK_SIZE_W; - } - } - /* free old memory and shift to-space into from-space for all * the collected steps (except the allocation area). These * freed blocks will probaby be quickly recycled. @@ -806,20 +851,21 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) // tack the new blocks on the end of the existing blocks if (stp->old_blocks != NULL) { for (bd = stp->old_blocks; bd != NULL; bd = next) { - next = bd->link; - if (next == NULL) { - bd->link = stp->blocks; - } // NB. this step might not be compacted next // time, so reset the BF_COMPACTED flags. // They are set before GC if we're going to // compact. (search for BF_COMPACTED above). bd->flags &= ~BF_COMPACTED; + next = bd->link; + if (next == NULL) { + bd->link = stp->blocks; + } } stp->blocks = stp->old_blocks; } // add the new blocks to the block tally stp->n_blocks += stp->n_old_blocks; + ASSERT(countBlocks(stp->blocks) == stp->n_blocks); } else { freeChain(stp->old_blocks); for (bd = stp->blocks; bd != NULL; bd = bd->link) { @@ -972,8 +1018,9 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) for (g = 0; g <= N; g++) { for (s = 0; s < generations[g].n_steps; s++) { stp = &generations[g].steps[s]; - if (stp->is_compacted && stp->bitmap != NULL) { + if (stp->bitmap != NULL) { freeGroup(stp->bitmap); + stp->bitmap = NULL; } } } @@ -1101,15 +1148,15 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) // Reset the nursery resetNurseries(); - RELEASE_LOCK(&sched_mutex); - // start any pending finalizers - scheduleFinalizers(old_weak_ptr_list); + RELEASE_SM_LOCK; + scheduleFinalizers(last_free_capability, old_weak_ptr_list); + ACQUIRE_SM_LOCK; // send exceptions to any threads which were about to die + RELEASE_SM_LOCK; resurrectThreads(resurrected_threads); - - ACQUIRE_LOCK(&sched_mutex); + ACQUIRE_SM_LOCK; // Update the stable pointer hash table. updateStablePtrTable(major_gc); @@ -1130,8 +1177,10 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) CCCS = prev_CCS; #endif - // check for memory leaks if sanity checking is on - IF_DEBUG(sanity, memInventory()); +#ifdef DEBUG + // check for memory leaks if DEBUG is on + memInventory(); +#endif #ifdef RTS_GTK_FRONTPANEL if (RtsFlags.GcFlags.frontpanel) { @@ -1140,13 +1189,15 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) #endif // ok, GC over: tell the stats department what happened. - stat_endGC(allocated, collected, live, copied, scavd_copied, N); + stat_endGC(allocated, live, copied, scavd_copied, N); #if defined(RTS_USER_SIGNALS) // unblock signals again unblockUserSignals(); #endif + RELEASE_SM_LOCK; + //PAR_TICKY_TP(); } @@ -1524,11 +1575,11 @@ copy(StgClosure *src, nat size, step *stp) * by evacuate()). */ if (stp->gen_no < evac_gen) { -#ifdef NO_EAGER_PROMOTION - failed_to_evac = rtsTrue; -#else - stp = &generations[evac_gen].steps[0]; -#endif + if (eager_promotion) { + stp = &generations[evac_gen].steps[0]; + } else { + failed_to_evac = rtsTrue; + } } /* chain a new block onto the to-space for the destination step if @@ -1574,11 +1625,11 @@ copy_noscav(StgClosure *src, nat size, step *stp) * by evacuate()). */ if (stp->gen_no < evac_gen) { -#ifdef NO_EAGER_PROMOTION - failed_to_evac = rtsTrue; -#else - stp = &generations[evac_gen].steps[0]; -#endif + if (eager_promotion) { + stp = &generations[evac_gen].steps[0]; + } else { + failed_to_evac = rtsTrue; + } } /* chain a new block onto the to-space for the destination step if @@ -1621,11 +1672,11 @@ copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *stp) TICK_GC_WORDS_COPIED(size_to_copy); if (stp->gen_no < evac_gen) { -#ifdef NO_EAGER_PROMOTION - failed_to_evac = rtsTrue; -#else - stp = &generations[evac_gen].steps[0]; -#endif + if (eager_promotion) { + stp = &generations[evac_gen].steps[0]; + } else { + failed_to_evac = rtsTrue; + } } if (stp->hp + size_to_reserve >= stp->hpLim) { @@ -1647,7 +1698,7 @@ copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *stp) SET_EVACUAEE_FOR_LDV(src, size_to_reserve); // fill the slop if (size_to_reserve - size_to_copy_org > 0) - FILL_SLOP(stp->hp - 1, (int)(size_to_reserve - size_to_copy_org)); + LDV_FILL_SLOP(stp->hp - 1, (int)(size_to_reserve - size_to_copy_org)); #endif return (StgClosure *)dest; } @@ -1702,11 +1753,11 @@ evacuate_large(StgPtr p) */ stp = bd->step->to; if (stp->gen_no < evac_gen) { -#ifdef NO_EAGER_PROMOTION - failed_to_evac = rtsTrue; -#else - stp = &generations[evac_gen].steps[0]; -#endif + if (eager_promotion) { + stp = &generations[evac_gen].steps[0]; + } else { + failed_to_evac = rtsTrue; + } } bd->step = stp; @@ -1892,7 +1943,8 @@ loop: switch (info->type) { - case MUT_VAR: + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: case MVAR: return copy(q,sizeW_fromITBL(info),stp); @@ -1966,11 +2018,15 @@ loop: case THUNK_SELECTOR: { StgClosure *p; + const StgInfoTable *info_ptr; if (thunk_selector_depth > MAX_THUNK_SELECTOR_DEPTH) { return copy(q,THUNK_SELECTOR_sizeW(),stp); } + // stashed away for LDV profiling, see below + info_ptr = q->header.info; + p = eval_thunk_selector(info->layout.selector_offset, (StgSelector *)q); @@ -1981,6 +2037,14 @@ loop: // q is still BLACKHOLE'd. thunk_selector_depth++; val = evacuate(p); + thunk_selector_depth--; + +#ifdef PROFILING + // For the purposes of LDV profiling, we have destroyed + // the original selector thunk. + SET_INFO(q, info_ptr); + LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(q); +#endif // Update the THUNK_SELECTOR with an indirection to the // EVACUATED closure now at p. Why do this rather than @@ -1991,12 +2055,10 @@ loop: SET_INFO(q, &stg_IND_info); ((StgInd *)q)->indirectee = p; -#ifdef PROFILING - // We store the size of the just evacuated object in the - // LDV word so that the profiler can guess the position of - // the next object later. - SET_EVACUAEE_FOR_LDV(q, THUNK_SELECTOR_sizeW()); -#endif + // For the purposes of LDV profiling, we have created an + // indirection. + LDV_RECORD_CREATE(q); + return val; } } @@ -2061,7 +2123,8 @@ loop: // just copy the block return copy_noscav(q,arr_words_sizeW((StgArrWords *)q),stp); - case MUT_ARR_PTRS: + case MUT_ARR_PTRS_CLEAN: + case MUT_ARR_PTRS_DIRTY: case MUT_ARR_PTRS_FROZEN: case MUT_ARR_PTRS_FROZEN0: // just copy the block @@ -2112,7 +2175,7 @@ loop: } case BLOCKED_FETCH: - ASSERT(sizeofW(StgBlockedFetch) >= MIN_NONUPD_SIZE); + ASSERT(sizeofW(StgBlockedFetch) >= MIN_PAYLOD_SIZE); to = copy(q,sizeofW(StgBlockedFetch),stp); IF_DEBUG(gc, debugBelch("@@ evacuate: %p (%s) to %p (%s)", @@ -2123,7 +2186,7 @@ loop: case REMOTE_REF: # endif case FETCH_ME: - ASSERT(sizeofW(StgBlockedFetch) >= MIN_UPD_SIZE); + ASSERT(sizeofW(StgBlockedFetch) >= MIN_PAYLOAD_SIZE); to = copy(q,sizeofW(StgFetchMe),stp); IF_DEBUG(gc, debugBelch("@@ evacuate: %p (%s) to %p (%s)", @@ -2131,7 +2194,7 @@ loop: return to; case FETCH_ME_BQ: - ASSERT(sizeofW(StgBlockedFetch) >= MIN_UPD_SIZE); + ASSERT(sizeofW(StgBlockedFetch) >= MIN_PAYLOAD_SIZE); to = copy(q,sizeofW(StgFetchMeBlockingQueue),stp); IF_DEBUG(gc, debugBelch("@@ evacuate: %p (%s) to %p (%s)", @@ -2843,13 +2906,22 @@ scavenge(step *stp) p += sizeofW(StgInd); break; - case MUT_VAR: - evac_gen = 0; + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: { + rtsBool saved_eager_promotion = eager_promotion; + + eager_promotion = rtsFalse; ((StgMutVar *)p)->var = evacuate(((StgMutVar *)p)->var); - evac_gen = saved_evac_gen; - failed_to_evac = rtsTrue; // mutable anyhow + eager_promotion = saved_eager_promotion; + + if (failed_to_evac) { + ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info; + } else { + ((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info; + } p += sizeofW(StgMutVar); break; + } case CAF_BLACKHOLE: case SE_CAF_BLACKHOLE: @@ -2890,18 +2962,32 @@ scavenge(step *stp) p += arr_words_sizeW((StgArrWords *)p); break; - case MUT_ARR_PTRS: + case MUT_ARR_PTRS_CLEAN: + case MUT_ARR_PTRS_DIRTY: // follow everything { StgPtr next; - - evac_gen = 0; // repeatedly mutable + rtsBool saved_eager; + + // We don't eagerly promote objects pointed to by a mutable + // array, but if we find the array only points to objects in + // the same or an older generation, we mark it "clean" and + // avoid traversing it during minor GCs. + saved_eager = eager_promotion; + eager_promotion = rtsFalse; next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p); for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) { *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p); } - evac_gen = saved_evac_gen; - failed_to_evac = rtsTrue; // mutable anyhow. + eager_promotion = saved_eager; + + if (failed_to_evac) { + ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info; + } else { + ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info; + } + + failed_to_evac = rtsTrue; // always put it on the mutable list. break; } @@ -2915,20 +3001,33 @@ scavenge(step *stp) for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) { *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p); } - // it's tempting to recordMutable() if failed_to_evac is - // false, but that breaks some assumptions (eg. every - // closure on the mutable list is supposed to have the MUT - // flag set, and MUT_ARR_PTRS_FROZEN doesn't). + + // If we're going to put this object on the mutable list, then + // set its info ptr to MUT_ARR_PTRS_FROZEN0 to indicate that. + if (failed_to_evac) { + ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN0_info; + } else { + ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_info; + } break; } case TSO: { StgTSO *tso = (StgTSO *)p; - evac_gen = 0; + rtsBool saved_eager = eager_promotion; + + eager_promotion = rtsFalse; scavengeTSO(tso); - evac_gen = saved_evac_gen; - failed_to_evac = rtsTrue; // mutable anyhow. + eager_promotion = saved_eager; + + if (failed_to_evac) { + tso->flags |= TSO_DIRTY; + } else { + tso->flags &= ~TSO_DIRTY; + } + + failed_to_evac = rtsTrue; // always on the mutable list p += tso_sizeW(tso); break; } @@ -3009,9 +3108,6 @@ scavenge(step *stp) evac_gen = 0; tvar->current_value = evacuate((StgClosure*)tvar->current_value); tvar->first_wait_queue_entry = (StgTVarWaitQueue *)evacuate((StgClosure*)tvar->first_wait_queue_entry); -#if defined(SMP) - tvar->last_update_by = (StgTRecHeader *)evacuate((StgClosure*)tvar->last_update_by); -#endif evac_gen = saved_evac_gen; failed_to_evac = rtsTrue; // mutable p += sizeofW(StgTVar); @@ -3062,7 +3158,9 @@ scavenge(step *stp) */ if (failed_to_evac) { failed_to_evac = rtsFalse; - recordMutableGen((StgClosure *)q, stp->gen); + if (stp->gen_no > 0) { + recordMutableGen((StgClosure *)q, stp->gen); + } } } @@ -3209,12 +3307,21 @@ linear_scan: evacuate(((StgInd *)p)->indirectee); break; - case MUT_VAR: - evac_gen = 0; + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: { + rtsBool saved_eager_promotion = eager_promotion; + + eager_promotion = rtsFalse; ((StgMutVar *)p)->var = evacuate(((StgMutVar *)p)->var); - evac_gen = saved_evac_gen; - failed_to_evac = rtsTrue; + eager_promotion = saved_eager_promotion; + + if (failed_to_evac) { + ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info; + } else { + ((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info; + } break; + } case CAF_BLACKHOLE: case SE_CAF_BLACKHOLE: @@ -3248,17 +3355,31 @@ linear_scan: scavenge_AP((StgAP *)p); break; - case MUT_ARR_PTRS: + case MUT_ARR_PTRS_CLEAN: + case MUT_ARR_PTRS_DIRTY: // follow everything { StgPtr next; - - evac_gen = 0; // repeatedly mutable + rtsBool saved_eager; + + // We don't eagerly promote objects pointed to by a mutable + // array, but if we find the array only points to objects in + // the same or an older generation, we mark it "clean" and + // avoid traversing it during minor GCs. + saved_eager = eager_promotion; + eager_promotion = rtsFalse; next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p); for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) { *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p); } - evac_gen = saved_evac_gen; + eager_promotion = saved_eager; + + if (failed_to_evac) { + ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info; + } else { + ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info; + } + failed_to_evac = rtsTrue; // mutable anyhow. break; } @@ -3267,22 +3388,39 @@ linear_scan: case MUT_ARR_PTRS_FROZEN0: // follow everything { - StgPtr next; + StgPtr next, q = p; next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p); for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) { *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p); } + + // If we're going to put this object on the mutable list, then + // set its info ptr to MUT_ARR_PTRS_FROZEN0 to indicate that. + if (failed_to_evac) { + ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN0_info; + } else { + ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_info; + } break; } case TSO: { StgTSO *tso = (StgTSO *)p; - evac_gen = 0; + rtsBool saved_eager = eager_promotion; + + eager_promotion = rtsFalse; scavengeTSO(tso); - evac_gen = saved_evac_gen; - failed_to_evac = rtsTrue; + eager_promotion = saved_eager; + + if (failed_to_evac) { + tso->flags |= TSO_DIRTY; + } else { + tso->flags &= ~TSO_DIRTY; + } + + failed_to_evac = rtsTrue; // always on the mutable list break; } @@ -3356,9 +3494,6 @@ linear_scan: evac_gen = 0; tvar->current_value = evacuate((StgClosure*)tvar->current_value); tvar->first_wait_queue_entry = (StgTVarWaitQueue *)evacuate((StgClosure*)tvar->first_wait_queue_entry); -#if defined(SMP) - tvar->last_update_by = (StgTRecHeader *)evacuate((StgClosure*)tvar->last_update_by); -#endif evac_gen = saved_evac_gen; failed_to_evac = rtsTrue; // mutable break; @@ -3399,7 +3534,9 @@ linear_scan: if (failed_to_evac) { failed_to_evac = rtsFalse; - recordMutableGen((StgClosure *)q, &generations[evac_gen]); + if (evac_gen > 0) { + recordMutableGen((StgClosure *)q, &generations[evac_gen]); + } } // mark the next bit to indicate "scavenged" @@ -3411,7 +3548,7 @@ linear_scan: if (mark_stack_overflowed && oldgen_scan_bd == NULL) { IF_DEBUG(gc, debugBelch("scavenge_mark_stack: starting linear scan")); mark_stack_overflowed = rtsFalse; - oldgen_scan_bd = oldest_gen->steps[0].blocks; + oldgen_scan_bd = oldest_gen->steps[0].old_blocks; oldgen_scan = oldgen_scan_bd->start; } @@ -3429,12 +3566,12 @@ linear_scan: // already scavenged? if (is_marked(oldgen_scan+1,oldgen_scan_bd)) { - oldgen_scan += sizeofW(StgHeader) + MIN_NONUPD_SIZE; + oldgen_scan += sizeofW(StgHeader) + MIN_PAYLOAD_SIZE; goto loop; } push_mark_stack(oldgen_scan); // ToDo: bump the linear scan by the actual size of the object - oldgen_scan += sizeofW(StgHeader) + MIN_NONUPD_SIZE; + oldgen_scan += sizeofW(StgHeader) + MIN_PAYLOAD_SIZE; goto linear_scan; } @@ -3518,12 +3655,22 @@ scavenge_one(StgPtr p) break; } - case MUT_VAR: - evac_gen = 0; + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: { + StgPtr q = p; + rtsBool saved_eager_promotion = eager_promotion; + + eager_promotion = rtsFalse; ((StgMutVar *)p)->var = evacuate(((StgMutVar *)p)->var); - evac_gen = saved_evac_gen; - failed_to_evac = rtsTrue; // mutable anyhow + eager_promotion = saved_eager_promotion; + + if (failed_to_evac) { + ((StgClosure *)q)->header.info = &stg_MUT_VAR_DIRTY_info; + } else { + ((StgClosure *)q)->header.info = &stg_MUT_VAR_CLEAN_info; + } break; + } case CAF_BLACKHOLE: case SE_CAF_BLACKHOLE: @@ -3560,17 +3707,31 @@ scavenge_one(StgPtr p) // nothing to follow break; - case MUT_ARR_PTRS: + case MUT_ARR_PTRS_CLEAN: + case MUT_ARR_PTRS_DIRTY: { - // follow everything - StgPtr next; - - evac_gen = 0; // repeatedly mutable + StgPtr next, q; + rtsBool saved_eager; + + // We don't eagerly promote objects pointed to by a mutable + // array, but if we find the array only points to objects in + // the same or an older generation, we mark it "clean" and + // avoid traversing it during minor GCs. + saved_eager = eager_promotion; + eager_promotion = rtsFalse; + q = p; next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p); for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) { *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p); } - evac_gen = saved_evac_gen; + eager_promotion = saved_eager; + + if (failed_to_evac) { + ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_DIRTY_info; + } else { + ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_CLEAN_info; + } + failed_to_evac = rtsTrue; break; } @@ -3579,23 +3740,39 @@ scavenge_one(StgPtr p) case MUT_ARR_PTRS_FROZEN0: { // follow everything - StgPtr next; + StgPtr next, q=p; next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p); for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) { *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p); } + + // If we're going to put this object on the mutable list, then + // set its info ptr to MUT_ARR_PTRS_FROZEN0 to indicate that. + if (failed_to_evac) { + ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN0_info; + } else { + ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_info; + } break; } case TSO: { StgTSO *tso = (StgTSO *)p; - - evac_gen = 0; // repeatedly mutable + rtsBool saved_eager = eager_promotion; + + eager_promotion = rtsFalse; scavengeTSO(tso); - evac_gen = saved_evac_gen; - failed_to_evac = rtsTrue; + eager_promotion = saved_eager; + + if (failed_to_evac) { + tso->flags |= TSO_DIRTY; + } else { + tso->flags &= ~TSO_DIRTY; + } + + failed_to_evac = rtsTrue; // always on the mutable list break; } @@ -3670,9 +3847,6 @@ scavenge_one(StgPtr p) evac_gen = 0; tvar->current_value = evacuate((StgClosure*)tvar->current_value); tvar->first_wait_queue_entry = (StgTVarWaitQueue *)evacuate((StgClosure*)tvar->first_wait_queue_entry); -#if defined(SMP) - tvar->last_update_by = (StgTRecHeader *)evacuate((StgClosure*)tvar->last_update_by); -#endif evac_gen = saved_evac_gen; failed_to_evac = rtsTrue; // mutable break; @@ -3781,10 +3955,55 @@ scavenge_mutable_list(generation *gen) for (q = bd->start; q < bd->free; q++) { p = (StgPtr)*q; ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); + +#ifdef DEBUG + switch (get_itbl((StgClosure *)p)->type) { + case MUT_VAR_CLEAN: + barf("MUT_VAR_CLEAN on mutable list"); + case MUT_VAR_DIRTY: + mutlist_MUTVARS++; break; + case MUT_ARR_PTRS_CLEAN: + case MUT_ARR_PTRS_DIRTY: + case MUT_ARR_PTRS_FROZEN: + case MUT_ARR_PTRS_FROZEN0: + mutlist_MUTARRS++; break; + default: + mutlist_OTHERS++; break; + } +#endif + + // Check whether this object is "clean", that is it + // definitely doesn't point into a young generation. + // Clean objects don't need to be scavenged. Some clean + // objects (MUT_VAR_CLEAN) are not kept on the mutable + // list at all; others, such as MUT_ARR_PTRS_CLEAN and + // TSO, are always on the mutable list. + // + switch (get_itbl((StgClosure *)p)->type) { + case MUT_ARR_PTRS_CLEAN: + recordMutableGen((StgClosure *)p,gen); + continue; + case TSO: { + StgTSO *tso = (StgTSO *)p; + if ((tso->flags & TSO_DIRTY) == 0) { + // A clean TSO: we don't have to traverse its + // stack. However, we *do* follow the link field: + // we don't want to have to mark a TSO dirty just + // because we put it on a different queue. + if (tso->why_blocked != BlockedOnBlackHole) { + tso->link = (StgTSO *)evacuate((StgClosure *)tso->link); + } + recordMutableGen((StgClosure *)p,gen); + continue; + } + } + default: + ; + } + if (scavenge_one(p)) { - /* didn't manage to promote everything, so put the - * object back on the list. - */ + // didn't manage to promote everything, so put the + // object back on the list. recordMutableGen((StgClosure *)p,gen); } } @@ -3946,6 +4165,32 @@ scavenge_stack(StgPtr p, StgPtr stack_end) switch (info->i.type) { case UPDATE_FRAME: + // In SMP, we can get update frames that point to indirections + // when two threads evaluate the same thunk. We do attempt to + // discover this situation in threadPaused(), but it's + // possible that the following sequence occurs: + // + // A B + // enter T + // enter T + // blackhole T + // update T + // GC + // + // Now T is an indirection, and the update frame is already + // marked on A's stack, so we won't traverse it again in + // threadPaused(). We could traverse the whole stack again + // before GC, but that seems like overkill. + // + // Scavenging this update frame as normal would be disastrous; + // the updatee would end up pointing to the value. So we turn + // the indirection into an IND_PERM, so that evacuate will + // copy the indirection into the old generation instead of + // discarding it. + if (get_itbl(((StgUpdateFrame *)p)->updatee)->type == IND) { + ((StgUpdateFrame *)p)->updatee->header.info = + (StgInfoTable *)&stg_IND_PERM_info; + } ((StgUpdateFrame *)p)->updatee = evacuate(((StgUpdateFrame *)p)->updatee); p += sizeofW(StgUpdateFrame); @@ -4074,7 +4319,9 @@ scavenge_large(step *stp) p = bd->start; if (scavenge_one(p)) { - recordMutableGen((StgClosure *)p, stp->gen); + if (stp->gen_no > 0) { + recordMutableGen((StgClosure *)p, stp->gen); + } } } } @@ -4186,74 +4433,6 @@ gcCAFs(void) /* ----------------------------------------------------------------------------- - Lazy black holing. - - Whenever a thread returns to the scheduler after possibly doing - some work, we have to run down the stack and black-hole all the - closures referred to by update frames. - -------------------------------------------------------------------------- */ - -static void -threadLazyBlackHole(StgTSO *tso) -{ - StgClosure *frame; - StgRetInfoTable *info; - StgClosure *bh; - StgPtr stack_end; - - stack_end = &tso->stack[tso->stack_size]; - - frame = (StgClosure *)tso->sp; - - while (1) { - info = get_ret_itbl(frame); - - switch (info->i.type) { - - case UPDATE_FRAME: - bh = ((StgUpdateFrame *)frame)->updatee; - - /* if the thunk is already blackholed, it means we've also - * already blackholed the rest of the thunks on this stack, - * so we can stop early. - * - * The blackhole made for a CAF is a CAF_BLACKHOLE, so they - * don't interfere with this optimisation. - */ - if (bh->header.info == &stg_BLACKHOLE_info) { - return; - } - - if (bh->header.info != &stg_CAF_BLACKHOLE_info) { -#if (!defined(LAZY_BLACKHOLING)) && defined(DEBUG) - debugBelch("Unexpected lazy BHing required at 0x%04x\n",(int)bh); -#endif -#ifdef PROFILING - // @LDV profiling - // We pretend that bh is now dead. - LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)bh); -#endif - SET_INFO(bh,&stg_BLACKHOLE_info); - - // We pretend that bh has just been created. - LDV_RECORD_CREATE(bh); - } - - frame = (StgClosure *) ((StgUpdateFrame *)frame + 1); - break; - - case STOP_FRAME: - return; - - // normal stack frames; do nothing except advance the pointer - default: - frame = (StgClosure *)((StgPtr)frame + stack_frame_sizeW(frame)); - } - } -} - - -/* ----------------------------------------------------------------------------- * Stack squeezing * * Code largely pinched from old RTS, then hacked to bits. We also do @@ -4264,12 +4443,11 @@ threadLazyBlackHole(StgTSO *tso) struct stack_gap { StgWord gap_size; struct stack_gap *next_gap; }; static void -threadSqueezeStack(StgTSO *tso) +stackSqueeze(StgTSO *tso, StgPtr bottom) { StgPtr frame; rtsBool prev_was_update_frame; StgClosure *updatee = NULL; - StgPtr bottom; StgRetInfoTable *info; StgWord current_gap_size; struct stack_gap *gap; @@ -4280,8 +4458,6 @@ threadSqueezeStack(StgTSO *tso) // contains two values: the size of the gap, and the distance // to the next gap (or the stack top). - bottom = &(tso->stack[tso->stack_size]); - frame = tso->sp; ASSERT(frame < bottom); @@ -4299,20 +4475,6 @@ threadSqueezeStack(StgTSO *tso) { StgUpdateFrame *upd = (StgUpdateFrame *)frame; - if (upd->updatee->header.info == &stg_BLACKHOLE_info) { - - // found a BLACKHOLE'd update frame; we've been here - // before, in a previous GC, so just break out. - - // Mark the end of the gap, if we're in one. - if (current_gap_size != 0) { - gap = (struct stack_gap *)(frame-sizeofW(StgUpdateFrame)); - } - - frame += sizeofW(StgUpdateFrame); - goto done_traversing; - } - if (prev_was_update_frame) { TICK_UPD_SQUEEZED(); @@ -4345,31 +4507,6 @@ threadSqueezeStack(StgTSO *tso) // single update frame, or the topmost update frame in a series else { - StgClosure *bh = upd->updatee; - - // Do lazy black-holing - if (bh->header.info != &stg_BLACKHOLE_info && - bh->header.info != &stg_CAF_BLACKHOLE_info) { -#if (!defined(LAZY_BLACKHOLING)) && defined(DEBUG) - debugBelch("Unexpected lazy BHing required at 0x%04x",(int)bh); -#endif -#ifdef DEBUG - // zero out the slop so that the sanity checker can tell - // where the next closure is. - DEBUG_FILL_SLOP(bh); -#endif -#ifdef PROFILING - // We pretend that bh is now dead. - // ToDo: is the slop filling the same as DEBUG_FILL_SLOP? - LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)bh); -#endif - // Todo: maybe use SET_HDR() and remove LDV_RECORD_CREATE()? - SET_INFO(bh,&stg_BLACKHOLE_info); - - // We pretend that bh has just been created. - LDV_RECORD_CREATE(bh); - } - prev_was_update_frame = rtsTrue; updatee = upd->updatee; frame += sizeofW(StgUpdateFrame); @@ -4392,8 +4529,10 @@ threadSqueezeStack(StgTSO *tso) } } -done_traversing: - + if (current_gap_size != 0) { + gap = (struct stack_gap *) (frame - sizeofW(StgUpdateFrame)); + } + // Now we have a stack with gaps in it, and we have to walk down // shoving the stack up to fill in the gaps. A diagram might // help: @@ -4451,12 +4590,110 @@ done_traversing: * turned on. * -------------------------------------------------------------------------- */ void -threadPaused(StgTSO *tso) +threadPaused(Capability *cap, StgTSO *tso) { - if ( RtsFlags.GcFlags.squeezeUpdFrames == rtsTrue ) - threadSqueezeStack(tso); // does black holing too - else - threadLazyBlackHole(tso); + StgClosure *frame; + StgRetInfoTable *info; + StgClosure *bh; + StgPtr stack_end; + nat words_to_squeeze = 0; + nat weight = 0; + nat weight_pending = 0; + rtsBool prev_was_update_frame; + + stack_end = &tso->stack[tso->stack_size]; + + frame = (StgClosure *)tso->sp; + + while (1) { + // If we've already marked this frame, then stop here. + if (frame->header.info == (StgInfoTable *)&stg_marked_upd_frame_info) { + goto end; + } + + info = get_ret_itbl(frame); + + switch (info->i.type) { + + case UPDATE_FRAME: + + SET_INFO(frame, (StgInfoTable *)&stg_marked_upd_frame_info); + + bh = ((StgUpdateFrame *)frame)->updatee; + + if (closure_IND(bh) || bh->header.info == &stg_BLACKHOLE_info) { + IF_DEBUG(squeeze, debugBelch("suspending duplicate work: %ld words of stack\n", (StgPtr)frame - tso->sp)); + + // If this closure is already an indirection, then + // suspend the computation up to this point: + suspendComputation(cap,tso,(StgPtr)frame); + + // Now drop the update frame, and arrange to return + // the value to the frame underneath: + tso->sp = (StgPtr)frame + sizeofW(StgUpdateFrame) - 2; + tso->sp[1] = (StgWord)bh; + tso->sp[0] = (W_)&stg_enter_info; + + // And continue with threadPaused; there might be + // yet more computation to suspend. + threadPaused(cap,tso); + return; + } + + if (bh->header.info != &stg_CAF_BLACKHOLE_info) { +#if (!defined(LAZY_BLACKHOLING)) && defined(DEBUG) + debugBelch("Unexpected lazy BHing required at 0x%04lx\n",(long)bh); +#endif + // zero out the slop so that the sanity checker can tell + // where the next closure is. + DEBUG_FILL_SLOP(bh); +#ifdef PROFILING + // @LDV profiling + // We pretend that bh is now dead. + LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)bh); +#endif + SET_INFO(bh,&stg_BLACKHOLE_info); + + // We pretend that bh has just been created. + LDV_RECORD_CREATE(bh); + } + + frame = (StgClosure *) ((StgUpdateFrame *)frame + 1); + if (prev_was_update_frame) { + words_to_squeeze += sizeofW(StgUpdateFrame); + weight += weight_pending; + weight_pending = 0; + } + prev_was_update_frame = rtsTrue; + break; + + case STOP_FRAME: + goto end; + + // normal stack frames; do nothing except advance the pointer + default: + { + nat frame_size = stack_frame_sizeW(frame); + weight_pending += frame_size; + frame = (StgClosure *)((StgPtr)frame + frame_size); + prev_was_update_frame = rtsFalse; + } + } + } + +end: + IF_DEBUG(squeeze, + debugBelch("words_to_squeeze: %d, weight: %d, squeeze: %s\n", + words_to_squeeze, weight, + weight < words_to_squeeze ? "YES" : "NO")); + + // Should we squeeze or not? Arbitrary heuristic: we squeeze if + // the number of words we have to shift down is less than the + // number of stack words we squeeze away by doing so. + if (RtsFlags.GcFlags.squeezeUpdFrames == rtsTrue && + weight < words_to_squeeze) { + stackSqueeze(tso, (StgPtr)frame); + } } /* -----------------------------------------------------------------------------