X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Frts%2FGC.c;h=1641e3a44148fe90a2f54d2bbb8b371403243333;hb=d3d69395401d3e7be7a8794e0b6850e608a30a24;hp=06bd7859e9f5888d078cefafd6965d6b38f38b89;hpb=e30a37bc7bdb88e8a92696747b40a1f692284e0d;p=ghc-hetmet.git diff --git a/ghc/rts/GC.c b/ghc/rts/GC.c index 06bd785..1641e3a 100644 --- a/ghc/rts/GC.c +++ b/ghc/rts/GC.c @@ -1137,7 +1137,9 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) resetNurseries(); // start any pending finalizers + RELEASE_SM_LOCK; scheduleFinalizers(last_free_capability, old_weak_ptr_list); + ACQUIRE_SM_LOCK; // send exceptions to any threads which were about to die resurrectThreads(resurrected_threads); @@ -2951,10 +2953,14 @@ scavenge(step *stp) for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) { *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p); } - // it's tempting to recordMutable() if failed_to_evac is - // false, but that breaks some assumptions (eg. every - // closure on the mutable list is supposed to have the MUT - // flag set, and MUT_ARR_PTRS_FROZEN doesn't). + + // If we're going to put this object on the mutable list, then + // set its info ptr to MUT_ARR_PTRS_FROZEN0 to indicate that. + if (failed_to_evac) { + ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN0_info; + } else { + ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_info; + } break; } @@ -3045,9 +3051,6 @@ scavenge(step *stp) evac_gen = 0; tvar->current_value = evacuate((StgClosure*)tvar->current_value); tvar->first_wait_queue_entry = (StgTVarWaitQueue *)evacuate((StgClosure*)tvar->first_wait_queue_entry); -#if defined(SMP) - tvar->last_update_by = (StgTRecHeader *)evacuate((StgClosure*)tvar->last_update_by); -#endif evac_gen = saved_evac_gen; failed_to_evac = rtsTrue; // mutable p += sizeofW(StgTVar); @@ -3305,12 +3308,20 @@ linear_scan: case MUT_ARR_PTRS_FROZEN0: // follow everything { - StgPtr next; + StgPtr next, q = p; next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p); for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) { *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p); } + + // If we're going to put this object on the mutable list, then + // set its info ptr to MUT_ARR_PTRS_FROZEN0 to indicate that. + if (failed_to_evac) { + ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN0_info; + } else { + ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_info; + } break; } @@ -3394,9 +3405,6 @@ linear_scan: evac_gen = 0; tvar->current_value = evacuate((StgClosure*)tvar->current_value); tvar->first_wait_queue_entry = (StgTVarWaitQueue *)evacuate((StgClosure*)tvar->first_wait_queue_entry); -#if defined(SMP) - tvar->last_update_by = (StgTRecHeader *)evacuate((StgClosure*)tvar->last_update_by); -#endif evac_gen = saved_evac_gen; failed_to_evac = rtsTrue; // mutable break; @@ -3619,12 +3627,20 @@ scavenge_one(StgPtr p) case MUT_ARR_PTRS_FROZEN0: { // follow everything - StgPtr next; + StgPtr next, q=p; next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p); for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) { *p = (StgWord)(StgPtr)evacuate((StgClosure *)*p); } + + // If we're going to put this object on the mutable list, then + // set its info ptr to MUT_ARR_PTRS_FROZEN0 to indicate that. + if (failed_to_evac) { + ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN0_info; + } else { + ((StgClosure *)q)->header.info = &stg_MUT_ARR_PTRS_FROZEN_info; + } break; } @@ -3710,9 +3726,6 @@ scavenge_one(StgPtr p) evac_gen = 0; tvar->current_value = evacuate((StgClosure*)tvar->current_value); tvar->first_wait_queue_entry = (StgTVarWaitQueue *)evacuate((StgClosure*)tvar->first_wait_queue_entry); -#if defined(SMP) - tvar->last_update_by = (StgTRecHeader *)evacuate((StgClosure*)tvar->last_update_by); -#endif evac_gen = saved_evac_gen; failed_to_evac = rtsTrue; // mutable break; @@ -3986,6 +3999,32 @@ scavenge_stack(StgPtr p, StgPtr stack_end) switch (info->i.type) { case UPDATE_FRAME: + // In SMP, we can get update frames that point to indirections + // when two threads evaluate the same thunk. We do attempt to + // discover this situation in threadPaused(), but it's + // possible that the following sequence occurs: + // + // A B + // enter T + // enter T + // blackhole T + // update T + // GC + // + // Now T is an indirection, and the update frame is already + // marked on A's stack, so we won't traverse it again in + // threadPaused(). We could traverse the whole stack again + // before GC, but that seems like overkill. + // + // Scavenging this update frame as normal would be disastrous; + // the updatee would end up pointing to the value. So we turn + // the indirection into an IND_PERM, so that evacuate will + // copy the indirection into the old generation instead of + // discarding it. + if (get_itbl(((StgUpdateFrame *)p)->updatee)->type == IND) { + ((StgUpdateFrame *)p)->updatee->header.info = + (StgInfoTable *)&stg_IND_PERM_info; + } ((StgUpdateFrame *)p)->updatee = evacuate(((StgUpdateFrame *)p)->updatee); p += sizeofW(StgUpdateFrame); @@ -4228,74 +4267,6 @@ gcCAFs(void) /* ----------------------------------------------------------------------------- - Lazy black holing. - - Whenever a thread returns to the scheduler after possibly doing - some work, we have to run down the stack and black-hole all the - closures referred to by update frames. - -------------------------------------------------------------------------- */ - -static void -threadLazyBlackHole(StgTSO *tso) -{ - StgClosure *frame; - StgRetInfoTable *info; - StgClosure *bh; - StgPtr stack_end; - - stack_end = &tso->stack[tso->stack_size]; - - frame = (StgClosure *)tso->sp; - - while (1) { - info = get_ret_itbl(frame); - - switch (info->i.type) { - - case UPDATE_FRAME: - bh = ((StgUpdateFrame *)frame)->updatee; - - /* if the thunk is already blackholed, it means we've also - * already blackholed the rest of the thunks on this stack, - * so we can stop early. - * - * The blackhole made for a CAF is a CAF_BLACKHOLE, so they - * don't interfere with this optimisation. - */ - if (bh->header.info == &stg_BLACKHOLE_info) { - return; - } - - if (bh->header.info != &stg_CAF_BLACKHOLE_info) { -#if (!defined(LAZY_BLACKHOLING)) && defined(DEBUG) - debugBelch("Unexpected lazy BHing required at 0x%04lx\n",(long)bh); -#endif -#ifdef PROFILING - // @LDV profiling - // We pretend that bh is now dead. - LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)bh); -#endif - SET_INFO(bh,&stg_BLACKHOLE_info); - - // We pretend that bh has just been created. - LDV_RECORD_CREATE(bh); - } - - frame = (StgClosure *) ((StgUpdateFrame *)frame + 1); - break; - - case STOP_FRAME: - return; - - // normal stack frames; do nothing except advance the pointer - default: - frame = (StgClosure *)((StgPtr)frame + stack_frame_sizeW(frame)); - } - } -} - - -/* ----------------------------------------------------------------------------- * Stack squeezing * * Code largely pinched from old RTS, then hacked to bits. We also do @@ -4306,12 +4277,11 @@ threadLazyBlackHole(StgTSO *tso) struct stack_gap { StgWord gap_size; struct stack_gap *next_gap; }; static void -threadSqueezeStack(StgTSO *tso) +stackSqueeze(StgTSO *tso, StgPtr bottom) { StgPtr frame; rtsBool prev_was_update_frame; StgClosure *updatee = NULL; - StgPtr bottom; StgRetInfoTable *info; StgWord current_gap_size; struct stack_gap *gap; @@ -4322,8 +4292,6 @@ threadSqueezeStack(StgTSO *tso) // contains two values: the size of the gap, and the distance // to the next gap (or the stack top). - bottom = &(tso->stack[tso->stack_size]); - frame = tso->sp; ASSERT(frame < bottom); @@ -4341,20 +4309,6 @@ threadSqueezeStack(StgTSO *tso) { StgUpdateFrame *upd = (StgUpdateFrame *)frame; - if (upd->updatee->header.info == &stg_BLACKHOLE_info) { - - // found a BLACKHOLE'd update frame; we've been here - // before, in a previous GC, so just break out. - - // Mark the end of the gap, if we're in one. - if (current_gap_size != 0) { - gap = (struct stack_gap *)(frame-sizeofW(StgUpdateFrame)); - } - - frame += sizeofW(StgUpdateFrame); - goto done_traversing; - } - if (prev_was_update_frame) { TICK_UPD_SQUEEZED(); @@ -4387,31 +4341,6 @@ threadSqueezeStack(StgTSO *tso) // single update frame, or the topmost update frame in a series else { - StgClosure *bh = upd->updatee; - - // Do lazy black-holing - if (bh->header.info != &stg_BLACKHOLE_info && - bh->header.info != &stg_CAF_BLACKHOLE_info) { -#if (!defined(LAZY_BLACKHOLING)) && defined(DEBUG) - debugBelch("Unexpected lazy BHing required at 0x%04lx",(long)bh); -#endif -#ifdef DEBUG - // zero out the slop so that the sanity checker can tell - // where the next closure is. - DEBUG_FILL_SLOP(bh); -#endif -#ifdef PROFILING - // We pretend that bh is now dead. - // ToDo: is the slop filling the same as DEBUG_FILL_SLOP? - LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)bh); -#endif - // Todo: maybe use SET_HDR() and remove LDV_RECORD_CREATE()? - SET_INFO(bh,&stg_BLACKHOLE_info); - - // We pretend that bh has just been created. - LDV_RECORD_CREATE(bh); - } - prev_was_update_frame = rtsTrue; updatee = upd->updatee; frame += sizeofW(StgUpdateFrame); @@ -4434,8 +4363,10 @@ threadSqueezeStack(StgTSO *tso) } } -done_traversing: - + if (current_gap_size != 0) { + gap = (struct stack_gap *) (frame - sizeofW(StgUpdateFrame)); + } + // Now we have a stack with gaps in it, and we have to walk down // shoving the stack up to fill in the gaps. A diagram might // help: @@ -4493,12 +4424,110 @@ done_traversing: * turned on. * -------------------------------------------------------------------------- */ void -threadPaused(StgTSO *tso) +threadPaused(Capability *cap, StgTSO *tso) { - if ( RtsFlags.GcFlags.squeezeUpdFrames == rtsTrue ) - threadSqueezeStack(tso); // does black holing too - else - threadLazyBlackHole(tso); + StgClosure *frame; + StgRetInfoTable *info; + StgClosure *bh; + StgPtr stack_end; + nat words_to_squeeze = 0; + nat weight = 0; + nat weight_pending = 0; + rtsBool prev_was_update_frame; + + stack_end = &tso->stack[tso->stack_size]; + + frame = (StgClosure *)tso->sp; + + while (1) { + // If we've already marked this frame, then stop here. + if (frame->header.info == (StgInfoTable *)&stg_marked_upd_frame_info) { + goto end; + } + + info = get_ret_itbl(frame); + + switch (info->i.type) { + + case UPDATE_FRAME: + + SET_INFO(frame, (StgInfoTable *)&stg_marked_upd_frame_info); + + bh = ((StgUpdateFrame *)frame)->updatee; + + if (closure_IND(bh) || bh->header.info == &stg_BLACKHOLE_info) { + IF_DEBUG(squeeze, debugBelch("suspending duplicate work: %ld words of stack\n", (StgPtr)frame - tso->sp)); + + // If this closure is already an indirection, then + // suspend the computation up to this point: + suspendComputation(cap,tso,(StgPtr)frame); + + // Now drop the update frame, and arrange to return + // the value to the frame underneath: + tso->sp = (StgPtr)frame + sizeofW(StgUpdateFrame) - 2; + tso->sp[1] = (StgWord)bh; + tso->sp[0] = (W_)&stg_enter_info; + + // And continue with threadPaused; there might be + // yet more computation to suspend. + threadPaused(cap,tso); + return; + } + + if (bh->header.info != &stg_CAF_BLACKHOLE_info) { +#if (!defined(LAZY_BLACKHOLING)) && defined(DEBUG) + debugBelch("Unexpected lazy BHing required at 0x%04lx\n",(long)bh); +#endif + // zero out the slop so that the sanity checker can tell + // where the next closure is. + DEBUG_FILL_SLOP(bh); +#ifdef PROFILING + // @LDV profiling + // We pretend that bh is now dead. + LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)bh); +#endif + SET_INFO(bh,&stg_BLACKHOLE_info); + + // We pretend that bh has just been created. + LDV_RECORD_CREATE(bh); + } + + frame = (StgClosure *) ((StgUpdateFrame *)frame + 1); + if (prev_was_update_frame) { + words_to_squeeze += sizeofW(StgUpdateFrame); + weight += weight_pending; + weight_pending = 0; + } + prev_was_update_frame = rtsTrue; + break; + + case STOP_FRAME: + goto end; + + // normal stack frames; do nothing except advance the pointer + default: + { + nat frame_size = stack_frame_sizeW(frame); + weight_pending += frame_size; + frame = (StgClosure *)((StgPtr)frame + frame_size); + prev_was_update_frame = rtsFalse; + } + } + } + +end: + IF_DEBUG(squeeze, + debugBelch("words_to_squeeze: %d, weight: %d, squeeze: %s\n", + words_to_squeeze, weight, + weight < words_to_squeeze ? "YES" : "NO")); + + // Should we squeeze or not? Arbitrary heuristic: we squeeze if + // the number of words we have to shift down is less than the + // number of stack words we squeeze away by doing so. + if (1 /*RtsFlags.GcFlags.squeezeUpdFrames == rtsTrue && + weight < words_to_squeeze*/) { + stackSqueeze(tso, (StgPtr)frame); + } } /* -----------------------------------------------------------------------------