X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=rts%2Fsm%2FEvac.c;h=21017a63a0eb6d9db6014710d5264a712dd477c9;hp=f117526f3a77a3fc3d318e2d6bd8c78001f3098a;hb=7408b39235bccdcde48df2a73337ff976fbc09b7;hpb=34d0fee7fa3f29069c9a84df404182726f61b367 diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c index f117526..21017a6 100644 --- a/rts/sm/Evac.c +++ b/rts/sm/Evac.c @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------------- * - * (c) The GHC Team 1998-2006 + * (c) The GHC Team 1998-2008 * * Generational garbage collector: evacuation functions * @@ -11,20 +11,34 @@ * * ---------------------------------------------------------------------------*/ +#include "PosixSource.h" #include "Rts.h" -#include "Storage.h" -#include "MBlock.h" + #include "Evac.h" +#include "Storage.h" #include "GC.h" +#include "GCThread.h" #include "GCUtils.h" #include "Compact.h" +#include "MarkStack.h" #include "Prelude.h" +#include "Trace.h" #include "LdvProfile.h" -#if defined(PROF_SPIN) && defined(THREADED_RTS) +#if defined(PROF_SPIN) && defined(THREADED_RTS) && defined(PARALLEL_GC) StgWord64 whitehole_spin = 0; #endif +#if defined(THREADED_RTS) && !defined(PARALLEL_GC) +#define evacuate(p) evacuate1(p) +#define HEAP_ALLOCED_GC(p) HEAP_ALLOCED(p) +#endif + +#if !defined(PARALLEL_GC) +#define copy_tag_nolock(p, info, src, size, stp, tag) \ + copy_tag(p, info, src, size, stp, tag) +#endif + /* Used to avoid long recursion due to selector thunks */ #define MAX_THUNK_SELECTOR_DEPTH 16 @@ -37,35 +51,35 @@ STATIC_INLINE void evacuate_large(StgPtr p); -------------------------------------------------------------------------- */ STATIC_INLINE StgPtr -alloc_for_copy (nat size, step *stp) +alloc_for_copy (nat size, generation *gen) { StgPtr to; - step_workspace *ws; + gen_workspace *ws; /* Find out where we're going, using the handy "to" pointer in - * the step of the source object. If it turns out we need to + * the gen of the source object. If it turns out we need to * evacuate to an older generation, adjust it here (see comment * by evacuate()). */ - if (stp < gct->evac_step) { + if (gen < gct->evac_gen) { if (gct->eager_promotion) { - stp = gct->evac_step; + gen = gct->evac_gen; } else { gct->failed_to_evac = rtsTrue; } } - ws = &gct->steps[stp->abs_no]; - // this compiles to a single mem access to stp->abs_no only + ws = &gct->gens[gen->no]; + // this compiles to a single mem access to gen->abs_no only - /* chain a new block onto the to-space for the destination step if + /* chain a new block onto the to-space for the destination gen if * necessary. */ to = ws->todo_free; - if (to + size > ws->todo_lim) { - to = gc_alloc_todo_block(ws); + ws->todo_free += size; + if (ws->todo_free > ws->todo_lim) { + to = todo_block_full(size, ws); } - ws->todo_free = to + size; ASSERT(ws->todo_free >= ws->todo_bd->free && ws->todo_free <= ws->todo_lim); return to; @@ -75,18 +89,148 @@ alloc_for_copy (nat size, step *stp) The evacuate() code -------------------------------------------------------------------------- */ -#define PARALLEL_GC -#include "Evac.c-inc" +STATIC_INLINE GNUC_ATTR_HOT void +copy_tag(StgClosure **p, const StgInfoTable *info, + StgClosure *src, nat size, generation *gen, StgWord tag) +{ + StgPtr to, from; + nat i; + + to = alloc_for_copy(size,gen); + + from = (StgPtr)src; + to[0] = (W_)info; + for (i = 1; i < size; i++) { // unroll for small i + to[i] = from[i]; + } + +// if (to+size+2 < bd->start + BLOCK_SIZE_W) { +// __builtin_prefetch(to + size + 2, 1); +// } + +#if defined(PARALLEL_GC) + { + const StgInfoTable *new_info; + new_info = (const StgInfoTable *)cas((StgPtr)&src->header.info, (W_)info, MK_FORWARDING_PTR(to)); + if (new_info != info) { + return evacuate(p); // does the failed_to_evac stuff + } else { + *p = TAG_CLOSURE(tag,(StgClosure*)to); + } + } +#else + src->header.info = (const StgInfoTable *)MK_FORWARDING_PTR(to); + *p = TAG_CLOSURE(tag,(StgClosure*)to); +#endif + +#ifdef PROFILING + // We store the size of the just evacuated object in the LDV word so that + // the profiler can guess the position of the next object later. + SET_EVACUAEE_FOR_LDV(from, size); +#endif +} + +#if defined(PARALLEL_GC) +STATIC_INLINE void +copy_tag_nolock(StgClosure **p, const StgInfoTable *info, + StgClosure *src, nat size, generation *gen, StgWord tag) +{ + StgPtr to, from; + nat i; + + to = alloc_for_copy(size,gen); + *p = TAG_CLOSURE(tag,(StgClosure*)to); + src->header.info = (const StgInfoTable *)MK_FORWARDING_PTR(to); + + from = (StgPtr)src; + to[0] = (W_)info; + for (i = 1; i < size; i++) { // unroll for small i + to[i] = from[i]; + } + +// if (to+size+2 < bd->start + BLOCK_SIZE_W) { +// __builtin_prefetch(to + size + 2, 1); +// } + +#ifdef PROFILING + // We store the size of the just evacuated object in the LDV word so that + // the profiler can guess the position of the next object later. + SET_EVACUAEE_FOR_LDV(from, size); +#endif +} +#endif + +/* Special version of copy() for when we only want to copy the info + * pointer of an object, but reserve some padding after it. This is + * used to optimise evacuation of BLACKHOLEs. + */ +static rtsBool +copyPart(StgClosure **p, StgClosure *src, nat size_to_reserve, + nat size_to_copy, generation *gen) +{ + StgPtr to, from; + nat i; + StgWord info; + +#if defined(PARALLEL_GC) +spin: + info = xchg((StgPtr)&src->header.info, (W_)&stg_WHITEHOLE_info); + if (info == (W_)&stg_WHITEHOLE_info) { +#ifdef PROF_SPIN + whitehole_spin++; +#endif + goto spin; + } + if (IS_FORWARDING_PTR(info)) { + src->header.info = (const StgInfoTable *)info; + evacuate(p); // does the failed_to_evac stuff + return rtsFalse; + } +#else + info = (W_)src->header.info; +#endif + + to = alloc_for_copy(size_to_reserve, gen); + *p = (StgClosure *)to; + + from = (StgPtr)src; + to[0] = info; + for (i = 1; i < size_to_copy; i++) { // unroll for small i + to[i] = from[i]; + } + +#if defined(PARALLEL_GC) + write_barrier(); +#endif + src->header.info = (const StgInfoTable*)MK_FORWARDING_PTR(to); + +#ifdef PROFILING + // We store the size of the just evacuated object in the LDV word so that + // the profiler can guess the position of the next object later. + SET_EVACUAEE_FOR_LDV(from, size_to_reserve); + // fill the slop + if (size_to_reserve - size_to_copy > 0) + LDV_FILL_SLOP(to + size_to_copy, (int)(size_to_reserve - size_to_copy)); +#endif + + return rtsTrue; +} + -#undef PARALLEL_GC -#include "Evac.c-inc" +/* Copy wrappers that don't tag the closure after copying */ +STATIC_INLINE GNUC_ATTR_HOT void +copy(StgClosure **p, const StgInfoTable *info, + StgClosure *src, nat size, generation *gen) +{ + copy_tag(p,info,src,size,gen,0); +} /* ----------------------------------------------------------------------------- Evacuate a large object This just consists of removing the object from the (doubly-linked) - step->large_objects list, and linking it on to the (singly-linked) - step->new_large_objects list, from where it will be scavenged later. + gen->large_objects list, and linking it on to the (singly-linked) + gen->new_large_objects list, from where it will be scavenged later. Convention: bd->flags has BF_EVACUATED set for a large object that has been evacuated, or unset otherwise. @@ -95,27 +239,24 @@ alloc_for_copy (nat size, step *stp) STATIC_INLINE void evacuate_large(StgPtr p) { - bdescr *bd = Bdescr(p); - step *stp, *new_stp; - step_workspace *ws; + bdescr *bd; + generation *gen, *new_gen; + gen_workspace *ws; - stp = bd->step; - ACQUIRE_SPIN_LOCK(&stp->sync_large_objects); - - // object must be at the beginning of the block (or be a ByteArray) - ASSERT(get_itbl((StgClosure *)p)->type == ARR_WORDS || - (((W_)p & BLOCK_MASK) == 0)); + bd = Bdescr(p); + gen = bd->gen; + ACQUIRE_SPIN_LOCK(&gen->sync_large_objects); // already evacuated? if (bd->flags & BF_EVACUATED) { /* Don't forget to set the gct->failed_to_evac flag if we didn't get * the desired destination (see comments in evacuate()). */ - if (stp < gct->evac_step) { + if (gen < gct->evac_gen) { gct->failed_to_evac = rtsTrue; TICK_GC_FAILED_PROMOTION(); } - RELEASE_SPIN_LOCK(&stp->sync_large_objects); + RELEASE_SPIN_LOCK(&gen->sync_large_objects); return; } @@ -123,31 +264,473 @@ evacuate_large(StgPtr p) if (bd->u.back) { bd->u.back->link = bd->link; } else { // first object in the list - stp->large_objects = bd->link; + gen->large_objects = bd->link; } if (bd->link) { bd->link->u.back = bd->u.back; } - /* link it on to the evacuated large object list of the destination step + /* link it on to the evacuated large object list of the destination gen */ - new_stp = stp->to; - if (new_stp < gct->evac_step) { + new_gen = bd->dest; + if (new_gen < gct->evac_gen) { if (gct->eager_promotion) { - new_stp = gct->evac_step; + new_gen = gct->evac_gen; } else { gct->failed_to_evac = rtsTrue; } } - ws = &gct->steps[new_stp->abs_no]; + ws = &gct->gens[new_gen->no]; + bd->flags |= BF_EVACUATED; - bd->step = new_stp; - bd->gen_no = new_stp->gen_no; - bd->link = ws->todo_large_objects; - ws->todo_large_objects = bd; + initBdescr(bd, new_gen, new_gen->to); + + // If this is a block of pinned objects, we don't have to scan + // these objects, because they aren't allowed to contain any + // pointers. For these blocks, we skip the scavenge stage and put + // them straight on the scavenged_large_objects list. + if (bd->flags & BF_PINNED) { + ASSERT(get_itbl((StgClosure *)p)->type == ARR_WORDS); + if (new_gen != gen) { ACQUIRE_SPIN_LOCK(&new_gen->sync_large_objects); } + dbl_link_onto(bd, &new_gen->scavenged_large_objects); + new_gen->n_scavenged_large_blocks += bd->blocks; + if (new_gen != gen) { RELEASE_SPIN_LOCK(&new_gen->sync_large_objects); } + } else { + bd->link = ws->todo_large_objects; + ws->todo_large_objects = bd; + } + + RELEASE_SPIN_LOCK(&gen->sync_large_objects); +} + +/* ---------------------------------------------------------------------------- + Evacuate + + This is called (eventually) for every live object in the system. + + The caller to evacuate specifies a desired generation in the + gct->evac_gen thread-local variable. The following conditions apply to + evacuating an object which resides in generation M when we're + collecting up to generation N + + if M >= gct->evac_gen + if M > N do nothing + else evac to gen->to + + if M < gct->evac_gen evac to gct->evac_gen, step 0 + + if the object is already evacuated, then we check which generation + it now resides in. + + if M >= gct->evac_gen do nothing + if M < gct->evac_gen set gct->failed_to_evac flag to indicate that we + didn't manage to evacuate this object into gct->evac_gen. + + + OPTIMISATION NOTES: + + evacuate() is the single most important function performance-wise + in the GC. Various things have been tried to speed it up, but as + far as I can tell the code generated by gcc 3.2 with -O2 is about + as good as it's going to get. We pass the argument to evacuate() + in a register using the 'regparm' attribute (see the prototype for + evacuate() near the top of this file). + + Changing evacuate() to take an (StgClosure **) rather than + returning the new pointer seems attractive, because we can avoid + writing back the pointer when it hasn't changed (eg. for a static + object, or an object in a generation > N). However, I tried it and + it doesn't help. One reason is that the (StgClosure **) pointer + gets spilled to the stack inside evacuate(), resulting in far more + extra reads/writes than we save. + ------------------------------------------------------------------------- */ + +REGPARM1 GNUC_ATTR_HOT void +evacuate(StgClosure **p) +{ + bdescr *bd = NULL; + generation *gen; + StgClosure *q; + const StgInfoTable *info; + StgWord tag; + + q = *p; + +loop: + /* The tag and the pointer are split, to be merged after evacing */ + tag = GET_CLOSURE_TAG(q); + q = UNTAG_CLOSURE(q); + + ASSERT(LOOKS_LIKE_CLOSURE_PTR(q)); + + if (!HEAP_ALLOCED_GC(q)) { + + if (!major_gc) return; + + info = get_itbl(q); + switch (info->type) { + + case THUNK_STATIC: + if (info->srt_bitmap != 0) { + if (*THUNK_STATIC_LINK((StgClosure *)q) == NULL) { +#ifndef THREADED_RTS + *THUNK_STATIC_LINK((StgClosure *)q) = gct->static_objects; + gct->static_objects = (StgClosure *)q; +#else + StgPtr link; + link = (StgPtr)cas((StgPtr)THUNK_STATIC_LINK((StgClosure *)q), + (StgWord)NULL, + (StgWord)gct->static_objects); + if (link == NULL) { + gct->static_objects = (StgClosure *)q; + } +#endif + } + } + return; + + case FUN_STATIC: + if (info->srt_bitmap != 0 && + *FUN_STATIC_LINK((StgClosure *)q) == NULL) { +#ifndef THREADED_RTS + *FUN_STATIC_LINK((StgClosure *)q) = gct->static_objects; + gct->static_objects = (StgClosure *)q; +#else + StgPtr link; + link = (StgPtr)cas((StgPtr)FUN_STATIC_LINK((StgClosure *)q), + (StgWord)NULL, + (StgWord)gct->static_objects); + if (link == NULL) { + gct->static_objects = (StgClosure *)q; + } +#endif + } + return; + + case IND_STATIC: + /* If q->saved_info != NULL, then it's a revertible CAF - it'll be + * on the CAF list, so don't do anything with it here (we'll + * scavenge it later). + */ + if (((StgIndStatic *)q)->saved_info == NULL) { + if (*IND_STATIC_LINK((StgClosure *)q) == NULL) { +#ifndef THREADED_RTS + *IND_STATIC_LINK((StgClosure *)q) = gct->static_objects; + gct->static_objects = (StgClosure *)q; +#else + StgPtr link; + link = (StgPtr)cas((StgPtr)IND_STATIC_LINK((StgClosure *)q), + (StgWord)NULL, + (StgWord)gct->static_objects); + if (link == NULL) { + gct->static_objects = (StgClosure *)q; + } +#endif + } + } + return; + + case CONSTR_STATIC: + if (*STATIC_LINK(info,(StgClosure *)q) == NULL) { +#ifndef THREADED_RTS + *STATIC_LINK(info,(StgClosure *)q) = gct->static_objects; + gct->static_objects = (StgClosure *)q; +#else + StgPtr link; + link = (StgPtr)cas((StgPtr)STATIC_LINK(info,(StgClosure *)q), + (StgWord)NULL, + (StgWord)gct->static_objects); + if (link == NULL) { + gct->static_objects = (StgClosure *)q; + } +#endif + } + /* I am assuming that static_objects pointers are not + * written to other objects, and thus, no need to retag. */ + return; + + case CONSTR_NOCAF_STATIC: + /* no need to put these on the static linked list, they don't need + * to be scavenged. + */ + return; + + default: + barf("evacuate(static): strange closure type %d", (int)(info->type)); + } + } + + bd = Bdescr((P_)q); + + if ((bd->flags & (BF_LARGE | BF_MARKED | BF_EVACUATED)) != 0) { + + // pointer into to-space: just return it. It might be a pointer + // into a generation that we aren't collecting (> N), or it + // might just be a pointer into to-space. The latter doesn't + // happen often, but allowing it makes certain things a bit + // easier; e.g. scavenging an object is idempotent, so it's OK to + // have an object on the mutable list multiple times. + if (bd->flags & BF_EVACUATED) { + // We aren't copying this object, so we have to check + // whether it is already in the target generation. (this is + // the write barrier). + if (bd->gen < gct->evac_gen) { + gct->failed_to_evac = rtsTrue; + TICK_GC_FAILED_PROMOTION(); + } + return; + } + + /* evacuate large objects by re-linking them onto a different list. + */ + if (bd->flags & BF_LARGE) { + info = get_itbl(q); + if (info->type == TSO && + ((StgTSO *)q)->what_next == ThreadRelocated) { + q = (StgClosure *)((StgTSO *)q)->_link; + *p = q; + goto loop; + } + evacuate_large((P_)q); + return; + } + + /* If the object is in a gen that we're compacting, then we + * need to use an alternative evacuate procedure. + */ + if (!is_marked((P_)q,bd)) { + mark((P_)q,bd); + push_mark_stack((P_)q); + } + return; + } + + gen = bd->dest; + + info = q->header.info; + if (IS_FORWARDING_PTR(info)) + { + /* Already evacuated, just return the forwarding address. + * HOWEVER: if the requested destination generation (gct->evac_gen) is + * older than the actual generation (because the object was + * already evacuated to a younger generation) then we have to + * set the gct->failed_to_evac flag to indicate that we couldn't + * manage to promote the object to the desired generation. + */ + /* + * Optimisation: the check is fairly expensive, but we can often + * shortcut it if either the required generation is 0, or the + * current object (the EVACUATED) is in a high enough generation. + * We know that an EVACUATED always points to an object in the + * same or an older generation. gen is the lowest generation that the + * current object would be evacuated to, so we only do the full + * check if gen is too low. + */ + StgClosure *e = (StgClosure*)UN_FORWARDING_PTR(info); + *p = TAG_CLOSURE(tag,e); + if (gen < gct->evac_gen) { // optimisation + if (Bdescr((P_)e)->gen < gct->evac_gen) { + gct->failed_to_evac = rtsTrue; + TICK_GC_FAILED_PROMOTION(); + } + } + return; + } + + switch (INFO_PTR_TO_STRUCT(info)->type) { + + case WHITEHOLE: + goto loop; + + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: + case MVAR_CLEAN: + case MVAR_DIRTY: + copy(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen); + return; + + // For ints and chars of low value, save space by replacing references to + // these with closures with references to common, shared ones in the RTS. + // + // * Except when compiling into Windows DLLs which don't support cross-package + // data references very well. + // + case CONSTR_0_1: + { +#if defined(__PIC__) && defined(mingw32_HOST_OS) + copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,gen,tag); +#else + StgWord w = (StgWord)q->payload[0]; + if (info == Czh_con_info && + // unsigned, so always true: (StgChar)w >= MIN_CHARLIKE && + (StgChar)w <= MAX_CHARLIKE) { + *p = TAG_CLOSURE(tag, + (StgClosure *)CHARLIKE_CLOSURE((StgChar)w) + ); + } + else if (info == Izh_con_info && + (StgInt)w >= MIN_INTLIKE && (StgInt)w <= MAX_INTLIKE) { + *p = TAG_CLOSURE(tag, + (StgClosure *)INTLIKE_CLOSURE((StgInt)w) + ); + } + else { + copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,gen,tag); + } +#endif + return; + } + + case FUN_0_1: + case FUN_1_0: + case CONSTR_1_0: + copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,gen,tag); + return; + + case THUNK_1_0: + case THUNK_0_1: + copy(p,info,q,sizeofW(StgThunk)+1,gen); + return; + + case THUNK_1_1: + case THUNK_2_0: + case THUNK_0_2: +#ifdef NO_PROMOTE_THUNKS +#error bitrotted +#endif + copy(p,info,q,sizeofW(StgThunk)+2,gen); + return; + + case FUN_1_1: + case FUN_2_0: + case FUN_0_2: + case CONSTR_1_1: + case CONSTR_2_0: + copy_tag_nolock(p,info,q,sizeofW(StgHeader)+2,gen,tag); + return; + + case CONSTR_0_2: + copy_tag_nolock(p,info,q,sizeofW(StgHeader)+2,gen,tag); + return; - RELEASE_SPIN_LOCK(&stp->sync_large_objects); + case THUNK: + copy(p,info,q,thunk_sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen); + return; + + case FUN: + case IND_PERM: + case IND_OLDGEN_PERM: + case CONSTR: + copy_tag_nolock(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen,tag); + return; + + case WEAK: + case PRIM: + case MUT_PRIM: + copy_tag(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen,tag); + return; + + case BCO: + copy(p,info,q,bco_sizeW((StgBCO *)q),gen); + return; + + case CAF_BLACKHOLE: + case BLACKHOLE: + copyPart(p,q,BLACKHOLE_sizeW(),sizeofW(StgHeader),gen); + return; + + case THUNK_SELECTOR: + eval_thunk_selector(p, (StgSelector *)q, rtsTrue); + return; + + case IND: + case IND_OLDGEN: + // follow chains of indirections, don't evacuate them + q = ((StgInd*)q)->indirectee; + *p = q; + goto loop; + + case RET_BCO: + case RET_SMALL: + case RET_BIG: + case RET_DYN: + case UPDATE_FRAME: + case STOP_FRAME: + case CATCH_FRAME: + case CATCH_STM_FRAME: + case CATCH_RETRY_FRAME: + case ATOMICALLY_FRAME: + // shouldn't see these + barf("evacuate: stack frame at %p\n", q); + + case PAP: + copy(p,info,q,pap_sizeW((StgPAP*)q),gen); + return; + + case AP: + copy(p,info,q,ap_sizeW((StgAP*)q),gen); + return; + + case AP_STACK: + copy(p,info,q,ap_stack_sizeW((StgAP_STACK*)q),gen); + return; + + case ARR_WORDS: + // just copy the block + copy(p,info,q,arr_words_sizeW((StgArrWords *)q),gen); + return; + + case MUT_ARR_PTRS_CLEAN: + case MUT_ARR_PTRS_DIRTY: + case MUT_ARR_PTRS_FROZEN: + case MUT_ARR_PTRS_FROZEN0: + // just copy the block + copy(p,info,q,mut_arr_ptrs_sizeW((StgMutArrPtrs *)q),gen); + return; + + case TSO: + { + StgTSO *tso = (StgTSO *)q; + + /* Deal with redirected TSOs (a TSO that's had its stack enlarged). + */ + if (tso->what_next == ThreadRelocated) { + q = (StgClosure *)tso->_link; + *p = q; + goto loop; + } + + /* To evacuate a small TSO, we need to adjust the stack pointer + */ + { + StgTSO *new_tso; + StgPtr r, s; + rtsBool mine; + + mine = copyPart(p,(StgClosure *)tso, tso_sizeW(tso), + sizeofW(StgTSO), gen); + if (mine) { + new_tso = (StgTSO *)*p; + move_TSO(tso, new_tso); + for (r = tso->sp, s = new_tso->sp; + r < tso->stack+tso->stack_size;) { + *s++ = *r++; + } + } + return; + } + } + + case TREC_CHUNK: + copy(p,info,q,sizeofW(StgTRecChunk),gen); + return; + + default: + barf("evacuate: strange closure type %d", (int)(INFO_PTR_TO_STRUCT(info)->type)); + } + + barf("evacuate"); } /* ----------------------------------------------------------------------------- @@ -178,19 +761,30 @@ unchain_thunk_selectors(StgSelector *p, StgClosure *val) // invoke eval_thunk_selector(), the recursive calls will not // evacuate the value (because we want to select on the value, // not evacuate it), so in this case val is in from-space. - // ASSERT(!HEAP_ALLOCED(val) || Bdescr((P_)val)->gen_no > N || (Bdescr((P_)val)->flags & BF_EVACUATED)); + // ASSERT(!HEAP_ALLOCED_GC(val) || Bdescr((P_)val)->gen_no > N || (Bdescr((P_)val)->flags & BF_EVACUATED)); prev = (StgSelector*)((StgClosure *)p)->payload[0]; // Update the THUNK_SELECTOR with an indirection to the - // EVACUATED closure now at p. Why do this rather than - // upd_evacuee(q,p)? Because we have an invariant that an - // EVACUATED closure always points to an object in the - // same or an older generation (required by the short-cut - // test in the EVACUATED case, below). - ((StgInd *)p)->indirectee = val; - write_barrier(); - SET_INFO(p, &stg_IND_info); + // value. The value is still in from-space at this stage. + // + // (old note: Why not do upd_evacuee(q,p)? Because we have an + // invariant that an EVACUATED closure always points to an + // object in the same or an older generation (required by + // the short-cut test in the EVACUATED case, below). + if ((StgClosure *)p == val) { + // must be a loop; just leave a BLACKHOLE in place. This + // can happen when we have a chain of selectors that + // eventually loops back on itself. We can't leave an + // indirection pointing to itself, and we want the program + // to deadlock if it ever enters this closure, so + // BLACKHOLE is correct. + SET_INFO(p, &stg_BLACKHOLE_info); + } else { + ((StgInd *)p)->indirectee = val; + write_barrier(); + SET_INFO(p, &stg_IND_info); + } // For the purposes of LDV profiling, we have created an // indirection. @@ -221,15 +815,20 @@ eval_thunk_selector (StgClosure **q, StgSelector * p, rtsBool evac) selector_chain: bd = Bdescr((StgPtr)p); - if (HEAP_ALLOCED(p)) { + if (HEAP_ALLOCED_GC(p)) { // If the THUNK_SELECTOR is in to-space or in a generation that we // are not collecting, then bale out early. We won't be able to // save any space in any case, and updating with an indirection is // trickier in a non-collected gen: we would have to update the // mutable list. - if ((bd->gen_no > N) || (bd->flags & BF_EVACUATED)) { + if (bd->flags & BF_EVACUATED) { unchain_thunk_selectors(prev_thunk_selector, (StgClosure *)p); *q = (StgClosure *)p; + // shortcut, behave as for: if (evac) evacuate(q); + if (evac && bd->gen < gct->evac_gen) { + gct->failed_to_evac = rtsTrue; + TICK_GC_FAILED_PROMOTION(); + } return; } // we don't update THUNK_SELECTORS in the compacted @@ -238,7 +837,7 @@ selector_chain: // (scavenge_mark_stack doesn't deal with IND). BEWARE! This // bit is very tricky to get right. If you make changes // around here, test by compiling stage 3 with +RTS -c -RTS. - if (bd->flags & BF_COMPACTED) { + if (bd->flags & BF_MARKED) { // must call evacuate() to mark this closure if evac==rtsTrue *q = (StgClosure *)p; if (evac) evacuate(q); @@ -260,9 +859,10 @@ selector_chain: } while (info_ptr == (W_)&stg_WHITEHOLE_info); // make sure someone else didn't get here first... - if (INFO_PTR_TO_STRUCT(info_ptr)->type != THUNK_SELECTOR) { + if (IS_FORWARDING_PTR(info_ptr) || + INFO_PTR_TO_STRUCT(info_ptr)->type != THUNK_SELECTOR) { // v. tricky now. The THUNK_SELECTOR has been evacuated - // by another thread, and is now either EVACUATED or IND. + // by another thread, and is now either a forwarding ptr or IND. // We need to extract ourselves from the current situation // as cleanly as possible. // - unlock the closure @@ -297,7 +897,16 @@ selector_loop: // from-space during marking, for example. We rely on the property // that evacuate() doesn't mind if it gets passed a to-space pointer. - info = get_itbl(selectee); + info = (StgInfoTable*)selectee->header.info; + + if (IS_FORWARDING_PTR(info)) { + // We don't follow pointers into to-space; the constructor + // has already been evacuated, so we won't save any space + // leaks by evaluating this selector thunk anyhow. + goto bale_out; + } + + info = INFO_PTR_TO_STRUCT(info); switch (info->type) { case WHITEHOLE: goto bale_out; // about to be evacuated by another thread (or a loop). @@ -323,8 +932,12 @@ selector_loop: // the original selector thunk, p. SET_INFO(p, (StgInfoTable *)info_ptr); LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC((StgClosure *)p); +#if defined(THREADED_RTS) + SET_INFO(p, &stg_WHITEHOLE_info); +#else SET_INFO(p, &stg_BLACKHOLE_info); #endif +#endif // the closure in val is now the "value" of the // THUNK_SELECTOR in p. However, val may itself be a @@ -332,33 +945,44 @@ selector_loop: // evaluating until we find the real value, and then // update the whole chain to point to the value. val_loop: - info = get_itbl(UNTAG_CLOSURE(val)); - switch (info->type) { - case IND: - case IND_PERM: - case IND_OLDGEN: - case IND_OLDGEN_PERM: - case IND_STATIC: - val = ((StgInd *)val)->indirectee; - goto val_loop; - case THUNK_SELECTOR: - ((StgClosure*)p)->payload[0] = (StgClosure *)prev_thunk_selector; - prev_thunk_selector = p; - p = (StgSelector*)val; - goto selector_chain; - default: - ((StgClosure*)p)->payload[0] = (StgClosure *)prev_thunk_selector; - prev_thunk_selector = p; - - *q = val; - if (evac) evacuate(q); - val = *q; - // evacuate() cannot recurse through - // eval_thunk_selector(), because we know val is not - // a THUNK_SELECTOR. - unchain_thunk_selectors(prev_thunk_selector, val); - return; + info_ptr = (StgWord)UNTAG_CLOSURE(val)->header.info; + if (!IS_FORWARDING_PTR(info_ptr)) + { + info = INFO_PTR_TO_STRUCT(info_ptr); + switch (info->type) { + case IND: + case IND_PERM: + case IND_OLDGEN: + case IND_OLDGEN_PERM: + case IND_STATIC: + val = ((StgInd *)val)->indirectee; + goto val_loop; + case THUNK_SELECTOR: + ((StgClosure*)p)->payload[0] = (StgClosure *)prev_thunk_selector; + prev_thunk_selector = p; + p = (StgSelector*)val; + goto selector_chain; + default: + break; + } } + ((StgClosure*)p)->payload[0] = (StgClosure *)prev_thunk_selector; + prev_thunk_selector = p; + + *q = val; + + // update the other selectors in the chain *before* + // evacuating the value. This is necessary in the case + // where the value turns out to be one of the selectors + // in the chain (i.e. we have a loop), and evacuating it + // would corrupt the chain. + unchain_thunk_selectors(prev_thunk_selector, val); + + // evacuate() cannot recurse through + // eval_thunk_selector(), because we know val is not + // a THUNK_SELECTOR. + if (evac) evacuate(q); + return; } case IND: @@ -370,12 +994,6 @@ selector_loop: selectee = UNTAG_CLOSURE( ((StgInd *)selectee)->indirectee ); goto selector_loop; - case EVACUATED: - // We don't follow pointers into to-space; the constructor - // has already been evacuated, so we won't save any space - // leaks by evaluating this selector thunk anyhow. - goto bale_out; - case THUNK_SELECTOR: { StgClosure *val; @@ -411,8 +1029,6 @@ selector_loop: case THUNK_0_2: case THUNK_STATIC: case CAF_BLACKHOLE: - case SE_CAF_BLACKHOLE: - case SE_BLACKHOLE: case BLACKHOLE: // not evaluated yet goto bale_out; @@ -431,24 +1047,8 @@ bale_out: // check whether it was updated in the meantime. *q = (StgClosure *)p; if (evac) { - copy(q,(StgClosure *)p,THUNK_SELECTOR_sizeW(),bd->step->to); + copy(q,(const StgInfoTable *)info_ptr,(StgClosure *)p,THUNK_SELECTOR_sizeW(),bd->dest); } unchain_thunk_selectors(prev_thunk_selector, *q); return; } - -/* ----------------------------------------------------------------------------- - move_TSO is called to update the TSO structure after it has been - moved from one place to another. - -------------------------------------------------------------------------- */ - -void -move_TSO (StgTSO *src, StgTSO *dest) -{ - ptrdiff_t diff; - - // relocate the stack pointer... - diff = (StgPtr)dest - (StgPtr)src; // In *words* - dest->sp = (StgPtr)dest->sp + diff; -} -