X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=ghc%2Frts%2FGC.c;h=567597c85ef7485f799c87e7ab8583bf2de31eb1;hb=2ba64673dbe842a1ca1630d85ee6e155942272ed;hp=0b236bfc9f157054617172649e88032ddd4e30ee;hpb=f762be1b5a12b215595acdfb0343a6161e1a0e86;p=ghc-hetmet.git diff --git a/ghc/rts/GC.c b/ghc/rts/GC.c index 0b236bf..567597c 100644 --- a/ghc/rts/GC.c +++ b/ghc/rts/GC.c @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------------- - * $Id: GC.c,v 1.132 2002/03/12 11:50:02 simonmar Exp $ + * $Id: GC.c,v 1.156 2003/06/19 12:47:08 simonmar Exp $ * - * (c) The GHC Team 1998-1999 + * (c) The GHC Team 1998-2003 * * Generational garbage collector * @@ -11,6 +11,7 @@ #include "Rts.h" #include "RtsFlags.h" #include "RtsUtils.h" +#include "Apply.h" #include "Storage.h" #include "StoragePriv.h" #include "Stats.h" @@ -19,7 +20,6 @@ #include "Sanity.h" #include "BlockAlloc.h" #include "MBlock.h" -#include "Main.h" #include "ProfHeap.h" #include "SchedAPI.h" #include "Weak.h" @@ -27,6 +27,7 @@ #include "Prelude.h" #include "ParTicky.h" // ToDo: move into Rts.h #include "GCCompact.h" +#include "Signals.h" #if defined(GRAN) || defined(PAR) # include "GranSimRts.h" # include "ParallelRts.h" @@ -45,6 +46,8 @@ #include "RetainerProfile.h" #include "LdvProfile.h" +#include + /* STATIC OBJECT LIST. * * During GC: @@ -79,8 +82,8 @@ * We build up a static object list while collecting generations 0..N, * which is then appended to the static object list of generation N+1. */ -StgClosure* static_objects; // live static objects -StgClosure* scavenged_static_objects; // static objects scavenged so far +static StgClosure* static_objects; // live static objects +StgClosure* scavenged_static_objects; // static objects scavenged so far /* N is the oldest generation being collected, where the generations * are numbered starting at 0. A major GC (indicated by the major_gc @@ -118,38 +121,53 @@ static rtsBool failed_to_evac; /* Old to-space (used for two-space collector only) */ -bdescr *old_to_blocks; +static bdescr *old_to_blocks; /* Data used for allocation area sizing. */ -lnat new_blocks; // blocks allocated during this GC -lnat g0s0_pcnt_kept = 30; // percentage of g0s0 live at last minor GC +static lnat new_blocks; // blocks allocated during this GC +static lnat g0s0_pcnt_kept = 30; // percentage of g0s0 live at last minor GC /* Used to avoid long recursion due to selector thunks */ -lnat thunk_selector_depth = 0; -#define MAX_THUNK_SELECTOR_DEPTH 256 +static lnat thunk_selector_depth = 0; +#define MAX_THUNK_SELECTOR_DEPTH 8 /* ----------------------------------------------------------------------------- Static function declarations -------------------------------------------------------------------------- */ +static bdescr * gc_alloc_block ( step *stp ); static void mark_root ( StgClosure **root ); -static StgClosure * evacuate ( StgClosure *q ); + +// Use a register argument for evacuate, if available. +#if __GNUC__ >= 2 +static StgClosure * evacuate (StgClosure *q) __attribute__((regparm(1))); +#else +static StgClosure * evacuate (StgClosure *q); +#endif + static void zero_static_object_list ( StgClosure* first_static ); static void zero_mutable_list ( StgMutClosure *first ); static rtsBool traverse_weak_ptr_list ( void ); static void mark_weak_ptr_list ( StgWeak **list ); -static void scavenge ( step * ); -static void scavenge_mark_stack ( void ); -static void scavenge_stack ( StgPtr p, StgPtr stack_end ); -static rtsBool scavenge_one ( StgPtr p ); -static void scavenge_large ( step * ); -static void scavenge_static ( void ); -static void scavenge_mutable_list ( generation *g ); -static void scavenge_mut_once_list ( generation *g ); +static StgClosure * eval_thunk_selector ( nat field, StgSelector * p ); + + +static void scavenge ( step * ); +static void scavenge_mark_stack ( void ); +static void scavenge_stack ( StgPtr p, StgPtr stack_end ); +static rtsBool scavenge_one ( StgPtr p ); +static void scavenge_large ( step * ); +static void scavenge_static ( void ); +static void scavenge_mutable_list ( generation *g ); +static void scavenge_mut_once_list ( generation *g ); + +static void scavenge_large_bitmap ( StgPtr p, + StgLargeBitmap *large_bitmap, + nat size ); #if 0 && defined(DEBUG) static void gcCAFs ( void ); @@ -203,20 +221,67 @@ pop_mark_stack(void) } /* ----------------------------------------------------------------------------- + Allocate a new to-space block in the given step. + -------------------------------------------------------------------------- */ + +static bdescr * +gc_alloc_block(step *stp) +{ + bdescr *bd = allocBlock(); + bd->gen_no = stp->gen_no; + bd->step = stp; + bd->link = NULL; + + // blocks in to-space in generations up to and including N + // get the BF_EVACUATED flag. + if (stp->gen_no <= N) { + bd->flags = BF_EVACUATED; + } else { + bd->flags = 0; + } + + // Start a new to-space block, chain it on after the previous one. + if (stp->hp_bd == NULL) { + stp->hp_bd = bd; + } else { + stp->hp_bd->free = stp->hp; + stp->hp_bd->link = bd; + stp->hp_bd = bd; + } + + stp->hp = bd->start; + stp->hpLim = stp->hp + BLOCK_SIZE_W; + + stp->n_to_blocks++; + new_blocks++; + + return bd; +} + +/* ----------------------------------------------------------------------------- GarbageCollect - For garbage collecting generation N (and all younger generations): + Rough outline of the algorithm: for garbage collecting generation N + (and all younger generations): - follow all pointers in the root set. the root set includes all - mutable objects in all steps in all generations. + mutable objects in all generations (mutable_list and mut_once_list). - for each pointer, evacuate the object it points to into either - + to-space in the next higher step in that generation, if one exists, - + if the object's generation == N, then evacuate it to the next - generation if one exists, or else to-space in the current - generation. - + if the object's generation < N, then evacuate it to to-space - in the next generation. + + + to-space of the step given by step->to, which is the next + highest step in this generation or the first step in the next + generation if this is the last step. + + + to-space of generations[evac_gen]->steps[0], if evac_gen != 0. + When we evacuate an object we attempt to evacuate + everything it points to into the same generation - this is + achieved by setting evac_gen to the desired generation. If + we can't do this, then an entry in the mut_once list has to + be made for the cross-generation pointer. + + + if the object is already in a generation > N, then leave + it alone. - repeatedly scavenge to-space from each step in each generation being collected until no more objects can be evacuated. @@ -245,6 +310,11 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) Now, Now)); #endif +#if defined(RTS_USER_SIGNALS) + // block signals + blockUserSignals(); +#endif + // tell the stats department that we've started a GC stat_startGC(); @@ -315,9 +385,9 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) */ new_blocks = 0; - /* Initialise to-space in all the generations/steps that we're - * collecting. - */ + // Initialise to-space in all the generations/steps that we're + // collecting. + // for (g = 0; g <= N; g++) { generations[g].mut_once_list = END_MUT_LIST; generations[g].mut_list = END_MUT_LIST; @@ -329,28 +399,26 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) continue; } - /* Get a free block for to-space. Extra blocks will be chained on - * as necessary. - */ - bd = allocBlock(); stp = &generations[g].steps[s]; ASSERT(stp->gen_no == g); - ASSERT(stp->hp ? Bdescr(stp->hp)->step == stp : rtsTrue); - bd->gen_no = g; - bd->step = stp; - bd->link = NULL; - bd->flags = BF_EVACUATED; // it's a to-space block - stp->hp = bd->start; - stp->hpLim = stp->hp + BLOCK_SIZE_W; - stp->hp_bd = bd; + + // start a new to-space for this step. + stp->hp = NULL; + stp->hp_bd = NULL; + stp->to_blocks = NULL; + + // allocate the first to-space block; extra blocks will be + // chained on as necessary. + bd = gc_alloc_block(stp); stp->to_blocks = bd; - stp->n_to_blocks = 1; stp->scan = bd->start; stp->scan_bd = bd; + + // initialise the large object queues. stp->new_large_objects = NULL; stp->scavenged_large_objects = NULL; stp->n_scavenged_large_blocks = 0; - new_blocks++; + // mark the large objects as not evacuated yet for (bd = stp->large_objects; bd; bd = bd->link) { bd->flags = BF_LARGE; @@ -388,24 +456,16 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) } /* make sure the older generations have at least one block to - * allocate into (this makes things easier for copy(), see below. + * allocate into (this makes things easier for copy(), see below). */ for (g = N+1; g < RtsFlags.GcFlags.generations; g++) { for (s = 0; s < generations[g].n_steps; s++) { stp = &generations[g].steps[s]; if (stp->hp_bd == NULL) { ASSERT(stp->blocks == NULL); - bd = allocBlock(); - bd->gen_no = g; - bd->step = stp; - bd->link = NULL; - bd->flags = 0; // *not* a to-space block or a large object - stp->hp = bd->start; - stp->hpLim = stp->hp + BLOCK_SIZE_W; - stp->hp_bd = bd; + bd = gc_alloc_block(stp); stp->blocks = bd; stp->n_blocks = 1; - new_blocks++; } /* Set the scan pointer for older generations: remember we * still have to scavenge objects that have been promoted. */ @@ -626,8 +686,8 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) for (s = 0; s < generations[g].n_steps; s++) { stp = &generations[g].steps[s]; if (!(g == 0 && s == 0 && RtsFlags.GcFlags.generations > 1)) { + ASSERT(Bdescr(stp->hp) == stp->hp_bd); stp->hp_bd->free = stp->hp; - stp->hp_bd->link = NULL; } } } @@ -750,6 +810,7 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) // add the new blocks we promoted during this GC stp->n_blocks += stp->n_to_blocks; + stp->n_to_blocks = 0; stp->n_large_blocks += stp->n_scavenged_large_blocks; } } @@ -985,7 +1046,6 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) // Reset the nursery resetNurseries(); - // let go of lock (so that it can be re-grabbed below). RELEASE_LOCK(&sched_mutex); // start any pending finalizers @@ -993,7 +1053,7 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) // send exceptions to any threads which were about to die resurrectThreads(resurrected_threads); - + ACQUIRE_LOCK(&sched_mutex); // Update the stable pointer hash table. @@ -1027,6 +1087,11 @@ GarbageCollect ( void (*get_roots)(evac_fn), rtsBool force_major_gc ) // ok, GC over: tell the stats department what happened. stat_endGC(allocated, collected, live, copied, N); +#if defined(RTS_USER_SIGNALS) + // unblock signals again + unblockUserSignals(); +#endif + //PAR_TICKY_TP(); } @@ -1105,31 +1170,41 @@ traverse_weak_ptr_list(void) continue; } - ASSERT(get_itbl(w)->type == WEAK); - - /* Now, check whether the key is reachable. - */ - new = isAlive(w->key); - if (new != NULL) { - w->key = new; - // evacuate the value and finalizer - w->value = evacuate(w->value); - w->finalizer = evacuate(w->finalizer); - // remove this weak ptr from the old_weak_ptr list - *last_w = w->link; - // and put it on the new weak ptr list - next_w = w->link; - w->link = weak_ptr_list; - weak_ptr_list = w; - flag = rtsTrue; - IF_DEBUG(weak, belch("Weak pointer still alive at %p -> %p", - w, w->key)); - continue; - } - else { - last_w = &(w->link); - next_w = w->link; + switch (get_itbl(w)->type) { + + case EVACUATED: + next_w = (StgWeak *)((StgEvacuated *)w)->evacuee; + *last_w = next_w; continue; + + case WEAK: + /* Now, check whether the key is reachable. + */ + new = isAlive(w->key); + if (new != NULL) { + w->key = new; + // evacuate the value and finalizer + w->value = evacuate(w->value); + w->finalizer = evacuate(w->finalizer); + // remove this weak ptr from the old_weak_ptr list + *last_w = w->link; + // and put it on the new weak ptr list + next_w = w->link; + w->link = weak_ptr_list; + weak_ptr_list = w; + flag = rtsTrue; + IF_DEBUG(weak, belch("Weak pointer still alive at %p -> %p", + w, w->key)); + continue; + } + else { + last_w = &(w->link); + next_w = w->link; + continue; + } + + default: + barf("traverse_weak_ptr_list: not WEAK"); } } @@ -1242,6 +1317,9 @@ mark_weak_ptr_list ( StgWeak **list ) last_w = list; for (w = *list; w; w = w->link) { + // w might be WEAK, EVACUATED, or DEAD_WEAK (actually CON_STATIC) here + ASSERT(w->header.info == &stg_DEAD_WEAK_info + || get_itbl(w)->type == WEAK || get_itbl(w)->type == EVACUATED); (StgClosure *)w = evacuate((StgClosure *)w); *last_w = w; last_w = &(w->link); @@ -1265,27 +1343,35 @@ isAlive(StgClosure *p) while (1) { + ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); info = get_itbl(p); - /* ToDo: for static closures, check the static link field. - * Problem here is that we sometimes don't set the link field, eg. - * for static closures with an empty SRT or CONSTR_STATIC_NOCAFs. - */ + // ignore static closures + // + // ToDo: for static closures, check the static link field. + // Problem here is that we sometimes don't set the link field, eg. + // for static closures with an empty SRT or CONSTR_STATIC_NOCAFs. + // + if (!HEAP_ALLOCED(p)) { + return p; + } - loop: - bd = Bdescr((P_)p); // ignore closures in generations that we're not collecting. - if (LOOKS_LIKE_STATIC(p) || bd->gen_no > N) { + bd = Bdescr((P_)p); + if (bd->gen_no > N) { + return p; + } + + // if it's a pointer into to-space, then we're done + if (bd->flags & BF_EVACUATED) { return p; } - // large objects have an evacuated flag + + // large objects use the evacuated flag if (bd->flags & BF_LARGE) { - if (bd->flags & BF_EVACUATED) { - return p; - } else { - return NULL; - } + return NULL; } + // check the mark bit for compacted steps if (bd->step->is_compacted && is_marked((P_)p,bd)) { return p; @@ -1309,8 +1395,9 @@ isAlive(StgClosure *p) case TSO: if (((StgTSO *)p)->what_next == ThreadRelocated) { p = (StgClosure *)((StgTSO *)p)->link; - goto loop; - } + continue; + } + return NULL; default: // dead. @@ -1325,34 +1412,15 @@ mark_root(StgClosure **root) *root = evacuate(*root); } -static void -addBlock(step *stp) -{ - bdescr *bd = allocBlock(); - bd->gen_no = stp->gen_no; - bd->step = stp; - - if (stp->gen_no <= N) { - bd->flags = BF_EVACUATED; - } else { - bd->flags = 0; - } - - stp->hp_bd->free = stp->hp; - stp->hp_bd->link = bd; - stp->hp = bd->start; - stp->hpLim = stp->hp + BLOCK_SIZE_W; - stp->hp_bd = bd; - stp->n_to_blocks++; - new_blocks++; -} - - static __inline__ void upd_evacuee(StgClosure *p, StgClosure *dest) { - p->header.info = &stg_EVACUATED_info; - ((StgEvacuated *)p)->evacuee = dest; + // Source object must be in from-space: + ASSERT((Bdescr((P_)p)->flags & BF_EVACUATED) == 0); + // not true: (ToDo: perhaps it should be) + // ASSERT(Bdescr((P_)dest)->flags & BF_EVACUATED); + p->header.info = &stg_EVACUATED_info; + ((StgEvacuated *)p)->evacuee = dest; } @@ -1383,7 +1451,7 @@ copy(StgClosure *src, nat size, step *stp) * necessary. */ if (stp->hp + size >= stp->hpLim) { - addBlock(stp); + gc_alloc_block(stp); } for(to = stp->hp, from = (P_)src; size>0; --size) { @@ -1426,7 +1494,7 @@ copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *stp) } if (stp->hp + size_to_reserve >= stp->hpLim) { - addBlock(stp); + gc_alloc_block(stp); } for(to = stp->hp, from = (P_)src; size_to_copy>0; --size_to_copy) { @@ -1454,8 +1522,8 @@ copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *stp) Evacuate a large object This just consists of removing the object from the (doubly-linked) - large_alloc_list, and linking it on to the (singly-linked) - new_large_objects list, from where it will be scavenged later. + step->large_objects list, and linking it on to the (singly-linked) + step->new_large_objects list, from where it will be scavenged later. Convention: bd->flags has BF_EVACUATED set for a large object that has been evacuated, or unset otherwise. @@ -1521,7 +1589,6 @@ evacuate_large(StgPtr p) the promotion until the next GC. -------------------------------------------------------------------------- */ - static StgClosure * mkMutCons(StgClosure *ptr, generation *gen) { @@ -1534,7 +1601,7 @@ mkMutCons(StgClosure *ptr, generation *gen) * necessary. */ if (stp->hp + sizeofW(StgIndOldGen) >= stp->hpLim) { - addBlock(stp); + gc_alloc_block(stp); } q = (StgMutVar *)stp->hp; @@ -1570,6 +1637,23 @@ mkMutCons(StgClosure *ptr, generation *gen) if M < evac_gen set failed_to_evac flag to indicate that we didn't manage to evacuate this object into evac_gen. + + OPTIMISATION NOTES: + + evacuate() is the single most important function performance-wise + in the GC. Various things have been tried to speed it up, but as + far as I can tell the code generated by gcc 3.2 with -O2 is about + as good as it's going to get. We pass the argument to evacuate() + in a register using the 'regparm' attribute (see the prototype for + evacuate() near the top of this file). + + Changing evacuate() to take an (StgClosure **) rather than + returning the new pointer seems attractive, because we can avoid + writing back the pointer when it hasn't changed (eg. for a static + object, or an object in a generation > N). However, I tried it and + it doesn't help. One reason is that the (StgClosure **) pointer + gets spilled to the stack inside evacuate(), resulting in far more + extra reads/writes than we save. -------------------------------------------------------------------------- */ static StgClosure * @@ -1584,9 +1668,6 @@ loop: if (HEAP_ALLOCED(q)) { bd = Bdescr((P_)q); - // not a group head: find the group head - if (bd->blocks == 0) { bd = bd->link; } - if (bd->gen_no > N) { /* Can't evacuate this object, because it's in a generation * older than the ones we're collecting. Let's hope that it's @@ -1636,16 +1717,14 @@ loop: #endif // make sure the info pointer is into text space - ASSERT(q && (LOOKS_LIKE_GHC_INFO(GET_INFO(q)) - || IS_HUGS_CONSTR_INFO(GET_INFO(q)))); + ASSERT(LOOKS_LIKE_CLOSURE_PTR(q)); info = get_itbl(q); switch (info -> type) { case MUT_VAR: case MVAR: - to = copy(q,sizeW_fromITBL(info),stp); - return to; + return copy(q,sizeW_fromITBL(info),stp); case CONSTR_0_1: { @@ -1697,9 +1776,11 @@ loop: case WEAK: case FOREIGN: case STABLE_NAME: - case BCO: return copy(q,sizeW_fromITBL(info),stp); + case BCO: + return copy(q,bco_sizeW((StgBCO *)q),stp); + case CAF_BLACKHOLE: case SE_CAF_BLACKHOLE: case SE_BLACKHOLE: @@ -1712,143 +1793,32 @@ loop: case THUNK_SELECTOR: { - const StgInfoTable* selectee_info; - StgClosure* selectee = ((StgSelector*)q)->selectee; - - selector_loop: - selectee_info = get_itbl(selectee); - switch (selectee_info->type) { - case CONSTR: - case CONSTR_1_0: - case CONSTR_0_1: - case CONSTR_2_0: - case CONSTR_1_1: - case CONSTR_0_2: - case CONSTR_STATIC: - case CONSTR_NOCAF_STATIC: - { - StgWord offset = info->layout.selector_offset; - - // check that the size is in range - ASSERT(offset < - (StgWord32)(selectee_info->layout.payload.ptrs + - selectee_info->layout.payload.nptrs)); - - // perform the selection! - q = selectee->payload[offset]; - if (major_gc==rtsTrue) {TICK_GC_SEL_MAJOR();} else {TICK_GC_SEL_MINOR();} - - /* if we're already in to-space, there's no need to continue - * with the evacuation, just update the source address with - * a pointer to the (evacuated) constructor field. - */ - if (HEAP_ALLOCED(q)) { - bdescr *bd = Bdescr((P_)q); - if (bd->flags & BF_EVACUATED) { - if (bd->gen_no < evac_gen) { - failed_to_evac = rtsTrue; - TICK_GC_FAILED_PROMOTION(); - } - return q; - } - } + StgClosure *p; - /* otherwise, carry on and evacuate this constructor field, - * (but not the constructor itself) - */ - goto loop; + if (thunk_selector_depth > MAX_THUNK_SELECTOR_DEPTH) { + return copy(q,THUNK_SELECTOR_sizeW(),stp); } - case IND: - case IND_STATIC: - case IND_PERM: - case IND_OLDGEN: - case IND_OLDGEN_PERM: - selectee = ((StgInd *)selectee)->indirectee; - goto selector_loop; - - case EVACUATED: - selectee = ((StgEvacuated *)selectee)->evacuee; - goto selector_loop; - - case THUNK_SELECTOR: -# if 0 - /* Disabled 03 April 2001 by JRS; it seems to cause the GC (or - something) to go into an infinite loop when the nightly - stage2 compiles PrelTup.lhs. */ - - /* we can't recurse indefinitely in evacuate(), so set a - * limit on the number of times we can go around this - * loop. - */ - if (thunk_selector_depth < MAX_THUNK_SELECTOR_DEPTH) { - bdescr *bd; - bd = Bdescr((P_)selectee); - if (!bd->flags & BF_EVACUATED) { - thunk_selector_depth++; - selectee = evacuate(selectee); - thunk_selector_depth--; - goto selector_loop; - } - } else { - TICK_GC_SEL_ABANDONED(); - // and fall through... - } -# endif - - case AP_UPD: - case THUNK: - case THUNK_1_0: - case THUNK_0_1: - case THUNK_2_0: - case THUNK_1_1: - case THUNK_0_2: - case THUNK_STATIC: - case CAF_BLACKHOLE: - case SE_CAF_BLACKHOLE: - case SE_BLACKHOLE: - case BLACKHOLE: - case BLACKHOLE_BQ: - // not evaluated yet - break; - -#if defined(PAR) - // a copy of the top-level cases below - case RBH: // cf. BLACKHOLE_BQ - { - //StgInfoTable *rip = get_closure_info(q, &size, &ptrs, &nonptrs, &vhs, str); - to = copy(q,BLACKHOLE_sizeW(),stp); - //ToDo: derive size etc from reverted IP - //to = copy(q,size,stp); - // recordMutable((StgMutClosure *)to); - return to; - } - - case BLOCKED_FETCH: - ASSERT(sizeofW(StgBlockedFetch) >= MIN_NONUPD_SIZE); - to = copy(q,sizeofW(StgBlockedFetch),stp); - return to; + p = eval_thunk_selector(info->layout.selector_offset, + (StgSelector *)q); -# ifdef DIST - case REMOTE_REF: -# endif - case FETCH_ME: - ASSERT(sizeofW(StgBlockedFetch) >= MIN_UPD_SIZE); - to = copy(q,sizeofW(StgFetchMe),stp); - return to; - - case FETCH_ME_BQ: - ASSERT(sizeofW(StgBlockedFetch) >= MIN_UPD_SIZE); - to = copy(q,sizeofW(StgFetchMeBlockingQueue),stp); - return to; + if (p == NULL) { + return copy(q,THUNK_SELECTOR_sizeW(),stp); + } else { + // q is still BLACKHOLE'd. + thunk_selector_depth++; + p = evacuate(p); + thunk_selector_depth--; + upd_evacuee(q,p); +#ifdef PROFILING + // We store the size of the just evacuated object in the + // LDV word so that the profiler can guess the position of + // the next object later. + SET_EVACUAEE_FOR_LDV(q, THUNK_SELECTOR_sizeW()); #endif - - default: - barf("evacuate: THUNK_SELECTOR: strange selectee %d", - (int)(selectee_info->type)); - } + return p; + } } - return copy(q,THUNK_SELECTOR_sizeW(),stp); case IND: case IND_OLDGEN: @@ -1857,7 +1827,7 @@ loop: goto loop; case THUNK_STATIC: - if (info->srt_len > 0 && major_gc && + if (info->srt_bitmap != 0 && major_gc && THUNK_STATIC_LINK((StgClosure *)q) == NULL) { THUNK_STATIC_LINK((StgClosure *)q) = static_objects; static_objects = (StgClosure *)q; @@ -1865,7 +1835,7 @@ loop: return q; case FUN_STATIC: - if (info->srt_len > 0 && major_gc && + if (info->srt_bitmap != 0 && major_gc && FUN_STATIC_LINK((StgClosure *)q) == NULL) { FUN_STATIC_LINK((StgClosure *)q) = static_objects; static_objects = (StgClosure *)q; @@ -1909,17 +1879,16 @@ loop: case UPDATE_FRAME: case STOP_FRAME: case CATCH_FRAME: - case SEQ_FRAME: // shouldn't see these barf("evacuate: stack frame at %p\n", q); - case AP_UPD: case PAP: - /* PAPs and AP_UPDs are special - the payload is a copy of a chunk - * of stack, tagging and all. - */ + case AP: return copy(q,pap_sizeW((StgPAP*)q),stp); + case AP_STACK: + return copy(q,ap_stack_sizeW((StgAP_STACK*)q),stp); + case EVACUATED: /* Already evacuated, just return the forwarding address. * HOWEVER: if the requested destination generation (evac_gen) is @@ -1930,7 +1899,7 @@ loop: */ if (evac_gen > 0) { // optimisation StgClosure *p = ((StgEvacuated*)q)->evacuee; - if (Bdescr((P_)p)->gen_no < evac_gen) { + if (HEAP_ALLOCED(p) && Bdescr((P_)p)->gen_no < evac_gen) { failed_to_evac = rtsTrue; TICK_GC_FAILED_PROMOTION(); } @@ -1961,8 +1930,18 @@ loop: * list it contains. */ { - StgTSO *new_tso = (StgTSO *)copy((StgClosure *)tso,tso_sizeW(tso),stp); + StgTSO *new_tso; + StgPtr p, q; + + new_tso = (StgTSO *)copyPart((StgClosure *)tso, + tso_sizeW(tso), + sizeofW(StgTSO), stp); move_TSO(tso, new_tso); + for (p = tso->sp, q = new_tso->sp; + p < tso->stack+tso->stack_size;) { + *q++ = *p++; + } + return (StgClosure *)new_tso; } } @@ -2016,133 +1995,400 @@ loop: } /* ----------------------------------------------------------------------------- - move_TSO is called to update the TSO structure after it has been - moved from one place to another. + Evaluate a THUNK_SELECTOR if possible. + + returns: NULL if we couldn't evaluate this THUNK_SELECTOR, or + a closure pointer if we evaluated it and this is the result. Note + that "evaluating" the THUNK_SELECTOR doesn't necessarily mean + reducing it to HNF, just that we have eliminated the selection. + The result might be another thunk, or even another THUNK_SELECTOR. + + If the return value is non-NULL, the original selector thunk has + been BLACKHOLE'd, and should be updated with an indirection or a + forwarding pointer. If the return value is NULL, then the selector + thunk is unchanged. -------------------------------------------------------------------------- */ -void -move_TSO(StgTSO *src, StgTSO *dest) +static StgClosure * +eval_thunk_selector( nat field, StgSelector * p ) { - ptrdiff_t diff; + StgInfoTable *info; + const StgInfoTable *info_ptr; + StgClosure *selectee; + + selectee = p->selectee; - // relocate the stack pointers... - diff = (StgPtr)dest - (StgPtr)src; // In *words* - dest->sp = (StgPtr)dest->sp + diff; - dest->su = (StgUpdateFrame *) ((P_)dest->su + diff); + // Save the real info pointer (NOTE: not the same as get_itbl()). + info_ptr = p->header.info; - relocate_stack(dest, diff); -} + // If the THUNK_SELECTOR is in a generation that we are not + // collecting, then bail out early. We won't be able to save any + // space in any case, and updating with an indirection is trickier + // in an old gen. + if (Bdescr((StgPtr)p)->gen_no > N) { + return NULL; + } -/* ----------------------------------------------------------------------------- - relocate_stack is called to update the linkage between - UPDATE_FRAMEs (and SEQ_FRAMEs etc.) when a stack is moved from one - place to another. - -------------------------------------------------------------------------- */ + // BLACKHOLE the selector thunk, since it is now under evaluation. + // This is important to stop us going into an infinite loop if + // this selector thunk eventually refers to itself. + SET_INFO(p,&stg_BLACKHOLE_info); + +selector_loop: + + // We don't want to end up in to-space, because this causes + // problems when the GC later tries to evacuate the result of + // eval_thunk_selector(). There are various ways this could + // happen: + // + // - following an IND_STATIC + // + // - when the old generation is compacted, the mark phase updates + // from-space pointers to be to-space pointers, and we can't + // reliably tell which we're following (eg. from an IND_STATIC). + // + // So we use the block-descriptor test to find out if we're in + // to-space. + // + if (HEAP_ALLOCED(selectee) && + Bdescr((StgPtr)selectee)->flags & BF_EVACUATED) { + goto bale_out; + } -StgTSO * -relocate_stack(StgTSO *dest, ptrdiff_t diff) -{ - StgUpdateFrame *su; - StgCatchFrame *cf; - StgSeqFrame *sf; + info = get_itbl(selectee); + switch (info->type) { + case CONSTR: + case CONSTR_1_0: + case CONSTR_0_1: + case CONSTR_2_0: + case CONSTR_1_1: + case CONSTR_0_2: + case CONSTR_STATIC: + case CONSTR_NOCAF_STATIC: + // check that the size is in range + ASSERT(field < (StgWord32)(info->layout.payload.ptrs + + info->layout.payload.nptrs)); + + // ToDo: shouldn't we test whether this pointer is in + // to-space? + return selectee->payload[field]; + + case IND: + case IND_PERM: + case IND_OLDGEN: + case IND_OLDGEN_PERM: + case IND_STATIC: + selectee = ((StgInd *)selectee)->indirectee; + goto selector_loop; - su = dest->su; + case EVACUATED: + // We don't follow pointers into to-space; the constructor + // has already been evacuated, so we won't save any space + // leaks by evaluating this selector thunk anyhow. + break; - while ((P_)su < dest->stack + dest->stack_size) { - switch (get_itbl(su)->type) { - - // GCC actually manages to common up these three cases! + case THUNK_SELECTOR: + { + StgClosure *val; - case UPDATE_FRAME: - su->link = (StgUpdateFrame *) ((StgPtr)su->link + diff); - su = su->link; - continue; + // check that we don't recurse too much, re-using the + // depth bound also used in evacuate(). + thunk_selector_depth++; + if (thunk_selector_depth > MAX_THUNK_SELECTOR_DEPTH) { + break; + } - case CATCH_FRAME: - cf = (StgCatchFrame *)su; - cf->link = (StgUpdateFrame *) ((StgPtr)cf->link + diff); - su = cf->link; - continue; + val = eval_thunk_selector(info->layout.selector_offset, + (StgSelector *)selectee); - case SEQ_FRAME: - sf = (StgSeqFrame *)su; - sf->link = (StgUpdateFrame *) ((StgPtr)sf->link + diff); - su = sf->link; - continue; + thunk_selector_depth--; - case STOP_FRAME: - // all done! - break; + if (val == NULL) { + break; + } else { + // We evaluated this selector thunk, so update it with + // an indirection. NOTE: we don't use UPD_IND here, + // because we are guaranteed that p is in a generation + // that we are collecting, and we never want to put the + // indirection on a mutable list. +#ifdef PROFILING + // For the purposes of LDV profiling, we have destroyed + // the original selector thunk. + SET_INFO(p, info_ptr); + LDV_recordDead_FILL_SLOP_DYNAMIC(selectee); +#endif + ((StgInd *)selectee)->indirectee = val; + SET_INFO(selectee,&stg_IND_info); +#ifdef PROFILING + // For the purposes of LDV profiling, we have created an + // indirection. + LDV_recordCreate(selectee); +#endif + selectee = val; + goto selector_loop; + } + } - default: - barf("relocate_stack %d", (int)(get_itbl(su)->type)); + case AP: + case THUNK: + case THUNK_1_0: + case THUNK_0_1: + case THUNK_2_0: + case THUNK_1_1: + case THUNK_0_2: + case THUNK_STATIC: + case CAF_BLACKHOLE: + case SE_CAF_BLACKHOLE: + case SE_BLACKHOLE: + case BLACKHOLE: + case BLACKHOLE_BQ: +#if defined(PAR) + case RBH: + case BLOCKED_FETCH: +# ifdef DIST + case REMOTE_REF: +# endif + case FETCH_ME: + case FETCH_ME_BQ: +#endif + // not evaluated yet + break; + + default: + barf("eval_thunk_selector: strange selectee %d", + (int)(info->type)); } - break; - } - return dest; +bale_out: + // We didn't manage to evaluate this thunk; restore the old info pointer + SET_INFO(p, info_ptr); + return NULL; } +/* ----------------------------------------------------------------------------- + move_TSO is called to update the TSO structure after it has been + moved from one place to another. + -------------------------------------------------------------------------- */ - -static inline void -scavenge_srt(const StgInfoTable *info) +void +move_TSO (StgTSO *src, StgTSO *dest) { - StgClosure **srt, **srt_end; + ptrdiff_t diff; - /* evacuate the SRT. If srt_len is zero, then there isn't an - * srt field in the info table. That's ok, because we'll - * never dereference it. - */ - srt = (StgClosure **)(info->srt); - srt_end = srt + info->srt_len; - for (; srt < srt_end; srt++) { - /* Special-case to handle references to closures hiding out in DLLs, since - double indirections required to get at those. The code generator knows - which is which when generating the SRT, so it stores the (indirect) - reference to the DLL closure in the table by first adding one to it. - We check for this here, and undo the addition before evacuating it. - - If the SRT entry hasn't got bit 0 set, the SRT entry points to a - closure that's fixed at link-time, and no extra magic is required. - */ -#ifdef ENABLE_WIN32_DLL_SUPPORT - if ( (unsigned long)(*srt) & 0x1 ) { - evacuate(*stgCast(StgClosure**,(stgCast(unsigned long, *srt) & ~0x1))); - } else { - evacuate(*srt); - } -#else - evacuate(*srt); -#endif - } + // relocate the stack pointer... + diff = (StgPtr)dest - (StgPtr)src; // In *words* + dest->sp = (StgPtr)dest->sp + diff; } -/* ----------------------------------------------------------------------------- - Scavenge a TSO. - -------------------------------------------------------------------------- */ - +/* Similar to scavenge_large_bitmap(), but we don't write back the + * pointers we get back from evacuate(). + */ +static void +scavenge_large_srt_bitmap( StgLargeSRT *large_srt ) +{ + nat i, b, size; + StgWord bitmap; + StgClosure **p; + + b = 0; + bitmap = large_srt->l.bitmap[b]; + size = (nat)large_srt->l.size; + p = large_srt->srt; + for (i = 0; i < size; ) { + if ((bitmap & 1) != 0) { + evacuate(*p); + } + i++; + p++; + if (i % BITS_IN(W_) == 0) { + b++; + bitmap = large_srt->l.bitmap[b]; + } else { + bitmap = bitmap >> 1; + } + } +} + +/* evacuate the SRT. If srt_bitmap is zero, then there isn't an + * srt field in the info table. That's ok, because we'll + * never dereference it. + */ +static inline void +scavenge_srt (StgClosure **srt, nat srt_bitmap) +{ + nat bitmap; + StgClosure **p; + + bitmap = srt_bitmap; + p = srt; + + if (bitmap == (StgHalfWord)(-1)) { + scavenge_large_srt_bitmap( (StgLargeSRT *)srt ); + return; + } + + while (bitmap != 0) { + if ((bitmap & 1) != 0) { +#ifdef ENABLE_WIN32_DLL_SUPPORT + // Special-case to handle references to closures hiding out in DLLs, since + // double indirections required to get at those. The code generator knows + // which is which when generating the SRT, so it stores the (indirect) + // reference to the DLL closure in the table by first adding one to it. + // We check for this here, and undo the addition before evacuating it. + // + // If the SRT entry hasn't got bit 0 set, the SRT entry points to a + // closure that's fixed at link-time, and no extra magic is required. + if ( (unsigned long)(*srt) & 0x1 ) { + evacuate(*stgCast(StgClosure**,(stgCast(unsigned long, *srt) & ~0x1))); + } else { + evacuate(*p); + } +#else + evacuate(*p); +#endif + } + p++; + bitmap = bitmap >> 1; + } +} + + +static inline void +scavenge_thunk_srt(const StgInfoTable *info) +{ + StgThunkInfoTable *thunk_info; + + thunk_info = itbl_to_thunk_itbl(info); + scavenge_srt((StgClosure **)thunk_info->srt, thunk_info->i.srt_bitmap); +} + +static inline void +scavenge_fun_srt(const StgInfoTable *info) +{ + StgFunInfoTable *fun_info; + + fun_info = itbl_to_fun_itbl(info); + scavenge_srt((StgClosure **)fun_info->srt, fun_info->i.srt_bitmap); +} + +static inline void +scavenge_ret_srt(const StgInfoTable *info) +{ + StgRetInfoTable *ret_info; + + ret_info = itbl_to_ret_itbl(info); + scavenge_srt((StgClosure **)ret_info->srt, ret_info->i.srt_bitmap); +} + +/* ----------------------------------------------------------------------------- + Scavenge a TSO. + -------------------------------------------------------------------------- */ + static void scavengeTSO (StgTSO *tso) { - // chase the link field for any TSOs on the same queue - (StgClosure *)tso->link = evacuate((StgClosure *)tso->link); - if ( tso->why_blocked == BlockedOnMVar - || tso->why_blocked == BlockedOnBlackHole - || tso->why_blocked == BlockedOnException + // chase the link field for any TSOs on the same queue + (StgClosure *)tso->link = evacuate((StgClosure *)tso->link); + if ( tso->why_blocked == BlockedOnMVar + || tso->why_blocked == BlockedOnBlackHole + || tso->why_blocked == BlockedOnException #if defined(PAR) - || tso->why_blocked == BlockedOnGA - || tso->why_blocked == BlockedOnGA_NoSend + || tso->why_blocked == BlockedOnGA + || tso->why_blocked == BlockedOnGA_NoSend #endif - ) { - tso->block_info.closure = evacuate(tso->block_info.closure); - } - if ( tso->blocked_exceptions != NULL ) { - tso->blocked_exceptions = - (StgTSO *)evacuate((StgClosure *)tso->blocked_exceptions); - } - // scavenge this thread's stack - scavenge_stack(tso->sp, &(tso->stack[tso->stack_size])); + ) { + tso->block_info.closure = evacuate(tso->block_info.closure); + } + if ( tso->blocked_exceptions != NULL ) { + tso->blocked_exceptions = + (StgTSO *)evacuate((StgClosure *)tso->blocked_exceptions); + } + + // scavenge this thread's stack + scavenge_stack(tso->sp, &(tso->stack[tso->stack_size])); +} + +/* ----------------------------------------------------------------------------- + Blocks of function args occur on the stack (at the top) and + in PAPs. + -------------------------------------------------------------------------- */ + +static inline StgPtr +scavenge_arg_block (StgFunInfoTable *fun_info, StgClosure **args) +{ + StgPtr p; + StgWord bitmap; + nat size; + + p = (StgPtr)args; + switch (fun_info->fun_type) { + case ARG_GEN: + bitmap = BITMAP_BITS(fun_info->bitmap); + size = BITMAP_SIZE(fun_info->bitmap); + goto small_bitmap; + case ARG_GEN_BIG: + size = ((StgLargeBitmap *)fun_info->bitmap)->size; + scavenge_large_bitmap(p, (StgLargeBitmap *)fun_info->bitmap, size); + p += size; + break; + default: + bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->fun_type]); + size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->fun_type]); + small_bitmap: + while (size > 0) { + if ((bitmap & 1) == 0) { + (StgClosure *)*p = evacuate((StgClosure *)*p); + } + p++; + bitmap = bitmap >> 1; + size--; + } + break; + } + return p; +} + +static inline StgPtr +scavenge_PAP (StgPAP *pap) +{ + StgPtr p; + StgWord bitmap, size; + StgFunInfoTable *fun_info; + + pap->fun = evacuate(pap->fun); + fun_info = get_fun_itbl(pap->fun); + ASSERT(fun_info->i.type != PAP); + + p = (StgPtr)pap->payload; + size = pap->n_args; + + switch (fun_info->fun_type) { + case ARG_GEN: + bitmap = BITMAP_BITS(fun_info->bitmap); + goto small_bitmap; + case ARG_GEN_BIG: + scavenge_large_bitmap(p, (StgLargeBitmap *)fun_info->bitmap, size); + p += size; + break; + case ARG_BCO: + scavenge_large_bitmap((StgPtr)pap->payload, BCO_BITMAP(pap->fun), size); + p += size; + break; + default: + bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->fun_type]); + small_bitmap: + size = pap->n_args; + while (size > 0) { + if ((bitmap & 1) == 0) { + (StgClosure *)*p = evacuate((StgClosure *)*p); + } + p++; + bitmap = bitmap >> 1; + size--; + } + break; + } + return p; } /* ----------------------------------------------------------------------------- @@ -2184,12 +2430,14 @@ scavenge(step *stp) continue; } + ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); info = get_itbl((StgClosure *)p); - ASSERT(p && (LOOKS_LIKE_GHC_INFO(info) || IS_HUGS_CONSTR_INFO(info))); + ASSERT(thunk_selector_depth == 0); + q = p; switch (info->type) { - + case MVAR: /* treat MVars specially, because we don't want to evacuate the * mut_link field in the middle of the closure. @@ -2207,9 +2455,15 @@ scavenge(step *stp) break; } - case THUNK_2_0: case FUN_2_0: - scavenge_srt(info); + scavenge_fun_srt(info); + ((StgClosure *)p)->payload[1] = evacuate(((StgClosure *)p)->payload[1]); + ((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]); + p += sizeofW(StgHeader) + 2; + break; + + case THUNK_2_0: + scavenge_thunk_srt(info); case CONSTR_2_0: ((StgClosure *)p)->payload[1] = evacuate(((StgClosure *)p)->payload[1]); ((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]); @@ -2217,54 +2471,66 @@ scavenge(step *stp) break; case THUNK_1_0: - scavenge_srt(info); + scavenge_thunk_srt(info); ((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]); p += sizeofW(StgHeader) + 2; // MIN_UPD_SIZE break; case FUN_1_0: - scavenge_srt(info); + scavenge_fun_srt(info); case CONSTR_1_0: ((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]); p += sizeofW(StgHeader) + 1; break; case THUNK_0_1: - scavenge_srt(info); + scavenge_thunk_srt(info); p += sizeofW(StgHeader) + 2; // MIN_UPD_SIZE break; case FUN_0_1: - scavenge_srt(info); + scavenge_fun_srt(info); case CONSTR_0_1: p += sizeofW(StgHeader) + 1; break; case THUNK_0_2: + scavenge_thunk_srt(info); + p += sizeofW(StgHeader) + 2; + break; + case FUN_0_2: - scavenge_srt(info); + scavenge_fun_srt(info); case CONSTR_0_2: p += sizeofW(StgHeader) + 2; break; case THUNK_1_1: + scavenge_thunk_srt(info); + ((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]); + p += sizeofW(StgHeader) + 2; + break; + case FUN_1_1: - scavenge_srt(info); + scavenge_fun_srt(info); case CONSTR_1_1: ((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]); p += sizeofW(StgHeader) + 2; break; case FUN: + scavenge_fun_srt(info); + goto gen_obj; + case THUNK: - scavenge_srt(info); + scavenge_thunk_srt(info); // fall through + gen_obj: case CONSTR: case WEAK: case FOREIGN: case STABLE_NAME: - case BCO: { StgPtr end; @@ -2276,6 +2542,16 @@ scavenge(step *stp) break; } + case BCO: { + StgBCO *bco = (StgBCO *)p; + (StgClosure *)bco->instrs = evacuate((StgClosure *)bco->instrs); + (StgClosure *)bco->literals = evacuate((StgClosure *)bco->literals); + (StgClosure *)bco->ptrs = evacuate((StgClosure *)bco->ptrs); + (StgClosure *)bco->itbls = evacuate((StgClosure *)bco->itbls); + p += bco_sizeW(bco); + break; + } + case IND_PERM: if (stp->gen->no != 0) { #ifdef PROFILING @@ -2346,20 +2622,22 @@ scavenge(step *stp) break; } - case AP_UPD: // same as PAPs - case PAP: - /* Treat a PAP just like a section of stack, not forgetting to - * evacuate the function pointer too... - */ - { - StgPAP* pap = (StgPAP *)p; + // A chunk of stack saved in a heap object + case AP_STACK: + { + StgAP_STACK *ap = (StgAP_STACK *)p; - pap->fun = evacuate(pap->fun); - scavenge_stack((P_)pap->payload, (P_)pap->payload + pap->n_args); - p += pap_sizeW(pap); + ap->fun = evacuate(ap->fun); + scavenge_stack((StgPtr)ap->payload, (StgPtr)ap->payload + ap->size); + p = (StgPtr)ap->payload + ap->size; break; } - + + case PAP: + case AP: + p = scavenge_PAP((StgPAP *)p); + break; + case ARR_WORDS: // nothing to follow p += arr_words_sizeW((StgArrWords *)p); @@ -2516,8 +2794,8 @@ linear_scan: while (!mark_stack_empty()) { p = pop_mark_stack(); + ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); info = get_itbl((StgClosure *)p); - ASSERT(p && (LOOKS_LIKE_GHC_INFO(info) || IS_HUGS_CONSTR_INFO(info))); q = p; switch (info->type) { @@ -2538,8 +2816,13 @@ linear_scan: } case FUN_2_0: + scavenge_fun_srt(info); + ((StgClosure *)p)->payload[1] = evacuate(((StgClosure *)p)->payload[1]); + ((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]); + break; + case THUNK_2_0: - scavenge_srt(info); + scavenge_thunk_srt(info); case CONSTR_2_0: ((StgClosure *)p)->payload[1] = evacuate(((StgClosure *)p)->payload[1]); ((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]); @@ -2547,9 +2830,13 @@ linear_scan: case FUN_1_0: case FUN_1_1: + scavenge_fun_srt(info); + ((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]); + break; + case THUNK_1_0: case THUNK_1_1: - scavenge_srt(info); + scavenge_thunk_srt(info); case CONSTR_1_0: case CONSTR_1_1: ((StgClosure *)p)->payload[0] = evacuate(((StgClosure *)p)->payload[0]); @@ -2557,23 +2844,31 @@ linear_scan: case FUN_0_1: case FUN_0_2: + scavenge_fun_srt(info); + break; + case THUNK_0_1: case THUNK_0_2: - scavenge_srt(info); + scavenge_thunk_srt(info); + break; + case CONSTR_0_1: case CONSTR_0_2: break; case FUN: + scavenge_fun_srt(info); + goto gen_obj; + case THUNK: - scavenge_srt(info); + scavenge_thunk_srt(info); // fall through + gen_obj: case CONSTR: case WEAK: case FOREIGN: case STABLE_NAME: - case BCO: { StgPtr end; @@ -2584,6 +2879,15 @@ linear_scan: break; } + case BCO: { + StgBCO *bco = (StgBCO *)p; + (StgClosure *)bco->instrs = evacuate((StgClosure *)bco->instrs); + (StgClosure *)bco->literals = evacuate((StgClosure *)bco->literals); + (StgClosure *)bco->ptrs = evacuate((StgClosure *)bco->ptrs); + (StgClosure *)bco->itbls = evacuate((StgClosure *)bco->itbls); + break; + } + case IND_PERM: // don't need to do anything here: the only possible case // is that we're in a 1-space compacting collector, with @@ -2635,18 +2939,20 @@ linear_scan: break; } - case AP_UPD: // same as PAPs - case PAP: - /* Treat a PAP just like a section of stack, not forgetting to - * evacuate the function pointer too... - */ - { - StgPAP* pap = (StgPAP *)p; + // A chunk of stack saved in a heap object + case AP_STACK: + { + StgAP_STACK *ap = (StgAP_STACK *)p; - pap->fun = evacuate(pap->fun); - scavenge_stack((P_)pap->payload, (P_)pap->payload + pap->n_args); + ap->fun = evacuate(ap->fun); + scavenge_stack((StgPtr)ap->payload, (StgPtr)ap->payload + ap->size); break; } + + case PAP: + case AP: + scavenge_PAP((StgPAP *)p); + break; case MUT_ARR_PTRS: // follow everything @@ -2815,9 +3121,7 @@ scavenge_one(StgPtr p) nat saved_evac_gen = evac_gen; rtsBool no_luck; - ASSERT(p && (LOOKS_LIKE_GHC_INFO(GET_INFO((StgClosure *)p)) - || IS_HUGS_CONSTR_INFO(GET_INFO((StgClosure *)p)))); - + ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); info = get_itbl((StgClosure *)p); switch (info->type) { @@ -2870,7 +3174,7 @@ scavenge_one(StgPtr p) case ARR_WORDS: // nothing to follow break; - + case MUT_ARR_PTRS: { // follow everything @@ -2911,15 +3215,21 @@ scavenge_one(StgPtr p) break; } - case AP_UPD: - case PAP: - { - StgPAP* pap = (StgPAP *)p; - pap->fun = evacuate(pap->fun); - scavenge_stack((P_)pap->payload, (P_)pap->payload + pap->n_args); + case AP_STACK: + { + StgAP_STACK *ap = (StgAP_STACK *)p; + + ap->fun = evacuate(ap->fun); + scavenge_stack((StgPtr)ap->payload, (StgPtr)ap->payload + ap->size); + p = (StgPtr)ap->payload + ap->size; break; } + case PAP: + case AP: + p = scavenge_PAP((StgPAP *)p); + break; + case IND_OLDGEN: // This might happen if for instance a MUT_CONS was pointing to a // THUNK which has since been updated. The IND_OLDGEN will @@ -2959,10 +3269,7 @@ scavenge_mut_once_list(generation *gen) for (; p != END_MUT_LIST; p = next, next = p->mut_link) { - // make sure the info pointer is into text space - ASSERT(p && (LOOKS_LIKE_GHC_INFO(GET_INFO(p)) - || IS_HUGS_CONSTR_INFO(GET_INFO(p)))); - + ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); info = get_itbl(p); /* if (info->type==RBH) @@ -3064,10 +3371,7 @@ scavenge_mutable_list(generation *gen) for (; p != END_MUT_LIST; p = next, next = p->mut_link) { - // make sure the info pointer is into text space - ASSERT(p && (LOOKS_LIKE_GHC_INFO(GET_INFO(p)) - || IS_HUGS_CONSTR_INFO(GET_INFO(p)))); - + ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); info = get_itbl(p); /* if (info->type==RBH) @@ -3252,14 +3556,13 @@ scavenge_static(void) list... */ while (p != END_OF_STATIC_LIST) { + ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); info = get_itbl(p); /* if (info->type==RBH) info = REVERT_INFOPTR(info); // if it's an RBH, look at the orig closure */ // make sure the info pointer is into text space - ASSERT(p && (LOOKS_LIKE_GHC_INFO(GET_INFO(p)) - || IS_HUGS_CONSTR_INFO(GET_INFO(p)))); /* Take this object *off* the static_objects list, * and put it on the scavenged_static_objects list. @@ -3290,8 +3593,11 @@ scavenge_static(void) } case THUNK_STATIC: + scavenge_thunk_srt(info); + break; + case FUN_STATIC: - scavenge_srt(info); + scavenge_fun_srt(info); break; case CONSTR_STATIC: @@ -3321,200 +3627,163 @@ scavenge_static(void) } /* ----------------------------------------------------------------------------- + scavenge a chunk of memory described by a bitmap + -------------------------------------------------------------------------- */ + +static void +scavenge_large_bitmap( StgPtr p, StgLargeBitmap *large_bitmap, nat size ) +{ + nat i, b; + StgWord bitmap; + + b = 0; + bitmap = large_bitmap->bitmap[b]; + for (i = 0; i < size; ) { + if ((bitmap & 1) == 0) { + (StgClosure *)*p = evacuate((StgClosure *)*p); + } + i++; + p++; + if (i % BITS_IN(W_) == 0) { + b++; + bitmap = large_bitmap->bitmap[b]; + } else { + bitmap = bitmap >> 1; + } + } +} + +static inline StgPtr +scavenge_small_bitmap (StgPtr p, nat size, StgWord bitmap) +{ + while (size > 0) { + if ((bitmap & 1) == 0) { + (StgClosure *)*p = evacuate((StgClosure *)*p); + } + p++; + bitmap = bitmap >> 1; + size--; + } + return p; +} + +/* ----------------------------------------------------------------------------- scavenge_stack walks over a section of stack and evacuates all the objects pointed to by it. We can use the same code for walking - PAPs, since these are just sections of copied stack. + AP_STACK_UPDs, since these are just sections of copied stack. -------------------------------------------------------------------------- */ + static void scavenge_stack(StgPtr p, StgPtr stack_end) { - StgPtr q; - const StgInfoTable* info; + const StgRetInfoTable* info; StgWord bitmap; + nat size; //IF_DEBUG(sanity, belch(" scavenging stack between %p and %p", p, stack_end)); /* * Each time around this loop, we are looking at a chunk of stack - * that starts with either a pending argument section or an - * activation record. + * that starts with an activation record. */ while (p < stack_end) { - q = *(P_ *)p; - - // If we've got a tag, skip over that many words on the stack - if (IS_ARG_TAG((W_)q)) { - p += ARG_SIZE(q); - p++; continue; - } - - /* Is q a pointer to a closure? - */ - if (! LOOKS_LIKE_GHC_INFO(q) ) { -#ifdef DEBUG - if ( 0 && LOOKS_LIKE_STATIC_CLOSURE(q) ) { // Is it a static closure? - ASSERT(closure_STATIC((StgClosure *)q)); - } - // otherwise, must be a pointer into the allocation space. -#endif - - (StgClosure *)*p = evacuate((StgClosure *)q); - p++; - continue; - } - - /* - * Otherwise, q must be the info pointer of an activation - * record. All activation records have 'bitmap' style layout - * info. - */ - info = get_itbl((StgClosure *)p); + info = get_ret_itbl((StgClosure *)p); - switch (info->type) { + switch (info->i.type) { - // Dynamic bitmap: the mask is stored on the stack - case RET_DYN: - bitmap = ((StgRetDyn *)p)->liveness; - p = (P_)&((StgRetDyn *)p)->payload[0]; - goto small_bitmap; - - // probably a slow-entry point return address: - case FUN: - case FUN_STATIC: - { -#if 0 - StgPtr old_p = p; - p++; p++; - IF_DEBUG(sanity, - belch("HWL: scavenge_stack: FUN(_STATIC) adjusting p from %p to %p (instead of %p)", - old_p, p, old_p+1)); -#else - p++; // what if FHS!=1 !? -- HWL -#endif - goto follow_srt; - } - - /* Specialised code for update frames, since they're so common. - * We *know* the updatee points to a BLACKHOLE, CAF_BLACKHOLE, - * or BLACKHOLE_BQ, so just inline the code to evacuate it here. - */ case UPDATE_FRAME: - { - StgUpdateFrame *frame = (StgUpdateFrame *)p; - + ((StgUpdateFrame *)p)->updatee + = evacuate(((StgUpdateFrame *)p)->updatee); p += sizeofW(StgUpdateFrame); - -#ifndef not_yet - frame->updatee = evacuate(frame->updatee); continue; -#else // specialised code for update frames, not sure if it's worth it. - StgClosure *to; - nat type = get_itbl(frame->updatee)->type; - - if (type == EVACUATED) { - frame->updatee = evacuate(frame->updatee); - continue; - } else { - bdescr *bd = Bdescr((P_)frame->updatee); - step *stp; - if (bd->gen_no > N) { - if (bd->gen_no < evac_gen) { - failed_to_evac = rtsTrue; - } - continue; - } - - // Don't promote blackholes - stp = bd->step; - if (!(stp->gen_no == 0 && - stp->no != 0 && - stp->no == stp->gen->n_steps-1)) { - stp = stp->to; - } - - switch (type) { - case BLACKHOLE: - case CAF_BLACKHOLE: - to = copyPart(frame->updatee, BLACKHOLE_sizeW(), - sizeofW(StgHeader), stp); - frame->updatee = to; - continue; - case BLACKHOLE_BQ: - to = copy(frame->updatee, BLACKHOLE_sizeW(), stp); - frame->updatee = to; - recordMutable((StgMutClosure *)to); - continue; - default: - /* will never be SE_{,CAF_}BLACKHOLE, since we - don't push an update frame for single-entry thunks. KSW 1999-01. */ - barf("scavenge_stack: UPDATE_FRAME updatee"); - } - } -#endif - } // small bitmap (< 32 entries, or 64 on a 64-bit machine) case STOP_FRAME: case CATCH_FRAME: - case SEQ_FRAME: - case RET_BCO: case RET_SMALL: case RET_VEC_SMALL: - bitmap = info->layout.bitmap; - p++; - // this assumes that the payload starts immediately after the info-ptr - small_bitmap: - while (bitmap != 0) { - if ((bitmap & 1) == 0) { - (StgClosure *)*p = evacuate((StgClosure *)*p); - } + bitmap = BITMAP_BITS(info->i.layout.bitmap); + size = BITMAP_SIZE(info->i.layout.bitmap); + // NOTE: the payload starts immediately after the info-ptr, we + // don't have an StgHeader in the same sense as a heap closure. p++; - bitmap = bitmap >> 1; - } - + p = scavenge_small_bitmap(p, size, bitmap); + follow_srt: - scavenge_srt(info); - continue; + scavenge_srt((StgClosure **)info->srt, info->i.srt_bitmap); + continue; + + case RET_BCO: { + StgBCO *bco; + nat size; + + p++; + (StgClosure *)*p = evacuate((StgClosure *)*p); + bco = (StgBCO *)*p; + p++; + size = BCO_BITMAP_SIZE(bco); + scavenge_large_bitmap(p, BCO_BITMAP(bco), size); + p += size; + continue; + } // large bitmap (> 32 entries, or > 64 on a 64-bit machine) case RET_BIG: case RET_VEC_BIG: - { - StgPtr q; - StgLargeBitmap *large_bitmap; - nat i; + { + nat size; - large_bitmap = info->layout.large_bitmap; + size = info->i.layout.large_bitmap->size; p++; + scavenge_large_bitmap(p, info->i.layout.large_bitmap, size); + p += size; + // and don't forget to follow the SRT + goto follow_srt; + } - for (i=0; isize; i++) { - bitmap = large_bitmap->bitmap[i]; - q = p + BITS_IN(W_); - while (bitmap != 0) { - if ((bitmap & 1) == 0) { - (StgClosure *)*p = evacuate((StgClosure *)*p); - } + // Dynamic bitmap: the mask is stored on the stack, and + // there are a number of non-pointers followed by a number + // of pointers above the bitmapped area. (see StgMacros.h, + // HEAP_CHK_GEN). + case RET_DYN: + { + StgWord dyn; + dyn = ((StgRetDyn *)p)->liveness; + + // traverse the bitmap first + bitmap = GET_LIVENESS(dyn); + p = (P_)&((StgRetDyn *)p)->payload[0]; + size = RET_DYN_BITMAP_SIZE; + p = scavenge_small_bitmap(p, size, bitmap); + + // skip over the non-ptr words + p += GET_NONPTRS(dyn) + RET_DYN_NONPTR_REGS_SIZE; + + // follow the ptr words + for (size = GET_PTRS(dyn); size > 0; size--) { + (StgClosure *)*p = evacuate((StgClosure *)*p); p++; - bitmap = bitmap >> 1; - } - if (i+1 < large_bitmap->size) { - while (p < q) { - (StgClosure *)*p = evacuate((StgClosure *)*p); - p++; - } - } } + continue; + } - // and don't forget to follow the SRT + case RET_FUN: + { + StgRetFun *ret_fun = (StgRetFun *)p; + StgFunInfoTable *fun_info; + + ret_fun->fun = evacuate(ret_fun->fun); + fun_info = get_fun_itbl(ret_fun->fun); + p = scavenge_arg_block(fun_info, ret_fun->payload); goto follow_srt; - } + } default: - barf("scavenge_stack: weird activation record found on stack: %d", (int)(info->type)); + barf("scavenge_stack: weird activation record found on stack: %d", (int)(info->i.type)); } - } + } } /*----------------------------------------------------------------------------- @@ -3686,65 +3955,63 @@ gcCAFs(void) static void threadLazyBlackHole(StgTSO *tso) { - StgUpdateFrame *update_frame; - StgBlockingQueue *bh; - StgPtr stack_end; - - stack_end = &tso->stack[tso->stack_size]; - update_frame = tso->su; - - while (1) { - switch (get_itbl(update_frame)->type) { - - case CATCH_FRAME: - update_frame = ((StgCatchFrame *)update_frame)->link; - break; - - case UPDATE_FRAME: - bh = (StgBlockingQueue *)update_frame->updatee; - - /* if the thunk is already blackholed, it means we've also - * already blackholed the rest of the thunks on this stack, - * so we can stop early. - * - * The blackhole made for a CAF is a CAF_BLACKHOLE, so they - * don't interfere with this optimisation. - */ - if (bh->header.info == &stg_BLACKHOLE_info) { - return; - } + StgClosure *frame; + StgRetInfoTable *info; + StgBlockingQueue *bh; + StgPtr stack_end; + + stack_end = &tso->stack[tso->stack_size]; + + frame = (StgClosure *)tso->sp; - if (bh->header.info != &stg_BLACKHOLE_BQ_info && - bh->header.info != &stg_CAF_BLACKHOLE_info) { + while (1) { + info = get_ret_itbl(frame); + + switch (info->i.type) { + + case UPDATE_FRAME: + bh = (StgBlockingQueue *)((StgUpdateFrame *)frame)->updatee; + + /* if the thunk is already blackholed, it means we've also + * already blackholed the rest of the thunks on this stack, + * so we can stop early. + * + * The blackhole made for a CAF is a CAF_BLACKHOLE, so they + * don't interfere with this optimisation. + */ + if (bh->header.info == &stg_BLACKHOLE_info) { + return; + } + + if (bh->header.info != &stg_BLACKHOLE_BQ_info && + bh->header.info != &stg_CAF_BLACKHOLE_info) { #if (!defined(LAZY_BLACKHOLING)) && defined(DEBUG) - belch("Unexpected lazy BHing required at 0x%04x",(int)bh); + belch("Unexpected lazy BHing required at 0x%04x",(int)bh); #endif #ifdef PROFILING - // @LDV profiling - // We pretend that bh is now dead. - LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)bh); + // @LDV profiling + // We pretend that bh is now dead. + LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)bh); #endif - SET_INFO(bh,&stg_BLACKHOLE_info); + SET_INFO(bh,&stg_BLACKHOLE_info); #ifdef PROFILING - // @LDV profiling - // We pretend that bh has just been created. - LDV_recordCreate(bh); + // @LDV profiling + // We pretend that bh has just been created. + LDV_recordCreate(bh); #endif - } - - update_frame = update_frame->link; - break; - - case SEQ_FRAME: - update_frame = ((StgSeqFrame *)update_frame)->link; - break; - - case STOP_FRAME: - return; - default: - barf("threadPaused"); + } + + frame = (StgClosure *) ((StgUpdateFrame *)frame + 1); + break; + + case STOP_FRAME: + return; + + // normal stack frames; do nothing except advance the pointer + default: + (StgPtr)frame += stack_frame_sizeW(frame); + } } - } } @@ -3756,277 +4023,204 @@ threadLazyBlackHole(StgTSO *tso) * * -------------------------------------------------------------------------- */ +struct stack_gap { StgWord gap_size; struct stack_gap *next_gap; }; + static void threadSqueezeStack(StgTSO *tso) { - lnat displacement = 0; - StgUpdateFrame *frame; - StgUpdateFrame *next_frame; // Temporally next - StgUpdateFrame *prev_frame; // Temporally previous - StgPtr bottom; - rtsBool prev_was_update_frame; -#if DEBUG - StgUpdateFrame *top_frame; - nat upd_frames=0, stop_frames=0, catch_frames=0, seq_frames=0, - bhs=0, squeezes=0; - void printObj( StgClosure *obj ); // from Printer.c + StgPtr frame; + rtsBool prev_was_update_frame; + StgClosure *updatee = NULL; + StgPtr bottom; + StgRetInfoTable *info; + StgWord current_gap_size; + struct stack_gap *gap; - top_frame = tso->su; -#endif - - bottom = &(tso->stack[tso->stack_size]); - frame = tso->su; + // Stage 1: + // Traverse the stack upwards, replacing adjacent update frames + // with a single update frame and a "stack gap". A stack gap + // contains two values: the size of the gap, and the distance + // to the next gap (or the stack top). - /* There must be at least one frame, namely the STOP_FRAME. - */ - ASSERT((P_)frame < bottom); + bottom = &(tso->stack[tso->stack_size]); - /* Walk down the stack, reversing the links between frames so that - * we can walk back up as we squeeze from the bottom. Note that - * next_frame and prev_frame refer to next and previous as they were - * added to the stack, rather than the way we see them in this - * walk. (It makes the next loop less confusing.) - * - * Stop if we find an update frame pointing to a black hole - * (see comment in threadLazyBlackHole()). - */ - - next_frame = NULL; - // bottom - sizeof(StgStopFrame) is the STOP_FRAME - while ((P_)frame < bottom - sizeofW(StgStopFrame)) { - prev_frame = frame->link; - frame->link = next_frame; - next_frame = frame; - frame = prev_frame; -#if DEBUG - IF_DEBUG(sanity, - if (!(frame>=top_frame && frame<=(StgUpdateFrame *)bottom)) { - printObj((StgClosure *)prev_frame); - barf("threadSqueezeStack: current frame is rubbish %p; previous was %p\n", - frame, prev_frame); - }) - switch (get_itbl(frame)->type) { - case UPDATE_FRAME: - upd_frames++; - if (frame->updatee->header.info == &stg_BLACKHOLE_info) - bhs++; - break; - case STOP_FRAME: - stop_frames++; - break; - case CATCH_FRAME: - catch_frames++; - break; - case SEQ_FRAME: - seq_frames++; - break; - default: - barf("Found non-frame during stack squeezing at %p (prev frame was %p)\n", - frame, prev_frame); - printObj((StgClosure *)prev_frame); - } -#endif - if (get_itbl(frame)->type == UPDATE_FRAME - && frame->updatee->header.info == &stg_BLACKHOLE_info) { - break; - } - } + frame = tso->sp; - /* Now, we're at the bottom. Frame points to the lowest update - * frame on the stack, and its link actually points to the frame - * above. We have to walk back up the stack, squeezing out empty - * update frames and turning the pointers back around on the way - * back up. - * - * The bottom-most frame (the STOP_FRAME) has not been altered, and - * we never want to eliminate it anyway. Just walk one step up - * before starting to squeeze. When you get to the topmost frame, - * remember that there are still some words above it that might have - * to be moved. - */ - - prev_frame = frame; - frame = next_frame; + ASSERT(frame < bottom); + + prev_was_update_frame = rtsFalse; + current_gap_size = 0; + gap = (struct stack_gap *) (tso->sp - sizeofW(StgUpdateFrame)); - prev_was_update_frame = (get_itbl(prev_frame)->type == UPDATE_FRAME); + while (frame < bottom) { + + info = get_ret_itbl((StgClosure *)frame); + switch (info->i.type) { - /* - * Loop through all of the frames (everything except the very - * bottom). Things are complicated by the fact that we have - * CATCH_FRAMEs and SEQ_FRAMEs interspersed with the update frames. - * We can only squeeze when there are two consecutive UPDATE_FRAMEs. - */ - while (frame != NULL) { - StgPtr sp; - StgPtr frame_bottom = (P_)frame + sizeofW(StgUpdateFrame); - rtsBool is_update_frame; - - next_frame = frame->link; - is_update_frame = (get_itbl(frame)->type == UPDATE_FRAME); + case UPDATE_FRAME: + { + StgUpdateFrame *upd = (StgUpdateFrame *)frame; - /* Check to see if - * 1. both the previous and current frame are update frames - * 2. the current frame is empty - */ - if (prev_was_update_frame && is_update_frame && - (P_)prev_frame == frame_bottom + displacement) { - - // Now squeeze out the current frame - StgClosure *updatee_keep = prev_frame->updatee; - StgClosure *updatee_bypass = frame->updatee; - -#if DEBUG - IF_DEBUG(gc, belch("@@ squeezing frame at %p", frame)); - squeezes++; -#endif + if (upd->updatee->header.info == &stg_BLACKHOLE_info) { - /* Deal with blocking queues. If both updatees have blocked - * threads, then we should merge the queues into the update - * frame that we're keeping. - * - * Alternatively, we could just wake them up: they'll just go - * straight to sleep on the proper blackhole! This is less code - * and probably less bug prone, although it's probably much - * slower --SDM - */ -#if 0 // do it properly... -# if (!defined(LAZY_BLACKHOLING)) && defined(DEBUG) -# error Unimplemented lazy BH warning. (KSW 1999-01) -# endif - if (GET_INFO(updatee_bypass) == stg_BLACKHOLE_BQ_info - || GET_INFO(updatee_bypass) == stg_CAF_BLACKHOLE_info - ) { - // Sigh. It has one. Don't lose those threads! - if (GET_INFO(updatee_keep) == stg_BLACKHOLE_BQ_info) { - // Urgh. Two queues. Merge them. - P_ keep_tso = ((StgBlockingQueue *)updatee_keep)->blocking_queue; - - while (keep_tso->link != END_TSO_QUEUE) { - keep_tso = keep_tso->link; - } - keep_tso->link = ((StgBlockingQueue *)updatee_bypass)->blocking_queue; + // found a BLACKHOLE'd update frame; we've been here + // before, in a previous GC, so just break out. - } else { - // For simplicity, just swap the BQ for the BH - P_ temp = updatee_keep; - - updatee_keep = updatee_bypass; - updatee_bypass = temp; - - // Record the swap in the kept frame (below) - prev_frame->updatee = updatee_keep; - } - } -#endif + // Mark the end of the gap, if we're in one. + if (current_gap_size != 0) { + gap = (struct stack_gap *)(frame-sizeofW(StgUpdateFrame)); + } + + frame += sizeofW(StgUpdateFrame); + goto done_traversing; + } - TICK_UPD_SQUEEZED(); - /* wasn't there something about update squeezing and ticky to be - * sorted out? oh yes: we aren't counting each enter properly - * in this case. See the log somewhere. KSW 1999-04-21 - * - * Check two things: that the two update frames don't point to - * the same object, and that the updatee_bypass isn't already an - * indirection. Both of these cases only happen when we're in a - * block hole-style loop (and there are multiple update frames - * on the stack pointing to the same closure), but they can both - * screw us up if we don't check. - */ - if (updatee_bypass != updatee_keep && !closure_IND(updatee_bypass)) { - // this wakes the threads up - UPD_IND_NOLOCK(updatee_bypass, updatee_keep); - } - - sp = (P_)frame - 1; // sp = stuff to slide - displacement += sizeofW(StgUpdateFrame); - - } else { - // No squeeze for this frame - sp = frame_bottom - 1; // Keep the current frame - - /* Do lazy black-holing. - */ - if (is_update_frame) { - StgBlockingQueue *bh = (StgBlockingQueue *)frame->updatee; - if (bh->header.info != &stg_BLACKHOLE_info && - bh->header.info != &stg_BLACKHOLE_BQ_info && - bh->header.info != &stg_CAF_BLACKHOLE_info) { + if (prev_was_update_frame) { + + TICK_UPD_SQUEEZED(); + /* wasn't there something about update squeezing and ticky to be + * sorted out? oh yes: we aren't counting each enter properly + * in this case. See the log somewhere. KSW 1999-04-21 + * + * Check two things: that the two update frames don't point to + * the same object, and that the updatee_bypass isn't already an + * indirection. Both of these cases only happen when we're in a + * block hole-style loop (and there are multiple update frames + * on the stack pointing to the same closure), but they can both + * screw us up if we don't check. + */ + if (upd->updatee != updatee && !closure_IND(upd->updatee)) { + // this wakes the threads up + UPD_IND_NOLOCK(upd->updatee, updatee); + } + + // now mark this update frame as a stack gap. The gap + // marker resides in the bottom-most update frame of + // the series of adjacent frames, and covers all the + // frames in this series. + current_gap_size += sizeofW(StgUpdateFrame); + ((struct stack_gap *)frame)->gap_size = current_gap_size; + ((struct stack_gap *)frame)->next_gap = gap; + + frame += sizeofW(StgUpdateFrame); + continue; + } + + // single update frame, or the topmost update frame in a series + else { + StgBlockingQueue *bh = (StgBlockingQueue *)upd->updatee; + + // Do lazy black-holing + if (bh->header.info != &stg_BLACKHOLE_info && + bh->header.info != &stg_BLACKHOLE_BQ_info && + bh->header.info != &stg_CAF_BLACKHOLE_info) { #if (!defined(LAZY_BLACKHOLING)) && defined(DEBUG) - belch("Unexpected lazy BHing required at 0x%04x",(int)bh); + belch("Unexpected lazy BHing required at 0x%04x",(int)bh); #endif #ifdef DEBUG - /* zero out the slop so that the sanity checker can tell - * where the next closure is. - */ - { - StgInfoTable *info = get_itbl(bh); - nat np = info->layout.payload.ptrs, nw = info->layout.payload.nptrs, i; - /* don't zero out slop for a THUNK_SELECTOR, because its layout - * info is used for a different purpose, and it's exactly the - * same size as a BLACKHOLE in any case. - */ - if (info->type != THUNK_SELECTOR) { - for (i = np; i < np + nw; i++) { - ((StgClosure *)bh)->payload[i] = 0; - } - } - } + /* zero out the slop so that the sanity checker can tell + * where the next closure is. + */ + { + StgInfoTable *bh_info = get_itbl(bh); + nat np = bh_info->layout.payload.ptrs, + nw = bh_info->layout.payload.nptrs, i; + /* don't zero out slop for a THUNK_SELECTOR, + * because its layout info is used for a + * different purpose, and it's exactly the + * same size as a BLACKHOLE in any case. + */ + if (bh_info->type != THUNK_SELECTOR) { + for (i = np; i < np + nw; i++) { + ((StgClosure *)bh)->payload[i] = 0; + } + } + } #endif #ifdef PROFILING - // @LDV profiling - // We pretend that bh is now dead. - LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)bh); + // We pretend that bh is now dead. + LDV_recordDead_FILL_SLOP_DYNAMIC((StgClosure *)bh); #endif - // - // Todo: maybe use SET_HDR() and remove LDV_recordCreate()? - // - SET_INFO(bh,&stg_BLACKHOLE_info); + // Todo: maybe use SET_HDR() and remove LDV_recordCreate()? + SET_INFO(bh,&stg_BLACKHOLE_info); #ifdef PROFILING - // @LDV profiling - // We pretend that bh has just been created. - LDV_recordCreate(bh); + // We pretend that bh has just been created. + LDV_recordCreate(bh); #endif + } + + prev_was_update_frame = rtsTrue; + updatee = upd->updatee; + frame += sizeofW(StgUpdateFrame); + continue; + } } - } + + default: + prev_was_update_frame = rtsFalse; - // Fix the link in the current frame (should point to the frame below) - frame->link = prev_frame; - prev_was_update_frame = is_update_frame; - } - - // Now slide all words from sp up to the next frame - - if (displacement > 0) { - P_ next_frame_bottom; + // we're not in a gap... check whether this is the end of a gap + // (an update frame can't be the end of a gap). + if (current_gap_size != 0) { + gap = (struct stack_gap *) (frame - sizeofW(StgUpdateFrame)); + } + current_gap_size = 0; - if (next_frame != NULL) - next_frame_bottom = (P_)next_frame + sizeofW(StgUpdateFrame); - else - next_frame_bottom = tso->sp - 1; - -#if 0 - IF_DEBUG(gc, - belch("sliding [%p, %p] by %ld", sp, next_frame_bottom, - displacement)) -#endif - - while (sp >= next_frame_bottom) { - sp[displacement] = *sp; - sp -= 1; - } + frame += stack_frame_sizeW((StgClosure *)frame); + continue; + } } - (P_)prev_frame = (P_)frame + displacement; - frame = next_frame; - } - tso->sp += displacement; - tso->su = prev_frame; -#if 0 - IF_DEBUG(gc, - belch("@@ threadSqueezeStack: squeezed %d update-frames; found %d BHs; found %d update-, %d stop-, %d catch, %d seq-frames", - squeezes, bhs, upd_frames, stop_frames, catch_frames, seq_frames)) -#endif -} +done_traversing: + + // Now we have a stack with gaps in it, and we have to walk down + // shoving the stack up to fill in the gaps. A diagram might + // help: + // + // +| ********* | + // | ********* | <- sp + // | | + // | | <- gap_start + // | ......... | | + // | stack_gap | <- gap | chunk_size + // | ......... | | + // | ......... | <- gap_end v + // | ********* | + // | ********* | + // | ********* | + // -| ********* | + // + // 'sp' points the the current top-of-stack + // 'gap' points to the stack_gap structure inside the gap + // ***** indicates real stack data + // ..... indicates gap + // indicates unused + // + { + void *sp; + void *gap_start, *next_gap_start, *gap_end; + nat chunk_size; + + next_gap_start = (void *)gap + sizeof(StgUpdateFrame); + sp = next_gap_start; + + while ((StgPtr)gap > tso->sp) { + // we're working in *bytes* now... + gap_start = next_gap_start; + gap_end = gap_start - gap->gap_size * sizeof(W_); + + gap = gap->next_gap; + next_gap_start = (void *)gap + sizeof(StgUpdateFrame); + + chunk_size = gap_end - next_gap_start; + sp -= chunk_size; + memmove(sp, next_gap_start, chunk_size); + } + + tso->sp = (StgPtr)sp; + } +} /* ----------------------------------------------------------------------------- * Pausing a thread