From: Simon Marlow Date: Wed, 2 Feb 2011 12:30:49 +0000 (+0000) Subject: A small GC optimisation X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=commitdiff_plain;h=bef3da1e26639303fccbf26c312d2833eedb486e A small GC optimisation Store the *number* of the destination generation in the Bdescr struct, so that in evacuate() we don't have to deref gen to get it. This is another improvement ported over from my GC branch. --- diff --git a/includes/rts/storage/Block.h b/includes/rts/storage/Block.h index f39b99c..c73c9af 100644 --- a/includes/rts/storage/Block.h +++ b/includes/rts/storage/Block.h @@ -46,26 +46,45 @@ * on a 32-bit machine. */ +// Note: fields marked with [READ ONLY] must not be modified by the +// client of the block allocator API. All other fields can be +// freely modified. + #ifndef CMINUSMINUS typedef struct bdescr_ { - StgPtr start; /* start addr of memory */ - StgPtr free; /* first free byte of memory */ - struct bdescr_ *link; /* used for chaining blocks together */ + + StgPtr start; // [READ ONLY] start addr of memory + + StgPtr free; // first free byte of memory. + // NB. during use this value should lie + // between start and start + blocks * + // BLOCK_SIZE. Values outside this + // range are reserved for use by the + // block allocator. In particular, the + // value (StgPtr)(-1) is used to + // indicate that a block is unallocated. + + struct bdescr_ *link; // used for chaining blocks together + union { - struct bdescr_ *back; /* used (occasionally) for doubly-linked lists*/ - StgWord *bitmap; - StgPtr scan; /* scan pointer for copying GC */ + struct bdescr_ *back; // used (occasionally) for doubly-linked lists + StgWord *bitmap; // bitmap for marking GC + StgPtr scan; // scan pointer for copying GC } u; - struct generation_ *gen; /* generation */ - struct generation_ *dest; /* destination gen */ + struct generation_ *gen; // generation + + StgWord16 gen_no; // gen->no, cached + StgWord16 dest_no; // number of destination generation + StgWord16 _pad1; + + StgWord16 flags; // block flags, see below - StgWord32 blocks; /* no. of blocks (if grp head, 0 otherwise) */ + StgWord32 blocks; // [READ ONLY] no. of blocks in a group + // (if group head, 0 otherwise) - StgWord16 gen_no; - StgWord16 flags; /* block flags, see below */ #if SIZEOF_VOID_P == 8 - StgWord32 _padding[2]; + StgWord32 _padding[3]; #else StgWord32 _padding[0]; #endif diff --git a/includes/rts/storage/GC.h b/includes/rts/storage/GC.h index c412c96..7cee670 100644 --- a/includes/rts/storage/GC.h +++ b/includes/rts/storage/GC.h @@ -194,9 +194,9 @@ extern rtsBool keepCAFs; INLINE_HEADER void initBdescr(bdescr *bd, generation *gen, generation *dest) { - bd->gen = gen; - bd->gen_no = gen->no; - bd->dest = dest; + bd->gen = gen; + bd->gen_no = gen->no; + bd->dest_no = dest->no; } #endif /* RTS_STORAGE_GC_H */ diff --git a/rts/Arena.c b/rts/Arena.c index 6d7a65b..653eb69 100644 --- a/rts/Arena.c +++ b/rts/Arena.c @@ -86,7 +86,7 @@ arenaAlloc( Arena *arena, size_t size ) bd->gen_no = 0; bd->gen = NULL; - bd->dest = NULL; + bd->dest_no = 0; bd->flags = 0; bd->free = bd->start; bd->link = arena->current; diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c index 9a5f278..18ace21 100644 --- a/rts/sm/Evac.c +++ b/rts/sm/Evac.c @@ -51,7 +51,7 @@ STATIC_INLINE void evacuate_large(StgPtr p); -------------------------------------------------------------------------- */ STATIC_INLINE StgPtr -alloc_for_copy (nat size, generation *gen) +alloc_for_copy (nat size, nat gen_no) { StgPtr to; gen_workspace *ws; @@ -61,17 +61,16 @@ alloc_for_copy (nat size, generation *gen) * evacuate to an older generation, adjust it here (see comment * by evacuate()). */ - if (gen < gct->evac_gen) { + if (gen_no < gct->evac_gen_no) { if (gct->eager_promotion) { - gen = gct->evac_gen; + gen_no = gct->evac_gen_no; } else { gct->failed_to_evac = rtsTrue; } } - ws = &gct->gens[gen->no]; - // this compiles to a single mem access to gen->abs_no only - + ws = &gct->gens[gen_no]; // zero memory references here + /* chain a new block onto the to-space for the destination gen if * necessary. */ @@ -91,12 +90,12 @@ alloc_for_copy (nat size, generation *gen) STATIC_INLINE GNUC_ATTR_HOT void copy_tag(StgClosure **p, const StgInfoTable *info, - StgClosure *src, nat size, generation *gen, StgWord tag) + StgClosure *src, nat size, nat gen_no, StgWord tag) { StgPtr to, from; nat i; - to = alloc_for_copy(size,gen); + to = alloc_for_copy(size,gen_no); from = (StgPtr)src; to[0] = (W_)info; @@ -133,12 +132,12 @@ copy_tag(StgClosure **p, const StgInfoTable *info, #if defined(PARALLEL_GC) STATIC_INLINE void copy_tag_nolock(StgClosure **p, const StgInfoTable *info, - StgClosure *src, nat size, generation *gen, StgWord tag) + StgClosure *src, nat size, nat gen_no, StgWord tag) { StgPtr to, from; nat i; - to = alloc_for_copy(size,gen); + to = alloc_for_copy(size,gen_no); from = (StgPtr)src; to[0] = (W_)info; @@ -170,7 +169,7 @@ copy_tag_nolock(StgClosure **p, const StgInfoTable *info, */ static rtsBool copyPart(StgClosure **p, StgClosure *src, nat size_to_reserve, - nat size_to_copy, generation *gen) + nat size_to_copy, nat gen_no) { StgPtr to, from; nat i; @@ -194,7 +193,7 @@ spin: info = (W_)src->header.info; #endif - to = alloc_for_copy(size_to_reserve, gen); + to = alloc_for_copy(size_to_reserve, gen_no); from = (StgPtr)src; to[0] = info; @@ -222,9 +221,9 @@ spin: /* Copy wrappers that don't tag the closure after copying */ STATIC_INLINE GNUC_ATTR_HOT void copy(StgClosure **p, const StgInfoTable *info, - StgClosure *src, nat size, generation *gen) + StgClosure *src, nat size, nat gen_no) { - copy_tag(p,info,src,size,gen,0); + copy_tag(p,info,src,size,gen_no,0); } /* ----------------------------------------------------------------------------- @@ -243,10 +242,12 @@ evacuate_large(StgPtr p) { bdescr *bd; generation *gen, *new_gen; + nat gen_no, new_gen_no; gen_workspace *ws; bd = Bdescr(p); gen = bd->gen; + gen_no = bd->gen_no; ACQUIRE_SPIN_LOCK(&gen->sync_large_objects); // already evacuated? @@ -254,7 +255,7 @@ evacuate_large(StgPtr p) /* Don't forget to set the gct->failed_to_evac flag if we didn't get * the desired destination (see comments in evacuate()). */ - if (gen < gct->evac_gen) { + if (gen_no < gct->evac_gen_no) { gct->failed_to_evac = rtsTrue; TICK_GC_FAILED_PROMOTION(); } @@ -274,16 +275,18 @@ evacuate_large(StgPtr p) /* link it on to the evacuated large object list of the destination gen */ - new_gen = bd->dest; - if (new_gen < gct->evac_gen) { + new_gen_no = bd->dest_no; + + if (new_gen_no < gct->evac_gen_no) { if (gct->eager_promotion) { - new_gen = gct->evac_gen; + new_gen_no = gct->evac_gen_no; } else { gct->failed_to_evac = rtsTrue; } } - ws = &gct->gens[new_gen->no]; + ws = &gct->gens[new_gen_no]; + new_gen = &generations[new_gen_no]; bd->flags |= BF_EVACUATED; initBdescr(bd, new_gen, new_gen->to); @@ -352,7 +355,7 @@ REGPARM1 GNUC_ATTR_HOT void evacuate(StgClosure **p) { bdescr *bd = NULL; - generation *gen; + nat gen_no; StgClosure *q; const StgInfoTable *info; StgWord tag; @@ -475,7 +478,7 @@ loop: // We aren't copying this object, so we have to check // whether it is already in the target generation. (this is // the write barrier). - if (bd->gen < gct->evac_gen) { + if (bd->gen_no < gct->evac_gen_no) { gct->failed_to_evac = rtsTrue; TICK_GC_FAILED_PROMOTION(); } @@ -499,7 +502,7 @@ loop: return; } - gen = bd->dest; + gen_no = bd->dest_no; info = q->header.info; if (IS_FORWARDING_PTR(info)) @@ -522,8 +525,8 @@ loop: */ StgClosure *e = (StgClosure*)UN_FORWARDING_PTR(info); *p = TAG_CLOSURE(tag,e); - if (gen < gct->evac_gen) { // optimisation - if (Bdescr((P_)e)->gen < gct->evac_gen) { + if (gen_no < gct->evac_gen_no) { // optimisation + if (Bdescr((P_)e)->gen_no < gct->evac_gen_no) { gct->failed_to_evac = rtsTrue; TICK_GC_FAILED_PROMOTION(); } @@ -540,7 +543,7 @@ loop: case MUT_VAR_DIRTY: case MVAR_CLEAN: case MVAR_DIRTY: - copy(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen); + copy(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen_no); return; // For ints and chars of low value, save space by replacing references to @@ -552,7 +555,7 @@ loop: case CONSTR_0_1: { #if defined(__PIC__) && defined(mingw32_HOST_OS) - copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,gen,tag); + copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,gen_no,tag); #else StgWord w = (StgWord)q->payload[0]; if (info == Czh_con_info && @@ -569,7 +572,7 @@ loop: ); } else { - copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,gen,tag); + copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,gen_no,tag); } #endif return; @@ -578,12 +581,12 @@ loop: case FUN_0_1: case FUN_1_0: case CONSTR_1_0: - copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,gen,tag); + copy_tag_nolock(p,info,q,sizeofW(StgHeader)+1,gen_no,tag); return; case THUNK_1_0: case THUNK_0_1: - copy(p,info,q,sizeofW(StgThunk)+1,gen); + copy(p,info,q,sizeofW(StgThunk)+1,gen_no); return; case THUNK_1_1: @@ -592,7 +595,7 @@ loop: #ifdef NO_PROMOTE_THUNKS #error bitrotted #endif - copy(p,info,q,sizeofW(StgThunk)+2,gen); + copy(p,info,q,sizeofW(StgThunk)+2,gen_no); return; case FUN_1_1: @@ -600,21 +603,21 @@ loop: case FUN_0_2: case CONSTR_1_1: case CONSTR_2_0: - copy_tag_nolock(p,info,q,sizeofW(StgHeader)+2,gen,tag); + copy_tag_nolock(p,info,q,sizeofW(StgHeader)+2,gen_no,tag); return; case CONSTR_0_2: - copy_tag_nolock(p,info,q,sizeofW(StgHeader)+2,gen,tag); + copy_tag_nolock(p,info,q,sizeofW(StgHeader)+2,gen_no,tag); return; case THUNK: - copy(p,info,q,thunk_sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen); + copy(p,info,q,thunk_sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen_no); return; case FUN: case IND_PERM: case CONSTR: - copy_tag_nolock(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen,tag); + copy_tag_nolock(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen_no,tag); return; case BLACKHOLE: @@ -632,7 +635,7 @@ loop: || i == &stg_WHITEHOLE_info || i == &stg_BLOCKING_QUEUE_CLEAN_info || i == &stg_BLOCKING_QUEUE_DIRTY_info) { - copy(p,info,q,sizeofW(StgInd),gen); + copy(p,info,q,sizeofW(StgInd),gen_no); return; } ASSERT(i != &stg_IND_info); @@ -646,11 +649,11 @@ loop: case WEAK: case PRIM: case MUT_PRIM: - copy(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen); + copy(p,info,q,sizeW_fromITBL(INFO_PTR_TO_STRUCT(info)),gen_no); return; case BCO: - copy(p,info,q,bco_sizeW((StgBCO *)q),gen); + copy(p,info,q,bco_sizeW((StgBCO *)q),gen_no); return; case THUNK_SELECTOR: @@ -678,20 +681,20 @@ loop: barf("evacuate: stack frame at %p\n", q); case PAP: - copy(p,info,q,pap_sizeW((StgPAP*)q),gen); + copy(p,info,q,pap_sizeW((StgPAP*)q),gen_no); return; case AP: - copy(p,info,q,ap_sizeW((StgAP*)q),gen); + copy(p,info,q,ap_sizeW((StgAP*)q),gen_no); return; case AP_STACK: - copy(p,info,q,ap_stack_sizeW((StgAP_STACK*)q),gen); + copy(p,info,q,ap_stack_sizeW((StgAP_STACK*)q),gen_no); return; case ARR_WORDS: // just copy the block - copy(p,info,q,arr_words_sizeW((StgArrWords *)q),gen); + copy(p,info,q,arr_words_sizeW((StgArrWords *)q),gen_no); return; case MUT_ARR_PTRS_CLEAN: @@ -699,11 +702,11 @@ loop: case MUT_ARR_PTRS_FROZEN: case MUT_ARR_PTRS_FROZEN0: // just copy the block - copy(p,info,q,mut_arr_ptrs_sizeW((StgMutArrPtrs *)q),gen); + copy(p,info,q,mut_arr_ptrs_sizeW((StgMutArrPtrs *)q),gen_no); return; case TSO: - copy(p,info,q,sizeofW(StgTSO),gen); + copy(p,info,q,sizeofW(StgTSO),gen_no); return; case STACK: @@ -718,7 +721,7 @@ loop: rtsBool mine; mine = copyPart(p,(StgClosure *)stack, stack_sizeW(stack), - sizeofW(StgStack), gen); + sizeofW(StgStack), gen_no); if (mine) { new_stack = (StgStack *)*p; move_STACK(stack, new_stack); @@ -732,7 +735,7 @@ loop: } case TREC_CHUNK: - copy(p,info,q,sizeofW(StgTRecChunk),gen); + copy(p,info,q,sizeofW(StgTRecChunk),gen_no); return; default: @@ -836,7 +839,7 @@ selector_chain: unchain_thunk_selectors(prev_thunk_selector, (StgClosure *)p); *q = (StgClosure *)p; // shortcut, behave as for: if (evac) evacuate(q); - if (evac && bd->gen < gct->evac_gen) { + if (evac && bd->gen_no < gct->evac_gen_no) { gct->failed_to_evac = rtsTrue; TICK_GC_FAILED_PROMOTION(); } @@ -1075,7 +1078,7 @@ bale_out: // check whether it was updated in the meantime. *q = (StgClosure *)p; if (evac) { - copy(q,(const StgInfoTable *)info_ptr,(StgClosure *)p,THUNK_SELECTOR_sizeW(),bd->dest); + copy(q,(const StgInfoTable *)info_ptr,(StgClosure *)p,THUNK_SELECTOR_sizeW(),bd->dest_no); } unchain_thunk_selectors(prev_thunk_selector, *q); return; diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 38ae1a7..c990835 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -344,11 +344,11 @@ SET_GCT(gc_threads[0]); } // follow roots from the CAF list (used by GHCi) - gct->evac_gen = 0; + gct->evac_gen_no = 0; markCAFs(mark_root, gct); // follow all the roots that the application knows about. - gct->evac_gen = 0; + gct->evac_gen_no = 0; markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads, rtsTrue/*prune sparks*/); @@ -1062,7 +1062,7 @@ gcWorkerThread (Capability *cap) #endif // Every thread evacuates some roots. - gct->evac_gen = 0; + gct->evac_gen_no = 0; markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads, rtsTrue/*prune sparks*/); scavenge_capability_mut_lists(&capabilities[gct->thread_index]); @@ -1399,7 +1399,7 @@ init_gc_thread (gc_thread *t) t->scavenged_static_objects = END_OF_STATIC_LIST; t->scan_bd = NULL; t->mut_lists = capabilities[t->thread_index].mut_lists; - t->evac_gen = 0; + t->evac_gen_no = 0; t->failed_to_evac = rtsFalse; t->eager_promotion = rtsTrue; t->thunk_selector_depth = 0; diff --git a/rts/sm/GCThread.h b/rts/sm/GCThread.h index c45eb1b..62dd1fb 100644 --- a/rts/sm/GCThread.h +++ b/rts/sm/GCThread.h @@ -146,7 +146,7 @@ typedef struct gc_thread_ { // -------------------- // evacuate flags - generation *evac_gen; // Youngest generation that objects + nat evac_gen_no; // Youngest generation that objects // should be evacuated to in // evacuate(). (Logically an // argument to evacuate, but it's diff --git a/rts/sm/MarkWeak.c b/rts/sm/MarkWeak.c index 72f0ade..f4b576a 100644 --- a/rts/sm/MarkWeak.c +++ b/rts/sm/MarkWeak.c @@ -110,7 +110,7 @@ traverseWeakPtrList(void) /* doesn't matter where we evacuate values/finalizers to, since * these pointers are treated as roots (iff the keys are alive). */ - gct->evac_gen = 0; + gct->evac_gen_no = 0; last_w = &old_weak_ptr_list; for (w = old_weak_ptr_list; w != NULL; w = next_w) { diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c index d7e16ea..9ac152a 100644 --- a/rts/sm/Scav.c +++ b/rts/sm/Scav.c @@ -362,11 +362,11 @@ scavenge_fun_srt(const StgInfoTable *info) /* ----------------------------------------------------------------------------- Scavenge a block from the given scan pointer up to bd->free. - evac_gen is set by the caller to be either zero (for a step in a + evac_gen_no is set by the caller to be either zero (for a step in a generation < N) or G where G is the generation of the step being scavenged. - We sometimes temporarily change evac_gen back to zero if we're + We sometimes temporarily change evac_gen_no back to zero if we're scavenging a mutable object where eager promotion isn't such a good idea. -------------------------------------------------------------------------- */ @@ -383,7 +383,7 @@ scavenge_block (bdescr *bd) bd->start, bd->gen_no, bd->u.scan); gct->scan_bd = bd; - gct->evac_gen = bd->gen; + gct->evac_gen_no = bd->gen_no; saved_eager_promotion = gct->eager_promotion; gct->failed_to_evac = rtsFalse; @@ -754,7 +754,7 @@ scavenge_mark_stack(void) StgInfoTable *info; rtsBool saved_eager_promotion; - gct->evac_gen = oldest_gen; + gct->evac_gen_no = oldest_gen->no; saved_eager_promotion = gct->eager_promotion; while ((p = pop_mark_stack())) { @@ -1041,8 +1041,8 @@ scavenge_mark_stack(void) if (gct->failed_to_evac) { gct->failed_to_evac = rtsFalse; - if (gct->evac_gen) { - recordMutableGen_GC((StgClosure *)q, gct->evac_gen->no); + if (gct->evac_gen_no) { + recordMutableGen_GC((StgClosure *)q, gct->evac_gen_no); } } } // while (p = pop_mark_stack()) @@ -1340,8 +1340,10 @@ void scavenge_mutable_list(bdescr *bd, generation *gen) { StgPtr p, q; + nat gen_no; - gct->evac_gen = gen; + gen_no = gen->no; + gct->evac_gen_no = gen_no; for (; bd != NULL; bd = bd->link) { for (q = bd->start; q < bd->free; q++) { p = (StgPtr)*q; @@ -1376,7 +1378,7 @@ scavenge_mutable_list(bdescr *bd, generation *gen) // switch (get_itbl((StgClosure *)p)->type) { case MUT_ARR_PTRS_CLEAN: - recordMutableGen_GC((StgClosure *)p,gen->no); + recordMutableGen_GC((StgClosure *)p,gen_no); continue; case MUT_ARR_PTRS_DIRTY: { @@ -1394,7 +1396,7 @@ scavenge_mutable_list(bdescr *bd, generation *gen) gct->eager_promotion = saved_eager_promotion; gct->failed_to_evac = rtsFalse; - recordMutableGen_GC((StgClosure *)p,gen->no); + recordMutableGen_GC((StgClosure *)p,gen_no); continue; } default: @@ -1404,7 +1406,7 @@ scavenge_mutable_list(bdescr *bd, generation *gen) if (scavenge_one(p)) { // didn't manage to promote everything, so put the // object back on the list. - recordMutableGen_GC((StgClosure *)p,gen->no); + recordMutableGen_GC((StgClosure *)p,gen_no); } } } @@ -1446,7 +1448,7 @@ scavenge_static(void) /* Always evacuate straight to the oldest generation for static * objects */ - gct->evac_gen = oldest_gen; + gct->evac_gen_no = oldest_gen->no; /* keep going until we've scavenged all the objects on the linked list... */ @@ -1741,7 +1743,7 @@ scavenge_large (gen_workspace *ws) bdescr *bd; StgPtr p; - gct->evac_gen = ws->gen; + gct->evac_gen_no = ws->gen->no; bd = ws->todo_large_objects;