From f86e7206ea94b48b94fb61007a1c5d55b8c60f45 Mon Sep 17 00:00:00 2001 From: Simon Marlow Date: Wed, 16 Apr 2008 23:22:32 +0000 Subject: [PATCH] Reorganisation to fix problems related to the gct register variable - GCAux.c contains code not compiled with the gct register enabled, it is callable from outside the GC - marking functions are moved to their relevant subsystems, outside the GC - mark_root needs to save the gct register, as it is called from outside the GC --- includes/Stable.h | 4 +- includes/Storage.h | 7 +- rts/Capability.c | 50 ++++++++ rts/Capability.h | 4 + rts/RetainerProfile.c | 8 +- rts/RtsSignals.h | 2 +- rts/Sparks.c | 69 +++++++++++ rts/Sparks.h | 1 + rts/Stable.c | 10 +- rts/Stats.c | 2 - rts/posix/Signals.c | 6 +- rts/sm/Compact.c | 16 ++- rts/sm/Compact.h | 2 +- rts/sm/Evac.c | 1 + rts/sm/Evac.h | 4 - rts/sm/GC.c | 274 ++++---------------------------------------- rts/sm/GC.h | 174 ++-------------------------- rts/sm/GCAux.c | 140 ++++++++++++++++++++++ rts/sm/GCThread.h | 184 +++++++++++++++++++++++++++++ rts/sm/GCUtils.c | 1 + rts/sm/GCUtils.h | 4 - rts/sm/MarkWeak.c | 1 + rts/sm/Scav.c | 1 + rts/sm/Storage.c | 1 - rts/win32/ConsoleHandler.c | 2 +- 25 files changed, 515 insertions(+), 453 deletions(-) create mode 100644 rts/sm/GCAux.c create mode 100644 rts/sm/GCThread.h diff --git a/includes/Stable.h b/includes/Stable.h index 3eabb30..5acc6bc 100644 --- a/includes/Stable.h +++ b/includes/Stable.h @@ -59,8 +59,8 @@ extern void exitStablePtrTable ( void ); extern void enlargeStablePtrTable ( void ); extern StgWord lookupStableName ( StgPtr p ); -extern void markStablePtrTable ( evac_fn evac ); -extern void threadStablePtrTable ( evac_fn evac ); +extern void markStablePtrTable ( evac_fn evac, void *user ); +extern void threadStablePtrTable ( evac_fn evac, void *user ); extern void gcStablePtrTable ( void ); extern void updateStablePtrTable ( rtsBool full ); diff --git a/includes/Storage.h b/includes/Storage.h index 5b8acfa..90e364c 100644 --- a/includes/Storage.h +++ b/includes/Storage.h @@ -536,16 +536,17 @@ extern void resizeNurseries ( nat blocks ); extern void resizeNurseriesFixed ( nat blocks ); extern lnat countNurseryBlocks ( void ); + /* ----------------------------------------------------------------------------- Functions from GC.c -------------------------------------------------------------------------- */ -typedef void (*evac_fn)(StgClosure **); +typedef void (*evac_fn)(void *user, StgClosure **root); extern void threadPaused ( Capability *cap, StgTSO * ); extern StgClosure * isAlive ( StgClosure *p ); -extern void markCAFs ( evac_fn evac ); -extern void GetRoots ( evac_fn evac ); +extern void markCAFs ( evac_fn evac, void *user ); +extern void GetRoots ( evac_fn evac, void *user ); /* ----------------------------------------------------------------------------- Stats 'n' DEBUG stuff diff --git a/rts/Capability.c b/rts/Capability.c index ffaa372..4950df6 100644 --- a/rts/Capability.c +++ b/rts/Capability.c @@ -759,3 +759,53 @@ freeCapability (Capability *cap) { #endif } +/* --------------------------------------------------------------------------- + Mark everything directly reachable from the Capabilities. When + using multiple GC threads, each GC thread marks all Capabilities + for which (c `mod` n == 0), for Capability c and thread n. + ------------------------------------------------------------------------ */ + +void +markSomeCapabilities (evac_fn evac, void *user, nat i0, nat delta) +{ + nat i; + Capability *cap; + Task *task; + + // Each GC thread is responsible for following roots from the + // Capability of the same number. There will usually be the same + // or fewer Capabilities as GC threads, but just in case there + // are more, we mark every Capability whose number is the GC + // thread's index plus a multiple of the number of GC threads. + for (i = i0; i < n_capabilities; i += delta) { + cap = &capabilities[i]; + evac(user, (StgClosure **)(void *)&cap->run_queue_hd); + evac(user, (StgClosure **)(void *)&cap->run_queue_tl); +#if defined(THREADED_RTS) + evac(user, (StgClosure **)(void *)&cap->wakeup_queue_hd); + evac(user, (StgClosure **)(void *)&cap->wakeup_queue_tl); +#endif + for (task = cap->suspended_ccalling_tasks; task != NULL; + task=task->next) { + debugTrace(DEBUG_sched, + "evac'ing suspended TSO %lu", (unsigned long)task->suspended_tso->id); + evac(user, (StgClosure **)(void *)&task->suspended_tso); + } + +#if defined(THREADED_RTS) + markSparkQueue (evac, user, cap); +#endif + } + +#if !defined(THREADED_RTS) + evac(user, (StgClosure **)(void *)&blocked_queue_hd); + evac(user, (StgClosure **)(void *)&blocked_queue_tl); + evac(user, (StgClosure **)(void *)&sleeping_queue); +#endif +} + +void +markCapabilities (evac_fn evac, void *user) +{ + markSomeCapabilities(evac, user, 0, 1); +} diff --git a/rts/Capability.h b/rts/Capability.h index c50fe7f..71c0ff6 100644 --- a/rts/Capability.h +++ b/rts/Capability.h @@ -235,6 +235,10 @@ extern void grabCapability (Capability **pCap); // Free a capability on exit void freeCapability (Capability *cap); +// FOr the GC: +void markSomeCapabilities (evac_fn evac, void *user, nat i0, nat delta); +void markCapabilities (evac_fn evac, void *user); + /* ----------------------------------------------------------------------------- * INLINE functions... private below here * -------------------------------------------------------------------------- */ diff --git a/rts/RetainerProfile.c b/rts/RetainerProfile.c index dec886a..b17f24f 100644 --- a/rts/RetainerProfile.c +++ b/rts/RetainerProfile.c @@ -1800,7 +1800,7 @@ inner_loop: * Compute the retainer set for every object reachable from *tl. * -------------------------------------------------------------------------- */ static void -retainRoot( StgClosure **tl ) +retainRoot(void *user STG_UNUSED, StgClosure **tl) { StgClosure *c; @@ -1837,7 +1837,7 @@ computeRetainerSet( void ) RetainerSet tmpRetainerSet; #endif - GetRoots(retainRoot); // for scheduler roots + markCapabilities(retainRoot, NULL); // for scheduler roots // This function is called after a major GC, when key, value, and finalizer // all are guaranteed to be valid, or reachable. @@ -1846,10 +1846,10 @@ computeRetainerSet( void ) // for retainer profilng. for (weak = weak_ptr_list; weak != NULL; weak = weak->link) // retainRoot((StgClosure *)weak); - retainRoot((StgClosure **)&weak); + retainRoot((StgClosure **)&weak, NULL); // Consider roots from the stable ptr table. - markStablePtrTable(retainRoot); + markStablePtrTable(retainRoot, NULL); // The following code resets the rs field of each unvisited mutable // object (computing sumOfNewCostExtra and updating costArray[] when diff --git a/rts/RtsSignals.h b/rts/RtsSignals.h index 6d9374a..721561e 100644 --- a/rts/RtsSignals.h +++ b/rts/RtsSignals.h @@ -73,7 +73,7 @@ extern void awaitUserSignals(void); * Evacuate the handler queue. _Assumes_ that console event delivery * has already been blocked. */ -extern void markSignalHandlers (evac_fn evac); +extern void markSignalHandlers (evac_fn evac, void *user); #endif /* RTS_USER_SIGNALS */ diff --git a/rts/Sparks.c b/rts/Sparks.c index 9a843fa..0f429e2 100644 --- a/rts/Sparks.c +++ b/rts/Sparks.c @@ -162,6 +162,74 @@ newSpark (StgRegTable *reg, StgClosure *p) return 1; } +/* ----------------------------------------------------------------------------- + * Mark all nodes pointed to by sparks in the spark queues (for GC) Does an + * implicit slide i.e. after marking all sparks are at the beginning of the + * spark pool and the spark pool only contains sparkable closures + * -------------------------------------------------------------------------- */ + +void +markSparkQueue (evac_fn evac, void *user, Capability *cap) +{ + StgClosure **sparkp, **to_sparkp; + nat n, pruned_sparks; // stats only + StgSparkPool *pool; + + PAR_TICKY_MARK_SPARK_QUEUE_START(); + + n = 0; + pruned_sparks = 0; + + pool = &(cap->r.rSparks); + + ASSERT_SPARK_POOL_INVARIANTS(pool); + +#if defined(PARALLEL_HASKELL) + // stats only + n = 0; + pruned_sparks = 0; +#endif + + sparkp = pool->hd; + to_sparkp = pool->hd; + while (sparkp != pool->tl) { + ASSERT(*sparkp!=NULL); + ASSERT(LOOKS_LIKE_CLOSURE_PTR(((StgClosure *)*sparkp))); + // ToDo?: statistics gathering here (also for GUM!) + if (closure_SHOULD_SPARK(*sparkp)) { + evac(user, sparkp); + *to_sparkp++ = *sparkp; + if (to_sparkp == pool->lim) { + to_sparkp = pool->base; + } + n++; + } else { + pruned_sparks++; + } + sparkp++; + if (sparkp == pool->lim) { + sparkp = pool->base; + } + } + pool->tl = to_sparkp; + + PAR_TICKY_MARK_SPARK_QUEUE_END(n); + +#if defined(PARALLEL_HASKELL) + debugTrace(DEBUG_sched, + "marked %d sparks and pruned %d sparks on [%x]", + n, pruned_sparks, mytid); +#else + debugTrace(DEBUG_sched, + "marked %d sparks and pruned %d sparks", + n, pruned_sparks); +#endif + + debugTrace(DEBUG_sched, + "new spark queue len=%d; (hd=%p; tl=%p)\n", + sparkPoolSize(pool), pool->hd, pool->tl); +} + #else StgInt @@ -171,6 +239,7 @@ newSpark (StgRegTable *reg STG_UNUSED, StgClosure *p STG_UNUSED) return 1; } + #endif /* PARALLEL_HASKELL || THREADED_RTS */ diff --git a/rts/Sparks.h b/rts/Sparks.h index aa2baf5..57c02e6 100644 --- a/rts/Sparks.h +++ b/rts/Sparks.h @@ -14,6 +14,7 @@ StgClosure * findSpark (Capability *cap); void initSparkPools (void); void freeSparkPool (StgSparkPool *pool); void createSparkThread (Capability *cap, StgClosure *p); +void markSparkQueue (evac_fn evac, void *user, Capability *cap); INLINE_HEADER void discardSparks (StgSparkPool *pool); INLINE_HEADER nat sparkPoolSize (StgSparkPool *pool); diff --git a/rts/Stable.c b/rts/Stable.c index a6b8ddf..046fb3b 100644 --- a/rts/Stable.c +++ b/rts/Stable.c @@ -323,7 +323,7 @@ enlargeStablePtrTable(void) * -------------------------------------------------------------------------- */ void -markStablePtrTable(evac_fn evac) +markStablePtrTable(evac_fn evac, void *user) { snEntry *p, *end_stable_ptr_table; StgPtr q; @@ -347,7 +347,7 @@ markStablePtrTable(evac_fn evac) // if the ref is non-zero, treat addr as a root if (p->ref != 0) { - evac((StgClosure **)&p->addr); + evac(user, (StgClosure **)&p->addr); } } } @@ -362,7 +362,7 @@ markStablePtrTable(evac_fn evac) * -------------------------------------------------------------------------- */ void -threadStablePtrTable( evac_fn evac ) +threadStablePtrTable( evac_fn evac, void *user ) { snEntry *p, *end_stable_ptr_table; StgPtr q; @@ -372,12 +372,12 @@ threadStablePtrTable( evac_fn evac ) for (p = stable_ptr_table+1; p < end_stable_ptr_table; p++) { if (p->sn_obj != NULL) { - evac((StgClosure **)&p->sn_obj); + evac(user, (StgClosure **)&p->sn_obj); } q = p->addr; if (q && (q < (P_)stable_ptr_table || q >= (P_)end_stable_ptr_table)) { - evac((StgClosure **)&p->addr); + evac(user, (StgClosure **)&p->addr); } } } diff --git a/rts/Stats.c b/rts/Stats.c index a00b639..b03984d 100644 --- a/rts/Stats.c +++ b/rts/Stats.c @@ -17,8 +17,6 @@ #include "Profiling.h" #include "GetTime.h" #include "GC.h" -#include "GCUtils.h" -#include "Evac.h" #if USE_PAPI #include "Papi.h" diff --git a/rts/posix/Signals.c b/rts/posix/Signals.c index a902b80..27f09b0 100644 --- a/rts/posix/Signals.c +++ b/rts/posix/Signals.c @@ -392,19 +392,19 @@ startSignalHandlers(Capability *cap) #if !defined(THREADED_RTS) void -markSignalHandlers (evac_fn evac) +markSignalHandlers (evac_fn evac, void *user) { StgPtr *p; p = next_pending_handler; while (p != pending_handler_buf) { p--; - evac((StgClosure **)p); + evac(user, (StgClosure **)p); } } #else void -markSignalHandlers (evac_fn evac STG_UNUSED) +markSignalHandlers (evac_fn evac STG_UNUSED, void *user STG_UNUSED) { } #endif diff --git a/rts/sm/Compact.c b/rts/sm/Compact.c index 44b5242..8e5dd64 100644 --- a/rts/sm/Compact.c +++ b/rts/sm/Compact.c @@ -109,6 +109,12 @@ thread (StgClosure **p) } } +static void +thread_root (void *user STG_UNUSED, StgClosure **p) +{ + thread(p); +} + // This version of thread() takes a (void *), used to circumvent // warnings from gcc about pointer punning and strict aliasing. STATIC_INLINE void thread_ (void *p) { thread((StgClosure **)p); } @@ -955,13 +961,13 @@ update_bkwd_compact( step *stp ) } void -compact(void) +compact(StgClosure *static_objects) { nat g, s, blocks; step *stp; // 1. thread the roots - GetRoots((evac_fn)thread); + markCapabilities((evac_fn)thread_root, NULL); // the weak pointer lists... if (weak_ptr_list != NULL) { @@ -999,13 +1005,13 @@ compact(void) } // the static objects - thread_static(gct->scavenged_static_objects /* ToDo: ok? */); + thread_static(static_objects /* ToDo: ok? */); // the stable pointer table - threadStablePtrTable((evac_fn)thread); + threadStablePtrTable((evac_fn)thread_root, NULL); // the CAF list (used by GHCi) - markCAFs((evac_fn)thread); + markCAFs((evac_fn)thread_root, NULL); // 2. update forward ptrs for (g = 0; g < RtsFlags.GcFlags.generations; g++) { diff --git a/rts/sm/Compact.h b/rts/sm/Compact.h index 9b3ecb3..8f037c3 100644 --- a/rts/sm/Compact.h +++ b/rts/sm/Compact.h @@ -74,6 +74,6 @@ is_marked(StgPtr p, bdescr *bd) return (*bitmap_word & bit_mask); } -void compact(void); +extern void compact (StgClosure *static_objects); #endif /* GCCOMPACT_H */ diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c index daa6018..b0b7ef5 100644 --- a/rts/sm/Evac.c +++ b/rts/sm/Evac.c @@ -16,6 +16,7 @@ #include "MBlock.h" #include "Evac.h" #include "GC.h" +#include "GCThread.h" #include "GCUtils.h" #include "Compact.h" #include "Prelude.h" diff --git a/rts/sm/Evac.h b/rts/sm/Evac.h index 893f79e..c0db814 100644 --- a/rts/sm/Evac.h +++ b/rts/sm/Evac.h @@ -31,7 +31,3 @@ REGPARM1 void evacuate (StgClosure **p); REGPARM1 void evacuate1 (StgClosure **p); extern lnat thunk_selector_depth; - -#if defined(PROF_SPIN) && defined(THREADED_RTS) -StgWord64 whitehole_spin; -#endif diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 7a6889c..b1584f1 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -43,6 +43,7 @@ #include "Papi.h" #include "GC.h" +#include "GCThread.h" #include "Compact.h" #include "Evac.h" #include "Scav.h" @@ -132,7 +133,7 @@ SpinLock recordMutableGen_sync; Static function declarations -------------------------------------------------------------------------- */ -static void mark_root (StgClosure **root); +static void mark_root (void *user, StgClosure **root); static void zero_static_object_list (StgClosure* first_static); static nat initialise_N (rtsBool force_major_gc); static void alloc_gc_threads (void); @@ -322,15 +323,15 @@ GarbageCollect ( rtsBool force_major_gc ) // follow roots from the CAF list (used by GHCi) gct->evac_step = 0; - markCAFs(mark_root); + markCAFs(mark_root, gct); // follow all the roots that the application knows about. gct->evac_step = 0; - GetRoots(mark_root); + markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads); #if defined(RTS_USER_SIGNALS) // mark the signal handlers (signals should be already blocked) - markSignalHandlers(mark_root); + markSignalHandlers(mark_root, gct); #endif // Mark the weak pointer list, and prepare to detect dead weak pointers. @@ -338,7 +339,7 @@ GarbageCollect ( rtsBool force_major_gc ) initWeakForGC(); // Mark the stable pointer table. - markStablePtrTable(mark_root); + markStablePtrTable(mark_root, gct); /* ------------------------------------------------------------------------- * Repeatedly scavenge all the areas we know about until there's no @@ -389,7 +390,7 @@ GarbageCollect ( rtsBool force_major_gc ) if (major_gc && oldest_gen->steps[0].is_compacted) { // save number of blocks for stats oldgen_saved_blocks = oldest_gen->steps[0].n_old_blocks; - compact(); + compact(gct->scavenged_static_objects); } IF_DEBUG(sanity, checkGlobalTSOList(rtsFalse)); @@ -738,212 +739,6 @@ GarbageCollect ( rtsBool force_major_gc ) } /* ----------------------------------------------------------------------------- - * Mark all nodes pointed to by sparks in the spark queues (for GC) Does an - * implicit slide i.e. after marking all sparks are at the beginning of the - * spark pool and the spark pool only contains sparkable closures - * -------------------------------------------------------------------------- */ - -#ifdef THREADED_RTS -static void -markSparkQueue (evac_fn evac, Capability *cap) -{ - StgClosure **sparkp, **to_sparkp; - nat n, pruned_sparks; // stats only - StgSparkPool *pool; - - PAR_TICKY_MARK_SPARK_QUEUE_START(); - - n = 0; - pruned_sparks = 0; - - pool = &(cap->r.rSparks); - - ASSERT_SPARK_POOL_INVARIANTS(pool); - -#if defined(PARALLEL_HASKELL) - // stats only - n = 0; - pruned_sparks = 0; -#endif - - sparkp = pool->hd; - to_sparkp = pool->hd; - while (sparkp != pool->tl) { - ASSERT(*sparkp!=NULL); - ASSERT(LOOKS_LIKE_CLOSURE_PTR(((StgClosure *)*sparkp))); - // ToDo?: statistics gathering here (also for GUM!) - if (closure_SHOULD_SPARK(*sparkp)) { - evac(sparkp); - *to_sparkp++ = *sparkp; - if (to_sparkp == pool->lim) { - to_sparkp = pool->base; - } - n++; - } else { - pruned_sparks++; - } - sparkp++; - if (sparkp == pool->lim) { - sparkp = pool->base; - } - } - pool->tl = to_sparkp; - - PAR_TICKY_MARK_SPARK_QUEUE_END(n); - -#if defined(PARALLEL_HASKELL) - debugTrace(DEBUG_sched, - "marked %d sparks and pruned %d sparks on [%x]", - n, pruned_sparks, mytid); -#else - debugTrace(DEBUG_sched, - "marked %d sparks and pruned %d sparks", - n, pruned_sparks); -#endif - - debugTrace(DEBUG_sched, - "new spark queue len=%d; (hd=%p; tl=%p)\n", - sparkPoolSize(pool), pool->hd, pool->tl); -} -#endif - -/* --------------------------------------------------------------------------- - Where are the roots that we know about? - - - all the threads on the runnable queue - - all the threads on the blocked queue - - all the threads on the sleeping queue - - all the thread currently executing a _ccall_GC - - all the "main threads" - - ------------------------------------------------------------------------ */ - -void -GetRoots( evac_fn evac ) -{ - nat i; - Capability *cap; - Task *task; - - // Each GC thread is responsible for following roots from the - // Capability of the same number. There will usually be the same - // or fewer Capabilities as GC threads, but just in case there - // are more, we mark every Capability whose number is the GC - // thread's index plus a multiple of the number of GC threads. - for (i = gct->thread_index; i < n_capabilities; i += n_gc_threads) { - cap = &capabilities[i]; - evac((StgClosure **)(void *)&cap->run_queue_hd); - evac((StgClosure **)(void *)&cap->run_queue_tl); -#if defined(THREADED_RTS) - evac((StgClosure **)(void *)&cap->wakeup_queue_hd); - evac((StgClosure **)(void *)&cap->wakeup_queue_tl); -#endif - for (task = cap->suspended_ccalling_tasks; task != NULL; - task=task->next) { - debugTrace(DEBUG_sched, - "evac'ing suspended TSO %lu", (unsigned long)task->suspended_tso->id); - evac((StgClosure **)(void *)&task->suspended_tso); - } - -#if defined(THREADED_RTS) - markSparkQueue(evac,cap); -#endif - } - -#if !defined(THREADED_RTS) - evac((StgClosure **)(void *)&blocked_queue_hd); - evac((StgClosure **)(void *)&blocked_queue_tl); - evac((StgClosure **)(void *)&sleeping_queue); -#endif -} - -/* ----------------------------------------------------------------------------- - isAlive determines whether the given closure is still alive (after - a garbage collection) or not. It returns the new address of the - closure if it is alive, or NULL otherwise. - - NOTE: Use it before compaction only! - It untags and (if needed) retags pointers to closures. - -------------------------------------------------------------------------- */ - - -StgClosure * -isAlive(StgClosure *p) -{ - const StgInfoTable *info; - bdescr *bd; - StgWord tag; - StgClosure *q; - - while (1) { - /* The tag and the pointer are split, to be merged later when needed. */ - tag = GET_CLOSURE_TAG(p); - q = UNTAG_CLOSURE(p); - - ASSERT(LOOKS_LIKE_CLOSURE_PTR(q)); - info = get_itbl(q); - - // ignore static closures - // - // ToDo: for static closures, check the static link field. - // Problem here is that we sometimes don't set the link field, eg. - // for static closures with an empty SRT or CONSTR_STATIC_NOCAFs. - // - if (!HEAP_ALLOCED(q)) { - return p; - } - - // ignore closures in generations that we're not collecting. - bd = Bdescr((P_)q); - if (bd->gen_no > N) { - return p; - } - - // if it's a pointer into to-space, then we're done - if (bd->flags & BF_EVACUATED) { - return p; - } - - // large objects use the evacuated flag - if (bd->flags & BF_LARGE) { - return NULL; - } - - // check the mark bit for compacted steps - if ((bd->flags & BF_COMPACTED) && is_marked((P_)q,bd)) { - return p; - } - - switch (info->type) { - - case IND: - case IND_STATIC: - case IND_PERM: - case IND_OLDGEN: // rely on compatible layout with StgInd - case IND_OLDGEN_PERM: - // follow indirections - p = ((StgInd *)q)->indirectee; - continue; - - case EVACUATED: - // alive! - return ((StgEvacuated *)q)->evacuee; - - case TSO: - if (((StgTSO *)q)->what_next == ThreadRelocated) { - p = (StgClosure *)((StgTSO *)q)->link; - continue; - } - return NULL; - - default: - // dead. - return NULL; - } - } -} - -/* ----------------------------------------------------------------------------- Figure out which generation to collect, initialise N and major_gc. Also returns the total number of blocks in generations that will be @@ -1111,7 +906,7 @@ gc_thread_work (void) // Every thread evacuates some roots. gct->evac_step = 0; - GetRoots(mark_root); + markSomeCapabilities(mark_root, gct, gct->thread_index, n_gc_threads); loop: scavenge_loop(); @@ -1461,13 +1256,24 @@ init_gc_thread (gc_thread *t) } /* ----------------------------------------------------------------------------- - Function we pass to GetRoots to evacuate roots. + Function we pass to evacuate roots. -------------------------------------------------------------------------- */ static void -mark_root(StgClosure **root) +mark_root(void *user, StgClosure **root) { - evacuate(root); + // we stole a register for gct, but this function is called from + // *outside* the GC where the register variable is not in effect, + // so we need to save and restore it here. NB. only call + // mark_root() from the main GC thread, otherwise gct will be + // incorrect. + gc_thread *saved_gct; + saved_gct = gct; + gct = user; + + evacuate(root); + + gct = saved_gct; } /* ----------------------------------------------------------------------------- @@ -1488,42 +1294,6 @@ zero_static_object_list(StgClosure* first_static) } } -/* ----------------------------------------------------------------------------- - Reverting CAFs - -------------------------------------------------------------------------- */ - -void -revertCAFs( void ) -{ - StgIndStatic *c; - - for (c = (StgIndStatic *)revertible_caf_list; c != NULL; - c = (StgIndStatic *)c->static_link) - { - SET_INFO(c, c->saved_info); - c->saved_info = NULL; - // could, but not necessary: c->static_link = NULL; - } - revertible_caf_list = NULL; -} - -void -markCAFs( evac_fn evac ) -{ - StgIndStatic *c; - - for (c = (StgIndStatic *)caf_list; c != NULL; - c = (StgIndStatic *)c->static_link) - { - evac(&c->indirectee); - } - for (c = (StgIndStatic *)revertible_caf_list; c != NULL; - c = (StgIndStatic *)c->static_link) - { - evac(&c->indirectee); - } -} - /* ---------------------------------------------------------------------------- Update the pointers from the task list diff --git a/rts/sm/GC.h b/rts/sm/GC.h index 62a4872..92e87d1 100644 --- a/rts/sm/GC.h +++ b/rts/sm/GC.h @@ -14,172 +14,8 @@ #ifndef GC_H #define GC_H -#include "OSThreads.h" - -/* ----------------------------------------------------------------------------- - General scheme - - ToDo: move this to the wiki when the implementation is done. - - We're only going to try to parallelise the copying GC for now. The - Plan is as follows. - - Each thread has a gc_thread structure (see below) which holds its - thread-local data. We'll keep a pointer to this in a thread-local - variable, or possibly in a register. - - In the gc_thread structure is a step_workspace for each step. The - primary purpose of the step_workspace is to hold evacuated objects; - when an object is evacuated, it is copied to the "todo" block in - the thread's workspace for the appropriate step. When the todo - block is full, it is pushed to the global step->todos list, which - is protected by a lock. (in fact we intervene a one-place buffer - here to reduce contention). - - A thread repeatedly grabs a block of work from one of the - step->todos lists, scavenges it, and keeps the scavenged block on - its own ws->scavd_list (this is to avoid unnecessary contention - returning the completed buffers back to the step: we can just - collect them all later). - - When there is no global work to do, we start scavenging the todo - blocks in the workspaces. This is where the scan_bd field comes - in: we can scan the contents of the todo block, when we have - scavenged the contents of the todo block (up to todo_bd->free), we - don't want to move this block immediately to the scavd_list, - because it is probably only partially full. So we remember that we - have scanned up to this point by saving the block in ws->scan_bd, - with the current scan pointer in ws->scan. Later, when more - objects have been copied to this block, we can come back and scan - the rest. When we visit this workspace again in the future, - scan_bd may still be the same as todo_bd, or it might be different: - if enough objects were copied into this block that it filled up, - then we will have allocated a new todo block, but *not* pushed the - old one to the step, because it is partially scanned. - - The reason to leave scanning the todo blocks until last is that we - want to deal with full blocks as far as possible. - ------------------------------------------------------------------------- */ - - -/* ----------------------------------------------------------------------------- - Step Workspace - - A step workspace exists for each step for each GC thread. The GC - thread takes a block from the todos list of the step into the - scanbd and then scans it. Objects referred to by those in the scan - block are copied into the todo or scavd blocks of the relevant step. - - ------------------------------------------------------------------------- */ - -typedef struct step_workspace_ { - step * step; // the step for this workspace - struct gc_thread_ * gct; // the gc_thread that contains this workspace - - // where objects to be scavenged go - bdescr * todo_bd; - StgPtr todo_free; // free ptr for todo_bd - StgPtr todo_lim; // lim for todo_bd - - bdescr * buffer_todo_bd; // buffer to reduce contention - // on the step's todos list - - // where large objects to be scavenged go - bdescr * todo_large_objects; - - // Objects that have already been, scavenged. - bdescr * scavd_list; - nat n_scavd_blocks; // count of blocks in this list - - // Partially-full, scavenged, blocks - bdescr * part_list; - unsigned int n_part_blocks; // count of above - -} step_workspace; - -/* ---------------------------------------------------------------------------- - GC thread object - - Every GC thread has one of these. It contains all the step specific - workspaces and other GC thread loacl information. At some later - point it maybe useful to move this other into the TLS store of the - GC threads - ------------------------------------------------------------------------- */ - -typedef struct gc_thread_ { -#ifdef THREADED_RTS - OSThreadId id; // The OS thread that this struct belongs to - Mutex wake_mutex; - Condition wake_cond; // So we can go to sleep between GCs - rtsBool wakeup; - rtsBool exit; -#endif - nat thread_index; // a zero based index identifying the thread - - bdescr * free_blocks; // a buffer of free blocks for this thread - // during GC without accessing the block - // allocators spin lock. - - StgClosure* static_objects; // live static objects - StgClosure* scavenged_static_objects; // static objects scavenged so far - - lnat gc_count; // number of GCs this thread has done - - // block that is currently being scanned - bdescr * scan_bd; - - // -------------------- - // evacuate flags - - step *evac_step; // Youngest generation that objects - // should be evacuated to in - // evacuate(). (Logically an - // argument to evacuate, but it's - // static a lot of the time so we - // optimise it into a per-thread - // variable). - - rtsBool failed_to_evac; // failure to evacuate an object typically - // Causes it to be recorded in the mutable - // object list - - rtsBool eager_promotion; // forces promotion to the evac gen - // instead of the to-space - // corresponding to the object - - lnat thunk_selector_depth; // ummm.... not used as of now - -#ifdef USE_PAPI - int papi_events; -#endif - - // ------------------- - // stats - - lnat copied; - lnat scanned; - lnat any_work; - lnat no_work; - lnat scav_find_work; - - // ------------------- - // workspaces - - // array of workspaces, indexed by stp->abs_no. This is placed - // directly at the end of the gc_thread structure so that we can get from - // the gc_thread pointer to a workspace using only pointer - // arithmetic, no memory access. This happens in the inner loop - // of the GC, see Evac.c:alloc_for_copy(). - step_workspace steps[]; -} gc_thread; - extern nat N; extern rtsBool major_gc; -extern nat n_gc_threads; - -extern gc_thread **gc_threads; -register gc_thread *gct __asm__("%rbx"); -// extern gc_thread *gct; // this thread's gct TODO: make thread-local extern bdescr *mark_stack_bdescr; extern StgPtr *mark_stack; @@ -196,7 +32,15 @@ extern long copied; extern nat mutlist_MUTVARS, mutlist_MUTARRS, mutlist_MVARS, mutlist_OTHERS; #endif -StgClosure * isAlive(StgClosure *p); +extern void markSomeCapabilities (evac_fn evac, void *user, nat i0, nat delta); + +#ifdef THREADED_RTS +extern SpinLock gc_alloc_block_sync; +#endif + +#if defined(PROF_SPIN) && defined(THREADED_RTS) +StgWord64 whitehole_spin; +#endif #define WORK_UNIT_WORDS 128 diff --git a/rts/sm/GCAux.c b/rts/sm/GCAux.c new file mode 100644 index 0000000..52e0aef --- /dev/null +++ b/rts/sm/GCAux.c @@ -0,0 +1,140 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team 1998-2008 + * + * Functions called from outside the GC need to be separate from GC.c, + * because GC.c is compiled with register variable(s). + * + * ---------------------------------------------------------------------------*/ + +#include "Rts.h" +#include "Storage.h" +#include "MBlock.h" +#include "GC.h" +#include "Compact.h" +#include "Task.h" +#include "Capability.h" +#include "Trace.h" +#include "Schedule.h" +// DO NOT include "GCThread.h", we don't want the register variable + +/* ----------------------------------------------------------------------------- + isAlive determines whether the given closure is still alive (after + a garbage collection) or not. It returns the new address of the + closure if it is alive, or NULL otherwise. + + NOTE: Use it before compaction only! + It untags and (if needed) retags pointers to closures. + -------------------------------------------------------------------------- */ + +StgClosure * +isAlive(StgClosure *p) +{ + const StgInfoTable *info; + bdescr *bd; + StgWord tag; + StgClosure *q; + + while (1) { + /* The tag and the pointer are split, to be merged later when needed. */ + tag = GET_CLOSURE_TAG(p); + q = UNTAG_CLOSURE(p); + + ASSERT(LOOKS_LIKE_CLOSURE_PTR(q)); + info = get_itbl(q); + + // ignore static closures + // + // ToDo: for static closures, check the static link field. + // Problem here is that we sometimes don't set the link field, eg. + // for static closures with an empty SRT or CONSTR_STATIC_NOCAFs. + // + if (!HEAP_ALLOCED(q)) { + return p; + } + + // ignore closures in generations that we're not collecting. + bd = Bdescr((P_)q); + if (bd->gen_no > N) { + return p; + } + + // if it's a pointer into to-space, then we're done + if (bd->flags & BF_EVACUATED) { + return p; + } + + // large objects use the evacuated flag + if (bd->flags & BF_LARGE) { + return NULL; + } + + // check the mark bit for compacted steps + if ((bd->flags & BF_COMPACTED) && is_marked((P_)q,bd)) { + return p; + } + + switch (info->type) { + + case IND: + case IND_STATIC: + case IND_PERM: + case IND_OLDGEN: // rely on compatible layout with StgInd + case IND_OLDGEN_PERM: + // follow indirections + p = ((StgInd *)q)->indirectee; + continue; + + case EVACUATED: + // alive! + return ((StgEvacuated *)q)->evacuee; + + case TSO: + if (((StgTSO *)q)->what_next == ThreadRelocated) { + p = (StgClosure *)((StgTSO *)q)->link; + continue; + } + return NULL; + + default: + // dead. + return NULL; + } + } +} + +/* ----------------------------------------------------------------------------- + Reverting CAFs + -------------------------------------------------------------------------- */ + +void +revertCAFs( void ) +{ + StgIndStatic *c; + + for (c = (StgIndStatic *)revertible_caf_list; c != NULL; + c = (StgIndStatic *)c->static_link) + { + SET_INFO(c, c->saved_info); + c->saved_info = NULL; + // could, but not necessary: c->static_link = NULL; + } + revertible_caf_list = NULL; +} + +void +markCAFs (evac_fn evac, void *user) +{ + StgIndStatic *c; + + for (c = (StgIndStatic *)caf_list; c != NULL; + c = (StgIndStatic *)c->static_link) + { + evac(user, &c->indirectee); + } + for (c = (StgIndStatic *)revertible_caf_list; c != NULL; + c = (StgIndStatic *)c->static_link) + { + evac(user, &c->indirectee); + } +} diff --git a/rts/sm/GCThread.h b/rts/sm/GCThread.h new file mode 100644 index 0000000..ba12615 --- /dev/null +++ b/rts/sm/GCThread.h @@ -0,0 +1,184 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team 1998-2006 + * + * Generational garbage collector + * + * Documentation on the architecture of the Garbage Collector can be + * found in the online commentary: + * + * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC + * + * ---------------------------------------------------------------------------*/ + +#ifndef GCTHREAD_H +#define GCTHREAD_H + +#include "OSThreads.h" + +/* ----------------------------------------------------------------------------- + General scheme + + ToDo: move this to the wiki when the implementation is done. + + We're only going to try to parallelise the copying GC for now. The + Plan is as follows. + + Each thread has a gc_thread structure (see below) which holds its + thread-local data. We'll keep a pointer to this in a thread-local + variable, or possibly in a register. + + In the gc_thread structure is a step_workspace for each step. The + primary purpose of the step_workspace is to hold evacuated objects; + when an object is evacuated, it is copied to the "todo" block in + the thread's workspace for the appropriate step. When the todo + block is full, it is pushed to the global step->todos list, which + is protected by a lock. (in fact we intervene a one-place buffer + here to reduce contention). + + A thread repeatedly grabs a block of work from one of the + step->todos lists, scavenges it, and keeps the scavenged block on + its own ws->scavd_list (this is to avoid unnecessary contention + returning the completed buffers back to the step: we can just + collect them all later). + + When there is no global work to do, we start scavenging the todo + blocks in the workspaces. This is where the scan_bd field comes + in: we can scan the contents of the todo block, when we have + scavenged the contents of the todo block (up to todo_bd->free), we + don't want to move this block immediately to the scavd_list, + because it is probably only partially full. So we remember that we + have scanned up to this point by saving the block in ws->scan_bd, + with the current scan pointer in ws->scan. Later, when more + objects have been copied to this block, we can come back and scan + the rest. When we visit this workspace again in the future, + scan_bd may still be the same as todo_bd, or it might be different: + if enough objects were copied into this block that it filled up, + then we will have allocated a new todo block, but *not* pushed the + old one to the step, because it is partially scanned. + + The reason to leave scanning the todo blocks until last is that we + want to deal with full blocks as far as possible. + ------------------------------------------------------------------------- */ + + +/* ----------------------------------------------------------------------------- + Step Workspace + + A step workspace exists for each step for each GC thread. The GC + thread takes a block from the todos list of the step into the + scanbd and then scans it. Objects referred to by those in the scan + block are copied into the todo or scavd blocks of the relevant step. + + ------------------------------------------------------------------------- */ + +typedef struct step_workspace_ { + step * step; // the step for this workspace + struct gc_thread_ * gct; // the gc_thread that contains this workspace + + // where objects to be scavenged go + bdescr * todo_bd; + StgPtr todo_free; // free ptr for todo_bd + StgPtr todo_lim; // lim for todo_bd + + bdescr * buffer_todo_bd; // buffer to reduce contention + // on the step's todos list + + // where large objects to be scavenged go + bdescr * todo_large_objects; + + // Objects that have already been, scavenged. + bdescr * scavd_list; + nat n_scavd_blocks; // count of blocks in this list + + // Partially-full, scavenged, blocks + bdescr * part_list; + unsigned int n_part_blocks; // count of above + +} step_workspace; + +/* ---------------------------------------------------------------------------- + GC thread object + + Every GC thread has one of these. It contains all the step specific + workspaces and other GC thread loacl information. At some later + point it maybe useful to move this other into the TLS store of the + GC threads + ------------------------------------------------------------------------- */ + +typedef struct gc_thread_ { +#ifdef THREADED_RTS + OSThreadId id; // The OS thread that this struct belongs to + Mutex wake_mutex; + Condition wake_cond; // So we can go to sleep between GCs + rtsBool wakeup; + rtsBool exit; +#endif + nat thread_index; // a zero based index identifying the thread + + bdescr * free_blocks; // a buffer of free blocks for this thread + // during GC without accessing the block + // allocators spin lock. + + StgClosure* static_objects; // live static objects + StgClosure* scavenged_static_objects; // static objects scavenged so far + + lnat gc_count; // number of GCs this thread has done + + // block that is currently being scanned + bdescr * scan_bd; + + // -------------------- + // evacuate flags + + step *evac_step; // Youngest generation that objects + // should be evacuated to in + // evacuate(). (Logically an + // argument to evacuate, but it's + // static a lot of the time so we + // optimise it into a per-thread + // variable). + + rtsBool failed_to_evac; // failure to evacuate an object typically + // Causes it to be recorded in the mutable + // object list + + rtsBool eager_promotion; // forces promotion to the evac gen + // instead of the to-space + // corresponding to the object + + lnat thunk_selector_depth; // ummm.... not used as of now + +#ifdef USE_PAPI + int papi_events; +#endif + + // ------------------- + // stats + + lnat copied; + lnat scanned; + lnat any_work; + lnat no_work; + lnat scav_find_work; + + // ------------------- + // workspaces + + // array of workspaces, indexed by stp->abs_no. This is placed + // directly at the end of the gc_thread structure so that we can get from + // the gc_thread pointer to a workspace using only pointer + // arithmetic, no memory access. This happens in the inner loop + // of the GC, see Evac.c:alloc_for_copy(). + step_workspace steps[]; +} gc_thread; + + +extern nat n_gc_threads; + +extern gc_thread **gc_threads; +register gc_thread *gct __asm__("%rbx"); +// extern gc_thread *gct; // this thread's gct TODO: make thread-local + +#endif // GCTHREAD_H + diff --git a/rts/sm/GCUtils.c b/rts/sm/GCUtils.c index 36fc4f3..465954f 100644 --- a/rts/sm/GCUtils.c +++ b/rts/sm/GCUtils.c @@ -15,6 +15,7 @@ #include "RtsFlags.h" #include "Storage.h" #include "GC.h" +#include "GCThread.h" #include "GCUtils.h" #include "Printer.h" #include "Trace.h" diff --git a/rts/sm/GCUtils.h b/rts/sm/GCUtils.h index 34657c2..249e0cf 100644 --- a/rts/sm/GCUtils.h +++ b/rts/sm/GCUtils.h @@ -13,10 +13,6 @@ #include "SMP.h" -#ifdef THREADED_RTS -extern SpinLock gc_alloc_block_sync; -#endif - bdescr *allocBlock_sync(void); void freeChain_sync(bdescr *bd); diff --git a/rts/sm/MarkWeak.c b/rts/sm/MarkWeak.c index eca5c54..2aa1a4e 100644 --- a/rts/sm/MarkWeak.c +++ b/rts/sm/MarkWeak.c @@ -15,6 +15,7 @@ #include "Storage.h" #include "MarkWeak.h" #include "GC.h" +#include "GCThread.h" #include "Evac.h" #include "Trace.h" #include "Schedule.h" diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c index f92ef49..814744f 100644 --- a/rts/sm/Scav.c +++ b/rts/sm/Scav.c @@ -16,6 +16,7 @@ #include "Storage.h" #include "MBlock.h" #include "GC.h" +#include "GCThread.h" #include "GCUtils.h" #include "Compact.h" #include "Evac.h" diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 856362d..c987add 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -30,7 +30,6 @@ #include "OSMem.h" #include "Trace.h" #include "GC.h" -#include "GCUtils.h" #include "Evac.h" #include diff --git a/rts/win32/ConsoleHandler.c b/rts/win32/ConsoleHandler.c index 76ebea0..2cd10ec 100644 --- a/rts/win32/ConsoleHandler.c +++ b/rts/win32/ConsoleHandler.c @@ -199,7 +199,7 @@ void startSignalHandlers(Capability *cap) * Evacuate the handler stack. _Assumes_ that console event delivery * has already been blocked. */ -void markSignalHandlers (evac_fn evac STG_UNUSED) +void markSignalHandlers (evac_fn evac STG_UNUSED, void *user STG_UNUSED) { // nothing to mark; the console handler is a StablePtr which is // already treated as a root by the GC. -- 1.7.10.4