X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=rts%2Fsm%2FGC.c;h=a3611757d8a5222885be6a6f3b9a8e3307817e49;hb=dbbf15c0f141357aa49b583286174867baadb821;hp=17bc2041ef1b9daac8e309a804dd1f7ead75ff1a;hpb=d5bd3e829c47c03157cf41cad581d2df44dfd81b;p=ghc-hetmet.git diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 17bc204..a361175 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -11,7 +11,7 @@ * * ---------------------------------------------------------------------------*/ -#include "PosixSource.h" +// #include "PosixSource.h" #include "Rts.h" #include "RtsFlags.h" #include "RtsUtils.h" @@ -40,6 +40,7 @@ #include "RetainerProfile.h" #include "RaiseAsync.h" #include "Sparks.h" +#include "Papi.h" #include "GC.h" #include "Compact.h" @@ -47,8 +48,10 @@ #include "Scav.h" #include "GCUtils.h" #include "MarkWeak.h" +#include "Sparks.h" #include // for memset() +#include /* ----------------------------------------------------------------------------- Global variables @@ -88,11 +91,6 @@ * We build up a static object list while collecting generations 0..N, * which is then appended to the static object list of generation N+1. */ -StgClosure* static_objects; // live static objects -StgClosure* scavenged_static_objects; // static objects scavenged so far -#ifdef THREADED_RTS -SpinLock static_objects_sync; -#endif /* N is the oldest generation being collected, where the generations * are numbered starting at 0. A major GC (indicated by the major_gc @@ -116,12 +114,19 @@ nat mutlist_MUTVARS, /* Thread-local data for each GC thread */ -gc_thread *gc_threads = NULL; -gc_thread *gct = NULL; // this thread's gct TODO: make thread-local +gc_thread **gc_threads = NULL; +// gc_thread *gct = NULL; // this thread's gct TODO: make thread-local + +// Number of threads running in *this* GC. Affects how many +// step->todos[] lists we have to look in to find work. +nat n_gc_threads; // For stats: long copied; // *words* copied & scavenged during this GC -long scavd_copied; // *words* copied only during this GC + +#ifdef THREADED_RTS +SpinLock recordMutableGen_sync; +#endif /* ----------------------------------------------------------------------------- Static function declarations @@ -129,7 +134,7 @@ long scavd_copied; // *words* copied only during this GC static void mark_root (StgClosure **root); static void zero_static_object_list (StgClosure* first_static); -static void initialise_N (rtsBool force_major_gc); +static nat initialise_N (rtsBool force_major_gc); static void alloc_gc_threads (void); static void init_collected_gen (nat g, nat threads); static void init_uncollected_gen (nat g, nat threads); @@ -137,6 +142,12 @@ static void init_gc_thread (gc_thread *t); static void update_task_list (void); static void resize_generations (void); static void resize_nursery (void); +static void start_gc_threads (void); +static void gc_thread_work (void); +static nat inc_running (void); +static nat dec_running (void); +static void wakeup_gc_threads (nat n_threads); +static void shutdown_gc_threads (nat n_threads); #if 0 && defined(DEBUG) static void gcCAFs (void); @@ -172,9 +183,11 @@ GarbageCollect ( rtsBool force_major_gc ) step *stp; lnat live, allocated; lnat oldgen_saved_blocks = 0; - nat n_threads; // number of threads participating in GC + gc_thread *saved_gct; + nat g, s, t, n; - nat g, s, t; + // necessary if we stole a callee-saves register for gct: + saved_gct = gct; #ifdef PROFILING CostCentreStack *prev_CCS; @@ -191,16 +204,11 @@ GarbageCollect ( rtsBool force_major_gc ) } #endif - // tell the STM to discard any cached closures it's hoping to re-use - stmPreGCHook(); - // tell the stats department that we've started a GC stat_startGC(); -#ifdef DEBUG - // check for memory leaks if DEBUG is on - memInventory(); -#endif + // tell the STM to discard any cached closures it's hoping to re-use + stmPreGCHook(); #ifdef DEBUG mutlist_MUTVARS = 0; @@ -221,23 +229,29 @@ GarbageCollect ( rtsBool force_major_gc ) /* Figure out which generation to collect */ - initialise_N(force_major_gc); + n = initialise_N(force_major_gc); /* Allocate + initialise the gc_thread structures. */ alloc_gc_threads(); + /* Start threads, so they can be spinning up while we finish initialisation. + */ + start_gc_threads(); + /* How many threads will be participating in this GC? * We don't try to parallelise minor GC. */ #if defined(THREADED_RTS) - if (N == 0) { - n_threads = 1; + if (n < (4*1024*1024 / BLOCK_SIZE)) { + n_gc_threads = 1; } else { - n_threads = RtsFlags.ParFlags.gcThreads; + n_gc_threads = RtsFlags.ParFlags.gcThreads; } + trace(TRACE_gc|DEBUG_gc, "GC (gen %d): %dKB to collect, using %d thread(s)", + N, n * (BLOCK_SIZE / 1024), n_gc_threads); #else - n_threads = 1; + n_gc_threads = 1; #endif #ifdef RTS_GTK_FRONTPANEL @@ -246,25 +260,27 @@ GarbageCollect ( rtsBool force_major_gc ) } #endif +#ifdef DEBUG + // check for memory leaks if DEBUG is on + memInventory(traceClass(DEBUG_gc)); +#endif + // check stack sanity *before* GC (ToDo: check all threads) IF_DEBUG(sanity, checkFreeListSanity()); - /* Initialise the static object lists - */ - static_objects = END_OF_STATIC_LIST; - scavenged_static_objects = END_OF_STATIC_LIST; -#ifdef THREADED_RTS - initSpinLock(&static_objects_sync); -#endif + // Initialise all our gc_thread structures + for (t = 0; t < n_gc_threads; t++) { + init_gc_thread(gc_threads[t]); + } // Initialise all the generations/steps that we're collecting. for (g = 0; g <= N; g++) { - init_collected_gen(g,n_threads); + init_collected_gen(g,n_gc_threads); } // Initialise all the generations/steps that we're *not* collecting. for (g = N+1; g < RtsFlags.GcFlags.generations; g++) { - init_uncollected_gen(g,n_threads); + init_uncollected_gen(g,n_gc_threads); } /* Allocate a mark stack if we're doing a major collection. @@ -278,18 +294,8 @@ GarbageCollect ( rtsBool force_major_gc ) mark_stack_bdescr = NULL; } - // Initialise all our gc_thread structures - for (t = 0; t < n_threads; t++) { - init_gc_thread(&gc_threads[t]); - } - - // Initialise stats - copied = 0; - scavd_copied = 0; - - // start threads etc. - // For now, we just have one thread, and set gct to gc_threads[0] - gct = &gc_threads[0]; + // this is the main thread + gct = gc_threads[0]; /* ----------------------------------------------------------------------- * follow all the roots that we know about: @@ -299,25 +305,36 @@ GarbageCollect ( rtsBool force_major_gc ) * Also do them in reverse generation order, for the usual reason: * namely to reduce the likelihood of spurious old->new pointers. */ - { - for (g = RtsFlags.GcFlags.generations-1; g > N; g--) { + for (g = RtsFlags.GcFlags.generations-1; g > N; g--) { generations[g].saved_mut_list = generations[g].mut_list; generations[g].mut_list = allocBlock(); - // mut_list always has at least one block. - } - for (g = RtsFlags.GcFlags.generations-1; g > N; g--) { + // mut_list always has at least one block. + } + + // the main thread is running: this prevents any other threads from + // exiting prematurely, so we can start them now. + // NB. do this after the mutable lists have been saved above, otherwise + // the other GC threads will be writing into the old mutable lists. + inc_running(); + wakeup_gc_threads(n_gc_threads); + + for (g = RtsFlags.GcFlags.generations-1; g > N; g--) { scavenge_mutable_list(&generations[g]); - } } // follow roots from the CAF list (used by GHCi) - gct->evac_gen = 0; + gct->evac_step = 0; markCAFs(mark_root); // follow all the roots that the application knows about. - gct->evac_gen = 0; + gct->evac_step = 0; GetRoots(mark_root); +#if defined(RTS_USER_SIGNALS) + // mark the signal handlers (signals should be already blocked) + markSignalHandlers(mark_root); +#endif + // Mark the weak pointer list, and prepare to detect dead weak pointers. markWeakPtrList(); initWeakForGC(); @@ -329,27 +346,32 @@ GarbageCollect ( rtsBool force_major_gc ) * Repeatedly scavenge all the areas we know about until there's no * more scavenging to be done. */ - { - rtsBool flag; - loop: - flag = rtsFalse; - - scavenge_loop(); - - // if any blackholes are alive, make the threads that wait on - // them alive too. - if (traverseBlackholeQueue()) - flag = rtsTrue; - - if (flag) { goto loop; } + for (;;) + { + gc_thread_work(); + // The other threads are now stopped. We might recurse back to + // here, but from now on this is the only thread. + + // if any blackholes are alive, make the threads that wait on + // them alive too. + if (traverseBlackholeQueue()) { + inc_running(); + continue; + } + + // must be last... invariant is that everything is fully + // scavenged at this point. + if (traverseWeakPtrList()) { // returns rtsTrue if evaced something + inc_running(); + continue; + } - // must be last... invariant is that everything is fully - // scavenged at this point. - if (traverseWeakPtrList()) { // returns rtsTrue if evaced something - goto loop; - } + // If we get to here, there's really nothing left to do. + break; } + shutdown_gc_threads(n_gc_threads); + // Update pointers from the Task list update_task_list(); @@ -392,36 +414,42 @@ GarbageCollect ( rtsBool force_major_gc ) step_workspace *ws; bdescr *prev; - for (t = 0; t < n_threads; t++) { - thr = &gc_threads[t]; - - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - for (s = 0; s < generations[g].n_steps; s++) { - ws = &thr->steps[g][s]; - if (g==0 && s==0) continue; - - ASSERT( ws->scan_bd == ws->todo_bd ); - ASSERT( ws->scan_bd ? ws->scan == ws->scan_bd->free : 1 ); - - // Push the final block - if (ws->scan_bd) { push_scan_block(ws->scan_bd, ws); } - - // update stats: we haven't counted the block at the - // front of the scavd_list yet. - scavd_copied += ws->scavd_list->free - ws->scavd_list->start; - - ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks); - - prev = ws->scavd_list; - for (bd = ws->scavd_list; bd != NULL; bd = bd->link) { - bd->flags &= ~BF_EVACUATED; // now from-space - prev = bd; - } - prev->link = ws->stp->blocks; - ws->stp->blocks = ws->scavd_list; - ws->stp->n_blocks += ws->n_scavd_blocks; - ASSERT(countBlocks(ws->stp->blocks) == ws->stp->n_blocks); - } + for (t = 0; t < n_gc_threads; t++) { + thr = gc_threads[t]; + + // not step 0 + for (s = 1; s < total_steps; s++) { + ws = &thr->steps[s]; + // Not true? + // ASSERT( ws->scan_bd == ws->todo_bd ); + ASSERT( ws->scan_bd ? ws->scan_bd->u.scan == ws->scan_bd->free : 1 ); + + // Push the final block + if (ws->scan_bd) { push_scanned_block(ws->scan_bd, ws); } + + ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks); + + prev = ws->part_list; + for (bd = ws->part_list; bd != NULL; bd = bd->link) { + bd->flags &= ~BF_EVACUATED; // now from-space + prev = bd; + } + if (prev != NULL) { + prev->link = ws->scavd_list; + } + for (bd = ws->scavd_list; bd != NULL; bd = bd->link) { + bd->flags &= ~BF_EVACUATED; // now from-space + prev = bd; + } + prev->link = ws->step->blocks; + if (ws->part_list != NULL) { + ws->step->blocks = ws->part_list; + } else { + ws->step->blocks = ws->scavd_list; + } + ws->step->n_blocks += ws->n_part_blocks; + ws->step->n_blocks += ws->n_scavd_blocks; + ASSERT(countBlocks(ws->step->blocks) == ws->step->n_blocks); } } } @@ -441,6 +469,21 @@ GarbageCollect ( rtsBool force_major_gc ) /* run through all the generations/steps and tidy up */ + copied = 0; + { + nat i; + for (i=0; i < n_gc_threads; i++) { + if (n_gc_threads > 1) { + trace(TRACE_gc,"thread %d:", i); + trace(TRACE_gc," copied %ld", gc_threads[i]->copied * sizeof(W_)); + trace(TRACE_gc," any_work %ld", gc_threads[i]->any_work); + trace(TRACE_gc," no_work %ld", gc_threads[i]->no_work); + trace(TRACE_gc," scav_find_work %ld", gc_threads[i]->scav_find_work); + } + copied += gc_threads[i]->copied; + } + } + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { if (g <= N) { @@ -549,7 +592,9 @@ GarbageCollect ( rtsBool force_major_gc ) resize_generations(); // Guess the amount of live data for stats. - live = calcLive(); + live = calcLiveBlocks() * BLOCK_SIZE_W; + debugTrace(DEBUG_gc, "Slop: %ldKB", + (live - calcLiveWords()) / (1024/sizeof(W_))); // Free the small objects allocated via allocate(), since this will // all have been copied into G0S1 now. @@ -592,12 +637,19 @@ GarbageCollect ( rtsBool force_major_gc ) #ifdef PROFILING // resetStaticObjectForRetainerProfiling() must be called before // zeroing below. - resetStaticObjectForRetainerProfiling(); + if (n_gc_threads > 1) { + barf("profiling is currently broken with multi-threaded GC"); + // ToDo: fix the gct->scavenged_static_objects below + } + resetStaticObjectForRetainerProfiling(gct->scavenged_static_objects); #endif // zero the scavenged static object list if (major_gc) { - zero_static_object_list(scavenged_static_objects); + nat i; + for (i = 0; i < n_gc_threads; i++) { + zero_static_object_list(gc_threads[i]->scavenged_static_objects); + } } // Reset the nursery @@ -620,7 +672,7 @@ GarbageCollect ( rtsBool force_major_gc ) IF_DEBUG(sanity, checkSanity()); // extra GC trace info - IF_DEBUG(gc, statDescribeGens()); + if (traceClass(TRACE_gc|DEBUG_gc)) statDescribeGens(); #ifdef DEBUG // symbol-table based profiling @@ -634,7 +686,7 @@ GarbageCollect ( rtsBool force_major_gc ) #ifdef DEBUG // check for memory leaks if DEBUG is on - memInventory(); + memInventory(traceClass(DEBUG_gc)); #endif #ifdef RTS_GTK_FRONTPANEL @@ -644,7 +696,7 @@ GarbageCollect ( rtsBool force_major_gc ) #endif // ok, GC over: tell the stats department what happened. - stat_endGC(allocated, live, copied, scavd_copied, N); + stat_endGC(allocated, live, copied, N); #if defined(RTS_USER_SIGNALS) if (RtsFlags.MiscFlags.install_signal_handlers) { @@ -654,8 +706,80 @@ GarbageCollect ( rtsBool force_major_gc ) #endif RELEASE_SM_LOCK; + + gct = saved_gct; } +/* ----------------------------------------------------------------------------- + * Mark all nodes pointed to by sparks in the spark queues (for GC) Does an + * implicit slide i.e. after marking all sparks are at the beginning of the + * spark pool and the spark pool only contains sparkable closures + * -------------------------------------------------------------------------- */ + +#ifdef THREADED_RTS +static void +markSparkQueue (evac_fn evac, Capability *cap) +{ + StgClosure **sparkp, **to_sparkp; + nat n, pruned_sparks; // stats only + StgSparkPool *pool; + + PAR_TICKY_MARK_SPARK_QUEUE_START(); + + n = 0; + pruned_sparks = 0; + + pool = &(cap->r.rSparks); + + ASSERT_SPARK_POOL_INVARIANTS(pool); + +#if defined(PARALLEL_HASKELL) + // stats only + n = 0; + pruned_sparks = 0; +#endif + + sparkp = pool->hd; + to_sparkp = pool->hd; + while (sparkp != pool->tl) { + ASSERT(*sparkp!=NULL); + ASSERT(LOOKS_LIKE_CLOSURE_PTR(((StgClosure *)*sparkp))); + // ToDo?: statistics gathering here (also for GUM!) + if (closure_SHOULD_SPARK(*sparkp)) { + evac(sparkp); + *to_sparkp++ = *sparkp; + if (to_sparkp == pool->lim) { + to_sparkp = pool->base; + } + n++; + } else { + pruned_sparks++; + } + sparkp++; + if (sparkp == pool->lim) { + sparkp = pool->base; + } + } + pool->tl = to_sparkp; + + PAR_TICKY_MARK_SPARK_QUEUE_END(n); + +#if defined(PARALLEL_HASKELL) + debugTrace(DEBUG_sched, + "marked %d sparks and pruned %d sparks on [%x]", + n, pruned_sparks, mytid); +#else + debugTrace(DEBUG_sched, + "marked %d sparks and pruned %d sparks", + n, pruned_sparks); +#endif + + debugTrace(DEBUG_sched, + "new spark queue len=%d; (hd=%p; tl=%p)\n", + sparkPoolSize(pool), pool->hd, pool->tl); +} +#endif + /* --------------------------------------------------------------------------- Where are the roots that we know about? @@ -667,11 +791,6 @@ GarbageCollect ( rtsBool force_major_gc ) ------------------------------------------------------------------------ */ -/* This has to be protected either by the scheduler monitor, or by the - garbage collection monitor (probably the latter). - KH @ 25/10/99 -*/ - void GetRoots( evac_fn evac ) { @@ -679,26 +798,12 @@ GetRoots( evac_fn evac ) Capability *cap; Task *task; -#if defined(GRAN) - for (i=0; i<=RtsFlags.GranFlags.proc; i++) { - if ((run_queue_hds[i] != END_TSO_QUEUE) && ((run_queue_hds[i] != NULL))) - evac((StgClosure **)&run_queue_hds[i]); - if ((run_queue_tls[i] != END_TSO_QUEUE) && ((run_queue_tls[i] != NULL))) - evac((StgClosure **)&run_queue_tls[i]); - - if ((blocked_queue_hds[i] != END_TSO_QUEUE) && ((blocked_queue_hds[i] != NULL))) - evac((StgClosure **)&blocked_queue_hds[i]); - if ((blocked_queue_tls[i] != END_TSO_QUEUE) && ((blocked_queue_tls[i] != NULL))) - evac((StgClosure **)&blocked_queue_tls[i]); - if ((ccalling_threadss[i] != END_TSO_QUEUE) && ((ccalling_threadss[i] != NULL))) - evac((StgClosure **)&ccalling_threads[i]); - } - - markEventQueue(); - -#else /* !GRAN */ - - for (i = 0; i < n_capabilities; i++) { + // Each GC thread is responsible for following roots from the + // Capability of the same number. There will usually be the same + // or fewer Capabilities as GC threads, but just in case there + // are more, we mark every Capability whose number is the GC + // thread's index plus a multiple of the number of GC threads. + for (i = gct->thread_index; i < n_capabilities; i += n_gc_threads) { cap = &capabilities[i]; evac((StgClosure **)(void *)&cap->run_queue_hd); evac((StgClosure **)(void *)&cap->run_queue_tl); @@ -713,26 +818,16 @@ GetRoots( evac_fn evac ) evac((StgClosure **)(void *)&task->suspended_tso); } +#if defined(THREADED_RTS) + markSparkQueue(evac,cap); +#endif } - #if !defined(THREADED_RTS) evac((StgClosure **)(void *)&blocked_queue_hd); evac((StgClosure **)(void *)&blocked_queue_tl); evac((StgClosure **)(void *)&sleeping_queue); #endif -#endif - - // evac((StgClosure **)&blackhole_queue); - -#if defined(THREADED_RTS) || defined(PARALLEL_HASKELL) || defined(GRAN) - markSparkQueue(evac); -#endif - -#if defined(RTS_USER_SIGNALS) - // mark the signal handlers (signals should be already blocked) - markSignalHandlers(evac); -#endif } /* ----------------------------------------------------------------------------- @@ -823,38 +918,67 @@ isAlive(StgClosure *p) /* ----------------------------------------------------------------------------- Figure out which generation to collect, initialise N and major_gc. + + Also returns the total number of blocks in generations that will be + collected. -------------------------------------------------------------------------- */ -static void +static nat initialise_N (rtsBool force_major_gc) { - nat g; + int g; + nat s, blocks, blocks_total; + + blocks = 0; + blocks_total = 0; if (force_major_gc) { - N = RtsFlags.GcFlags.generations - 1; - major_gc = rtsTrue; + N = RtsFlags.GcFlags.generations - 1; } else { - N = 0; - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - if (generations[g].steps[0].n_blocks + - generations[g].steps[0].n_large_blocks - >= generations[g].max_blocks) { - N = g; - } - } - major_gc = (N == RtsFlags.GcFlags.generations-1); + N = 0; + } + + for (g = RtsFlags.GcFlags.generations - 1; g >= 0; g--) { + blocks = 0; + for (s = 0; s < generations[g].n_steps; s++) { + blocks += generations[g].steps[s].n_blocks; + blocks += generations[g].steps[s].n_large_blocks; + } + if (blocks >= generations[g].max_blocks) { + N = stg_max(N,g); + } + if ((nat)g <= N) { + blocks_total += blocks; + } } + + blocks_total += countNurseryBlocks(); + + major_gc = (N == RtsFlags.GcFlags.generations-1); + return blocks_total; } /* ----------------------------------------------------------------------------- Initialise the gc_thread structures. -------------------------------------------------------------------------- */ -static void -alloc_gc_thread (gc_thread *t, int n) +static gc_thread * +alloc_gc_thread (int n) { - nat g, s; + nat s; step_workspace *ws; + gc_thread *t; + + t = stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace), + "alloc_gc_thread"); + +#ifdef THREADED_RTS + t->id = 0; + initCondition(&t->wake_cond); + initMutex(&t->wake_mutex); + t->wakeup = rtsFalse; + t->exit = rtsFalse; +#endif t->thread_index = n; t->free_blocks = NULL; @@ -862,32 +986,30 @@ alloc_gc_thread (gc_thread *t, int n) init_gc_thread(t); - t->steps = stgMallocBytes(RtsFlags.GcFlags.generations * - sizeof(step_workspace *), - "initialise_gc_thread"); +#ifdef USE_PAPI + t->papi_events = -1; +#endif - for (g = 0; g < RtsFlags.GcFlags.generations; g++) + for (s = 0; s < total_steps; s++) { - t->steps[g] = stgMallocBytes(generations[g].n_steps * - sizeof(step_workspace), - "initialise_gc_thread/2"); - - for (s = 0; s < generations[g].n_steps; s++) - { - ws = &t->steps[g][s]; - ws->stp = &generations[g].steps[s]; - ws->gct = t; - - ws->scan_bd = NULL; - ws->scan = NULL; - - ws->todo_bd = NULL; - ws->buffer_todo_bd = NULL; - - ws->scavd_list = NULL; - ws->n_scavd_blocks = 0; - } + ws = &t->steps[s]; + ws->step = &all_steps[s]; + ASSERT(s == ws->step->abs_no); + ws->gct = t; + + ws->scan_bd = NULL; + + ws->todo_bd = NULL; + ws->buffer_todo_bd = NULL; + + ws->part_list = NULL; + ws->n_part_blocks = 0; + + ws->scavd_list = NULL; + ws->n_scavd_blocks = 0; } + + return t; } @@ -897,24 +1019,203 @@ alloc_gc_threads (void) if (gc_threads == NULL) { #if defined(THREADED_RTS) nat i; - gc_threads = stgMallocBytes (RtsFlags.ParFlags.gcThreads * - sizeof(gc_thread), + sizeof(gc_thread*), "alloc_gc_threads"); for (i = 0; i < RtsFlags.ParFlags.gcThreads; i++) { - alloc_gc_thread(&gc_threads[i], i); + gc_threads[i] = alloc_gc_thread(i); } #else - gc_threads = stgMallocBytes (sizeof(gc_thread), + gc_threads = stgMallocBytes (sizeof(gc_thread*), "alloc_gc_threads"); - alloc_gc_thread(gc_threads, 0); + gc_threads[0] = alloc_gc_thread(0); #endif } } /* ---------------------------------------------------------------------------- + Start GC threads + ------------------------------------------------------------------------- */ + +static nat gc_running_threads; + +#if defined(THREADED_RTS) +static Mutex gc_running_mutex; +#endif + +static nat +inc_running (void) +{ + nat n_running; + ACQUIRE_LOCK(&gc_running_mutex); + n_running = ++gc_running_threads; + RELEASE_LOCK(&gc_running_mutex); + ASSERT(n_running <= n_gc_threads); + return n_running; +} + +static nat +dec_running (void) +{ + nat n_running; + ACQUIRE_LOCK(&gc_running_mutex); + ASSERT(n_gc_threads != 0); + n_running = --gc_running_threads; + RELEASE_LOCK(&gc_running_mutex); + return n_running; +} + +// +// gc_thread_work(): Scavenge until there's no work left to do and all +// the running threads are idle. +// +static void +gc_thread_work (void) +{ + nat r; + + debugTrace(DEBUG_gc, "GC thread %d working", gct->thread_index); + + // gc_running_threads has already been incremented for us; either + // this is the main thread and we incremented it inside + // GarbageCollect(), or this is a worker thread and the main + // thread bumped gc_running_threads before waking us up. + + // Every thread evacuates some roots. + gct->evac_step = 0; + GetRoots(mark_root); + +loop: + scavenge_loop(); + // scavenge_loop() only exits when there's no work to do + r = dec_running(); + + debugTrace(DEBUG_gc, "GC thread %d idle (%d still running)", + gct->thread_index, r); + + while (gc_running_threads != 0) { + usleep(1); + if (any_work()) { + inc_running(); + goto loop; + } + // any_work() does not remove the work from the queue, it + // just checks for the presence of work. If we find any, + // then we increment gc_running_threads and go back to + // scavenge_loop() to perform any pending work. + } + + // All threads are now stopped + debugTrace(DEBUG_gc, "GC thread %d finished.", gct->thread_index); +} + + +#if defined(THREADED_RTS) +static void +gc_thread_mainloop (void) +{ + while (!gct->exit) { + + // Wait until we're told to wake up + ACQUIRE_LOCK(&gct->wake_mutex); + gct->wakeup = rtsFalse; + while (!gct->wakeup) { + debugTrace(DEBUG_gc, "GC thread %d standing by...", + gct->thread_index); + waitCondition(&gct->wake_cond, &gct->wake_mutex); + } + RELEASE_LOCK(&gct->wake_mutex); + if (gct->exit) break; + +#ifdef USE_PAPI + // start performance counters in this thread... + if (gct->papi_events == -1) { + papi_init_eventset(&gct->papi_events); + } + papi_thread_start_gc1_count(gct->papi_events); +#endif + + gc_thread_work(); + +#ifdef USE_PAPI + // count events in this thread towards the GC totals + papi_thread_stop_gc1_count(gct->papi_events); +#endif + } +} +#endif + +#if defined(THREADED_RTS) +static void +gc_thread_entry (gc_thread *my_gct) +{ + gct = my_gct; + debugTrace(DEBUG_gc, "GC thread %d starting...", gct->thread_index); + gct->id = osThreadId(); + gc_thread_mainloop(); +} +#endif + +static void +start_gc_threads (void) +{ +#if defined(THREADED_RTS) + nat i; + OSThreadId id; + static rtsBool done = rtsFalse; + + gc_running_threads = 0; + initMutex(&gc_running_mutex); + + if (!done) { + // Start from 1: the main thread is 0 + for (i = 1; i < RtsFlags.ParFlags.gcThreads; i++) { + createOSThread(&id, (OSThreadProc*)&gc_thread_entry, + gc_threads[i]); + } + done = rtsTrue; + } +#endif +} + +static void +wakeup_gc_threads (nat n_threads USED_IF_THREADS) +{ +#if defined(THREADED_RTS) + nat i; + for (i=1; i < n_threads; i++) { + inc_running(); + ACQUIRE_LOCK(&gc_threads[i]->wake_mutex); + gc_threads[i]->wakeup = rtsTrue; + signalCondition(&gc_threads[i]->wake_cond); + RELEASE_LOCK(&gc_threads[i]->wake_mutex); + } +#endif +} + +// After GC is complete, we must wait for all GC threads to enter the +// standby state, otherwise they may still be executing inside +// any_work(), and may even remain awake until the next GC starts. +static void +shutdown_gc_threads (nat n_threads USED_IF_THREADS) +{ +#if defined(THREADED_RTS) + nat i; + rtsBool wakeup; + for (i=1; i < n_threads; i++) { + do { + ACQUIRE_LOCK(&gc_threads[i]->wake_mutex); + wakeup = gc_threads[i]->wakeup; + // wakeup is false while the thread is waiting + RELEASE_LOCK(&gc_threads[i]->wake_mutex); + } while (wakeup); + } +#endif +} + +/* ---------------------------------------------------------------------------- Initialise a generation that is to be collected ------------------------------------------------------------------------- */ @@ -956,6 +1257,7 @@ init_collected_gen (nat g, nat n_threads) // we don't have any to-be-scavenged blocks yet stp->todos = NULL; + stp->todos_last = NULL; stp->n_todos = 0; // initialise the large object queues. @@ -1013,25 +1315,23 @@ init_collected_gen (nat g, nat n_threads) // we don't copy objects into g0s0, unless -G0 if (g==0 && s==0 && RtsFlags.GcFlags.generations > 1) continue; - ws = &gc_threads[t].steps[g][s]; + ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s]; ws->scan_bd = NULL; - ws->scan = NULL; ws->todo_large_objects = NULL; + ws->part_list = NULL; + ws->n_part_blocks = 0; + // allocate the first to-space block; extra blocks will be // chained on as necessary. ws->todo_bd = NULL; ws->buffer_todo_bd = NULL; - gc_alloc_todo_block(ws); + alloc_todo_block(ws,0); - // allocate a block for "already scavenged" objects. This goes - // on the front of the stp->blocks list, so it won't be - // traversed by the scavenging sweep. ws->scavd_list = NULL; ws->n_scavd_blocks = 0; - gc_alloc_scavd_block(ws); } } } @@ -1058,17 +1358,25 @@ init_uncollected_gen (nat g, nat threads) for (t = 0; t < threads; t++) { for (s = 0; s < generations[g].n_steps; s++) { - ws = &gc_threads[t].steps[g][s]; - stp = ws->stp; + ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s]; + stp = ws->step; ws->buffer_todo_bd = NULL; ws->todo_large_objects = NULL; + ws->part_list = NULL; + ws->n_part_blocks = 0; + + ws->scavd_list = NULL; + ws->n_scavd_blocks = 0; + // If the block at the head of the list in this generation // is less than 3/4 full, then use it as a todo block. - if (isPartiallyFull(stp->blocks)) + if (stp->blocks && isPartiallyFull(stp->blocks)) { ws->todo_bd = stp->blocks; + ws->todo_free = ws->todo_bd->free; + ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W; stp->blocks = stp->blocks->link; stp->n_blocks -= 1; ws->todo_bd->link = NULL; @@ -1076,7 +1384,7 @@ init_uncollected_gen (nat g, nat threads) // this block is also the scan block; we must scan // from the current end point. ws->scan_bd = ws->todo_bd; - ws->scan = ws->scan_bd->free; + ws->scan_bd->u.scan = ws->scan_bd->free; // subtract the contents of this block from the stats, // because we'll count the whole block later. @@ -1085,28 +1393,8 @@ init_uncollected_gen (nat g, nat threads) else { ws->scan_bd = NULL; - ws->scan = NULL; ws->todo_bd = NULL; - gc_alloc_todo_block(ws); - } - - // Do the same trick for the scavd block - if (isPartiallyFull(stp->blocks)) - { - ws->scavd_list = stp->blocks; - stp->blocks = stp->blocks->link; - stp->n_blocks -= 1; - ws->scavd_list->link = NULL; - ws->n_scavd_blocks = 1; - // subtract the contents of this block from the stats, - // because we'll count the whole block later. - scavd_copied -= ws->scavd_list->free - ws->scavd_list->start; - } - else - { - ws->scavd_list = NULL; - ws->n_scavd_blocks = 0; - gc_alloc_scavd_block(ws); + alloc_todo_block(ws,0); } } } @@ -1131,10 +1419,17 @@ init_uncollected_gen (nat g, nat threads) static void init_gc_thread (gc_thread *t) { - t->evac_gen = 0; + t->static_objects = END_OF_STATIC_LIST; + t->scavenged_static_objects = END_OF_STATIC_LIST; + t->evac_step = 0; t->failed_to_evac = rtsFalse; t->eager_promotion = rtsTrue; t->thunk_selector_depth = 0; + t->copied = 0; + t->any_work = 0; + t->no_work = 0; + t->scav_find_work = 0; + } /* ----------------------------------------------------------------------------- @@ -1144,7 +1439,7 @@ init_gc_thread (gc_thread *t) static void mark_root(StgClosure **root) { - *root = evacuate(*root); + evacuate(root); } /* -----------------------------------------------------------------------------