X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=rts%2Fsm%2FGC.c;h=f248a75a394f8ebbb33177079bb223687dbf1b20;hb=a84385fa752d8fa800a2155cce95f47c68b74e2b;hp=17bc2041ef1b9daac8e309a804dd1f7ead75ff1a;hpb=d5bd3e829c47c03157cf41cad581d2df44dfd81b;p=ghc-hetmet.git diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 17bc204..f248a75 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -40,6 +40,7 @@ #include "RetainerProfile.h" #include "RaiseAsync.h" #include "Sparks.h" +#include "Papi.h" #include "GC.h" #include "Compact.h" @@ -47,6 +48,7 @@ #include "Scav.h" #include "GCUtils.h" #include "MarkWeak.h" +#include "Sparks.h" #include // for memset() @@ -117,11 +119,14 @@ nat mutlist_MUTVARS, /* Thread-local data for each GC thread */ gc_thread *gc_threads = NULL; -gc_thread *gct = NULL; // this thread's gct TODO: make thread-local +// gc_thread *gct = NULL; // this thread's gct TODO: make thread-local // For stats: long copied; // *words* copied & scavenged during this GC -long scavd_copied; // *words* copied only during this GC + +#ifdef THREADED_RTS +SpinLock recordMutableGen_sync; +#endif /* ----------------------------------------------------------------------------- Static function declarations @@ -137,6 +142,11 @@ static void init_gc_thread (gc_thread *t); static void update_task_list (void); static void resize_generations (void); static void resize_nursery (void); +static void start_gc_threads (void); +static void gc_thread_work (void); +static nat inc_running (void); +static nat dec_running (void); +static void wakeup_gc_threads (nat n_threads); #if 0 && defined(DEBUG) static void gcCAFs (void); @@ -173,9 +183,12 @@ GarbageCollect ( rtsBool force_major_gc ) lnat live, allocated; lnat oldgen_saved_blocks = 0; nat n_threads; // number of threads participating in GC - + gc_thread *saved_gct; nat g, s, t; + // necessary if we stole a callee-saves register for gct: + saved_gct = gct; + #ifdef PROFILING CostCentreStack *prev_CCS; #endif @@ -227,6 +240,10 @@ GarbageCollect ( rtsBool force_major_gc ) */ alloc_gc_threads(); + /* Start threads, so they can be spinning up while we finish initialisation. + */ + start_gc_threads(); + /* How many threads will be participating in this GC? * We don't try to parallelise minor GC. */ @@ -253,8 +270,11 @@ GarbageCollect ( rtsBool force_major_gc ) */ static_objects = END_OF_STATIC_LIST; scavenged_static_objects = END_OF_STATIC_LIST; + #ifdef THREADED_RTS initSpinLock(&static_objects_sync); + initSpinLock(&recordMutableGen_sync); + initSpinLock(&gc_alloc_block_sync); #endif // Initialise all the generations/steps that we're collecting. @@ -283,12 +303,15 @@ GarbageCollect ( rtsBool force_major_gc ) init_gc_thread(&gc_threads[t]); } + // the main thread is running: this prevents any other threads from + // exiting prematurely, so we can start them now. + inc_running(); + wakeup_gc_threads(n_threads); + // Initialise stats copied = 0; - scavd_copied = 0; - // start threads etc. - // For now, we just have one thread, and set gct to gc_threads[0] + // this is the main thread gct = &gc_threads[0]; /* ----------------------------------------------------------------------- @@ -311,11 +334,11 @@ GarbageCollect ( rtsBool force_major_gc ) } // follow roots from the CAF list (used by GHCi) - gct->evac_gen = 0; + gct->evac_step = 0; markCAFs(mark_root); // follow all the roots that the application knows about. - gct->evac_gen = 0; + gct->evac_step = 0; GetRoots(mark_root); // Mark the weak pointer list, and prepare to detect dead weak pointers. @@ -329,25 +352,28 @@ GarbageCollect ( rtsBool force_major_gc ) * Repeatedly scavenge all the areas we know about until there's no * more scavenging to be done. */ - { - rtsBool flag; - loop: - flag = rtsFalse; - - scavenge_loop(); - - // if any blackholes are alive, make the threads that wait on - // them alive too. - if (traverseBlackholeQueue()) - flag = rtsTrue; - - if (flag) { goto loop; } + for (;;) + { + gc_thread_work(); + // The other threads are now stopped. We might recurse back to + // here, but from now on this is the only thread. + + // if any blackholes are alive, make the threads that wait on + // them alive too. + if (traverseBlackholeQueue()) { + inc_running(); + continue; + } + + // must be last... invariant is that everything is fully + // scavenged at this point. + if (traverseWeakPtrList()) { // returns rtsTrue if evaced something + inc_running(); + continue; + } - // must be last... invariant is that everything is fully - // scavenged at this point. - if (traverseWeakPtrList()) { // returns rtsTrue if evaced something - goto loop; - } + // If we get to here, there's really nothing left to do. + break; } // Update pointers from the Task list @@ -400,16 +426,13 @@ GarbageCollect ( rtsBool force_major_gc ) ws = &thr->steps[g][s]; if (g==0 && s==0) continue; - ASSERT( ws->scan_bd == ws->todo_bd ); + // Not true? + // ASSERT( ws->scan_bd == ws->todo_bd ); ASSERT( ws->scan_bd ? ws->scan == ws->scan_bd->free : 1 ); // Push the final block if (ws->scan_bd) { push_scan_block(ws->scan_bd, ws); } - // update stats: we haven't counted the block at the - // front of the scavd_list yet. - scavd_copied += ws->scavd_list->free - ws->scavd_list->start; - ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks); prev = ws->scavd_list; @@ -644,7 +667,7 @@ GarbageCollect ( rtsBool force_major_gc ) #endif // ok, GC over: tell the stats department what happened. - stat_endGC(allocated, live, copied, scavd_copied, N); + stat_endGC(allocated, live, copied, N); #if defined(RTS_USER_SIGNALS) if (RtsFlags.MiscFlags.install_signal_handlers) { @@ -654,6 +677,8 @@ GarbageCollect ( rtsBool force_major_gc ) #endif RELEASE_SM_LOCK; + + gct = saved_gct; } /* --------------------------------------------------------------------------- @@ -679,25 +704,6 @@ GetRoots( evac_fn evac ) Capability *cap; Task *task; -#if defined(GRAN) - for (i=0; i<=RtsFlags.GranFlags.proc; i++) { - if ((run_queue_hds[i] != END_TSO_QUEUE) && ((run_queue_hds[i] != NULL))) - evac((StgClosure **)&run_queue_hds[i]); - if ((run_queue_tls[i] != END_TSO_QUEUE) && ((run_queue_tls[i] != NULL))) - evac((StgClosure **)&run_queue_tls[i]); - - if ((blocked_queue_hds[i] != END_TSO_QUEUE) && ((blocked_queue_hds[i] != NULL))) - evac((StgClosure **)&blocked_queue_hds[i]); - if ((blocked_queue_tls[i] != END_TSO_QUEUE) && ((blocked_queue_tls[i] != NULL))) - evac((StgClosure **)&blocked_queue_tls[i]); - if ((ccalling_threadss[i] != END_TSO_QUEUE) && ((ccalling_threadss[i] != NULL))) - evac((StgClosure **)&ccalling_threads[i]); - } - - markEventQueue(); - -#else /* !GRAN */ - for (i = 0; i < n_capabilities; i++) { cap = &capabilities[i]; evac((StgClosure **)(void *)&cap->run_queue_hd); @@ -715,17 +721,15 @@ GetRoots( evac_fn evac ) } - #if !defined(THREADED_RTS) evac((StgClosure **)(void *)&blocked_queue_hd); evac((StgClosure **)(void *)&blocked_queue_tl); evac((StgClosure **)(void *)&sleeping_queue); #endif -#endif // evac((StgClosure **)&blackhole_queue); -#if defined(THREADED_RTS) || defined(PARALLEL_HASKELL) || defined(GRAN) +#if defined(THREADED_RTS) markSparkQueue(evac); #endif @@ -856,12 +860,24 @@ alloc_gc_thread (gc_thread *t, int n) nat g, s; step_workspace *ws; +#ifdef THREADED_RTS + t->id = 0; + initCondition(&t->wake_cond); + initMutex(&t->wake_mutex); + t->wakeup = rtsFalse; + t->exit = rtsFalse; +#endif + t->thread_index = n; t->free_blocks = NULL; t->gc_count = 0; init_gc_thread(t); +#ifdef USE_PAPI + t->papi_events = -1; +#endif + t->steps = stgMallocBytes(RtsFlags.GcFlags.generations * sizeof(step_workspace *), "initialise_gc_thread"); @@ -897,7 +913,6 @@ alloc_gc_threads (void) if (gc_threads == NULL) { #if defined(THREADED_RTS) nat i; - gc_threads = stgMallocBytes (RtsFlags.ParFlags.gcThreads * sizeof(gc_thread), "alloc_gc_threads"); @@ -915,6 +930,159 @@ alloc_gc_threads (void) } /* ---------------------------------------------------------------------------- + Start GC threads + ------------------------------------------------------------------------- */ + +static nat gc_running_threads; + +#if defined(THREADED_RTS) +static Mutex gc_running_mutex; +#endif + +static nat +inc_running (void) +{ + nat n_running; + ACQUIRE_LOCK(&gc_running_mutex); + n_running = ++gc_running_threads; + RELEASE_LOCK(&gc_running_mutex); + return n_running; +} + +static nat +dec_running (void) +{ + nat n_running; + ACQUIRE_LOCK(&gc_running_mutex); + n_running = --gc_running_threads; + RELEASE_LOCK(&gc_running_mutex); + return n_running; +} + +// +// gc_thread_work(): Scavenge until there's no work left to do and all +// the running threads are idle. +// +static void +gc_thread_work (void) +{ + nat r; + + debugTrace(DEBUG_gc, "GC thread %d working", gct->thread_index); + + // gc_running_threads has already been incremented for us; either + // this is the main thread and we incremented it inside + // GarbageCollect(), or this is a worker thread and the main + // thread bumped gc_running_threads before waking us up. + +loop: + scavenge_loop(); + // scavenge_loop() only exits when there's no work to do + r = dec_running(); + + debugTrace(DEBUG_gc, "GC thread %d idle (%d still running)", + gct->thread_index, r); + + while (gc_running_threads != 0) { + if (any_work()) { + inc_running(); + goto loop; + } + // any_work() does not remove the work from the queue, it + // just checks for the presence of work. If we find any, + // then we increment gc_running_threads and go back to + // scavenge_loop() to perform any pending work. + } + + // All threads are now stopped + debugTrace(DEBUG_gc, "GC thread %d finished.", gct->thread_index); +} + + +#if defined(THREADED_RTS) +static void +gc_thread_mainloop (void) +{ + while (!gct->exit) { + + // Wait until we're told to wake up + ACQUIRE_LOCK(&gct->wake_mutex); + while (!gct->wakeup) { + debugTrace(DEBUG_gc, "GC thread %d standing by...", + gct->thread_index); + waitCondition(&gct->wake_cond, &gct->wake_mutex); + } + RELEASE_LOCK(&gct->wake_mutex); + gct->wakeup = rtsFalse; + if (gct->exit) break; + +#ifdef USE_PAPI + // start performance counters in this thread... + if (gct->papi_events == -1) { + papi_init_eventset(&gct->papi_events); + } + papi_thread_start_gc_count(gct->papi_events); +#endif + + gc_thread_work(); + +#ifdef USE_PAPI + // count events in this thread towards the GC totals + papi_thread_stop_gc_count(gct->papi_events); +#endif + } +} +#endif + +#if defined(THREADED_RTS) +static void +gc_thread_entry (gc_thread *my_gct) +{ + gct = my_gct; + debugTrace(DEBUG_gc, "GC thread %d starting...", gct->thread_index); + gct->id = osThreadId(); + gc_thread_mainloop(); +} +#endif + +static void +start_gc_threads (void) +{ +#if defined(THREADED_RTS) + nat i; + OSThreadId id; + static rtsBool done = rtsFalse; + + gc_running_threads = 0; + initMutex(&gc_running_mutex); + + if (!done) { + // Start from 1: the main thread is 0 + for (i = 1; i < RtsFlags.ParFlags.gcThreads; i++) { + createOSThread(&id, (OSThreadProc*)&gc_thread_entry, + &gc_threads[i]); + } + done = rtsTrue; + } +#endif +} + +static void +wakeup_gc_threads (nat n_threads USED_IF_THREADS) +{ +#if defined(THREADED_RTS) + nat i; + for (i=1; i < n_threads; i++) { + inc_running(); + ACQUIRE_LOCK(&gc_threads[i].wake_mutex); + gc_threads[i].wakeup = rtsTrue; + signalCondition(&gc_threads[i].wake_cond); + RELEASE_LOCK(&gc_threads[i].wake_mutex); + } +#endif +} + +/* ---------------------------------------------------------------------------- Initialise a generation that is to be collected ------------------------------------------------------------------------- */ @@ -1026,12 +1194,8 @@ init_collected_gen (nat g, nat n_threads) ws->buffer_todo_bd = NULL; gc_alloc_todo_block(ws); - // allocate a block for "already scavenged" objects. This goes - // on the front of the stp->blocks list, so it won't be - // traversed by the scavenging sweep. ws->scavd_list = NULL; ws->n_scavd_blocks = 0; - gc_alloc_scavd_block(ws); } } } @@ -1064,6 +1228,9 @@ init_uncollected_gen (nat g, nat threads) ws->buffer_todo_bd = NULL; ws->todo_large_objects = NULL; + ws->scavd_list = NULL; + ws->n_scavd_blocks = 0; + // If the block at the head of the list in this generation // is less than 3/4 full, then use it as a todo block. if (isPartiallyFull(stp->blocks)) @@ -1089,25 +1256,6 @@ init_uncollected_gen (nat g, nat threads) ws->todo_bd = NULL; gc_alloc_todo_block(ws); } - - // Do the same trick for the scavd block - if (isPartiallyFull(stp->blocks)) - { - ws->scavd_list = stp->blocks; - stp->blocks = stp->blocks->link; - stp->n_blocks -= 1; - ws->scavd_list->link = NULL; - ws->n_scavd_blocks = 1; - // subtract the contents of this block from the stats, - // because we'll count the whole block later. - scavd_copied -= ws->scavd_list->free - ws->scavd_list->start; - } - else - { - ws->scavd_list = NULL; - ws->n_scavd_blocks = 0; - gc_alloc_scavd_block(ws); - } } } @@ -1131,7 +1279,7 @@ init_uncollected_gen (nat g, nat threads) static void init_gc_thread (gc_thread *t) { - t->evac_gen = 0; + t->evac_step = 0; t->failed_to_evac = rtsFalse; t->eager_promotion = rtsTrue; t->thunk_selector_depth = 0; @@ -1144,7 +1292,7 @@ init_gc_thread (gc_thread *t) static void mark_root(StgClosure **root) { - *root = evacuate(*root); + evacuate(root); } /* -----------------------------------------------------------------------------