X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=rts%2Fsm%2FGC.c;h=80ec6f2019de5fabe30cd52c962904bdfa221205;hb=2aa877f8588da099351ef51efca3605fd87ea768;hp=2fc3f4dd9b3a030fbeb9ee61ee6d1f0483f38bd9;hpb=cacd714cd545bc3bd07882904055cfee162bd7de;p=ghc-hetmet.git diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 2fc3f4d..80ec6f2 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -40,6 +40,7 @@ #include "RetainerProfile.h" #include "RaiseAsync.h" #include "Sparks.h" +#include "Papi.h" #include "GC.h" #include "Compact.h" @@ -117,9 +118,13 @@ nat mutlist_MUTVARS, /* Thread-local data for each GC thread */ -gc_thread *gc_threads = NULL; +gc_thread **gc_threads = NULL; // gc_thread *gct = NULL; // this thread's gct TODO: make thread-local +// Number of threads running in *this* GC. Affects how many +// step->todos[] lists we have to look in to find work. +nat n_gc_threads; + // For stats: long copied; // *words* copied & scavenged during this GC @@ -181,7 +186,6 @@ GarbageCollect ( rtsBool force_major_gc ) step *stp; lnat live, allocated; lnat oldgen_saved_blocks = 0; - nat n_threads; // number of threads participating in GC gc_thread *saved_gct; nat g, s, t; @@ -203,16 +207,11 @@ GarbageCollect ( rtsBool force_major_gc ) } #endif - // tell the STM to discard any cached closures it's hoping to re-use - stmPreGCHook(); - // tell the stats department that we've started a GC stat_startGC(); -#ifdef DEBUG - // check for memory leaks if DEBUG is on - memInventory(); -#endif + // tell the STM to discard any cached closures it's hoping to re-use + stmPreGCHook(); #ifdef DEBUG mutlist_MUTVARS = 0; @@ -248,12 +247,12 @@ GarbageCollect ( rtsBool force_major_gc ) */ #if defined(THREADED_RTS) if (N == 0) { - n_threads = 1; + n_gc_threads = 1; } else { - n_threads = RtsFlags.ParFlags.gcThreads; + n_gc_threads = RtsFlags.ParFlags.gcThreads; } #else - n_threads = 1; + n_gc_threads = 1; #endif #ifdef RTS_GTK_FRONTPANEL @@ -262,6 +261,11 @@ GarbageCollect ( rtsBool force_major_gc ) } #endif +#ifdef DEBUG + // check for memory leaks if DEBUG is on + memInventory(traceClass(DEBUG_gc)); +#endif + // check stack sanity *before* GC (ToDo: check all threads) IF_DEBUG(sanity, checkFreeListSanity()); @@ -270,20 +274,14 @@ GarbageCollect ( rtsBool force_major_gc ) static_objects = END_OF_STATIC_LIST; scavenged_static_objects = END_OF_STATIC_LIST; -#ifdef THREADED_RTS - initSpinLock(&static_objects_sync); - initSpinLock(&recordMutableGen_sync); - initSpinLock(&gc_alloc_block_sync); -#endif - // Initialise all the generations/steps that we're collecting. for (g = 0; g <= N; g++) { - init_collected_gen(g,n_threads); + init_collected_gen(g,n_gc_threads); } // Initialise all the generations/steps that we're *not* collecting. for (g = N+1; g < RtsFlags.GcFlags.generations; g++) { - init_uncollected_gen(g,n_threads); + init_uncollected_gen(g,n_gc_threads); } /* Allocate a mark stack if we're doing a major collection. @@ -298,20 +296,20 @@ GarbageCollect ( rtsBool force_major_gc ) } // Initialise all our gc_thread structures - for (t = 0; t < n_threads; t++) { - init_gc_thread(&gc_threads[t]); + for (t = 0; t < n_gc_threads; t++) { + init_gc_thread(gc_threads[t]); } // the main thread is running: this prevents any other threads from // exiting prematurely, so we can start them now. inc_running(); - wakeup_gc_threads(n_threads); + wakeup_gc_threads(n_gc_threads); // Initialise stats copied = 0; // this is the main thread - gct = &gc_threads[0]; + gct = gc_threads[0]; /* ----------------------------------------------------------------------- * follow all the roots that we know about: @@ -340,6 +338,11 @@ GarbageCollect ( rtsBool force_major_gc ) gct->evac_step = 0; GetRoots(mark_root); +#if defined(RTS_USER_SIGNALS) + // mark the signal handlers (signals should be already blocked) + markSignalHandlers(mark_root); +#endif + // Mark the weak pointer list, and prepare to detect dead weak pointers. markWeakPtrList(); initWeakForGC(); @@ -417,33 +420,30 @@ GarbageCollect ( rtsBool force_major_gc ) step_workspace *ws; bdescr *prev; - for (t = 0; t < n_threads; t++) { - thr = &gc_threads[t]; - - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - for (s = 0; s < generations[g].n_steps; s++) { - ws = &thr->steps[g][s]; - if (g==0 && s==0) continue; - - // Not true? - // ASSERT( ws->scan_bd == ws->todo_bd ); - ASSERT( ws->scan_bd ? ws->scan == ws->scan_bd->free : 1 ); - - // Push the final block - if (ws->scan_bd) { push_scan_block(ws->scan_bd, ws); } - - ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks); - - prev = ws->scavd_list; - for (bd = ws->scavd_list; bd != NULL; bd = bd->link) { - bd->flags &= ~BF_EVACUATED; // now from-space - prev = bd; - } - prev->link = ws->stp->blocks; - ws->stp->blocks = ws->scavd_list; - ws->stp->n_blocks += ws->n_scavd_blocks; - ASSERT(countBlocks(ws->stp->blocks) == ws->stp->n_blocks); - } + for (t = 0; t < n_gc_threads; t++) { + thr = gc_threads[t]; + + // not step 0 + for (s = 1; s < total_steps; s++) { + ws = &thr->steps[s]; + // Not true? + // ASSERT( ws->scan_bd == ws->todo_bd ); + ASSERT( ws->scan_bd ? ws->scan == ws->scan_bd->free : 1 ); + + // Push the final block + if (ws->scan_bd) { push_scan_block(ws->scan_bd, ws); } + + ASSERT(countBlocks(ws->scavd_list) == ws->n_scavd_blocks); + + prev = ws->scavd_list; + for (bd = ws->scavd_list; bd != NULL; bd = bd->link) { + bd->flags &= ~BF_EVACUATED; // now from-space + prev = bd; + } + prev->link = ws->stp->blocks; + ws->stp->blocks = ws->scavd_list; + ws->stp->n_blocks += ws->n_scavd_blocks; + ASSERT(countBlocks(ws->stp->blocks) == ws->stp->n_blocks); } } } @@ -571,7 +571,9 @@ GarbageCollect ( rtsBool force_major_gc ) resize_generations(); // Guess the amount of live data for stats. - live = calcLive(); + live = calcLiveBlocks() * BLOCK_SIZE_W; + debugTrace(DEBUG_gc, "Slop: %ldKB", + (live - calcLiveWords()) / (1024/sizeof(W_))); // Free the small objects allocated via allocate(), since this will // all have been copied into G0S1 now. @@ -656,7 +658,7 @@ GarbageCollect ( rtsBool force_major_gc ) #ifdef DEBUG // check for memory leaks if DEBUG is on - memInventory(); + memInventory(traceClass(DEBUG_gc)); #endif #ifdef RTS_GTK_FRONTPANEL @@ -680,6 +682,76 @@ GarbageCollect ( rtsBool force_major_gc ) gct = saved_gct; } +/* ----------------------------------------------------------------------------- + * Mark all nodes pointed to by sparks in the spark queues (for GC) Does an + * implicit slide i.e. after marking all sparks are at the beginning of the + * spark pool and the spark pool only contains sparkable closures + * -------------------------------------------------------------------------- */ + +#ifdef THREADED_RTS +static void +markSparkQueue (evac_fn evac, Capability *cap) +{ + StgClosure **sparkp, **to_sparkp; + nat n, pruned_sparks; // stats only + StgSparkPool *pool; + + PAR_TICKY_MARK_SPARK_QUEUE_START(); + + n = 0; + pruned_sparks = 0; + + pool = &(cap->r.rSparks); + + ASSERT_SPARK_POOL_INVARIANTS(pool); + +#if defined(PARALLEL_HASKELL) + // stats only + n = 0; + pruned_sparks = 0; +#endif + + sparkp = pool->hd; + to_sparkp = pool->hd; + while (sparkp != pool->tl) { + ASSERT(*sparkp!=NULL); + ASSERT(LOOKS_LIKE_CLOSURE_PTR(((StgClosure *)*sparkp))); + // ToDo?: statistics gathering here (also for GUM!) + if (closure_SHOULD_SPARK(*sparkp)) { + evac(sparkp); + *to_sparkp++ = *sparkp; + if (to_sparkp == pool->lim) { + to_sparkp = pool->base; + } + n++; + } else { + pruned_sparks++; + } + sparkp++; + if (sparkp == pool->lim) { + sparkp = pool->base; + } + } + pool->tl = to_sparkp; + + PAR_TICKY_MARK_SPARK_QUEUE_END(n); + +#if defined(PARALLEL_HASKELL) + debugTrace(DEBUG_sched, + "marked %d sparks and pruned %d sparks on [%x]", + n, pruned_sparks, mytid); +#else + debugTrace(DEBUG_sched, + "marked %d sparks and pruned %d sparks", + n, pruned_sparks); +#endif + + debugTrace(DEBUG_sched, + "new spark queue len=%d; (hd=%p; tl=%p)\n", + sparkPoolSize(pool), pool->hd, pool->tl); +} +#endif + /* --------------------------------------------------------------------------- Where are the roots that we know about? @@ -691,11 +763,6 @@ GarbageCollect ( rtsBool force_major_gc ) ------------------------------------------------------------------------ */ -/* This has to be protected either by the scheduler monitor, or by the - garbage collection monitor (probably the latter). - KH @ 25/10/99 -*/ - void GetRoots( evac_fn evac ) { @@ -703,7 +770,12 @@ GetRoots( evac_fn evac ) Capability *cap; Task *task; - for (i = 0; i < n_capabilities; i++) { + // Each GC thread is responsible for following roots from the + // Capability of the same number. There will usually be the same + // or fewer Capabilities as GC threads, but just in case there + // are more, we mark every Capability whose number is the GC + // thread's index plus a multiple of the number of GC threads. + for (i = gct->thread_index; i < n_capabilities; i += n_gc_threads) { cap = &capabilities[i]; evac((StgClosure **)(void *)&cap->run_queue_hd); evac((StgClosure **)(void *)&cap->run_queue_tl); @@ -718,6 +790,9 @@ GetRoots( evac_fn evac ) evac((StgClosure **)(void *)&task->suspended_tso); } +#if defined(THREADED_RTS) + markSparkQueue(evac,cap); +#endif } #if !defined(THREADED_RTS) @@ -725,17 +800,6 @@ GetRoots( evac_fn evac ) evac((StgClosure **)(void *)&blocked_queue_tl); evac((StgClosure **)(void *)&sleeping_queue); #endif - - // evac((StgClosure **)&blackhole_queue); - -#if defined(THREADED_RTS) - markSparkQueue(evac); -#endif - -#if defined(RTS_USER_SIGNALS) - // mark the signal handlers (signals should be already blocked) - markSignalHandlers(evac); -#endif } /* ----------------------------------------------------------------------------- @@ -853,11 +917,15 @@ initialise_N (rtsBool force_major_gc) Initialise the gc_thread structures. -------------------------------------------------------------------------- */ -static void -alloc_gc_thread (gc_thread *t, int n) +static gc_thread * +alloc_gc_thread (int n) { - nat g, s; + nat s; step_workspace *ws; + gc_thread *t; + + t = stgMallocBytes(sizeof(gc_thread) + total_steps * sizeof(step_workspace), + "alloc_gc_thread"); #ifdef THREADED_RTS t->id = 0; @@ -873,32 +941,28 @@ alloc_gc_thread (gc_thread *t, int n) init_gc_thread(t); - t->steps = stgMallocBytes(RtsFlags.GcFlags.generations * - sizeof(step_workspace *), - "initialise_gc_thread"); +#ifdef USE_PAPI + t->papi_events = -1; +#endif - for (g = 0; g < RtsFlags.GcFlags.generations; g++) + for (s = 0; s < total_steps; s++) { - t->steps[g] = stgMallocBytes(generations[g].n_steps * - sizeof(step_workspace), - "initialise_gc_thread/2"); - - for (s = 0; s < generations[g].n_steps; s++) - { - ws = &t->steps[g][s]; - ws->stp = &generations[g].steps[s]; - ws->gct = t; - - ws->scan_bd = NULL; - ws->scan = NULL; - - ws->todo_bd = NULL; - ws->buffer_todo_bd = NULL; - - ws->scavd_list = NULL; - ws->n_scavd_blocks = 0; - } + ws = &t->steps[s]; + ws->stp = &all_steps[s]; + ASSERT(s == ws->stp->abs_no); + ws->gct = t; + + ws->scan_bd = NULL; + ws->scan = NULL; + + ws->todo_bd = NULL; + ws->buffer_todo_bd = NULL; + + ws->scavd_list = NULL; + ws->n_scavd_blocks = 0; } + + return t; } @@ -909,17 +973,17 @@ alloc_gc_threads (void) #if defined(THREADED_RTS) nat i; gc_threads = stgMallocBytes (RtsFlags.ParFlags.gcThreads * - sizeof(gc_thread), + sizeof(gc_thread*), "alloc_gc_threads"); for (i = 0; i < RtsFlags.ParFlags.gcThreads; i++) { - alloc_gc_thread(&gc_threads[i], i); + gc_threads[i] = alloc_gc_thread(i); } #else - gc_threads = stgMallocBytes (sizeof(gc_thread), + gc_threads = stgMallocBytes (sizeof(gc_thread*), "alloc_gc_threads"); - alloc_gc_thread(gc_threads, 0); + gc_threads[0] = alloc_gc_thread(0); #endif } } @@ -970,6 +1034,10 @@ gc_thread_work (void) // GarbageCollect(), or this is a worker thread and the main // thread bumped gc_running_threads before waking us up. + // Every thread evacuates some roots. + gct->evac_step = 0; + GetRoots(mark_root); + loop: scavenge_loop(); // scavenge_loop() only exits when there's no work to do @@ -1011,7 +1079,20 @@ gc_thread_mainloop (void) gct->wakeup = rtsFalse; if (gct->exit) break; +#ifdef USE_PAPI + // start performance counters in this thread... + if (gct->papi_events == -1) { + papi_init_eventset(&gct->papi_events); + } + papi_thread_start_gc1_count(gct->papi_events); +#endif + gc_thread_work(); + +#ifdef USE_PAPI + // count events in this thread towards the GC totals + papi_thread_stop_gc1_count(gct->papi_events); +#endif } } #endif @@ -1042,7 +1123,7 @@ start_gc_threads (void) // Start from 1: the main thread is 0 for (i = 1; i < RtsFlags.ParFlags.gcThreads; i++) { createOSThread(&id, (OSThreadProc*)&gc_thread_entry, - &gc_threads[i]); + gc_threads[i]); } done = rtsTrue; } @@ -1056,10 +1137,10 @@ wakeup_gc_threads (nat n_threads USED_IF_THREADS) nat i; for (i=1; i < n_threads; i++) { inc_running(); - ACQUIRE_LOCK(&gc_threads[i].wake_mutex); - gc_threads[i].wakeup = rtsTrue; - signalCondition(&gc_threads[i].wake_cond); - RELEASE_LOCK(&gc_threads[i].wake_mutex); + ACQUIRE_LOCK(&gc_threads[i]->wake_mutex); + gc_threads[i]->wakeup = rtsTrue; + signalCondition(&gc_threads[i]->wake_cond); + RELEASE_LOCK(&gc_threads[i]->wake_mutex); } #endif } @@ -1163,7 +1244,7 @@ init_collected_gen (nat g, nat n_threads) // we don't copy objects into g0s0, unless -G0 if (g==0 && s==0 && RtsFlags.GcFlags.generations > 1) continue; - ws = &gc_threads[t].steps[g][s]; + ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s]; ws->scan_bd = NULL; ws->scan = NULL; @@ -1204,7 +1285,7 @@ init_uncollected_gen (nat g, nat threads) for (t = 0; t < threads; t++) { for (s = 0; s < generations[g].n_steps; s++) { - ws = &gc_threads[t].steps[g][s]; + ws = &gc_threads[t]->steps[g * RtsFlags.GcFlags.steps + s]; stp = ws->stp; ws->buffer_todo_bd = NULL; @@ -1215,9 +1296,11 @@ init_uncollected_gen (nat g, nat threads) // If the block at the head of the list in this generation // is less than 3/4 full, then use it as a todo block. - if (isPartiallyFull(stp->blocks)) + if (stp->blocks && isPartiallyFull(stp->blocks)) { ws->todo_bd = stp->blocks; + ws->todo_free = ws->todo_bd->free; + ws->todo_lim = ws->todo_bd->start + BLOCK_SIZE_W; stp->blocks = stp->blocks->link; stp->n_blocks -= 1; ws->todo_bd->link = NULL;