X-Git-Url: http://git.megacz.com/?a=blobdiff_plain;f=rts%2Fsm%2FGC.c;h=07cf2b51e5f3fa92b51f74bff1a170fb79b2a41d;hb=754e039a8a15d5774fe73872ff9ac593b46370e0;hp=74880064e485316d0e58065f4199e86d040f8fc1;hpb=2da67673279cf015ccc84270f6fd7935f64261a6;p=ghc-hetmet.git diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 7488006..07cf2b5 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -11,35 +11,32 @@ * * ---------------------------------------------------------------------------*/ -// #include "PosixSource.h" +#include "PosixSource.h" #include "Rts.h" -#include "RtsFlags.h" +#include "HsFFI.h" + +#include "Storage.h" #include "RtsUtils.h" #include "Apply.h" -#include "OSThreads.h" -#include "LdvProfile.h" #include "Updates.h" #include "Stats.h" #include "Schedule.h" #include "Sanity.h" #include "BlockAlloc.h" -#include "MBlock.h" #include "ProfHeap.h" -#include "SchedAPI.h" #include "Weak.h" #include "Prelude.h" -#include "ParTicky.h" // ToDo: move into Rts.h #include "RtsSignals.h" #include "STM.h" -#include "HsFFI.h" -#include "Linker.h" #if defined(RTS_GTK_FRONTPANEL) #include "FrontPanel.h" #endif #include "Trace.h" #include "RetainerProfile.h" +#include "LdvProfile.h" #include "RaiseAsync.h" #include "Papi.h" +#include "Stable.h" #include "GC.h" #include "GCThread.h" @@ -47,6 +44,7 @@ #include "Evac.h" #include "Scav.h" #include "GCUtils.h" +#include "MarkStack.h" #include "MarkWeak.h" #include "Sparks.h" #include "Sweep.h" @@ -157,21 +155,12 @@ static void gcCAFs (void); #endif /* ----------------------------------------------------------------------------- - The mark bitmap & stack. + The mark stack. -------------------------------------------------------------------------- */ -#define MARK_STACK_BLOCKS 4 - -bdescr *mark_stack_bdescr; -StgPtr *mark_stack; -StgPtr *mark_sp; -StgPtr *mark_splim; - -// Flag and pointers used for falling back to a linear scan when the -// mark stack overflows. -rtsBool mark_stack_overflowed; -bdescr *oldgen_scan_bd; -StgPtr oldgen_scan; +bdescr *mark_stack_top_bd; // topmost block in the mark stack +bdescr *mark_stack_bd; // current block in the mark stack +StgPtr mark_sp; // pointer to the next unallocated mark stack entry /* ----------------------------------------------------------------------------- GarbageCollect: the main entry point to the garbage collector. @@ -240,7 +229,8 @@ GarbageCollect (rtsBool force_major_gc, n = initialise_N(force_major_gc); #if defined(THREADED_RTS) - work_stealing = RtsFlags.ParFlags.parGcLoadBalancing; + work_stealing = RtsFlags.ParFlags.parGcLoadBalancingEnabled && + N >= RtsFlags.ParFlags.parGcLoadBalancingGen; // It's not always a good idea to do load balancing in parallel // GC. In particular, for a parallel program we don't want to // lose locality by moving cached data into another CPU's cache @@ -281,7 +271,7 @@ GarbageCollect (rtsBool force_major_gc, #ifdef DEBUG // check for memory leaks if DEBUG is on - memInventory(traceClass(DEBUG_gc)); + memInventory(DEBUG_gc); #endif // check stack sanity *before* GC @@ -306,15 +296,15 @@ GarbageCollect (rtsBool force_major_gc, /* Allocate a mark stack if we're doing a major collection. */ if (major_gc && oldest_gen->steps[0].mark) { - nat mark_stack_blocks; - mark_stack_blocks = stg_max(MARK_STACK_BLOCKS, - oldest_gen->steps[0].n_old_blocks / 100); - mark_stack_bdescr = allocGroup(mark_stack_blocks); - mark_stack = (StgPtr *)mark_stack_bdescr->start; - mark_sp = mark_stack; - mark_splim = mark_stack + (mark_stack_blocks * BLOCK_SIZE_W); + mark_stack_bd = allocBlock(); + mark_stack_top_bd = mark_stack_bd; + mark_stack_bd->link = NULL; + mark_stack_bd->u.back = NULL; + mark_sp = mark_stack_bd->start; } else { - mark_stack_bdescr = NULL; + mark_stack_bd = NULL; + mark_stack_top_bd = NULL; + mark_sp = NULL; } // this is the main thread @@ -709,8 +699,10 @@ SET_GCT(gc_threads[0]); pinned_object_block = NULL; // Free the mark stack. - if (mark_stack_bdescr != NULL) { - freeGroup(mark_stack_bdescr); + if (mark_stack_top_bd != NULL) { + debugTrace(DEBUG_gc, "mark stack: %d blocks", + countBlocks(mark_stack_top_bd)); + freeChain(mark_stack_top_bd); } // Free any bitmaps. @@ -784,7 +776,7 @@ SET_GCT(gc_threads[0]); #ifdef DEBUG // check for memory leaks if DEBUG is on - memInventory(traceClass(DEBUG_gc)); + memInventory(DEBUG_gc); #endif #ifdef RTS_GTK_FRONTPANEL @@ -937,6 +929,23 @@ initGcThreads (void) } } +void +freeGcThreads (void) +{ + if (gc_threads != NULL) { +#if defined(THREADED_RTS) + nat i; + for (i = 0; i < RtsFlags.ParFlags.nNodes; i++) { + stgFree (gc_threads[i]); + } + stgFree (gc_threads); +#else + stgFree (gc_threads); +#endif + gc_threads = NULL; + } +} + /* ---------------------------------------------------------------------------- Start GC threads ------------------------------------------------------------------------- */ @@ -970,8 +979,7 @@ any_work (void) write_barrier(); // scavenge objects in compacted generation - if (mark_stack_overflowed || oldgen_scan_bd != NULL || - (mark_stack_bdescr != NULL && !mark_stack_empty())) { + if (mark_stack_bd != NULL && !mark_stack_empty()) { return rtsTrue; } @@ -1052,6 +1060,11 @@ loop: void gcWorkerThread (Capability *cap) { + gc_thread *saved_gct; + + // necessary if we stole a callee-saves register for gct: + saved_gct = gct; + cap->in_gc = rtsTrue; gct = gc_threads[cap->no]; @@ -1091,14 +1104,17 @@ gcWorkerThread (Capability *cap) gct->thread_index); ACQUIRE_SPIN_LOCK(&gct->mut_spin); debugTrace(DEBUG_gc, "GC thread %d on my way...", gct->thread_index); + + SET_GCT(saved_gct); } #endif +#if defined(THREADED_RTS) + void waitForGcThreads (Capability *cap USED_IF_THREADS) { -#if defined(THREADED_RTS) nat n_threads = RtsFlags.ParFlags.nNodes; nat me = cap->no; nat i, j; @@ -1111,7 +1127,7 @@ waitForGcThreads (Capability *cap USED_IF_THREADS) prodCapability(&capabilities[i], cap->running_task); } } - for (j=0; j < 10000000; j++) { + for (j=0; j < 10; j++) { retry = rtsFalse; for (i=0; i < n_threads; i++) { if (i == me) continue; @@ -1122,11 +1138,13 @@ waitForGcThreads (Capability *cap USED_IF_THREADS) } } if (!retry) break; + yieldThread(); } } -#endif } +#endif // THREADED_RTS + static void start_gc_threads (void) { @@ -1168,10 +1186,10 @@ shutdown_gc_threads (nat n_threads USED_IF_THREADS, nat me USED_IF_THREADS) #endif } +#if defined(THREADED_RTS) void releaseGCThreads (Capability *cap USED_IF_THREADS) { -#if defined(THREADED_RTS) nat n_threads = RtsFlags.ParFlags.nNodes; nat me = cap->no; nat i; @@ -1184,8 +1202,8 @@ releaseGCThreads (Capability *cap USED_IF_THREADS) ACQUIRE_SPIN_LOCK(&gc_threads[i]->gc_spin); RELEASE_SPIN_LOCK(&gc_threads[i]->mut_spin); } -#endif } +#endif /* ---------------------------------------------------------------------------- Initialise a generation that is to be collected @@ -1599,6 +1617,8 @@ resize_generations (void) static void resize_nursery (void) { + lnat min_nursery = RtsFlags.GcFlags.minAllocAreaSize * n_capabilities; + if (RtsFlags.GcFlags.generations == 1) { // Two-space collector: nat blocks; @@ -1641,9 +1661,9 @@ resize_nursery (void) else { blocks *= RtsFlags.GcFlags.oldGenFactor; - if (blocks < RtsFlags.GcFlags.minAllocAreaSize) + if (blocks < min_nursery) { - blocks = RtsFlags.GcFlags.minAllocAreaSize; + blocks = min_nursery; } } resizeNurseries(blocks); @@ -1690,8 +1710,8 @@ resize_nursery (void) (((long)RtsFlags.GcFlags.heapSizeSuggestion - (long)needed) * 100) / (100 + (long)g0s0_pcnt_kept); - if (blocks < (long)RtsFlags.GcFlags.minAllocAreaSize) { - blocks = RtsFlags.GcFlags.minAllocAreaSize; + if (blocks < (long)min_nursery) { + blocks = min_nursery; } resizeNurseries((nat)blocks);