X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=rts%2Fsm%2FStorage.c;h=f8a9e559bf6ea86e6dae3d5692e1eaced21c0cbe;hp=6aedb96a5903593bfb556fce1538e829ec40f962;hb=6cec61d14a324285dbb8ce73d4c7215f1f8d6766;hpb=e74936e39e845923c2e2a239524037d11dee2abd diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 6aedb96..f8a9e55 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -15,6 +15,7 @@ #include "Rts.h" #include "Storage.h" +#include "GCThread.h" #include "RtsUtils.h" #include "Stats.h" #include "BlockAlloc.h" @@ -40,8 +41,8 @@ StgClosure *caf_list = NULL; StgClosure *revertible_caf_list = NULL; rtsBool keepCAFs; -nat alloc_blocks_lim; /* GC if n_large_blocks in any nursery - * reaches this. */ +nat large_alloc_lim; /* GC if n_large_blocks in any nursery + * reaches this. */ bdescr *exec_block; @@ -77,15 +78,14 @@ initGeneration (generation *gen, int g) gen->n_old_blocks = 0; gen->large_objects = NULL; gen->n_large_blocks = 0; - gen->n_new_large_blocks = 0; - gen->mut_list = allocBlock(); + gen->n_new_large_words = 0; gen->scavenged_large_objects = NULL; gen->n_scavenged_large_blocks = 0; gen->mark = 0; gen->compact = 0; gen->bitmap = NULL; #ifdef THREADED_RTS - initSpinLock(&gen->sync_large_objects); + initSpinLock(&gen->sync); #endif gen->threads = END_TSO_QUEUE; gen->old_threads = END_TSO_QUEUE; @@ -107,7 +107,7 @@ initStorage( void ) * doing something reasonable. */ /* We use the NOT_NULL variant or gcc warns that the test is always true */ - ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLACKHOLE_info)); + ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLOCKING_QUEUE_CLEAN_info)); ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure)); ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure)); @@ -177,11 +177,11 @@ initStorage( void ) allocNurseries(); weak_ptr_list = NULL; - caf_list = NULL; - revertible_caf_list = NULL; + caf_list = END_OF_STATIC_LIST; + revertible_caf_list = END_OF_STATIC_LIST; /* initialise the allocate() interface */ - alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize; + large_alloc_lim = RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W; exec_block = NULL; @@ -209,14 +209,14 @@ initStorage( void ) void exitStorage (void) { - stat_exit(calcAllocated()); + stat_exit(calcAllocated(rtsTrue)); } void -freeStorage (void) +freeStorage (rtsBool free_heap) { stgFree(generations); - freeAllMBlocks(); + if (free_heap) freeAllMBlocks(); #if defined(THREADED_RTS) closeMutex(&sm_mutex); #endif @@ -229,13 +229,12 @@ freeStorage (void) The entry code for every CAF does the following: - - builds a CAF_BLACKHOLE in the heap - - pushes an update frame pointing to the CAF_BLACKHOLE - - invokes UPD_CAF(), which: - - calls newCaf, below - - updates the CAF with a static indirection to the CAF_BLACKHOLE + - builds a BLACKHOLE in the heap + - pushes an update frame pointing to the BLACKHOLE + - calls newCaf, below + - updates the CAF with a static indirection to the BLACKHOLE - Why do we build a BLACKHOLE in the heap rather than just updating + Why do we build an BLACKHOLE in the heap rather than just updating the thunk directly? It's so that we only need one kind of update frame - otherwise we'd need a static version of the update frame too. @@ -291,7 +290,9 @@ newCAF(StgRegTable *reg, StgClosure* caf) { // Put this CAF on the mutable list for the old generation. ((StgIndStatic *)caf)->saved_info = NULL; - recordMutableCap(caf, regTableToCapability(reg), oldest_gen->no); + if (oldest_gen->no != 0) { + recordMutableCap(caf, regTableToCapability(reg), oldest_gen->no); + } } } @@ -330,32 +331,48 @@ newDynCAF (StgRegTable *reg STG_UNUSED, StgClosure *caf) static bdescr * allocNursery (bdescr *tail, nat blocks) { - bdescr *bd; - nat i; + bdescr *bd = NULL; + nat i, n; - // Allocate a nursery: we allocate fresh blocks one at a time and - // cons them on to the front of the list, not forgetting to update - // the back pointer on the tail of the list to point to the new block. - for (i=0; i < blocks; i++) { - // @LDV profiling - /* - processNursery() in LdvProfile.c assumes that every block group in - the nursery contains only a single block. So, if a block group is - given multiple blocks, change processNursery() accordingly. - */ - bd = allocBlock(); - bd->link = tail; - // double-link the nursery: we might need to insert blocks - if (tail != NULL) { - tail->u.back = bd; - } - initBdescr(bd, g0, g0); - bd->flags = 0; - bd->free = bd->start; - tail = bd; + // We allocate the nursery as a single contiguous block and then + // divide it into single blocks manually. This way we guarantee + // that the nursery blocks are adjacent, so that the processor's + // automatic prefetching works across nursery blocks. This is a + // tiny optimisation (~0.5%), but it's free. + + while (blocks > 0) { + n = stg_min(blocks, BLOCKS_PER_MBLOCK); + blocks -= n; + + bd = allocGroup(n); + for (i = 0; i < n; i++) { + initBdescr(&bd[i], g0, g0); + + bd[i].blocks = 1; + bd[i].flags = 0; + + if (i > 0) { + bd[i].u.back = &bd[i-1]; + } else { + bd[i].u.back = NULL; + } + + if (i+1 < n) { + bd[i].link = &bd[i+1]; + } else { + bd[i].link = tail; + if (tail != NULL) { + tail->u.back = &bd[i]; + } + } + + bd[i].free = bd[i].start; + } + + tail = &bd[0]; } - tail->u.back = NULL; - return tail; + + return &bd[0]; } static void @@ -384,21 +401,31 @@ allocNurseries( void ) assignNurseriesToCapabilities(); } -void -resetNurseries( void ) +lnat // words allocated +clearNurseries (void) { + lnat allocated = 0; nat i; bdescr *bd; for (i = 0; i < n_capabilities; i++) { for (bd = nurseries[i].blocks; bd; bd = bd->link) { - bd->free = bd->start; + allocated += (lnat)(bd->free - bd->start); + bd->free = bd->start; ASSERT(bd->gen_no == 0); ASSERT(bd->gen == g0); IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE)); } } + + return allocated; +} + +void +resetNurseries (void) +{ assignNurseriesToCapabilities(); + } lnat @@ -478,12 +505,12 @@ resizeNurseries (nat blocks) /* ----------------------------------------------------------------------------- - move_TSO is called to update the TSO structure after it has been + move_STACK is called to update the TSO structure after it has been moved from one place to another. -------------------------------------------------------------------------- */ void -move_TSO (StgTSO *src, StgTSO *dest) +move_STACK (StgStack *src, StgStack *dest) { ptrdiff_t diff; @@ -493,45 +520,6 @@ move_TSO (StgTSO *src, StgTSO *dest) } /* ----------------------------------------------------------------------------- - split N blocks off the front of the given bdescr, returning the - new block group. We add the remainder to the large_blocks list - in the same step as the original block. - -------------------------------------------------------------------------- */ - -bdescr * -splitLargeBlock (bdescr *bd, nat blocks) -{ - bdescr *new_bd; - - ACQUIRE_SM_LOCK; - - ASSERT(countBlocks(bd->gen->large_objects) == bd->gen->n_large_blocks); - - // subtract the original number of blocks from the counter first - bd->gen->n_large_blocks -= bd->blocks; - - new_bd = splitBlockGroup (bd, blocks); - initBdescr(new_bd, bd->gen, bd->gen->to); - new_bd->flags = BF_LARGE | (bd->flags & BF_EVACUATED); - // if new_bd is in an old generation, we have to set BF_EVACUATED - new_bd->free = bd->free; - dbl_link_onto(new_bd, &bd->gen->large_objects); - - ASSERT(new_bd->free <= new_bd->start + new_bd->blocks * BLOCK_SIZE_W); - - // add the new number of blocks to the counter. Due to the gaps - // for block descriptors, new_bd->blocks + bd->blocks might not be - // equal to the original bd->blocks, which is why we do it this way. - bd->gen->n_large_blocks += bd->blocks + new_bd->blocks; - - ASSERT(countBlocks(bd->gen->large_objects) == bd->gen->n_large_blocks); - - RELEASE_SM_LOCK; - - return new_bd; -} - -/* ----------------------------------------------------------------------------- allocate() This allocates memory in the current thread - it is intended for @@ -571,7 +559,7 @@ allocate (Capability *cap, lnat n) bd = allocGroup(req_blocks); dbl_link_onto(bd, &g0->large_objects); g0->n_large_blocks += bd->blocks; // might be larger than req_blocks - g0->n_new_large_blocks += bd->blocks; + g0->n_new_large_words += n; RELEASE_SM_LOCK; initBdescr(bd, g0, g0); bd->flags = BF_LARGE; @@ -669,14 +657,29 @@ allocatePinned (Capability *cap, lnat n) // If we don't have a block of pinned objects yet, or the current // one isn't large enough to hold the new object, allocate a new one. if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) { + // The pinned_object_block remains attached to the capability + // until it is full, even if a GC occurs. We want this + // behaviour because otherwise the unallocated portion of the + // block would be forever slop, and under certain workloads + // (allocating a few ByteStrings per GC) we accumulate a lot + // of slop. + // + // So, the pinned_object_block is initially marked + // BF_EVACUATED so the GC won't touch it. When it is full, + // we place it on the large_objects list, and at the start of + // the next GC the BF_EVACUATED flag will be cleared, and the + // block will be promoted as usual (if anything in it is + // live). ACQUIRE_SM_LOCK; - cap->pinned_object_block = bd = allocBlock(); - dbl_link_onto(bd, &g0->large_objects); - g0->n_large_blocks++; - g0->n_new_large_blocks++; + if (bd != NULL) { + dbl_link_onto(bd, &g0->large_objects); + g0->n_large_blocks++; + g0->n_new_large_words += bd->free - bd->start; + } + cap->pinned_object_block = bd = allocBlock(); RELEASE_SM_LOCK; initBdescr(bd, g0, g0); - bd->flags = BF_PINNED | BF_LARGE; + bd->flags = BF_PINNED | BF_LARGE | BF_EVACUATED; bd->free = bd->start; } @@ -699,11 +702,9 @@ void dirty_MUT_VAR(StgRegTable *reg, StgClosure *p) { Capability *cap = regTableToCapability(reg); - bdescr *bd; if (p->header.info == &stg_MUT_VAR_CLEAN_info) { p->header.info = &stg_MUT_VAR_DIRTY_info; - bd = Bdescr((StgPtr)p); - if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no); + recordClosureMutated(cap,p); } } @@ -716,24 +717,39 @@ dirty_MUT_VAR(StgRegTable *reg, StgClosure *p) void setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target) { - bdescr *bd; - if (tso->dirty == 0 && (tso->flags & TSO_LINK_DIRTY) == 0) { - tso->flags |= TSO_LINK_DIRTY; - bd = Bdescr((StgPtr)tso); - if (bd->gen_no > 0) recordMutableCap((StgClosure*)tso,cap,bd->gen_no); + if (tso->dirty == 0) { + tso->dirty = 1; + recordClosureMutated(cap,(StgClosure*)tso); } tso->_link = target; } void +setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target) +{ + if (tso->dirty == 0) { + tso->dirty = 1; + recordClosureMutated(cap,(StgClosure*)tso); + } + tso->block_info.prev = target; +} + +void dirty_TSO (Capability *cap, StgTSO *tso) { - bdescr *bd; - if (tso->dirty == 0 && (tso->flags & TSO_LINK_DIRTY) == 0) { - bd = Bdescr((StgPtr)tso); - if (bd->gen_no > 0) recordMutableCap((StgClosure*)tso,cap,bd->gen_no); + if (tso->dirty == 0) { + tso->dirty = 1; + recordClosureMutated(cap,(StgClosure*)tso); + } +} + +void +dirty_STACK (Capability *cap, StgStack *stack) +{ + if (stack->dirty == 0) { + stack->dirty = 1; + recordClosureMutated(cap,(StgClosure*)stack); } - tso->dirty = 1; } /* @@ -747,10 +763,7 @@ dirty_TSO (Capability *cap, StgTSO *tso) void dirty_MVAR(StgRegTable *reg, StgClosure *p) { - Capability *cap = regTableToCapability(reg); - bdescr *bd; - bd = Bdescr((StgPtr)p); - if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no); + recordClosureMutated(regTableToCapability(reg),p); } /* ----------------------------------------------------------------------------- @@ -766,56 +779,27 @@ dirty_MVAR(StgRegTable *reg, StgClosure *p) * -------------------------------------------------------------------------- */ lnat -calcAllocated( void ) +calcAllocated (rtsBool include_nurseries) { - nat allocated; - bdescr *bd; + nat allocated = 0; nat i; - allocated = countNurseryBlocks() * BLOCK_SIZE_W; - - for (i = 0; i < n_capabilities; i++) { - Capability *cap; - for ( bd = capabilities[i].r.rCurrentNursery->link; - bd != NULL; bd = bd->link ) { - allocated -= BLOCK_SIZE_W; - } - cap = &capabilities[i]; - if (cap->r.rCurrentNursery->free < - cap->r.rCurrentNursery->start + BLOCK_SIZE_W) { - allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W) - - cap->r.rCurrentNursery->free; - } - if (cap->pinned_object_block != NULL) { - allocated -= (cap->pinned_object_block->start + BLOCK_SIZE_W) - - cap->pinned_object_block->free; + // When called from GC.c, we already have the allocation count for + // the nursery from resetNurseries(), so we don't need to walk + // through these block lists again. + if (include_nurseries) + { + for (i = 0; i < n_capabilities; i++) { + allocated += countOccupied(nurseries[i].blocks); } } - allocated += g0->n_new_large_blocks * BLOCK_SIZE_W; + // add in sizes of new large and pinned objects + allocated += g0->n_new_large_words; return allocated; } -/* Approximate the amount of live data in the heap. To be called just - * after garbage collection (see GarbageCollect()). - */ -lnat calcLiveBlocks (void) -{ - nat g; - lnat live = 0; - generation *gen; - - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - /* approximate amount of live data (doesn't take into account slop - * at end of each block). - */ - gen = &generations[g]; - live += gen->n_large_blocks + gen->n_blocks; - } - return live; -} - lnat countOccupied (bdescr *bd) { lnat words; @@ -828,18 +812,60 @@ lnat countOccupied (bdescr *bd) return words; } +lnat genLiveWords (generation *gen) +{ + return gen->n_words + countOccupied(gen->large_objects); +} + +lnat genLiveBlocks (generation *gen) +{ + return gen->n_blocks + gen->n_large_blocks; +} + +lnat gcThreadLiveWords (nat i, nat g) +{ + lnat words; + + words = countOccupied(gc_threads[i]->gens[g].todo_bd); + words += countOccupied(gc_threads[i]->gens[g].part_list); + words += countOccupied(gc_threads[i]->gens[g].scavd_list); + + return words; +} + +lnat gcThreadLiveBlocks (nat i, nat g) +{ + lnat blocks; + + blocks = countBlocks(gc_threads[i]->gens[g].todo_bd); + blocks += gc_threads[i]->gens[g].n_part_blocks; + blocks += gc_threads[i]->gens[g].n_scavd_blocks; + + return blocks; +} + // Return an accurate count of the live data in the heap, excluding // generation 0. lnat calcLiveWords (void) { nat g; lnat live; - generation *gen; - + live = 0; for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - gen = &generations[g]; - live += gen->n_words + countOccupied(gen->large_objects); + live += genLiveWords(&generations[g]); + } + return live; +} + +lnat calcLiveBlocks (void) +{ + nat g; + lnat live; + + live = 0; + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + live += genLiveBlocks(&generations[g]); } return live; }