From db0c13a482893243cc829bbc253862e65f437cbe Mon Sep 17 00:00:00 2001 From: Simon Marlow Date: Tue, 21 Dec 2010 15:29:56 +0000 Subject: [PATCH] Count allocations more accurately The allocation stats (+RTS -s etc.) used to count the slop at the end of each nursery block (except the last) as allocated space, now we count the allocated words accurately. This should make allocation figures more predictable, too. This has the side effect of reducing the apparent allocations by a small amount (~1%), so remember to take this into account when looking at nofib results. --- includes/Cmm.h | 2 +- includes/mkDerivedConstants.c | 2 +- includes/rts/storage/GC.h | 5 ++-- includes/stg/MiscClosures.h | 2 +- rts/Linker.c | 2 +- rts/sm/GC.c | 20 ++++++------- rts/sm/Storage.c | 63 +++++++++++++++++++++-------------------- rts/sm/Storage.h | 5 ++-- 8 files changed, 52 insertions(+), 49 deletions(-) diff --git a/includes/Cmm.h b/includes/Cmm.h index 6abe760..0ba14fb 100644 --- a/includes/Cmm.h +++ b/includes/Cmm.h @@ -383,7 +383,7 @@ // allocate() - this includes many of the primops. #define MAYBE_GC(liveness,reentry) \ if (bdescr_link(CurrentNursery) == NULL || \ - generation_n_new_large_blocks(W_[g0]) >= CInt[alloc_blocks_lim]) { \ + generation_n_new_large_words(W_[g0]) >= CLong[large_alloc_lim]) { \ R9 = liveness; \ R10 = reentry; \ HpAlloc = 0; \ diff --git a/includes/mkDerivedConstants.c b/includes/mkDerivedConstants.c index 0ed7ec6..d00e428 100644 --- a/includes/mkDerivedConstants.c +++ b/includes/mkDerivedConstants.c @@ -246,7 +246,7 @@ main(int argc, char *argv[]) struct_size(generation); struct_field(generation, mut_list); - struct_field(generation, n_new_large_blocks); + struct_field(generation, n_new_large_words); struct_size(CostCentreStack); struct_field(CostCentreStack, ccsID); diff --git a/includes/rts/storage/GC.h b/includes/rts/storage/GC.h index 9616d72..5eadd2d 100644 --- a/includes/rts/storage/GC.h +++ b/includes/rts/storage/GC.h @@ -67,7 +67,8 @@ typedef struct generation_ { bdescr * large_objects; // large objects (doubly linked) unsigned int n_large_blocks; // no. of blocks used by large objs - unsigned int n_new_large_blocks; // count freshly allocated large objects + unsigned long n_new_large_words; // words of new large objects + // (for allocation stats) unsigned int max_blocks; // max blocks bdescr *mut_list; // mut objects in this gen (not G0) @@ -154,7 +155,7 @@ void * allocateExec(unsigned int len, void **exec_addr); void freeExec (void *p); // Used by GC checks in external .cmm code: -extern nat alloc_blocks_lim; +extern nat large_alloc_lim; /* ----------------------------------------------------------------------------- Performing Garbage Collection diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h index c52a3c9..e6cfc47 100644 --- a/includes/stg/MiscClosures.h +++ b/includes/stg/MiscClosures.h @@ -474,7 +474,7 @@ extern StgWord stg_stack_save_entries[]; // Storage.c extern unsigned int RTS_VAR(g0); -extern unsigned int RTS_VAR(alloc_blocks_lim); +extern unsigned int RTS_VAR(large_alloc_lim); extern StgWord RTS_VAR(weak_ptr_list); extern StgWord RTS_VAR(atomic_modify_mutvar_mutex); diff --git a/rts/Linker.c b/rts/Linker.c index 0bbd869..d8ec9f0 100644 --- a/rts/Linker.c +++ b/rts/Linker.c @@ -979,7 +979,7 @@ typedef struct _RtsSymbolVal { SymI_HasProto(stg_yieldzh) \ SymI_NeedsProto(stg_interp_constr_entry) \ SymI_HasProto(stg_arg_bitmaps) \ - SymI_HasProto(alloc_blocks_lim) \ + SymI_HasProto(large_alloc_lim) \ SymI_HasProto(g0) \ SymI_HasProto(allocate) \ SymI_HasProto(allocateExec) \ diff --git a/rts/sm/GC.c b/rts/sm/GC.c index ec5f700..7c5f99e 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -221,7 +221,7 @@ GarbageCollect (rtsBool force_major_gc, /* Approximate how much we allocated. * Todo: only when generating stats? */ - allocated = calcAllocated(); + allocated = calcAllocated(rtsFalse/* don't count the nursery yet */); /* Figure out which generation to collect */ @@ -648,7 +648,7 @@ SET_GCT(gc_threads[0]); freeChain(gen->large_objects); gen->large_objects = gen->scavenged_large_objects; gen->n_large_blocks = gen->n_scavenged_large_blocks; - gen->n_new_large_blocks = 0; + gen->n_new_large_words = 0; ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks); } else // for generations > N @@ -674,10 +674,6 @@ SET_GCT(gc_threads[0]); // Calculate the amount of live data for stats. live = calcLiveWords(); - // Free the small objects allocated via allocate(), since this will - // all have been copied into G0S1 now. - alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize; - // Start a new pinned_object_block for (n = 0; n < n_capabilities; n++) { capabilities[n].pinned_object_block = NULL; @@ -699,9 +695,14 @@ SET_GCT(gc_threads[0]); } } + // Reset the nursery: make the blocks empty + allocated += clearNurseries(); + resize_nursery(); - // mark the garbage collected CAFs as dead + resetNurseries(); + + // mark the garbage collected CAFs as dead #if 0 && defined(DEBUG) // doesn't work at the moment if (major_gc) { gcCAFs(); } #endif @@ -724,10 +725,7 @@ SET_GCT(gc_threads[0]); } } - // Reset the nursery - resetNurseries(); - - // send exceptions to any threads which were about to die + // send exceptions to any threads which were about to die RELEASE_SM_LOCK; resurrectThreads(resurrected_threads); ACQUIRE_SM_LOCK; diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 4247d28..b16d81a 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -40,8 +40,8 @@ StgClosure *caf_list = NULL; StgClosure *revertible_caf_list = NULL; rtsBool keepCAFs; -nat alloc_blocks_lim; /* GC if n_large_blocks in any nursery - * reaches this. */ +nat large_alloc_lim; /* GC if n_large_blocks in any nursery + * reaches this. */ bdescr *exec_block; @@ -77,7 +77,7 @@ initGeneration (generation *gen, int g) gen->n_old_blocks = 0; gen->large_objects = NULL; gen->n_large_blocks = 0; - gen->n_new_large_blocks = 0; + gen->n_new_large_words = 0; gen->mut_list = allocBlock(); gen->scavenged_large_objects = NULL; gen->n_scavenged_large_blocks = 0; @@ -181,7 +181,7 @@ initStorage( void ) revertible_caf_list = END_OF_STATIC_LIST; /* initialise the allocate() interface */ - alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize; + large_alloc_lim = RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W; exec_block = NULL; @@ -209,7 +209,7 @@ initStorage( void ) void exitStorage (void) { - stat_exit(calcAllocated()); + stat_exit(calcAllocated(rtsTrue)); } void @@ -401,21 +401,31 @@ allocNurseries( void ) assignNurseriesToCapabilities(); } -void -resetNurseries( void ) +lnat // words allocated +clearNurseries (void) { + lnat allocated = 0; nat i; bdescr *bd; for (i = 0; i < n_capabilities; i++) { for (bd = nurseries[i].blocks; bd; bd = bd->link) { - bd->free = bd->start; + allocated += (lnat)(bd->free - bd->start); + bd->free = bd->start; ASSERT(bd->gen_no == 0); ASSERT(bd->gen == g0); IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE)); } } + + return allocated; +} + +void +resetNurseries (void) +{ assignNurseriesToCapabilities(); + } lnat @@ -549,7 +559,7 @@ allocate (Capability *cap, lnat n) bd = allocGroup(req_blocks); dbl_link_onto(bd, &g0->large_objects); g0->n_large_blocks += bd->blocks; // might be larger than req_blocks - g0->n_new_large_blocks += bd->blocks; + g0->n_new_large_words += n; RELEASE_SM_LOCK; initBdescr(bd, g0, g0); bd->flags = BF_LARGE; @@ -651,13 +661,13 @@ allocatePinned (Capability *cap, lnat n) cap->pinned_object_block = bd = allocBlock(); dbl_link_onto(bd, &g0->large_objects); g0->n_large_blocks++; - g0->n_new_large_blocks++; RELEASE_SM_LOCK; initBdescr(bd, g0, g0); bd->flags = BF_PINNED | BF_LARGE; bd->free = bd->start; } + g0->n_new_large_words += n; p = bd->free; bd->free += n; return p; @@ -754,33 +764,26 @@ dirty_MVAR(StgRegTable *reg, StgClosure *p) * -------------------------------------------------------------------------- */ lnat -calcAllocated( void ) +calcAllocated (rtsBool include_nurseries) { - nat allocated; + nat allocated = 0; bdescr *bd; nat i; - allocated = countNurseryBlocks() * BLOCK_SIZE_W; - - for (i = 0; i < n_capabilities; i++) { - Capability *cap; - for ( bd = capabilities[i].r.rCurrentNursery->link; - bd != NULL; bd = bd->link ) { - allocated -= BLOCK_SIZE_W; - } - cap = &capabilities[i]; - if (cap->r.rCurrentNursery->free < - cap->r.rCurrentNursery->start + BLOCK_SIZE_W) { - allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W) - - cap->r.rCurrentNursery->free; - } - if (cap->pinned_object_block != NULL) { - allocated -= (cap->pinned_object_block->start + BLOCK_SIZE_W) - - cap->pinned_object_block->free; + // When called from GC.c, we already have the allocation count for + // the nursery from resetNurseries(), so we don't need to walk + // through these block lists again. + if (include_nurseries) + { + for (i = 0; i < n_capabilities; i++) { + for (bd = nurseries[i].blocks; bd; bd = bd->link) { + allocated += (lnat)(bd->free - bd->start); + } } } - allocated += g0->n_new_large_blocks * BLOCK_SIZE_W; + // add in sizes of new large and pinned objects + allocated += g0->n_new_large_words; return allocated; } diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h index 3ff3380..fdb6a46 100644 --- a/rts/sm/Storage.h +++ b/rts/sm/Storage.h @@ -29,7 +29,7 @@ INLINE_HEADER rtsBool doYouWantToGC( Capability *cap ) { return (cap->r.rCurrentNursery->link == NULL || - g0->n_large_blocks >= alloc_blocks_lim); + g0->n_new_large_words >= large_alloc_lim); } /* for splitting blocks groups in two */ @@ -124,6 +124,7 @@ void dirty_MVAR(StgRegTable *reg, StgClosure *p); extern nursery *nurseries; void resetNurseries ( void ); +lnat clearNurseries ( void ); void resizeNurseries ( nat blocks ); void resizeNurseriesFixed ( nat blocks ); lnat countNurseryBlocks ( void ); @@ -132,7 +133,7 @@ lnat countNurseryBlocks ( void ); Stats 'n' DEBUG stuff -------------------------------------------------------------------------- */ -lnat calcAllocated (void); +lnat calcAllocated (rtsBool count_nurseries); lnat calcLiveBlocks (void); lnat calcLiveWords (void); lnat countOccupied (bdescr *bd); -- 1.7.10.4