X-Git-Url: http://git.megacz.com/?p=ghc-hetmet.git;a=blobdiff_plain;f=rts%2Fsm%2FStorage.c;h=f8a9e559bf6ea86e6dae3d5692e1eaced21c0cbe;hp=97615e9d1b5b0df06c9c4179b1a4081238b21413;hb=6cec61d14a324285dbb8ce73d4c7215f1f8d6766;hpb=a2a67cd520b9841114d69a87a423dabcb3b4368e diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 97615e9..f8a9e55 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -15,6 +15,7 @@ #include "Rts.h" #include "Storage.h" +#include "GCThread.h" #include "RtsUtils.h" #include "Stats.h" #include "BlockAlloc.h" @@ -40,24 +41,16 @@ StgClosure *caf_list = NULL; StgClosure *revertible_caf_list = NULL; rtsBool keepCAFs; -bdescr *pinned_object_block; /* allocate pinned objects into this block */ -nat alloc_blocks; /* number of allocate()d blocks since GC */ -nat alloc_blocks_lim; /* approximate limit on alloc_blocks */ +nat large_alloc_lim; /* GC if n_large_blocks in any nursery + * reaches this. */ -static bdescr *exec_block; +bdescr *exec_block; generation *generations = NULL; /* all the generations */ generation *g0 = NULL; /* generation 0, for convenience */ generation *oldest_gen = NULL; /* oldest generation, for convenience */ -step *g0s0 = NULL; /* generation 0, step 0, for convenience */ -nat total_steps = 0; -step *all_steps = NULL; /* single array of steps */ - -ullong total_allocated = 0; /* total memory allocated during run */ - -nat n_nurseries = 0; /* == RtsFlags.ParFlags.nNodes, convenience */ -step *nurseries = NULL; /* array of nurseries, >1 only if THREADED_RTS */ +nursery *nurseries = NULL; /* array of nurseries, size == n_capabilities */ #ifdef THREADED_RTS /* @@ -70,37 +63,38 @@ Mutex sm_mutex; static void allocNurseries ( void ); static void -initStep (step *stp, int g, int s) +initGeneration (generation *gen, int g) { - stp->no = s; - stp->abs_no = RtsFlags.GcFlags.steps * g + s; - stp->blocks = NULL; - stp->n_blocks = 0; - stp->n_words = 0; - stp->live_estimate = 0; - stp->old_blocks = NULL; - stp->n_old_blocks = 0; - stp->gen = &generations[g]; - stp->gen_no = g; - stp->large_objects = NULL; - stp->n_large_blocks = 0; - stp->scavenged_large_objects = NULL; - stp->n_scavenged_large_blocks = 0; - stp->mark = 0; - stp->compact = 0; - stp->bitmap = NULL; + gen->no = g; + gen->collections = 0; + gen->par_collections = 0; + gen->failed_promotions = 0; + gen->max_blocks = 0; + gen->blocks = NULL; + gen->n_blocks = 0; + gen->n_words = 0; + gen->live_estimate = 0; + gen->old_blocks = NULL; + gen->n_old_blocks = 0; + gen->large_objects = NULL; + gen->n_large_blocks = 0; + gen->n_new_large_words = 0; + gen->scavenged_large_objects = NULL; + gen->n_scavenged_large_blocks = 0; + gen->mark = 0; + gen->compact = 0; + gen->bitmap = NULL; #ifdef THREADED_RTS - initSpinLock(&stp->sync_large_objects); + initSpinLock(&gen->sync); #endif - stp->threads = END_TSO_QUEUE; - stp->old_threads = END_TSO_QUEUE; + gen->threads = END_TSO_QUEUE; + gen->old_threads = END_TSO_QUEUE; } void initStorage( void ) { - nat g, s; - generation *gen; + nat g, n; if (generations != NULL) { // multi-init protection @@ -113,7 +107,7 @@ initStorage( void ) * doing something reasonable. */ /* We use the NOT_NULL variant or gcc warns that the test is always true */ - ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLACKHOLE_info)); + ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLOCKING_QUEUE_CLEAN_info)); ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure)); ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure)); @@ -143,95 +137,36 @@ initStorage( void ) * sizeof(struct generation_), "initStorage: gens"); - /* allocate all the steps into an array. It is important that we do - it this way, because we need the invariant that two step pointers - can be directly compared to see which is the oldest. - Remember that the last generation has only one step. */ - total_steps = 1 + (RtsFlags.GcFlags.generations - 1) * RtsFlags.GcFlags.steps; - all_steps = stgMallocBytes(total_steps * sizeof(struct step_), - "initStorage: steps"); - /* Initialise all generations */ for(g = 0; g < RtsFlags.GcFlags.generations; g++) { - gen = &generations[g]; - gen->no = g; - gen->mut_list = allocBlock(); - gen->collections = 0; - gen->par_collections = 0; - gen->failed_promotions = 0; - gen->max_blocks = 0; + initGeneration(&generations[g], g); } /* A couple of convenience pointers */ g0 = &generations[0]; oldest_gen = &generations[RtsFlags.GcFlags.generations-1]; - /* Allocate step structures in each generation */ - if (RtsFlags.GcFlags.generations > 1) { - /* Only for multiple-generations */ - - /* Oldest generation: one step */ - oldest_gen->n_steps = 1; - oldest_gen->steps = all_steps + (RtsFlags.GcFlags.generations - 1) - * RtsFlags.GcFlags.steps; - - /* set up all except the oldest generation with 2 steps */ - for(g = 0; g < RtsFlags.GcFlags.generations-1; g++) { - generations[g].n_steps = RtsFlags.GcFlags.steps; - generations[g].steps = all_steps + g * RtsFlags.GcFlags.steps; - } - - } else { - /* single generation, i.e. a two-space collector */ - g0->n_steps = 1; - g0->steps = all_steps; - } - -#ifdef THREADED_RTS - n_nurseries = n_capabilities; -#else - n_nurseries = 1; -#endif - nurseries = stgMallocBytes (n_nurseries * sizeof(struct step_), - "initStorage: nurseries"); - - /* Initialise all steps */ - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - for (s = 0; s < generations[g].n_steps; s++) { - initStep(&generations[g].steps[s], g, s); - } - } - - for (s = 0; s < n_nurseries; s++) { - initStep(&nurseries[s], 0, s); - } + nurseries = stgMallocBytes(n_capabilities * sizeof(struct nursery_), + "initStorage: nurseries"); /* Set up the destination pointers in each younger gen. step */ for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) { - for (s = 0; s < generations[g].n_steps-1; s++) { - generations[g].steps[s].to = &generations[g].steps[s+1]; - } - generations[g].steps[s].to = &generations[g+1].steps[0]; - } - oldest_gen->steps[0].to = &oldest_gen->steps[0]; - - for (s = 0; s < n_nurseries; s++) { - nurseries[s].to = generations[0].steps[0].to; + generations[g].to = &generations[g+1]; } + oldest_gen->to = oldest_gen; /* The oldest generation has one step. */ if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) { if (RtsFlags.GcFlags.generations == 1) { errorBelch("WARNING: compact/sweep is incompatible with -G1; disabled"); } else { - oldest_gen->steps[0].mark = 1; + oldest_gen->mark = 1; if (RtsFlags.GcFlags.compact) - oldest_gen->steps[0].compact = 1; + oldest_gen->compact = 1; } } generations[0].max_blocks = 0; - g0s0 = &generations[0].steps[0]; /* The allocation area. Policy: keep the allocation area * small to begin with, even if we have a large suggested heap @@ -242,12 +177,11 @@ initStorage( void ) allocNurseries(); weak_ptr_list = NULL; - caf_list = NULL; - revertible_caf_list = NULL; + caf_list = END_OF_STATIC_LIST; + revertible_caf_list = END_OF_STATIC_LIST; /* initialise the allocate() interface */ - alloc_blocks = 0; - alloc_blocks_lim = RtsFlags.GcFlags.minAllocAreaSize; + large_alloc_lim = RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W; exec_block = NULL; @@ -258,6 +192,13 @@ initStorage( void ) N = 0; + // allocate a block for each mut list + for (n = 0; n < n_capabilities; n++) { + for (g = 1; g < RtsFlags.GcFlags.generations; g++) { + capabilities[n].mut_lists[g] = allocBlock(); + } + } + initGcThreads(); IF_DEBUG(gc, statDescribeGens()); @@ -268,15 +209,14 @@ initStorage( void ) void exitStorage (void) { - stat_exit(calcAllocated()); + stat_exit(calcAllocated(rtsTrue)); } void -freeStorage (void) +freeStorage (rtsBool free_heap) { - stgFree(g0s0); // frees all the steps stgFree(generations); - freeAllMBlocks(); + if (free_heap) freeAllMBlocks(); #if defined(THREADED_RTS) closeMutex(&sm_mutex); #endif @@ -289,20 +229,19 @@ freeStorage (void) The entry code for every CAF does the following: - - builds a CAF_BLACKHOLE in the heap - - pushes an update frame pointing to the CAF_BLACKHOLE - - invokes UPD_CAF(), which: - - calls newCaf, below - - updates the CAF with a static indirection to the CAF_BLACKHOLE + - builds a BLACKHOLE in the heap + - pushes an update frame pointing to the BLACKHOLE + - calls newCaf, below + - updates the CAF with a static indirection to the BLACKHOLE - Why do we build a BLACKHOLE in the heap rather than just updating + Why do we build an BLACKHOLE in the heap rather than just updating the thunk directly? It's so that we only need one kind of update frame - otherwise we'd need a static version of the update frame too. newCaf() does the following: - - it puts the CAF on the oldest generation's mut-once list. - This is so that we can treat the CAF as a root when collecting + - it puts the CAF on the oldest generation's mutable list. + This is so that we treat the CAF as a root when collecting younger generations. For GHCI, we have additional requirements when dealing with CAFs: @@ -326,10 +265,8 @@ freeStorage (void) -------------------------------------------------------------------------- */ void -newCAF(StgClosure* caf) +newCAF(StgRegTable *reg, StgClosure* caf) { - ACQUIRE_SM_LOCK; - if(keepCAFs) { // HACK: @@ -343,23 +280,27 @@ newCAF(StgClosure* caf) // do another hack here and do an address range test on caf to figure // out whether it is from a dynamic library. ((StgIndStatic *)caf)->saved_info = (StgInfoTable *)caf->header.info; + + ACQUIRE_SM_LOCK; // caf_list is global, locked by sm_mutex ((StgIndStatic *)caf)->static_link = caf_list; caf_list = caf; + RELEASE_SM_LOCK; } else { - /* Put this CAF on the mutable list for the old generation. - * This is a HACK - the IND_STATIC closure doesn't really have - * a mut_link field, but we pretend it has - in fact we re-use - * the STATIC_LINK field for the time being, because when we - * come to do a major GC we won't need the mut_link field - * any more and can use it as a STATIC_LINK. - */ + // Put this CAF on the mutable list for the old generation. ((StgIndStatic *)caf)->saved_info = NULL; - recordMutableGen(caf, oldest_gen->no); + if (oldest_gen->no != 0) { + recordMutableCap(caf, regTableToCapability(reg), oldest_gen->no); + } } - - RELEASE_SM_LOCK; +} + +// External API for setting the keepCAFs flag. see #3900. +void +setKeepCAFs (void) +{ + keepCAFs = 1; } // An alternate version of newCaf which is used for dynamically loaded @@ -372,7 +313,7 @@ newCAF(StgClosure* caf) // The linker hackily arranges that references to newCaf from dynamic // code end up pointing to newDynCAF. void -newDynCAF(StgClosure *caf) +newDynCAF (StgRegTable *reg STG_UNUSED, StgClosure *caf) { ACQUIRE_SM_LOCK; @@ -388,53 +329,62 @@ newDynCAF(StgClosure *caf) -------------------------------------------------------------------------- */ static bdescr * -allocNursery (step *stp, bdescr *tail, nat blocks) +allocNursery (bdescr *tail, nat blocks) { - bdescr *bd; - nat i; + bdescr *bd = NULL; + nat i, n; + + // We allocate the nursery as a single contiguous block and then + // divide it into single blocks manually. This way we guarantee + // that the nursery blocks are adjacent, so that the processor's + // automatic prefetching works across nursery blocks. This is a + // tiny optimisation (~0.5%), but it's free. + + while (blocks > 0) { + n = stg_min(blocks, BLOCKS_PER_MBLOCK); + blocks -= n; + + bd = allocGroup(n); + for (i = 0; i < n; i++) { + initBdescr(&bd[i], g0, g0); + + bd[i].blocks = 1; + bd[i].flags = 0; + + if (i > 0) { + bd[i].u.back = &bd[i-1]; + } else { + bd[i].u.back = NULL; + } - // Allocate a nursery: we allocate fresh blocks one at a time and - // cons them on to the front of the list, not forgetting to update - // the back pointer on the tail of the list to point to the new block. - for (i=0; i < blocks; i++) { - // @LDV profiling - /* - processNursery() in LdvProfile.c assumes that every block group in - the nursery contains only a single block. So, if a block group is - given multiple blocks, change processNursery() accordingly. - */ - bd = allocBlock(); - bd->link = tail; - // double-link the nursery: we might need to insert blocks - if (tail != NULL) { - tail->u.back = bd; - } - bd->step = stp; - bd->gen_no = 0; - bd->flags = 0; - bd->free = bd->start; - tail = bd; + if (i+1 < n) { + bd[i].link = &bd[i+1]; + } else { + bd[i].link = tail; + if (tail != NULL) { + tail->u.back = &bd[i]; + } + } + + bd[i].free = bd[i].start; + } + + tail = &bd[0]; } - tail->u.back = NULL; - return tail; + + return &bd[0]; } static void assignNurseriesToCapabilities (void) { -#ifdef THREADED_RTS nat i; - for (i = 0; i < n_nurseries; i++) { + for (i = 0; i < n_capabilities; i++) { capabilities[i].r.rNursery = &nurseries[i]; capabilities[i].r.rCurrentNursery = nurseries[i].blocks; capabilities[i].r.rCurrentAlloc = NULL; } -#else /* THREADED_RTS */ - MainCapability.r.rNursery = &nurseries[0]; - MainCapability.r.rCurrentNursery = nurseries[0].blocks; - MainCapability.r.rCurrentAlloc = NULL; -#endif } static void @@ -442,34 +392,40 @@ allocNurseries( void ) { nat i; - for (i = 0; i < n_nurseries; i++) { + for (i = 0; i < n_capabilities; i++) { nurseries[i].blocks = - allocNursery(&nurseries[i], NULL, - RtsFlags.GcFlags.minAllocAreaSize); - nurseries[i].n_blocks = RtsFlags.GcFlags.minAllocAreaSize; - nurseries[i].old_blocks = NULL; - nurseries[i].n_old_blocks = 0; + allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize); + nurseries[i].n_blocks = + RtsFlags.GcFlags.minAllocAreaSize; } assignNurseriesToCapabilities(); } -void -resetNurseries( void ) +lnat // words allocated +clearNurseries (void) { + lnat allocated = 0; nat i; bdescr *bd; - step *stp; - for (i = 0; i < n_nurseries; i++) { - stp = &nurseries[i]; - for (bd = stp->blocks; bd; bd = bd->link) { - bd->free = bd->start; + for (i = 0; i < n_capabilities; i++) { + for (bd = nurseries[i].blocks; bd; bd = bd->link) { + allocated += (lnat)(bd->free - bd->start); + bd->free = bd->start; ASSERT(bd->gen_no == 0); - ASSERT(bd->step == stp); + ASSERT(bd->gen == g0); IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE)); } } + + return allocated; +} + +void +resetNurseries (void) +{ assignNurseriesToCapabilities(); + } lnat @@ -478,25 +434,25 @@ countNurseryBlocks (void) nat i; lnat blocks = 0; - for (i = 0; i < n_nurseries; i++) { + for (i = 0; i < n_capabilities; i++) { blocks += nurseries[i].n_blocks; } return blocks; } static void -resizeNursery ( step *stp, nat blocks ) +resizeNursery ( nursery *nursery, nat blocks ) { bdescr *bd; nat nursery_blocks; - nursery_blocks = stp->n_blocks; + nursery_blocks = nursery->n_blocks; if (nursery_blocks == blocks) return; if (nursery_blocks < blocks) { debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks", blocks); - stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks); + nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks); } else { bdescr *next_bd; @@ -504,7 +460,7 @@ resizeNursery ( step *stp, nat blocks ) debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks", blocks); - bd = stp->blocks; + bd = nursery->blocks; while (nursery_blocks > blocks) { next_bd = bd->link; next_bd->u.back = NULL; @@ -512,16 +468,16 @@ resizeNursery ( step *stp, nat blocks ) freeGroup(bd); bd = next_bd; } - stp->blocks = bd; + nursery->blocks = bd; // might have gone just under, by freeing a large block, so make // up the difference. if (nursery_blocks < blocks) { - stp->blocks = allocNursery(stp, stp->blocks, blocks-nursery_blocks); + nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks); } } - stp->n_blocks = blocks; - ASSERT(countBlocks(stp->blocks) == stp->n_blocks); + nursery->n_blocks = blocks; + ASSERT(countBlocks(nursery->blocks) == nursery->n_blocks); } // @@ -531,7 +487,7 @@ void resizeNurseriesFixed (nat blocks) { nat i; - for (i = 0; i < n_nurseries; i++) { + for (i = 0; i < n_capabilities; i++) { resizeNursery(&nurseries[i], blocks); } } @@ -544,17 +500,17 @@ resizeNurseries (nat blocks) { // If there are multiple nurseries, then we just divide the number // of available blocks between them. - resizeNurseriesFixed(blocks / n_nurseries); + resizeNurseriesFixed(blocks / n_capabilities); } /* ----------------------------------------------------------------------------- - move_TSO is called to update the TSO structure after it has been + move_STACK is called to update the TSO structure after it has been moved from one place to another. -------------------------------------------------------------------------- */ void -move_TSO (StgTSO *src, StgTSO *dest) +move_STACK (StgStack *src, StgStack *dest) { ptrdiff_t diff; @@ -564,32 +520,25 @@ move_TSO (StgTSO *src, StgTSO *dest) } /* ----------------------------------------------------------------------------- - The allocate() interface + allocate() - allocateInGen() function allocates memory directly into a specific - generation. It always succeeds, and returns a chunk of memory n - words long. n can be larger than the size of a block if necessary, - in which case a contiguous block group will be allocated. + This allocates memory in the current thread - it is intended for + use primarily from STG-land where we have a Capability. It is + better than allocate() because it doesn't require taking the + sm_mutex lock in the common case. - allocate(n) is equivalent to allocateInGen(g0). + Memory is allocated directly from the nursery if possible (but not + from the current nursery block, so as not to interfere with + Hp/HpLim). -------------------------------------------------------------------------- */ StgPtr -allocateInGen (generation *g, lnat n) +allocate (Capability *cap, lnat n) { - step *stp; bdescr *bd; - StgPtr ret; - - ACQUIRE_SM_LOCK; - - TICK_ALLOC_HEAP_NOCTR(n); - CCS_ALLOC(CCCS,n); - - stp = &g->steps[0]; + StgPtr p; - if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) - { + if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { lnat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; // Attempting to allocate an object larger than maxHeapSize @@ -606,109 +555,16 @@ allocateInGen (generation *g, lnat n) stg_exit(EXIT_HEAPOVERFLOW); } + ACQUIRE_SM_LOCK bd = allocGroup(req_blocks); - dbl_link_onto(bd, &stp->large_objects); - stp->n_large_blocks += bd->blocks; // might be larger than req_blocks - alloc_blocks += bd->blocks; - bd->gen_no = g->no; - bd->step = stp; + dbl_link_onto(bd, &g0->large_objects); + g0->n_large_blocks += bd->blocks; // might be larger than req_blocks + g0->n_new_large_words += n; + RELEASE_SM_LOCK; + initBdescr(bd, g0, g0); bd->flags = BF_LARGE; bd->free = bd->start + n; - ret = bd->start; - } - else - { - // small allocation (blocks; - if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { - bd = allocBlock(); - bd->gen_no = g->no; - bd->step = stp; - bd->flags = 0; - bd->link = stp->blocks; - stp->blocks = bd; - stp->n_blocks++; - alloc_blocks++; - } - ret = bd->free; - bd->free += n; - } - - RELEASE_SM_LOCK; - - return ret; -} - -StgPtr -allocate (lnat n) -{ - return allocateInGen(g0,n); -} - -lnat -allocatedBytes( void ) -{ - lnat allocated; - - allocated = alloc_blocks * BLOCK_SIZE_W; - if (pinned_object_block != NULL) { - allocated -= (pinned_object_block->start + BLOCK_SIZE_W) - - pinned_object_block->free; - } - - return allocated; -} - -// split N blocks off the front of the given bdescr, returning the -// new block group. We treat the remainder as if it -// had been freshly allocated in generation 0. -bdescr * -splitLargeBlock (bdescr *bd, nat blocks) -{ - bdescr *new_bd; - - // subtract the original number of blocks from the counter first - bd->step->n_large_blocks -= bd->blocks; - - new_bd = splitBlockGroup (bd, blocks); - - dbl_link_onto(new_bd, &g0s0->large_objects); - g0s0->n_large_blocks += new_bd->blocks; - new_bd->gen_no = g0s0->no; - new_bd->step = g0s0; - new_bd->flags = BF_LARGE; - new_bd->free = bd->free; - ASSERT(new_bd->free <= new_bd->start + new_bd->blocks * BLOCK_SIZE_W); - - // add the new number of blocks to the counter. Due to the gaps - // for block descriptor, new_bd->blocks + bd->blocks might not be - // equal to the original bd->blocks, which is why we do it this way. - bd->step->n_large_blocks += bd->blocks; - - return new_bd; -} - -/* ----------------------------------------------------------------------------- - allocateLocal() - - This allocates memory in the current thread - it is intended for - use primarily from STG-land where we have a Capability. It is - better than allocate() because it doesn't require taking the - sm_mutex lock in the common case. - - Memory is allocated directly from the nursery if possible (but not - from the current nursery block, so as not to interfere with - Hp/HpLim). - -------------------------------------------------------------------------- */ - -StgPtr -allocateLocal (Capability *cap, lnat n) -{ - bdescr *bd; - StgPtr p; - - if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { - return allocateInGen(g0,n); + return bd->start; } /* small allocation (r.rNursery->n_blocks++; RELEASE_SM_LOCK; - bd->gen_no = 0; - bd->step = cap->r.rNursery; + initBdescr(bd, g0, g0); bd->flags = 0; - // NO: alloc_blocks++; - // calcAllocated() uses the size of the nursery, and we've - // already bumpted nursery->n_blocks above. We'll GC - // pretty quickly now anyway, because MAYBE_GC() will + // If we had to allocate a new block, then we'll GC + // pretty quickly now, because MAYBE_GC() will // notice that CurrentNursery->link is NULL. } else { // we have a block in the nursery: take it and put @@ -754,6 +607,8 @@ allocateLocal (Capability *cap, lnat n) } p = bd->free; bd->free += n; + + IF_DEBUG(sanity, ASSERT(*((StgWord8*)p) == 0xaa)); return p; } @@ -762,7 +617,7 @@ allocateLocal (Capability *cap, lnat n) We allocate small pinned objects into a single block, allocating a new block when the current one overflows. The block is chained - onto the large_object_list of generation 0 step 0. + onto the large_object_list of generation 0. NOTE: The GC can't in general handle pinned objects. This interface is only safe to use for ByteArrays, which have no @@ -781,40 +636,55 @@ allocateLocal (Capability *cap, lnat n) ------------------------------------------------------------------------- */ StgPtr -allocatePinned( lnat n ) +allocatePinned (Capability *cap, lnat n) { StgPtr p; - bdescr *bd = pinned_object_block; + bdescr *bd; // If the request is for a large object, then allocate() // will give us a pinned object anyway. if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { - p = allocate(n); + p = allocate(cap, n); Bdescr(p)->flags |= BF_PINNED; return p; } - ACQUIRE_SM_LOCK; - TICK_ALLOC_HEAP_NOCTR(n); CCS_ALLOC(CCCS,n); + bd = cap->pinned_object_block; + // If we don't have a block of pinned objects yet, or the current // one isn't large enough to hold the new object, allocate a new one. if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) { - pinned_object_block = bd = allocBlock(); - dbl_link_onto(bd, &g0s0->large_objects); - g0s0->n_large_blocks++; - bd->gen_no = 0; - bd->step = g0s0; - bd->flags = BF_PINNED | BF_LARGE; + // The pinned_object_block remains attached to the capability + // until it is full, even if a GC occurs. We want this + // behaviour because otherwise the unallocated portion of the + // block would be forever slop, and under certain workloads + // (allocating a few ByteStrings per GC) we accumulate a lot + // of slop. + // + // So, the pinned_object_block is initially marked + // BF_EVACUATED so the GC won't touch it. When it is full, + // we place it on the large_objects list, and at the start of + // the next GC the BF_EVACUATED flag will be cleared, and the + // block will be promoted as usual (if anything in it is + // live). + ACQUIRE_SM_LOCK; + if (bd != NULL) { + dbl_link_onto(bd, &g0->large_objects); + g0->n_large_blocks++; + g0->n_new_large_words += bd->free - bd->start; + } + cap->pinned_object_block = bd = allocBlock(); + RELEASE_SM_LOCK; + initBdescr(bd, g0, g0); + bd->flags = BF_PINNED | BF_LARGE | BF_EVACUATED; bd->free = bd->start; - alloc_blocks++; } p = bd->free; bd->free += n; - RELEASE_SM_LOCK; return p; } @@ -832,11 +702,9 @@ void dirty_MUT_VAR(StgRegTable *reg, StgClosure *p) { Capability *cap = regTableToCapability(reg); - bdescr *bd; if (p->header.info == &stg_MUT_VAR_CLEAN_info) { p->header.info = &stg_MUT_VAR_DIRTY_info; - bd = Bdescr((StgPtr)p); - if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no); + recordClosureMutated(cap,p); } } @@ -849,24 +717,39 @@ dirty_MUT_VAR(StgRegTable *reg, StgClosure *p) void setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target) { - bdescr *bd; - if ((tso->flags & (TSO_DIRTY|TSO_LINK_DIRTY)) == 0) { - tso->flags |= TSO_LINK_DIRTY; - bd = Bdescr((StgPtr)tso); - if (bd->gen_no > 0) recordMutableCap((StgClosure*)tso,cap,bd->gen_no); + if (tso->dirty == 0) { + tso->dirty = 1; + recordClosureMutated(cap,(StgClosure*)tso); } tso->_link = target; } void +setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target) +{ + if (tso->dirty == 0) { + tso->dirty = 1; + recordClosureMutated(cap,(StgClosure*)tso); + } + tso->block_info.prev = target; +} + +void dirty_TSO (Capability *cap, StgTSO *tso) { - bdescr *bd; - if ((tso->flags & (TSO_DIRTY|TSO_LINK_DIRTY)) == 0) { - bd = Bdescr((StgPtr)tso); - if (bd->gen_no > 0) recordMutableCap((StgClosure*)tso,cap,bd->gen_no); + if (tso->dirty == 0) { + tso->dirty = 1; + recordClosureMutated(cap,(StgClosure*)tso); + } +} + +void +dirty_STACK (Capability *cap, StgStack *stack) +{ + if (stack->dirty == 0) { + stack->dirty = 1; + recordClosureMutated(cap,(StgClosure*)stack); } - tso->flags |= TSO_DIRTY; } /* @@ -880,10 +763,7 @@ dirty_TSO (Capability *cap, StgTSO *tso) void dirty_MVAR(StgRegTable *reg, StgClosure *p) { - Capability *cap = regTableToCapability(reg); - bdescr *bd; - bd = Bdescr((StgPtr)p); - if (bd->gen_no > 0) recordMutableCap(p,cap,bd->gen_no); + recordClosureMutated(regTableToCapability(reg),p); } /* ----------------------------------------------------------------------------- @@ -895,83 +775,32 @@ dirty_MVAR(StgRegTable *reg, StgClosure *p) * * Approximate how much we've allocated: number of blocks in the * nursery + blocks allocated via allocate() - unused nusery blocks. - * This leaves a little slop at the end of each block, and doesn't - * take into account large objects (ToDo). + * This leaves a little slop at the end of each block. * -------------------------------------------------------------------------- */ lnat -calcAllocated( void ) +calcAllocated (rtsBool include_nurseries) { - nat allocated; - bdescr *bd; + nat allocated = 0; + nat i; - allocated = allocatedBytes(); - allocated += countNurseryBlocks() * BLOCK_SIZE_W; - + // When called from GC.c, we already have the allocation count for + // the nursery from resetNurseries(), so we don't need to walk + // through these block lists again. + if (include_nurseries) { -#ifdef THREADED_RTS - nat i; - for (i = 0; i < n_nurseries; i++) { - Capability *cap; - for ( bd = capabilities[i].r.rCurrentNursery->link; - bd != NULL; bd = bd->link ) { - allocated -= BLOCK_SIZE_W; - } - cap = &capabilities[i]; - if (cap->r.rCurrentNursery->free < - cap->r.rCurrentNursery->start + BLOCK_SIZE_W) { - allocated -= (cap->r.rCurrentNursery->start + BLOCK_SIZE_W) - - cap->r.rCurrentNursery->free; + for (i = 0; i < n_capabilities; i++) { + allocated += countOccupied(nurseries[i].blocks); } } -#else - bdescr *current_nursery = MainCapability.r.rCurrentNursery; - for ( bd = current_nursery->link; bd != NULL; bd = bd->link ) { - allocated -= BLOCK_SIZE_W; - } - if (current_nursery->free < current_nursery->start + BLOCK_SIZE_W) { - allocated -= (current_nursery->start + BLOCK_SIZE_W) - - current_nursery->free; - } -#endif - } + // add in sizes of new large and pinned objects + allocated += g0->n_new_large_words; - total_allocated += allocated; return allocated; } -/* Approximate the amount of live data in the heap. To be called just - * after garbage collection (see GarbageCollect()). - */ -lnat -calcLiveBlocks(void) -{ - nat g, s; - lnat live = 0; - step *stp; - - if (RtsFlags.GcFlags.generations == 1) { - return g0s0->n_large_blocks + g0s0->n_blocks; - } - - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - for (s = 0; s < generations[g].n_steps; s++) { - /* approximate amount of live data (doesn't take into account slop - * at end of each block). - */ - if (g == 0 && s == 0) { - continue; - } - stp = &generations[g].steps[s]; - live += stp->n_large_blocks + stp->n_blocks; - } - } - return live; -} - -lnat -countOccupied(bdescr *bd) +lnat countOccupied (bdescr *bd) { lnat words; @@ -983,26 +812,60 @@ countOccupied(bdescr *bd) return words; } +lnat genLiveWords (generation *gen) +{ + return gen->n_words + countOccupied(gen->large_objects); +} + +lnat genLiveBlocks (generation *gen) +{ + return gen->n_blocks + gen->n_large_blocks; +} + +lnat gcThreadLiveWords (nat i, nat g) +{ + lnat words; + + words = countOccupied(gc_threads[i]->gens[g].todo_bd); + words += countOccupied(gc_threads[i]->gens[g].part_list); + words += countOccupied(gc_threads[i]->gens[g].scavd_list); + + return words; +} + +lnat gcThreadLiveBlocks (nat i, nat g) +{ + lnat blocks; + + blocks = countBlocks(gc_threads[i]->gens[g].todo_bd); + blocks += gc_threads[i]->gens[g].n_part_blocks; + blocks += gc_threads[i]->gens[g].n_scavd_blocks; + + return blocks; +} + // Return an accurate count of the live data in the heap, excluding // generation 0. -lnat -calcLiveWords(void) +lnat calcLiveWords (void) { - nat g, s; + nat g; lnat live; - step *stp; - - if (RtsFlags.GcFlags.generations == 1) { - return g0s0->n_words + countOccupied(g0s0->large_objects); + + live = 0; + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + live += genLiveWords(&generations[g]); } - + return live; +} + +lnat calcLiveBlocks (void) +{ + nat g; + lnat live; + live = 0; for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - for (s = 0; s < generations[g].n_steps; s++) { - if (g == 0 && s == 0) continue; - stp = &generations[g].steps[s]; - live += stp->n_words + countOccupied(stp->large_objects); - } + live += genLiveBlocks(&generations[g]); } return live; } @@ -1010,44 +873,39 @@ calcLiveWords(void) /* Approximate the number of blocks that will be needed at the next * garbage collection. * - * Assume: all data currently live will remain live. Steps that will - * be collected next time will therefore need twice as many blocks - * since all the data will be copied. + * Assume: all data currently live will remain live. Generationss + * that will be collected next time will therefore need twice as many + * blocks since all the data will be copied. */ extern lnat calcNeeded(void) { lnat needed = 0; - nat g, s; - step *stp; + nat g; + generation *gen; for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - for (s = 0; s < generations[g].n_steps; s++) { - if (g == 0 && s == 0) { continue; } - stp = &generations[g].steps[s]; - - // we need at least this much space - needed += stp->n_blocks + stp->n_large_blocks; - - // any additional space needed to collect this gen next time? - if (g == 0 || // always collect gen 0 - (generations[g].steps[0].n_blocks + - generations[g].steps[0].n_large_blocks - > generations[g].max_blocks)) { - // we will collect this gen next time - if (stp->mark) { - // bitmap: - needed += stp->n_blocks / BITS_IN(W_); - // mark stack: - needed += stp->n_blocks / 100; - } - if (stp->compact) { - continue; // no additional space needed for compaction - } else { - needed += stp->n_blocks; - } - } - } + gen = &generations[g]; + + // we need at least this much space + needed += gen->n_blocks + gen->n_large_blocks; + + // any additional space needed to collect this gen next time? + if (g == 0 || // always collect gen 0 + (gen->n_blocks + gen->n_large_blocks > gen->max_blocks)) { + // we will collect this gen next time + if (gen->mark) { + // bitmap: + needed += gen->n_blocks / BITS_IN(W_); + // mark stack: + needed += gen->n_blocks / 100; + } + if (gen->compact) { + continue; // no additional space needed for compaction + } else { + needed += gen->n_blocks; + } + } } return needed; } @@ -1178,268 +1036,8 @@ void freeExec (void *addr) #endif /* mingw32_HOST_OS */ -/* ----------------------------------------------------------------------------- - Debugging - - memInventory() checks for memory leaks by counting up all the - blocks we know about and comparing that to the number of blocks - allegedly floating around in the system. - -------------------------------------------------------------------------- */ - #ifdef DEBUG -// Useful for finding partially full blocks in gdb -void findSlop(bdescr *bd); -void findSlop(bdescr *bd) -{ - lnat slop; - - for (; bd != NULL; bd = bd->link) { - slop = (bd->blocks * BLOCK_SIZE_W) - (bd->free - bd->start); - if (slop > (1024/sizeof(W_))) { - debugBelch("block at %p (bdescr %p) has %ldKB slop\n", - bd->start, bd, slop / (1024/sizeof(W_))); - } - } -} - -nat -countBlocks(bdescr *bd) -{ - nat n; - for (n=0; bd != NULL; bd=bd->link) { - n += bd->blocks; - } - return n; -} - -// (*1) Just like countBlocks, except that we adjust the count for a -// megablock group so that it doesn't include the extra few blocks -// that would be taken up by block descriptors in the second and -// subsequent megablock. This is so we can tally the count with the -// number of blocks allocated in the system, for memInventory(). -static nat -countAllocdBlocks(bdescr *bd) -{ - nat n; - for (n=0; bd != NULL; bd=bd->link) { - n += bd->blocks; - // hack for megablock groups: see (*1) above - if (bd->blocks > BLOCKS_PER_MBLOCK) { - n -= (MBLOCK_SIZE / BLOCK_SIZE - BLOCKS_PER_MBLOCK) - * (bd->blocks/(MBLOCK_SIZE/BLOCK_SIZE)); - } - } - return n; -} - -static lnat -stepBlocks (step *stp) -{ - ASSERT(countBlocks(stp->blocks) == stp->n_blocks); - ASSERT(countBlocks(stp->large_objects) == stp->n_large_blocks); - return stp->n_blocks + stp->n_old_blocks + - countAllocdBlocks(stp->large_objects); -} - -// If memInventory() calculates that we have a memory leak, this -// function will try to find the block(s) that are leaking by marking -// all the ones that we know about, and search through memory to find -// blocks that are not marked. In the debugger this can help to give -// us a clue about what kind of block leaked. In the future we might -// annotate blocks with their allocation site to give more helpful -// info. -static void -findMemoryLeak (void) -{ - nat g, s, i; - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - for (i = 0; i < n_capabilities; i++) { - markBlocks(capabilities[i].mut_lists[g]); - } - markBlocks(generations[g].mut_list); - for (s = 0; s < generations[g].n_steps; s++) { - markBlocks(generations[g].steps[s].blocks); - markBlocks(generations[g].steps[s].large_objects); - } - } - - for (i = 0; i < n_nurseries; i++) { - markBlocks(nurseries[i].blocks); - markBlocks(nurseries[i].large_objects); - } - -#ifdef PROFILING - // TODO: - // if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) { - // markRetainerBlocks(); - // } -#endif - - // count the blocks allocated by the arena allocator - // TODO: - // markArenaBlocks(); - - // count the blocks containing executable memory - markBlocks(exec_block); - - reportUnmarkedBlocks(); -} - - -void -memInventory (rtsBool show) -{ - nat g, s, i; - step *stp; - lnat gen_blocks[RtsFlags.GcFlags.generations]; - lnat nursery_blocks, retainer_blocks, - arena_blocks, exec_blocks; - lnat live_blocks = 0, free_blocks = 0; - rtsBool leak; - - // count the blocks we current have - - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - gen_blocks[g] = 0; - for (i = 0; i < n_capabilities; i++) { - gen_blocks[g] += countBlocks(capabilities[i].mut_lists[g]); - } - gen_blocks[g] += countAllocdBlocks(generations[g].mut_list); - for (s = 0; s < generations[g].n_steps; s++) { - stp = &generations[g].steps[s]; - gen_blocks[g] += stepBlocks(stp); - } - } - - nursery_blocks = 0; - for (i = 0; i < n_nurseries; i++) { - nursery_blocks += stepBlocks(&nurseries[i]); - } - - retainer_blocks = 0; -#ifdef PROFILING - if (RtsFlags.ProfFlags.doHeapProfile == HEAP_BY_RETAINER) { - retainer_blocks = retainerStackBlocks(); - } -#endif - - // count the blocks allocated by the arena allocator - arena_blocks = arenaBlocks(); - - // count the blocks containing executable memory - exec_blocks = countAllocdBlocks(exec_block); - - /* count the blocks on the free list */ - free_blocks = countFreeList(); - - live_blocks = 0; - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - live_blocks += gen_blocks[g]; - } - live_blocks += nursery_blocks + - + retainer_blocks + arena_blocks + exec_blocks; - -#define MB(n) (((n) * BLOCK_SIZE_W) / ((1024*1024)/sizeof(W_))) - - leak = live_blocks + free_blocks != mblocks_allocated * BLOCKS_PER_MBLOCK; - - if (show || leak) - { - if (leak) { - debugBelch("Memory leak detected:\n"); - } else { - debugBelch("Memory inventory:\n"); - } - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - debugBelch(" gen %d blocks : %5lu blocks (%lu MB)\n", g, - gen_blocks[g], MB(gen_blocks[g])); - } - debugBelch(" nursery : %5lu blocks (%lu MB)\n", - nursery_blocks, MB(nursery_blocks)); - debugBelch(" retainer : %5lu blocks (%lu MB)\n", - retainer_blocks, MB(retainer_blocks)); - debugBelch(" arena blocks : %5lu blocks (%lu MB)\n", - arena_blocks, MB(arena_blocks)); - debugBelch(" exec : %5lu blocks (%lu MB)\n", - exec_blocks, MB(exec_blocks)); - debugBelch(" free : %5lu blocks (%lu MB)\n", - free_blocks, MB(free_blocks)); - debugBelch(" total : %5lu blocks (%lu MB)\n", - live_blocks + free_blocks, MB(live_blocks+free_blocks)); - if (leak) { - debugBelch("\n in system : %5lu blocks (%lu MB)\n", - mblocks_allocated * BLOCKS_PER_MBLOCK, mblocks_allocated); - } - } - - if (leak) { - debugBelch("\n"); - findMemoryLeak(); - } - ASSERT(n_alloc_blocks == live_blocks); - ASSERT(!leak); -} - - -/* Full heap sanity check. */ -void -checkSanity( void ) -{ - nat g, s; - - if (RtsFlags.GcFlags.generations == 1) { - checkHeap(g0s0->blocks); - checkLargeObjects(g0s0->large_objects); - } else { - - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - for (s = 0; s < generations[g].n_steps; s++) { - if (g == 0 && s == 0) { continue; } - ASSERT(countBlocks(generations[g].steps[s].blocks) - == generations[g].steps[s].n_blocks); - ASSERT(countBlocks(generations[g].steps[s].large_objects) - == generations[g].steps[s].n_large_blocks); - checkHeap(generations[g].steps[s].blocks); - checkLargeObjects(generations[g].steps[s].large_objects); - } - } - - for (s = 0; s < n_nurseries; s++) { - ASSERT(countBlocks(nurseries[s].blocks) - == nurseries[s].n_blocks); - ASSERT(countBlocks(nurseries[s].large_objects) - == nurseries[s].n_large_blocks); - } - - checkFreeListSanity(); - } - -#if defined(THREADED_RTS) - // check the stacks too in threaded mode, because we don't do a - // full heap sanity check in this case (see checkHeap()) - checkMutableLists(rtsTrue); -#else - checkMutableLists(rtsFalse); -#endif -} - -/* Nursery sanity check */ -void -checkNurserySanity( step *stp ) -{ - bdescr *bd, *prev; - nat blocks = 0; - - prev = NULL; - for (bd = stp->blocks; bd != NULL; bd = bd->link) { - ASSERT(bd->u.back == prev); - prev = bd; - blocks += bd->blocks; - } - ASSERT(blocks == stp->n_blocks); -} - // handy function for use in gdb, because Bdescr() is inlined. extern bdescr *_bdescr( StgPtr p );